feat: Add Playbook Manager, Saved Searches, and Timeline View components

- Implemented PlaybookManager for creating and managing investigation playbooks with templates.
- Added SavedSearches component for managing bookmarked queries and recurring scans.
- Introduced TimelineView for visualizing forensic event timelines with zoomable charts.
- Enhanced backend processing with auto-queued jobs for dataset uploads and improved database concurrency.
- Updated frontend components for better user experience and performance optimizations.
- Documented changes in update log for future reference.
This commit is contained in:
2026-02-23 14:23:07 -05:00
parent 37a9584d0c
commit 5a2ad8ec1c
110 changed files with 10537 additions and 1185 deletions

View File

@@ -0,0 +1,227 @@
from pathlib import Path
p=Path(r'd:/Projects/Dev/ThreatHunt/backend/app/services/host_inventory.py')
t=p.read_text(encoding='utf-8')
start=t.index('async def build_host_inventory(')
# find end of function by locating '\n\n' before EOF after ' }\n'
end=t.index('\n\n', start)
# need proper end: first double newline after function may occur in docstring? compute by searching for '\n\n' after ' }\n' near end
ret_idx=t.rfind(' }')
# safer locate end as last occurrence of '\n }\n' after start, then function ends next newline
end=t.find('\n\n', ret_idx)
if end==-1:
end=len(t)
new_func='''async def build_host_inventory(hunt_id: str, db: AsyncSession) -> dict:
"""Build a deduplicated host inventory from all datasets in a hunt.
Returns dict with 'hosts', 'connections', and 'stats'.
Each host has: id, hostname, fqdn, client_id, ips, os, users, datasets, row_count.
"""
ds_result = await db.execute(
select(Dataset).where(Dataset.hunt_id == hunt_id)
)
all_datasets = ds_result.scalars().all()
if not all_datasets:
return {"hosts": [], "connections": [], "stats": {
"total_hosts": 0, "total_datasets_scanned": 0,
"total_rows_scanned": 0,
}}
hosts: dict[str, dict] = {} # fqdn -> host record
ip_to_host: dict[str, str] = {} # local-ip -> fqdn
connections: dict[tuple, int] = defaultdict(int)
total_rows = 0
ds_with_hosts = 0
sampled_dataset_count = 0
total_row_budget = max(0, int(settings.NETWORK_INVENTORY_MAX_TOTAL_ROWS))
max_connections = max(0, int(settings.NETWORK_INVENTORY_MAX_CONNECTIONS))
global_budget_reached = False
dropped_connections = 0
for ds in all_datasets:
if total_row_budget and total_rows >= total_row_budget:
global_budget_reached = True
break
cols = _identify_columns(ds)
if not cols['fqdn'] and not cols['host_id']:
continue
ds_with_hosts += 1
batch_size = 5000
max_rows_per_dataset = max(0, int(settings.NETWORK_INVENTORY_MAX_ROWS_PER_DATASET))
rows_scanned_this_dataset = 0
sampled_dataset = False
last_row_index = -1
while True:
if total_row_budget and total_rows >= total_row_budget:
sampled_dataset = True
global_budget_reached = True
break
rr = await db.execute(
select(DatasetRow)
.where(DatasetRow.dataset_id == ds.id)
.where(DatasetRow.row_index > last_row_index)
.order_by(DatasetRow.row_index)
.limit(batch_size)
)
rows = rr.scalars().all()
if not rows:
break
for ro in rows:
if max_rows_per_dataset and rows_scanned_this_dataset >= max_rows_per_dataset:
sampled_dataset = True
break
if total_row_budget and total_rows >= total_row_budget:
sampled_dataset = True
global_budget_reached = True
break
data = ro.data or {}
total_rows += 1
rows_scanned_this_dataset += 1
fqdn = ''
for c in cols['fqdn']:
fqdn = _clean(data.get(c))
if fqdn:
break
client_id = ''
for c in cols['host_id']:
client_id = _clean(data.get(c))
if client_id:
break
if not fqdn and not client_id:
continue
host_key = fqdn or client_id
if host_key not in hosts:
short = fqdn.split('.')[0] if fqdn and '.' in fqdn else fqdn
hosts[host_key] = {
'id': host_key,
'hostname': short or client_id,
'fqdn': fqdn,
'client_id': client_id,
'ips': set(),
'os': '',
'users': set(),
'datasets': set(),
'row_count': 0,
}
h = hosts[host_key]
h['datasets'].add(ds.name)
h['row_count'] += 1
if client_id and not h['client_id']:
h['client_id'] = client_id
for c in cols['username']:
u = _extract_username(_clean(data.get(c)))
if u:
h['users'].add(u)
for c in cols['local_ip']:
ip = _clean(data.get(c))
if _is_valid_ip(ip):
h['ips'].add(ip)
ip_to_host[ip] = host_key
for c in cols['os']:
ov = _clean(data.get(c))
if ov and not h['os']:
h['os'] = ov
for c in cols['remote_ip']:
rip = _clean(data.get(c))
if _is_valid_ip(rip):
rport = ''
for pc in cols['remote_port']:
rport = _clean(data.get(pc))
if rport:
break
conn_key = (host_key, rip, rport)
if max_connections and len(connections) >= max_connections and conn_key not in connections:
dropped_connections += 1
continue
connections[conn_key] += 1
if sampled_dataset:
sampled_dataset_count += 1
logger.info(
"Host inventory sampling for dataset %s (%d rows scanned)",
ds.id,
rows_scanned_this_dataset,
)
break
last_row_index = rows[-1].row_index
if len(rows) < batch_size:
break
if global_budget_reached:
logger.info(
"Host inventory global row budget reached for hunt %s at %d rows",
hunt_id,
total_rows,
)
break
# Post-process hosts
for h in hosts.values():
if not h['os'] and h['fqdn']:
h['os'] = _infer_os(h['fqdn'])
h['ips'] = sorted(h['ips'])
h['users'] = sorted(h['users'])
h['datasets'] = sorted(h['datasets'])
# Build connections, resolving IPs to host keys
conn_list = []
seen = set()
for (src, dst_ip, dst_port), cnt in connections.items():
if dst_ip in _IGNORE_IPS:
continue
dst_host = ip_to_host.get(dst_ip, '')
if dst_host == src:
continue
key = tuple(sorted([src, dst_host or dst_ip]))
if key in seen:
continue
seen.add(key)
conn_list.append({
'source': src,
'target': dst_host or dst_ip,
'target_ip': dst_ip,
'port': dst_port,
'count': cnt,
})
host_list = sorted(hosts.values(), key=lambda x: x['row_count'], reverse=True)
return {
"hosts": host_list,
"connections": conn_list,
"stats": {
"total_hosts": len(host_list),
"total_datasets_scanned": len(all_datasets),
"datasets_with_hosts": ds_with_hosts,
"total_rows_scanned": total_rows,
"hosts_with_ips": sum(1 for h in host_list if h['ips']),
"hosts_with_users": sum(1 for h in host_list if h['users']),
"row_budget_per_dataset": settings.NETWORK_INVENTORY_MAX_ROWS_PER_DATASET,
"row_budget_total": settings.NETWORK_INVENTORY_MAX_TOTAL_ROWS,
"connection_budget": settings.NETWORK_INVENTORY_MAX_CONNECTIONS,
"sampled_mode": settings.NETWORK_INVENTORY_MAX_ROWS_PER_DATASET > 0 or settings.NETWORK_INVENTORY_MAX_TOTAL_ROWS > 0,
"sampled_datasets": sampled_dataset_count,
"global_budget_reached": global_budget_reached,
"dropped_connections": dropped_connections,
},
}
'''
out=t[:start]+new_func+t[end:]
p.write_text(out,encoding='utf-8')
print('replaced build_host_inventory with hard-budget fast mode')