mirror of
https://github.com/mblanke/ThreatHunt.git
synced 2026-03-01 05:50:21 -05:00
feat: Add Playbook Manager, Saved Searches, and Timeline View components
- Implemented PlaybookManager for creating and managing investigation playbooks with templates. - Added SavedSearches component for managing bookmarked queries and recurring scans. - Introduced TimelineView for visualizing forensic event timelines with zoomable charts. - Enhanced backend processing with auto-queued jobs for dataset uploads and improved database concurrency. - Updated frontend components for better user experience and performance optimizations. - Documented changes in update log for future reference.
This commit is contained in:
227
_perf_replace_build_host_inventory.py
Normal file
227
_perf_replace_build_host_inventory.py
Normal file
@@ -0,0 +1,227 @@
|
||||
from pathlib import Path
|
||||
p=Path(r'd:/Projects/Dev/ThreatHunt/backend/app/services/host_inventory.py')
|
||||
t=p.read_text(encoding='utf-8')
|
||||
start=t.index('async def build_host_inventory(')
|
||||
# find end of function by locating '\n\n' before EOF after ' }\n'
|
||||
end=t.index('\n\n', start)
|
||||
# need proper end: first double newline after function may occur in docstring? compute by searching for '\n\n' after ' }\n' near end
|
||||
ret_idx=t.rfind(' }')
|
||||
# safer locate end as last occurrence of '\n }\n' after start, then function ends next newline
|
||||
end=t.find('\n\n', ret_idx)
|
||||
if end==-1:
|
||||
end=len(t)
|
||||
new_func='''async def build_host_inventory(hunt_id: str, db: AsyncSession) -> dict:
|
||||
"""Build a deduplicated host inventory from all datasets in a hunt.
|
||||
|
||||
Returns dict with 'hosts', 'connections', and 'stats'.
|
||||
Each host has: id, hostname, fqdn, client_id, ips, os, users, datasets, row_count.
|
||||
"""
|
||||
ds_result = await db.execute(
|
||||
select(Dataset).where(Dataset.hunt_id == hunt_id)
|
||||
)
|
||||
all_datasets = ds_result.scalars().all()
|
||||
|
||||
if not all_datasets:
|
||||
return {"hosts": [], "connections": [], "stats": {
|
||||
"total_hosts": 0, "total_datasets_scanned": 0,
|
||||
"total_rows_scanned": 0,
|
||||
}}
|
||||
|
||||
hosts: dict[str, dict] = {} # fqdn -> host record
|
||||
ip_to_host: dict[str, str] = {} # local-ip -> fqdn
|
||||
connections: dict[tuple, int] = defaultdict(int)
|
||||
total_rows = 0
|
||||
ds_with_hosts = 0
|
||||
sampled_dataset_count = 0
|
||||
total_row_budget = max(0, int(settings.NETWORK_INVENTORY_MAX_TOTAL_ROWS))
|
||||
max_connections = max(0, int(settings.NETWORK_INVENTORY_MAX_CONNECTIONS))
|
||||
global_budget_reached = False
|
||||
dropped_connections = 0
|
||||
|
||||
for ds in all_datasets:
|
||||
if total_row_budget and total_rows >= total_row_budget:
|
||||
global_budget_reached = True
|
||||
break
|
||||
|
||||
cols = _identify_columns(ds)
|
||||
if not cols['fqdn'] and not cols['host_id']:
|
||||
continue
|
||||
ds_with_hosts += 1
|
||||
|
||||
batch_size = 5000
|
||||
max_rows_per_dataset = max(0, int(settings.NETWORK_INVENTORY_MAX_ROWS_PER_DATASET))
|
||||
rows_scanned_this_dataset = 0
|
||||
sampled_dataset = False
|
||||
last_row_index = -1
|
||||
|
||||
while True:
|
||||
if total_row_budget and total_rows >= total_row_budget:
|
||||
sampled_dataset = True
|
||||
global_budget_reached = True
|
||||
break
|
||||
|
||||
rr = await db.execute(
|
||||
select(DatasetRow)
|
||||
.where(DatasetRow.dataset_id == ds.id)
|
||||
.where(DatasetRow.row_index > last_row_index)
|
||||
.order_by(DatasetRow.row_index)
|
||||
.limit(batch_size)
|
||||
)
|
||||
rows = rr.scalars().all()
|
||||
if not rows:
|
||||
break
|
||||
|
||||
for ro in rows:
|
||||
if max_rows_per_dataset and rows_scanned_this_dataset >= max_rows_per_dataset:
|
||||
sampled_dataset = True
|
||||
break
|
||||
if total_row_budget and total_rows >= total_row_budget:
|
||||
sampled_dataset = True
|
||||
global_budget_reached = True
|
||||
break
|
||||
|
||||
data = ro.data or {}
|
||||
total_rows += 1
|
||||
rows_scanned_this_dataset += 1
|
||||
|
||||
fqdn = ''
|
||||
for c in cols['fqdn']:
|
||||
fqdn = _clean(data.get(c))
|
||||
if fqdn:
|
||||
break
|
||||
client_id = ''
|
||||
for c in cols['host_id']:
|
||||
client_id = _clean(data.get(c))
|
||||
if client_id:
|
||||
break
|
||||
|
||||
if not fqdn and not client_id:
|
||||
continue
|
||||
|
||||
host_key = fqdn or client_id
|
||||
|
||||
if host_key not in hosts:
|
||||
short = fqdn.split('.')[0] if fqdn and '.' in fqdn else fqdn
|
||||
hosts[host_key] = {
|
||||
'id': host_key,
|
||||
'hostname': short or client_id,
|
||||
'fqdn': fqdn,
|
||||
'client_id': client_id,
|
||||
'ips': set(),
|
||||
'os': '',
|
||||
'users': set(),
|
||||
'datasets': set(),
|
||||
'row_count': 0,
|
||||
}
|
||||
|
||||
h = hosts[host_key]
|
||||
h['datasets'].add(ds.name)
|
||||
h['row_count'] += 1
|
||||
if client_id and not h['client_id']:
|
||||
h['client_id'] = client_id
|
||||
|
||||
for c in cols['username']:
|
||||
u = _extract_username(_clean(data.get(c)))
|
||||
if u:
|
||||
h['users'].add(u)
|
||||
|
||||
for c in cols['local_ip']:
|
||||
ip = _clean(data.get(c))
|
||||
if _is_valid_ip(ip):
|
||||
h['ips'].add(ip)
|
||||
ip_to_host[ip] = host_key
|
||||
|
||||
for c in cols['os']:
|
||||
ov = _clean(data.get(c))
|
||||
if ov and not h['os']:
|
||||
h['os'] = ov
|
||||
|
||||
for c in cols['remote_ip']:
|
||||
rip = _clean(data.get(c))
|
||||
if _is_valid_ip(rip):
|
||||
rport = ''
|
||||
for pc in cols['remote_port']:
|
||||
rport = _clean(data.get(pc))
|
||||
if rport:
|
||||
break
|
||||
conn_key = (host_key, rip, rport)
|
||||
if max_connections and len(connections) >= max_connections and conn_key not in connections:
|
||||
dropped_connections += 1
|
||||
continue
|
||||
connections[conn_key] += 1
|
||||
|
||||
if sampled_dataset:
|
||||
sampled_dataset_count += 1
|
||||
logger.info(
|
||||
"Host inventory sampling for dataset %s (%d rows scanned)",
|
||||
ds.id,
|
||||
rows_scanned_this_dataset,
|
||||
)
|
||||
break
|
||||
|
||||
last_row_index = rows[-1].row_index
|
||||
if len(rows) < batch_size:
|
||||
break
|
||||
|
||||
if global_budget_reached:
|
||||
logger.info(
|
||||
"Host inventory global row budget reached for hunt %s at %d rows",
|
||||
hunt_id,
|
||||
total_rows,
|
||||
)
|
||||
break
|
||||
|
||||
# Post-process hosts
|
||||
for h in hosts.values():
|
||||
if not h['os'] and h['fqdn']:
|
||||
h['os'] = _infer_os(h['fqdn'])
|
||||
h['ips'] = sorted(h['ips'])
|
||||
h['users'] = sorted(h['users'])
|
||||
h['datasets'] = sorted(h['datasets'])
|
||||
|
||||
# Build connections, resolving IPs to host keys
|
||||
conn_list = []
|
||||
seen = set()
|
||||
for (src, dst_ip, dst_port), cnt in connections.items():
|
||||
if dst_ip in _IGNORE_IPS:
|
||||
continue
|
||||
dst_host = ip_to_host.get(dst_ip, '')
|
||||
if dst_host == src:
|
||||
continue
|
||||
key = tuple(sorted([src, dst_host or dst_ip]))
|
||||
if key in seen:
|
||||
continue
|
||||
seen.add(key)
|
||||
conn_list.append({
|
||||
'source': src,
|
||||
'target': dst_host or dst_ip,
|
||||
'target_ip': dst_ip,
|
||||
'port': dst_port,
|
||||
'count': cnt,
|
||||
})
|
||||
|
||||
host_list = sorted(hosts.values(), key=lambda x: x['row_count'], reverse=True)
|
||||
|
||||
return {
|
||||
"hosts": host_list,
|
||||
"connections": conn_list,
|
||||
"stats": {
|
||||
"total_hosts": len(host_list),
|
||||
"total_datasets_scanned": len(all_datasets),
|
||||
"datasets_with_hosts": ds_with_hosts,
|
||||
"total_rows_scanned": total_rows,
|
||||
"hosts_with_ips": sum(1 for h in host_list if h['ips']),
|
||||
"hosts_with_users": sum(1 for h in host_list if h['users']),
|
||||
"row_budget_per_dataset": settings.NETWORK_INVENTORY_MAX_ROWS_PER_DATASET,
|
||||
"row_budget_total": settings.NETWORK_INVENTORY_MAX_TOTAL_ROWS,
|
||||
"connection_budget": settings.NETWORK_INVENTORY_MAX_CONNECTIONS,
|
||||
"sampled_mode": settings.NETWORK_INVENTORY_MAX_ROWS_PER_DATASET > 0 or settings.NETWORK_INVENTORY_MAX_TOTAL_ROWS > 0,
|
||||
"sampled_datasets": sampled_dataset_count,
|
||||
"global_budget_reached": global_budget_reached,
|
||||
"dropped_connections": dropped_connections,
|
||||
},
|
||||
}
|
||||
'''
|
||||
out=t[:start]+new_func+t[end:]
|
||||
p.write_text(out,encoding='utf-8')
|
||||
print('replaced build_host_inventory with hard-budget fast mode')
|
||||
Reference in New Issue
Block a user