feat: Add Playbook Manager, Saved Searches, and Timeline View components

- Implemented PlaybookManager for creating and managing investigation playbooks with templates.
- Added SavedSearches component for managing bookmarked queries and recurring scans.
- Introduced TimelineView for visualizing forensic event timelines with zoomable charts.
- Enhanced backend processing with auto-queued jobs for dataset uploads and improved database concurrency.
- Updated frontend components for better user experience and performance optimizations.
- Documented changes in update log for future reference.
This commit is contained in:
2026-02-23 14:23:07 -05:00
parent 37a9584d0c
commit 5a2ad8ec1c
110 changed files with 10537 additions and 1185 deletions

View File

@@ -13,6 +13,7 @@ from sqlalchemy import select, func
from sqlalchemy.ext.asyncio import AsyncSession
from app.db.models import Dataset, DatasetRow
from app.config import settings
logger = logging.getLogger(__name__)
@@ -79,6 +80,55 @@ def _extract_username(raw: str) -> str:
return name or ''
# In-memory host inventory cache
# Pre-computed results stored per hunt_id, built in background after upload.
import time as _time
class _InventoryCache:
"""Simple in-memory cache for pre-computed host inventories."""
def __init__(self):
self._data: dict[str, dict] = {} # hunt_id -> result dict
self._timestamps: dict[str, float] = {} # hunt_id -> epoch
self._building: set[str] = set() # hunt_ids currently being built
def get(self, hunt_id: str) -> dict | None:
"""Return cached result if present. Never expires; only invalidated on new upload."""
return self._data.get(hunt_id)
def put(self, hunt_id: str, result: dict):
self._data[hunt_id] = result
self._timestamps[hunt_id] = _time.time()
self._building.discard(hunt_id)
logger.info(f"Cached host inventory for hunt {hunt_id} "
f"({result['stats']['total_hosts']} hosts)")
def invalidate(self, hunt_id: str):
self._data.pop(hunt_id, None)
self._timestamps.pop(hunt_id, None)
def is_building(self, hunt_id: str) -> bool:
return hunt_id in self._building
def set_building(self, hunt_id: str):
self._building.add(hunt_id)
def clear_building(self, hunt_id: str):
self._building.discard(hunt_id)
def status(self, hunt_id: str) -> str:
if hunt_id in self._building:
return "building"
if hunt_id in self._data:
return "ready"
return "none"
inventory_cache = _InventoryCache()
def _infer_os(fqdn: str) -> str:
u = fqdn.upper()
if 'W10-' in u or 'WIN10' in u:
@@ -151,33 +201,61 @@ async def build_host_inventory(hunt_id: str, db: AsyncSession) -> dict:
}}
hosts: dict[str, dict] = {} # fqdn -> host record
ip_to_host: dict[str, str] = {} # local-ip -> fqdn
ip_to_host: dict[str, str] = {} # local-ip -> fqdn
connections: dict[tuple, int] = defaultdict(int)
total_rows = 0
ds_with_hosts = 0
sampled_dataset_count = 0
total_row_budget = max(0, int(settings.NETWORK_INVENTORY_MAX_TOTAL_ROWS))
max_connections = max(0, int(settings.NETWORK_INVENTORY_MAX_CONNECTIONS))
global_budget_reached = False
dropped_connections = 0
for ds in all_datasets:
if total_row_budget and total_rows >= total_row_budget:
global_budget_reached = True
break
cols = _identify_columns(ds)
if not cols['fqdn'] and not cols['host_id']:
continue
ds_with_hosts += 1
batch_size = 5000
offset = 0
max_rows_per_dataset = max(0, int(settings.NETWORK_INVENTORY_MAX_ROWS_PER_DATASET))
rows_scanned_this_dataset = 0
sampled_dataset = False
last_row_index = -1
while True:
if total_row_budget and total_rows >= total_row_budget:
sampled_dataset = True
global_budget_reached = True
break
rr = await db.execute(
select(DatasetRow)
.where(DatasetRow.dataset_id == ds.id)
.where(DatasetRow.row_index > last_row_index)
.order_by(DatasetRow.row_index)
.offset(offset).limit(batch_size)
.limit(batch_size)
)
rows = rr.scalars().all()
if not rows:
break
for ro in rows:
if max_rows_per_dataset and rows_scanned_this_dataset >= max_rows_per_dataset:
sampled_dataset = True
break
if total_row_budget and total_rows >= total_row_budget:
sampled_dataset = True
global_budget_reached = True
break
data = ro.data or {}
total_rows += 1
rows_scanned_this_dataset += 1
fqdn = ''
for c in cols['fqdn']:
@@ -239,12 +317,33 @@ async def build_host_inventory(hunt_id: str, db: AsyncSession) -> dict:
rport = _clean(data.get(pc))
if rport:
break
connections[(host_key, rip, rport)] += 1
conn_key = (host_key, rip, rport)
if max_connections and len(connections) >= max_connections and conn_key not in connections:
dropped_connections += 1
continue
connections[conn_key] += 1
offset += batch_size
if sampled_dataset:
sampled_dataset_count += 1
logger.info(
"Host inventory sampling for dataset %s (%d rows scanned)",
ds.id,
rows_scanned_this_dataset,
)
break
last_row_index = rows[-1].row_index
if len(rows) < batch_size:
break
if global_budget_reached:
logger.info(
"Host inventory global row budget reached for hunt %s at %d rows",
hunt_id,
total_rows,
)
break
# Post-process hosts
for h in hosts.values():
if not h['os'] and h['fqdn']:
@@ -286,5 +385,12 @@ async def build_host_inventory(hunt_id: str, db: AsyncSession) -> dict:
"total_rows_scanned": total_rows,
"hosts_with_ips": sum(1 for h in host_list if h['ips']),
"hosts_with_users": sum(1 for h in host_list if h['users']),
"row_budget_per_dataset": settings.NETWORK_INVENTORY_MAX_ROWS_PER_DATASET,
"row_budget_total": settings.NETWORK_INVENTORY_MAX_TOTAL_ROWS,
"connection_budget": settings.NETWORK_INVENTORY_MAX_CONNECTIONS,
"sampled_mode": settings.NETWORK_INVENTORY_MAX_ROWS_PER_DATASET > 0 or settings.NETWORK_INVENTORY_MAX_TOTAL_ROWS > 0,
"sampled_datasets": sampled_dataset_count,
"global_budget_reached": global_budget_reached,
"dropped_connections": dropped_connections,
},
}
}