mirror of
https://github.com/mblanke/ThreatHunt.git
synced 2026-03-01 05:50:21 -05:00
feat: Add Playbook Manager, Saved Searches, and Timeline View components
- Implemented PlaybookManager for creating and managing investigation playbooks with templates. - Added SavedSearches component for managing bookmarked queries and recurring scans. - Introduced TimelineView for visualizing forensic event timelines with zoomable charts. - Enhanced backend processing with auto-queued jobs for dataset uploads and improved database concurrency. - Updated frontend components for better user experience and performance optimizations. - Documented changes in update log for future reference.
This commit is contained in:
206
_apply_phase1_patch.py
Normal file
206
_apply_phase1_patch.py
Normal file
@@ -0,0 +1,206 @@
|
||||
from pathlib import Path
|
||||
|
||||
root = Path(r"d:\Projects\Dev\ThreatHunt")
|
||||
|
||||
# 1) config.py additions
|
||||
cfg = root / "backend/app/config.py"
|
||||
text = cfg.read_text(encoding="utf-8")
|
||||
needle = " # -- Scanner settings -----------------------------------------------\n SCANNER_BATCH_SIZE: int = Field(default=500, description=\"Rows per scanner batch\")\n"
|
||||
insert = " # -- Scanner settings -----------------------------------------------\n SCANNER_BATCH_SIZE: int = Field(default=500, description=\"Rows per scanner batch\")\n\n # -- Job queue settings ----------------------------------------------\n JOB_QUEUE_MAX_BACKLOG: int = Field(\n default=2000, description=\"Soft cap for queued background jobs\"\n )\n JOB_QUEUE_RETAIN_COMPLETED: int = Field(\n default=3000, description=\"Maximum completed/failed jobs to retain in memory\"\n )\n JOB_QUEUE_CLEANUP_INTERVAL_SECONDS: int = Field(\n default=60, description=\"How often to run in-memory job cleanup\"\n )\n JOB_QUEUE_CLEANUP_MAX_AGE_SECONDS: int = Field(\n default=3600, description=\"Age threshold for in-memory completed job cleanup\"\n )\n"
|
||||
if needle in text:
|
||||
text = text.replace(needle, insert)
|
||||
cfg.write_text(text, encoding="utf-8")
|
||||
|
||||
# 2) scanner.py default scope = dataset-only
|
||||
scanner = root / "backend/app/services/scanner.py"
|
||||
text = scanner.read_text(encoding="utf-8")
|
||||
text = text.replace(" scan_hunts: bool = True,", " scan_hunts: bool = False,")
|
||||
text = text.replace(" scan_annotations: bool = True,", " scan_annotations: bool = False,")
|
||||
text = text.replace(" scan_messages: bool = True,", " scan_messages: bool = False,")
|
||||
scanner.write_text(text, encoding="utf-8")
|
||||
|
||||
# 3) keywords.py defaults = dataset-only
|
||||
kw = root / "backend/app/api/routes/keywords.py"
|
||||
text = kw.read_text(encoding="utf-8")
|
||||
text = text.replace(" scan_hunts: bool = True", " scan_hunts: bool = False")
|
||||
text = text.replace(" scan_annotations: bool = True", " scan_annotations: bool = False")
|
||||
text = text.replace(" scan_messages: bool = True", " scan_messages: bool = False")
|
||||
kw.write_text(text, encoding="utf-8")
|
||||
|
||||
# 4) job_queue.py dedupe + periodic cleanup
|
||||
jq = root / "backend/app/services/job_queue.py"
|
||||
text = jq.read_text(encoding="utf-8")
|
||||
|
||||
text = text.replace(
|
||||
"from typing import Any, Callable, Coroutine, Optional\n",
|
||||
"from typing import Any, Callable, Coroutine, Optional\n\nfrom app.config import settings\n"
|
||||
)
|
||||
|
||||
text = text.replace(
|
||||
" self._completion_callbacks: list[Callable[[Job], Coroutine]] = []\n",
|
||||
" self._completion_callbacks: list[Callable[[Job], Coroutine]] = []\n self._cleanup_task: asyncio.Task | None = None\n"
|
||||
)
|
||||
|
||||
start_old = ''' async def start(self):
|
||||
if self._started:
|
||||
return
|
||||
self._started = True
|
||||
for i in range(self._max_workers):
|
||||
task = asyncio.create_task(self._worker(i))
|
||||
self._workers.append(task)
|
||||
logger.info(f"Job queue started with {self._max_workers} workers")
|
||||
'''
|
||||
start_new = ''' async def start(self):
|
||||
if self._started:
|
||||
return
|
||||
self._started = True
|
||||
for i in range(self._max_workers):
|
||||
task = asyncio.create_task(self._worker(i))
|
||||
self._workers.append(task)
|
||||
if not self._cleanup_task or self._cleanup_task.done():
|
||||
self._cleanup_task = asyncio.create_task(self._cleanup_loop())
|
||||
logger.info(f"Job queue started with {self._max_workers} workers")
|
||||
'''
|
||||
text = text.replace(start_old, start_new)
|
||||
|
||||
stop_old = ''' async def stop(self):
|
||||
self._started = False
|
||||
for w in self._workers:
|
||||
w.cancel()
|
||||
await asyncio.gather(*self._workers, return_exceptions=True)
|
||||
self._workers.clear()
|
||||
logger.info("Job queue stopped")
|
||||
'''
|
||||
stop_new = ''' async def stop(self):
|
||||
self._started = False
|
||||
for w in self._workers:
|
||||
w.cancel()
|
||||
await asyncio.gather(*self._workers, return_exceptions=True)
|
||||
self._workers.clear()
|
||||
if self._cleanup_task:
|
||||
self._cleanup_task.cancel()
|
||||
await asyncio.gather(self._cleanup_task, return_exceptions=True)
|
||||
self._cleanup_task = None
|
||||
logger.info("Job queue stopped")
|
||||
'''
|
||||
text = text.replace(stop_old, stop_new)
|
||||
|
||||
submit_old = ''' def submit(self, job_type: JobType, **params) -> Job:
|
||||
job = Job(id=str(uuid.uuid4()), job_type=job_type, params=params)
|
||||
self._jobs[job.id] = job
|
||||
self._queue.put_nowait(job.id)
|
||||
logger.info(f"Job submitted: {job.id} ({job_type.value}) params={params}")
|
||||
return job
|
||||
'''
|
||||
submit_new = ''' def submit(self, job_type: JobType, **params) -> Job:
|
||||
# Soft backpressure: prefer dedupe over queue amplification
|
||||
dedupe_job = self._find_active_duplicate(job_type, params)
|
||||
if dedupe_job is not None:
|
||||
logger.info(
|
||||
f"Job deduped: reusing {dedupe_job.id} ({job_type.value}) params={params}"
|
||||
)
|
||||
return dedupe_job
|
||||
|
||||
if self._queue.qsize() >= settings.JOB_QUEUE_MAX_BACKLOG:
|
||||
logger.warning(
|
||||
"Job queue backlog high (%d >= %d). Accepting job but system may be degraded.",
|
||||
self._queue.qsize(), settings.JOB_QUEUE_MAX_BACKLOG,
|
||||
)
|
||||
|
||||
job = Job(id=str(uuid.uuid4()), job_type=job_type, params=params)
|
||||
self._jobs[job.id] = job
|
||||
self._queue.put_nowait(job.id)
|
||||
logger.info(f"Job submitted: {job.id} ({job_type.value}) params={params}")
|
||||
return job
|
||||
'''
|
||||
text = text.replace(submit_old, submit_new)
|
||||
|
||||
insert_methods_after = " def get_job(self, job_id: str) -> Job | None:\n return self._jobs.get(job_id)\n"
|
||||
new_methods = ''' def get_job(self, job_id: str) -> Job | None:
|
||||
return self._jobs.get(job_id)
|
||||
|
||||
def _find_active_duplicate(self, job_type: JobType, params: dict) -> Job | None:
|
||||
"""Return queued/running job with same key workload to prevent duplicate storms."""
|
||||
key_fields = ["dataset_id", "hunt_id", "hostname", "question", "mode"]
|
||||
sig = tuple((k, params.get(k)) for k in key_fields if params.get(k) is not None)
|
||||
if not sig:
|
||||
return None
|
||||
for j in self._jobs.values():
|
||||
if j.job_type != job_type:
|
||||
continue
|
||||
if j.status not in (JobStatus.QUEUED, JobStatus.RUNNING):
|
||||
continue
|
||||
other_sig = tuple((k, j.params.get(k)) for k in key_fields if j.params.get(k) is not None)
|
||||
if sig == other_sig:
|
||||
return j
|
||||
return None
|
||||
'''
|
||||
text = text.replace(insert_methods_after, new_methods)
|
||||
|
||||
cleanup_old = ''' def cleanup(self, max_age_seconds: float = 3600):
|
||||
now = time.time()
|
||||
to_remove = [
|
||||
jid for jid, j in self._jobs.items()
|
||||
if j.status in (JobStatus.COMPLETED, JobStatus.FAILED, JobStatus.CANCELLED)
|
||||
and (now - j.created_at) > max_age_seconds
|
||||
]
|
||||
for jid in to_remove:
|
||||
del self._jobs[jid]
|
||||
if to_remove:
|
||||
logger.info(f"Cleaned up {len(to_remove)} old jobs")
|
||||
'''
|
||||
cleanup_new = ''' def cleanup(self, max_age_seconds: float = 3600):
|
||||
now = time.time()
|
||||
terminal_states = (JobStatus.COMPLETED, JobStatus.FAILED, JobStatus.CANCELLED)
|
||||
to_remove = [
|
||||
jid for jid, j in self._jobs.items()
|
||||
if j.status in terminal_states and (now - j.created_at) > max_age_seconds
|
||||
]
|
||||
|
||||
# Also cap retained terminal jobs to avoid unbounded memory growth
|
||||
terminal_jobs = sorted(
|
||||
[j for j in self._jobs.values() if j.status in terminal_states],
|
||||
key=lambda j: j.created_at,
|
||||
reverse=True,
|
||||
)
|
||||
overflow = terminal_jobs[settings.JOB_QUEUE_RETAIN_COMPLETED :]
|
||||
to_remove.extend([j.id for j in overflow])
|
||||
|
||||
removed = 0
|
||||
for jid in set(to_remove):
|
||||
if jid in self._jobs:
|
||||
del self._jobs[jid]
|
||||
removed += 1
|
||||
if removed:
|
||||
logger.info(f"Cleaned up {removed} old jobs")
|
||||
|
||||
async def _cleanup_loop(self):
|
||||
interval = max(10, settings.JOB_QUEUE_CLEANUP_INTERVAL_SECONDS)
|
||||
while self._started:
|
||||
try:
|
||||
self.cleanup(max_age_seconds=settings.JOB_QUEUE_CLEANUP_MAX_AGE_SECONDS)
|
||||
except Exception as e:
|
||||
logger.warning(f"Job queue cleanup loop error: {e}")
|
||||
await asyncio.sleep(interval)
|
||||
'''
|
||||
text = text.replace(cleanup_old, cleanup_new)
|
||||
|
||||
jq.write_text(text, encoding="utf-8")
|
||||
|
||||
# 5) NetworkMap polling backoff/jitter max wait
|
||||
nm = root / "frontend/src/components/NetworkMap.tsx"
|
||||
text = nm.read_text(encoding="utf-8")
|
||||
|
||||
text = text.replace(
|
||||
" // Poll until ready, then re-fetch\n for (;;) {\n await new Promise(r => setTimeout(r, 2000));\n const st = await network.inventoryStatus(huntId);\n if (st.status === 'ready') break;\n }\n",
|
||||
" // Poll until ready (exponential backoff), then re-fetch\n let delayMs = 1500;\n const startedAt = Date.now();\n for (;;) {\n const jitter = Math.floor(Math.random() * 250);\n await new Promise(r => setTimeout(r, delayMs + jitter));\n const st = await network.inventoryStatus(huntId);\n if (st.status === 'ready') break;\n if (Date.now() - startedAt > 5 * 60 * 1000) {\n throw new Error('Host inventory build timed out after 5 minutes');\n }\n delayMs = Math.min(10000, Math.floor(delayMs * 1.5));\n }\n"
|
||||
)
|
||||
|
||||
text = text.replace(
|
||||
" const waitUntilReady = async (): Promise<boolean> => {\n // Poll inventory-status every 2s until 'ready' (or cancelled)\n setProgress('Host inventory is being prepared in the background');\n setLoading(true);\n for (;;) {\n await new Promise(r => setTimeout(r, 2000));\n if (cancelled) return false;\n try {\n const st = await network.inventoryStatus(selectedHuntId);\n if (cancelled) return false;\n if (st.status === 'ready') return true;\n // still building or none (job may not have started yet) - keep polling\n } catch { if (cancelled) return false; }\n }\n };\n",
|
||||
" const waitUntilReady = async (): Promise<boolean> => {\n // Poll inventory-status with exponential backoff until 'ready' (or cancelled)\n setProgress('Host inventory is being prepared in the background');\n setLoading(true);\n let delayMs = 1500;\n const startedAt = Date.now();\n for (;;) {\n const jitter = Math.floor(Math.random() * 250);\n await new Promise(r => setTimeout(r, delayMs + jitter));\n if (cancelled) return false;\n try {\n const st = await network.inventoryStatus(selectedHuntId);\n if (cancelled) return false;\n if (st.status === 'ready') return true;\n if (Date.now() - startedAt > 5 * 60 * 1000) {\n setError('Host inventory build timed out. Please retry.');\n return false;\n }\n delayMs = Math.min(10000, Math.floor(delayMs * 1.5));\n // still building or none (job may not have started yet) - keep polling\n } catch {\n if (cancelled) return false;\n delayMs = Math.min(10000, Math.floor(delayMs * 1.5));\n }\n }\n };\n"
|
||||
)
|
||||
|
||||
nm.write_text(text, encoding="utf-8")
|
||||
|
||||
print("Patched: config.py, scanner.py, keywords.py, job_queue.py, NetworkMap.tsx")
|
||||
Reference in New Issue
Block a user