mirror of
https://github.com/mblanke/ThreatHunt.git
synced 2026-03-01 14:00:20 -05:00
- Implemented PlaybookManager for creating and managing investigation playbooks with templates. - Added SavedSearches component for managing bookmarked queries and recurring scans. - Introduced TimelineView for visualizing forensic event timelines with zoomable charts. - Enhanced backend processing with auto-queued jobs for dataset uploads and improved database concurrency. - Updated frontend components for better user experience and performance optimizations. - Documented changes in update log for future reference.
62 lines
2.2 KiB
Python
62 lines
2.2 KiB
Python
from pathlib import Path
|
|
p=Path(r'd:/Projects/Dev/ThreatHunt/backend/app/services/scanner.py')
|
|
t=p.read_text(encoding='utf-8')
|
|
start=t.index(' async def _scan_datasets(')
|
|
end=t.index(' async def _scan_hunts', start)
|
|
new_func=''' async def _scan_datasets(
|
|
self, patterns: dict, result: ScanResult, dataset_ids: list[str] | None
|
|
) -> None:
|
|
"""Scan dataset rows in batches using keyset pagination (no OFFSET)."""
|
|
ds_q = select(Dataset.id, Dataset.name)
|
|
if dataset_ids:
|
|
ds_q = ds_q.where(Dataset.id.in_(dataset_ids))
|
|
ds_result = await self.db.execute(ds_q)
|
|
ds_map = {r[0]: r[1] for r in ds_result.fetchall()}
|
|
|
|
if not ds_map:
|
|
return
|
|
|
|
import asyncio
|
|
|
|
for ds_id, ds_name in ds_map.items():
|
|
last_id = 0
|
|
while True:
|
|
rows_result = await self.db.execute(
|
|
select(DatasetRow)
|
|
.where(DatasetRow.dataset_id == ds_id)
|
|
.where(DatasetRow.id > last_id)
|
|
.order_by(DatasetRow.id)
|
|
.limit(BATCH_SIZE)
|
|
)
|
|
rows = rows_result.scalars().all()
|
|
if not rows:
|
|
break
|
|
|
|
for row in rows:
|
|
result.rows_scanned += 1
|
|
data = row.data or {}
|
|
for col_name, cell_value in data.items():
|
|
if cell_value is None:
|
|
continue
|
|
text = str(cell_value)
|
|
self._match_text(
|
|
text,
|
|
patterns,
|
|
"dataset_row",
|
|
row.id,
|
|
col_name,
|
|
result.hits,
|
|
row_index=row.row_index,
|
|
dataset_name=ds_name,
|
|
)
|
|
|
|
last_id = rows[-1].id
|
|
await asyncio.sleep(0)
|
|
if len(rows) < BATCH_SIZE:
|
|
break
|
|
|
|
'''
|
|
out=t[:start]+new_func+t[end:]
|
|
p.write_text(out,encoding='utf-8')
|
|
print('optimized scanner _scan_datasets to keyset pagination')
|