Compare commits
14 Commits
Claude-Ite
...
483176c06b
| Author | SHA1 | Date | |
|---|---|---|---|
| 483176c06b | |||
| 13bd9ec9e0 | |||
| 5a2ad8ec1c | |||
| 37a9584d0c | |||
| 7c454036c7 | |||
| 365cf87c90 | |||
| bb562a91ca | |||
| 04a9946891 | |||
| ab8038867a | |||
| 9b98ab9614 | |||
| d0c9f88268 | |||
| dc2dcd02c1 | |||
| 73a2efcde3 | |||
| 77509b08f5 |
3
.env
@@ -1,3 +0,0 @@
|
|||||||
DATABASE_URL=postgresql://admin:secure_password_123@database:5432/threat_hunter
|
|
||||||
SECRET_KEY=your-very-secret-key-change-in-production
|
|
||||||
FLASK_ENV=production
|
|
||||||
53
.env.example
Normal file
@@ -0,0 +1,53 @@
|
|||||||
|
# ── ThreatHunt Configuration ──────────────────────────────────────────
|
||||||
|
# All backend env vars are prefixed with TH_ and match AppConfig field names.
|
||||||
|
# Copy this file to .env and adjust values.
|
||||||
|
|
||||||
|
# ── General ───────────────────────────────────────────────────────────
|
||||||
|
TH_DEBUG=false
|
||||||
|
|
||||||
|
# ── Database ──────────────────────────────────────────────────────────
|
||||||
|
# SQLite for local dev (zero-config):
|
||||||
|
TH_DATABASE_URL=sqlite+aiosqlite:///./threathunt.db
|
||||||
|
# PostgreSQL for production:
|
||||||
|
# TH_DATABASE_URL=postgresql+asyncpg://threathunt:password@localhost:5432/threathunt
|
||||||
|
|
||||||
|
# ── CORS ──────────────────────────────────────────────────────────────
|
||||||
|
TH_ALLOWED_ORIGINS=http://localhost:3000,http://localhost:8000
|
||||||
|
|
||||||
|
# ── File uploads ──────────────────────────────────────────────────────
|
||||||
|
TH_MAX_UPLOAD_SIZE_MB=500
|
||||||
|
|
||||||
|
# ── LLM Cluster (Wile & Roadrunner) ──────────────────────────────────
|
||||||
|
TH_OPENWEBUI_URL=https://ai.guapo613.beer
|
||||||
|
TH_OPENWEBUI_API_KEY=
|
||||||
|
TH_WILE_HOST=100.110.190.12
|
||||||
|
TH_WILE_OLLAMA_PORT=11434
|
||||||
|
TH_ROADRUNNER_HOST=100.110.190.11
|
||||||
|
TH_ROADRUNNER_OLLAMA_PORT=11434
|
||||||
|
|
||||||
|
# ── Default models (auto-selected by TaskRouter) ─────────────────────
|
||||||
|
TH_DEFAULT_FAST_MODEL=llama3.1:latest
|
||||||
|
TH_DEFAULT_HEAVY_MODEL=llama3.1:70b-instruct-q4_K_M
|
||||||
|
TH_DEFAULT_CODE_MODEL=qwen2.5-coder:32b
|
||||||
|
TH_DEFAULT_VISION_MODEL=llama3.2-vision:11b
|
||||||
|
TH_DEFAULT_EMBEDDING_MODEL=bge-m3:latest
|
||||||
|
|
||||||
|
# ── Agent behaviour ──────────────────────────────────────────────────
|
||||||
|
TH_AGENT_MAX_TOKENS=2048
|
||||||
|
TH_AGENT_TEMPERATURE=0.3
|
||||||
|
TH_AGENT_HISTORY_LENGTH=10
|
||||||
|
TH_FILTER_SENSITIVE_DATA=true
|
||||||
|
|
||||||
|
# ── Enrichment API keys (optional) ───────────────────────────────────
|
||||||
|
TH_VIRUSTOTAL_API_KEY=
|
||||||
|
TH_ABUSEIPDB_API_KEY=
|
||||||
|
TH_SHODAN_API_KEY=
|
||||||
|
|
||||||
|
# ── Auth ─────────────────────────────────────────────────────────────
|
||||||
|
TH_JWT_SECRET=CHANGE-ME-IN-PRODUCTION-USE-A-REAL-SECRET
|
||||||
|
TH_JWT_ACCESS_TOKEN_MINUTES=60
|
||||||
|
TH_JWT_REFRESH_TOKEN_DAYS=7
|
||||||
|
|
||||||
|
# ── Frontend ─────────────────────────────────────────────────────────
|
||||||
|
REACT_APP_API_URL=http://localhost:8000
|
||||||
|
|
||||||
60
.gitignore
vendored
@@ -0,0 +1,60 @@
|
|||||||
|
# ── Python ────────────────────────────────────
|
||||||
|
__pycache__/
|
||||||
|
*.py[cod]
|
||||||
|
*$py.class
|
||||||
|
*.egg-info/
|
||||||
|
dist/
|
||||||
|
build/
|
||||||
|
*.egg
|
||||||
|
.eggs/
|
||||||
|
|
||||||
|
# ── Virtual environments ─────────────────────
|
||||||
|
venv/
|
||||||
|
.venv/
|
||||||
|
env/
|
||||||
|
|
||||||
|
# ── IDE / Editor ─────────────────────────────
|
||||||
|
.vscode/
|
||||||
|
.idea/
|
||||||
|
*.swp
|
||||||
|
*.swo
|
||||||
|
*~
|
||||||
|
|
||||||
|
# ── OS ────────────────────────────────────────
|
||||||
|
.DS_Store
|
||||||
|
Thumbs.db
|
||||||
|
|
||||||
|
# ── Environment / Secrets ────────────────────
|
||||||
|
.env
|
||||||
|
*.env.local
|
||||||
|
|
||||||
|
# ── Database ─────────────────────────────────
|
||||||
|
*.db
|
||||||
|
*.sqlite3
|
||||||
|
|
||||||
|
# ── Uploads ──────────────────────────────────
|
||||||
|
uploads/
|
||||||
|
|
||||||
|
# ── Node / Frontend ──────────────────────────
|
||||||
|
node_modules/
|
||||||
|
frontend/build/
|
||||||
|
frontend/.env.local
|
||||||
|
npm-debug.log*
|
||||||
|
yarn-debug.log*
|
||||||
|
yarn-error.log*
|
||||||
|
|
||||||
|
# ── Docker ───────────────────────────────────
|
||||||
|
docker-compose.override.yml
|
||||||
|
|
||||||
|
# ── Test / Coverage ──────────────────────────
|
||||||
|
.coverage
|
||||||
|
htmlcov/
|
||||||
|
.pytest_cache/
|
||||||
|
.mypy_cache/
|
||||||
|
|
||||||
|
# ── Alembic ──────────────────────────────────
|
||||||
|
alembic/versions/*.pyc
|
||||||
|
|
||||||
|
*.db-wal
|
||||||
|
*.db-shm
|
||||||
|
|
||||||
|
|||||||
1
.playwright-mcp/console-2026-02-20T16-32-53-248Z.log
Normal file
@@ -0,0 +1 @@
|
|||||||
|
[ 656ms] [WARNING] No routes matched location "/network-map" @ http://localhost:3000/static/js/main.c0a7ab6d.js:1
|
||||||
1
.playwright-mcp/console-2026-02-20T18-16-44-089Z.log
Normal file
@@ -0,0 +1 @@
|
|||||||
|
[ 4269ms] [WARNING] You have set a custom wheel sensitivity. This will make your app zoom unnaturally when using mainstream mice. You should change this value from the default only if you can guarantee that all your users will use the same hardware and OS configuration as your current machine. @ http://localhost:3000/static/js/main.6d916bcf.js:1
|
||||||
1
.playwright-mcp/console-2026-02-20T18-26-05-692Z.log
Normal file
@@ -0,0 +1 @@
|
|||||||
|
[ 496ms] [WARNING] You have set a custom wheel sensitivity. This will make your app zoom unnaturally when using mainstream mice. You should change this value from the default only if you can guarantee that all your users will use the same hardware and OS configuration as your current machine. @ http://localhost:3000/static/js/main.28ae077d.js:1
|
||||||
76
.playwright-mcp/console-2026-02-20T18-30-45-724Z.log
Normal file
@@ -0,0 +1,76 @@
|
|||||||
|
[ 402ms] [WARNING] You have set a custom wheel sensitivity. This will make your app zoom unnaturally when using mainstream mice. You should change this value from the default only if you can guarantee that all your users will use the same hardware and OS configuration as your current machine. @ http://localhost:3000/static/js/main.cb47c3a0.js:1
|
||||||
|
[ 60389ms] [ERROR] Failed to load resource: the server responded with a status of 500 (Internal Server Error) @ http://localhost:3000/api/analysis/process-tree?hunt_id=4bb956a4225e45459a464da1146d3cf5:0
|
||||||
|
[ 114742ms] [ERROR] Failed to load resource: the server responded with a status of 500 (Internal Server Error) @ http://localhost:3000/api/analysis/process-tree?hunt_id=4bb956a4225e45459a464da1146d3cf5:0
|
||||||
|
[ 116603ms] [ERROR] Failed to load resource: the server responded with a status of 500 (Internal Server Error) @ http://localhost:3000/api/analysis/process-tree?hunt_id=4bb956a4225e45459a464da1146d3cf5:0
|
||||||
|
[ 362021ms] [WARNING] You have set a custom wheel sensitivity. This will make your app zoom unnaturally when using mainstream mice. You should change this value from the default only if you can guarantee that all your users will use the same hardware and OS configuration as your current machine. @ http://localhost:3000/static/js/main.cb47c3a0.js:1
|
||||||
|
[ 379006ms] [WARNING] You have set a custom wheel sensitivity. This will make your app zoom unnaturally when using mainstream mice. You should change this value from the default only if you can guarantee that all your users will use the same hardware and OS configuration as your current machine. @ http://localhost:3000/static/js/main.cb47c3a0.js:1
|
||||||
|
[ 379019ms] [ERROR] NotFoundError: Failed to execute 'removeChild' on 'Node': The node to be removed is not a child of this node.
|
||||||
|
at ps (http://localhost:3000/static/js/main.cb47c3a0.js:2:227378)
|
||||||
|
at ds (http://localhost:3000/static/js/main.cb47c3a0.js:2:227062)
|
||||||
|
at ps (http://localhost:3000/static/js/main.cb47c3a0.js:2:227824)
|
||||||
|
at ds (http://localhost:3000/static/js/main.cb47c3a0.js:2:227062)
|
||||||
|
at ps (http://localhost:3000/static/js/main.cb47c3a0.js:2:227824)
|
||||||
|
at hs (http://localhost:3000/static/js/main.cb47c3a0.js:2:228635)
|
||||||
|
at vs (http://localhost:3000/static/js/main.cb47c3a0.js:2:229095)
|
||||||
|
at hs (http://localhost:3000/static/js/main.cb47c3a0.js:2:228785)
|
||||||
|
at vs (http://localhost:3000/static/js/main.cb47c3a0.js:2:228898)
|
||||||
|
at hs (http://localhost:3000/static/js/main.cb47c3a0.js:2:228785) @ http://localhost:3000/static/js/main.cb47c3a0.js:1
|
||||||
|
[ 379021ms] NotFoundError: Failed to execute 'removeChild' on 'Node': The node to be removed is not a child of this node.
|
||||||
|
at ps (http://localhost:3000/static/js/main.cb47c3a0.js:2:227378)
|
||||||
|
at ds (http://localhost:3000/static/js/main.cb47c3a0.js:2:227062)
|
||||||
|
at ps (http://localhost:3000/static/js/main.cb47c3a0.js:2:227824)
|
||||||
|
at ds (http://localhost:3000/static/js/main.cb47c3a0.js:2:227062)
|
||||||
|
at ps (http://localhost:3000/static/js/main.cb47c3a0.js:2:227824)
|
||||||
|
at hs (http://localhost:3000/static/js/main.cb47c3a0.js:2:228635)
|
||||||
|
at vs (http://localhost:3000/static/js/main.cb47c3a0.js:2:229095)
|
||||||
|
at hs (http://localhost:3000/static/js/main.cb47c3a0.js:2:228785)
|
||||||
|
at vs (http://localhost:3000/static/js/main.cb47c3a0.js:2:228898)
|
||||||
|
at hs (http://localhost:3000/static/js/main.cb47c3a0.js:2:228785)
|
||||||
|
[ 382647ms] [WARNING] You have set a custom wheel sensitivity. This will make your app zoom unnaturally when using mainstream mice. You should change this value from the default only if you can guarantee that all your users will use the same hardware and OS configuration as your current machine. @ http://localhost:3000/static/js/main.cb47c3a0.js:1
|
||||||
|
[ 386088ms] [WARNING] You have set a custom wheel sensitivity. This will make your app zoom unnaturally when using mainstream mice. You should change this value from the default only if you can guarantee that all your users will use the same hardware and OS configuration as your current machine. @ http://localhost:3000/static/js/main.cb47c3a0.js:1
|
||||||
|
[ 386343ms] [ERROR] NotFoundError: Failed to execute 'removeChild' on 'Node': The node to be removed is not a child of this node.
|
||||||
|
at ps (http://localhost:3000/static/js/main.cb47c3a0.js:2:227378)
|
||||||
|
at ds (http://localhost:3000/static/js/main.cb47c3a0.js:2:227062)
|
||||||
|
at ps (http://localhost:3000/static/js/main.cb47c3a0.js:2:227824)
|
||||||
|
at ds (http://localhost:3000/static/js/main.cb47c3a0.js:2:227062)
|
||||||
|
at ps (http://localhost:3000/static/js/main.cb47c3a0.js:2:227824)
|
||||||
|
at hs (http://localhost:3000/static/js/main.cb47c3a0.js:2:228635)
|
||||||
|
at vs (http://localhost:3000/static/js/main.cb47c3a0.js:2:229095)
|
||||||
|
at hs (http://localhost:3000/static/js/main.cb47c3a0.js:2:228785)
|
||||||
|
at vs (http://localhost:3000/static/js/main.cb47c3a0.js:2:228898)
|
||||||
|
at hs (http://localhost:3000/static/js/main.cb47c3a0.js:2:228785) @ http://localhost:3000/static/js/main.cb47c3a0.js:1
|
||||||
|
[ 386345ms] NotFoundError: Failed to execute 'removeChild' on 'Node': The node to be removed is not a child of this node.
|
||||||
|
at ps (http://localhost:3000/static/js/main.cb47c3a0.js:2:227378)
|
||||||
|
at ds (http://localhost:3000/static/js/main.cb47c3a0.js:2:227062)
|
||||||
|
at ps (http://localhost:3000/static/js/main.cb47c3a0.js:2:227824)
|
||||||
|
at ds (http://localhost:3000/static/js/main.cb47c3a0.js:2:227062)
|
||||||
|
at ps (http://localhost:3000/static/js/main.cb47c3a0.js:2:227824)
|
||||||
|
at hs (http://localhost:3000/static/js/main.cb47c3a0.js:2:228635)
|
||||||
|
at vs (http://localhost:3000/static/js/main.cb47c3a0.js:2:229095)
|
||||||
|
at hs (http://localhost:3000/static/js/main.cb47c3a0.js:2:228785)
|
||||||
|
at vs (http://localhost:3000/static/js/main.cb47c3a0.js:2:228898)
|
||||||
|
at hs (http://localhost:3000/static/js/main.cb47c3a0.js:2:228785)
|
||||||
|
[ 397704ms] [WARNING] You have set a custom wheel sensitivity. This will make your app zoom unnaturally when using mainstream mice. You should change this value from the default only if you can guarantee that all your users will use the same hardware and OS configuration as your current machine. @ http://localhost:3000/static/js/main.cb47c3a0.js:1
|
||||||
|
[ 519009ms] [WARNING] You have set a custom wheel sensitivity. This will make your app zoom unnaturally when using mainstream mice. You should change this value from the default only if you can guarantee that all your users will use the same hardware and OS configuration as your current machine. @ http://localhost:3000/static/js/main.cb47c3a0.js:1
|
||||||
|
[ 519273ms] [ERROR] NotFoundError: Failed to execute 'removeChild' on 'Node': The node to be removed is not a child of this node.
|
||||||
|
at ps (http://localhost:3000/static/js/main.cb47c3a0.js:2:227378)
|
||||||
|
at ds (http://localhost:3000/static/js/main.cb47c3a0.js:2:227062)
|
||||||
|
at ps (http://localhost:3000/static/js/main.cb47c3a0.js:2:227824)
|
||||||
|
at ds (http://localhost:3000/static/js/main.cb47c3a0.js:2:227062)
|
||||||
|
at ps (http://localhost:3000/static/js/main.cb47c3a0.js:2:227824)
|
||||||
|
at hs (http://localhost:3000/static/js/main.cb47c3a0.js:2:228635)
|
||||||
|
at vs (http://localhost:3000/static/js/main.cb47c3a0.js:2:229095)
|
||||||
|
at hs (http://localhost:3000/static/js/main.cb47c3a0.js:2:228785)
|
||||||
|
at vs (http://localhost:3000/static/js/main.cb47c3a0.js:2:228898)
|
||||||
|
at hs (http://localhost:3000/static/js/main.cb47c3a0.js:2:228785) @ http://localhost:3000/static/js/main.cb47c3a0.js:1
|
||||||
|
[ 519274ms] NotFoundError: Failed to execute 'removeChild' on 'Node': The node to be removed is not a child of this node.
|
||||||
|
at ps (http://localhost:3000/static/js/main.cb47c3a0.js:2:227378)
|
||||||
|
at ds (http://localhost:3000/static/js/main.cb47c3a0.js:2:227062)
|
||||||
|
at ps (http://localhost:3000/static/js/main.cb47c3a0.js:2:227824)
|
||||||
|
at ds (http://localhost:3000/static/js/main.cb47c3a0.js:2:227062)
|
||||||
|
at ps (http://localhost:3000/static/js/main.cb47c3a0.js:2:227824)
|
||||||
|
at hs (http://localhost:3000/static/js/main.cb47c3a0.js:2:228635)
|
||||||
|
at vs (http://localhost:3000/static/js/main.cb47c3a0.js:2:229095)
|
||||||
|
at hs (http://localhost:3000/static/js/main.cb47c3a0.js:2:228785)
|
||||||
|
at vs (http://localhost:3000/static/js/main.cb47c3a0.js:2:228898)
|
||||||
|
at hs (http://localhost:3000/static/js/main.cb47c3a0.js:2:228785)
|
||||||
1
.playwright-mcp/console-2026-02-20T18-44-41-738Z.log
Normal file
@@ -0,0 +1 @@
|
|||||||
|
[ 1803ms] [WARNING] You have set a custom wheel sensitivity. This will make your app zoom unnaturally when using mainstream mice. You should change this value from the default only if you can guarantee that all your users will use the same hardware and OS configuration as your current machine. @ http://localhost:3000/static/js/main.b2c21c5a.js:1
|
||||||
48
.playwright-mcp/console-2026-02-20T18-46-54-542Z.log
Normal file
@@ -0,0 +1,48 @@
|
|||||||
|
[ 2196ms] [WARNING] You have set a custom wheel sensitivity. This will make your app zoom unnaturally when using mainstream mice. You should change this value from the default only if you can guarantee that all your users will use the same hardware and OS configuration as your current machine. @ http://localhost:3000/static/js/main.0e63bc98.js:1
|
||||||
|
[ 46100ms] [WARNING] You have set a custom wheel sensitivity. This will make your app zoom unnaturally when using mainstream mice. You should change this value from the default only if you can guarantee that all your users will use the same hardware and OS configuration as your current machine. @ http://localhost:3000/static/js/main.0e63bc98.js:1
|
||||||
|
[ 46117ms] [ERROR] NotFoundError: Failed to execute 'removeChild' on 'Node': The node to be removed is not a child of this node.
|
||||||
|
at ps (http://localhost:3000/static/js/main.0e63bc98.js:2:227378)
|
||||||
|
at ds (http://localhost:3000/static/js/main.0e63bc98.js:2:227062)
|
||||||
|
at ps (http://localhost:3000/static/js/main.0e63bc98.js:2:227824)
|
||||||
|
at ds (http://localhost:3000/static/js/main.0e63bc98.js:2:227062)
|
||||||
|
at ps (http://localhost:3000/static/js/main.0e63bc98.js:2:227824)
|
||||||
|
at hs (http://localhost:3000/static/js/main.0e63bc98.js:2:228635)
|
||||||
|
at vs (http://localhost:3000/static/js/main.0e63bc98.js:2:229095)
|
||||||
|
at hs (http://localhost:3000/static/js/main.0e63bc98.js:2:228785)
|
||||||
|
at vs (http://localhost:3000/static/js/main.0e63bc98.js:2:228898)
|
||||||
|
at hs (http://localhost:3000/static/js/main.0e63bc98.js:2:228785) @ http://localhost:3000/static/js/main.0e63bc98.js:1
|
||||||
|
[ 46118ms] NotFoundError: Failed to execute 'removeChild' on 'Node': The node to be removed is not a child of this node.
|
||||||
|
at ps (http://localhost:3000/static/js/main.0e63bc98.js:2:227378)
|
||||||
|
at ds (http://localhost:3000/static/js/main.0e63bc98.js:2:227062)
|
||||||
|
at ps (http://localhost:3000/static/js/main.0e63bc98.js:2:227824)
|
||||||
|
at ds (http://localhost:3000/static/js/main.0e63bc98.js:2:227062)
|
||||||
|
at ps (http://localhost:3000/static/js/main.0e63bc98.js:2:227824)
|
||||||
|
at hs (http://localhost:3000/static/js/main.0e63bc98.js:2:228635)
|
||||||
|
at vs (http://localhost:3000/static/js/main.0e63bc98.js:2:229095)
|
||||||
|
at hs (http://localhost:3000/static/js/main.0e63bc98.js:2:228785)
|
||||||
|
at vs (http://localhost:3000/static/js/main.0e63bc98.js:2:228898)
|
||||||
|
at hs (http://localhost:3000/static/js/main.0e63bc98.js:2:228785)
|
||||||
|
[ 52506ms] [WARNING] You have set a custom wheel sensitivity. This will make your app zoom unnaturally when using mainstream mice. You should change this value from the default only if you can guarantee that all your users will use the same hardware and OS configuration as your current machine. @ http://localhost:3000/static/js/main.0e63bc98.js:1
|
||||||
|
[ 54912ms] [WARNING] You have set a custom wheel sensitivity. This will make your app zoom unnaturally when using mainstream mice. You should change this value from the default only if you can guarantee that all your users will use the same hardware and OS configuration as your current machine. @ http://localhost:3000/static/js/main.0e63bc98.js:1
|
||||||
|
[ 54928ms] [ERROR] NotFoundError: Failed to execute 'removeChild' on 'Node': The node to be removed is not a child of this node.
|
||||||
|
at ps (http://localhost:3000/static/js/main.0e63bc98.js:2:227378)
|
||||||
|
at ds (http://localhost:3000/static/js/main.0e63bc98.js:2:227062)
|
||||||
|
at ps (http://localhost:3000/static/js/main.0e63bc98.js:2:227824)
|
||||||
|
at ds (http://localhost:3000/static/js/main.0e63bc98.js:2:227062)
|
||||||
|
at ps (http://localhost:3000/static/js/main.0e63bc98.js:2:227824)
|
||||||
|
at hs (http://localhost:3000/static/js/main.0e63bc98.js:2:228635)
|
||||||
|
at vs (http://localhost:3000/static/js/main.0e63bc98.js:2:229095)
|
||||||
|
at hs (http://localhost:3000/static/js/main.0e63bc98.js:2:228785)
|
||||||
|
at vs (http://localhost:3000/static/js/main.0e63bc98.js:2:228898)
|
||||||
|
at hs (http://localhost:3000/static/js/main.0e63bc98.js:2:228785) @ http://localhost:3000/static/js/main.0e63bc98.js:1
|
||||||
|
[ 54929ms] NotFoundError: Failed to execute 'removeChild' on 'Node': The node to be removed is not a child of this node.
|
||||||
|
at ps (http://localhost:3000/static/js/main.0e63bc98.js:2:227378)
|
||||||
|
at ds (http://localhost:3000/static/js/main.0e63bc98.js:2:227062)
|
||||||
|
at ps (http://localhost:3000/static/js/main.0e63bc98.js:2:227824)
|
||||||
|
at ds (http://localhost:3000/static/js/main.0e63bc98.js:2:227062)
|
||||||
|
at ps (http://localhost:3000/static/js/main.0e63bc98.js:2:227824)
|
||||||
|
at hs (http://localhost:3000/static/js/main.0e63bc98.js:2:228635)
|
||||||
|
at vs (http://localhost:3000/static/js/main.0e63bc98.js:2:229095)
|
||||||
|
at hs (http://localhost:3000/static/js/main.0e63bc98.js:2:228785)
|
||||||
|
at vs (http://localhost:3000/static/js/main.0e63bc98.js:2:228898)
|
||||||
|
at hs (http://localhost:3000/static/js/main.0e63bc98.js:2:228785)
|
||||||
7
.playwright-mcp/console-2026-02-20T18-50-52-269Z.log
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
[ 2548ms] [WARNING] You have set a custom wheel sensitivity. This will make your app zoom unnaturally when using mainstream mice. You should change this value from the default only if you can guarantee that all your users will use the same hardware and OS configuration as your current machine. @ http://localhost:3000/static/js/main.c311038e.js:1
|
||||||
|
[ 32912ms] [WARNING] You have set a custom wheel sensitivity. This will make your app zoom unnaturally when using mainstream mice. You should change this value from the default only if you can guarantee that all your users will use the same hardware and OS configuration as your current machine. @ http://localhost:3000/static/js/main.c311038e.js:1
|
||||||
|
[ 55583ms] [WARNING] You have set a custom wheel sensitivity. This will make your app zoom unnaturally when using mainstream mice. You should change this value from the default only if you can guarantee that all your users will use the same hardware and OS configuration as your current machine. @ http://localhost:3000/static/js/main.c311038e.js:1
|
||||||
|
[ 58208ms] [WARNING] You have set a custom wheel sensitivity. This will make your app zoom unnaturally when using mainstream mice. You should change this value from the default only if you can guarantee that all your users will use the same hardware and OS configuration as your current machine. @ http://localhost:3000/static/js/main.c311038e.js:1
|
||||||
|
[ 1168933ms] [ERROR] Failed to load resource: the server responded with a status of 504 (Gateway Time-out) @ http://localhost:3000/api/analysis/llm-analyze:0
|
||||||
|
[ 1477343ms] [WARNING] You have set a custom wheel sensitivity. This will make your app zoom unnaturally when using mainstream mice. You should change this value from the default only if you can guarantee that all your users will use the same hardware and OS configuration as your current machine. @ http://localhost:3000/static/js/main.c311038e.js:1
|
||||||
|
[ 1482908ms] [WARNING] You have set a custom wheel sensitivity. This will make your app zoom unnaturally when using mainstream mice. You should change this value from the default only if you can guarantee that all your users will use the same hardware and OS configuration as your current machine. @ http://localhost:3000/static/js/main.c311038e.js:1
|
||||||
7
.playwright-mcp/console-2026-02-20T19-16-43-503Z.log
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
[ 9612ms] [WARNING] The resource https://github.githubassets.com/assets/mona-sans-14595085164a.woff2 was preloaded using link preload but not used within a few seconds from the window's load event. Please make sure it has an appropriate `as` value and it is preloaded intentionally. @ https://github.com/:0
|
||||||
|
[ 17464ms] [WARNING] The resource https://github.githubassets.com/assets/mona-sans-14595085164a.woff2 was preloaded using link preload but not used within a few seconds from the window's load event. Please make sure it has an appropriate `as` value and it is preloaded intentionally. @ https://github.com/enterprise:0
|
||||||
|
[ 20742ms] [WARNING] The resource https://github.githubassets.com/assets/mona-sans-14595085164a.woff2 was preloaded using link preload but not used within a few seconds from the window's load event. Please make sure it has an appropriate `as` value and it is preloaded intentionally. @ https://github.com/enterprise:0
|
||||||
|
[ 53258ms] [WARNING] The resource https://github.githubassets.com/assets/mona-sans-14595085164a.woff2 was preloaded using link preload but not used within a few seconds from the window's load event. Please make sure it has an appropriate `as` value and it is preloaded intentionally. @ https://github.com/pricing:0
|
||||||
|
[ 59240ms] [WARNING] The resource https://github.githubassets.com/assets/mona-sans-14595085164a.woff2 was preloaded using link preload but not used within a few seconds from the window's load event. Please make sure it has an appropriate `as` value and it is preloaded intentionally. @ https://github.com/features/copilot#pricing:0
|
||||||
|
[ 67668ms] [WARNING] The resource https://github.githubassets.com/assets/mona-sans-14595085164a.woff2 was preloaded using link preload but not used within a few seconds from the window's load event. Please make sure it has an appropriate `as` value and it is preloaded intentionally. @ https://github.com/features/spark?utm_source=web-copilot-ce-cta&utm_campaign=spark-launch-sep-2025:0
|
||||||
|
[ 72166ms] [WARNING] The resource https://github.githubassets.com/assets/mona-sans-14595085164a.woff2 was preloaded using link preload but not used within a few seconds from the window's load event. Please make sure it has an appropriate `as` value and it is preloaded intentionally. @ https://github.com/features/spark?utm_source=web-copilot-ce-cta&utm_campaign=spark-launch-sep-2025:0
|
||||||
3923
.playwright-mcp/console-2026-02-20T19-27-06-976Z.log
Normal file
BIN
.playwright-mcp/page-2026-02-20T16-33-40-311Z.png
Normal file
|
After Width: | Height: | Size: 41 KiB |
BIN
.playwright-mcp/page-2026-02-20T16-34-14-809Z.png
Normal file
|
After Width: | Height: | Size: 54 KiB |
BIN
.playwright-mcp/page-2026-02-20T16-38-20-099Z.png
Normal file
|
After Width: | Height: | Size: 70 KiB |
BIN
.playwright-mcp/page-2026-02-20T16-42-11-611Z.png
Normal file
|
After Width: | Height: | Size: 103 KiB |
BIN
.playwright-mcp/page-2026-02-20T18-17-19-668Z.png
Normal file
|
After Width: | Height: | Size: 558 KiB |
BIN
.playwright-mcp/page-2026-02-20T18-26-49-357Z.png
Normal file
|
After Width: | Height: | Size: 607 KiB |
BIN
.playwright-mcp/page-2026-02-20T18-31-29-013Z.png
Normal file
|
After Width: | Height: | Size: 341 KiB |
BIN
.playwright-mcp/page-2026-02-20T18-44-58-287Z.png
Normal file
|
After Width: | Height: | Size: 53 KiB |
BIN
.playwright-mcp/page-2026-02-20T18-45-12-934Z.png
Normal file
|
After Width: | Height: | Size: 55 KiB |
BIN
.playwright-mcp/page-2026-02-20T18-47-14-660Z.png
Normal file
|
After Width: | Height: | Size: 193 KiB |
BIN
.playwright-mcp/page-2026-02-20T18-51-32-804Z.png
Normal file
|
After Width: | Height: | Size: 184 KiB |
5
.vscode/settings.json
vendored
@@ -1,5 +0,0 @@
|
|||||||
{
|
|
||||||
"cSpell.words": [
|
|
||||||
"jsonify"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
32
Dockerfile.backend
Normal file
@@ -0,0 +1,32 @@
|
|||||||
|
# ThreatHunt Backend API - Python 3.13
|
||||||
|
FROM python:3.13-slim
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# Install system dependencies
|
||||||
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||||
|
gcc curl \
|
||||||
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
# Copy requirements
|
||||||
|
COPY backend/requirements.txt .
|
||||||
|
|
||||||
|
# Install Python dependencies
|
||||||
|
RUN pip install --no-cache-dir -r requirements.txt
|
||||||
|
|
||||||
|
# Copy backend code
|
||||||
|
COPY backend/ .
|
||||||
|
|
||||||
|
# Create non-root user & data directory
|
||||||
|
RUN useradd -m -u 1000 appuser && mkdir -p /app/data && chown -R appuser:appuser /app
|
||||||
|
USER appuser
|
||||||
|
|
||||||
|
# Expose port
|
||||||
|
EXPOSE 8000
|
||||||
|
|
||||||
|
# Health check
|
||||||
|
HEALTHCHECK --interval=30s --timeout=10s --start-period=10s --retries=3 \
|
||||||
|
CMD curl -f http://localhost:8000/ || exit 1
|
||||||
|
|
||||||
|
# Run Alembic migrations then start Uvicorn
|
||||||
|
CMD ["sh", "-c", "python -m alembic upgrade head && python run.py"]
|
||||||
36
Dockerfile.frontend
Normal file
@@ -0,0 +1,36 @@
|
|||||||
|
# ThreatHunt Frontend - Node.js React
|
||||||
|
FROM node:20-alpine AS builder
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# Copy package files
|
||||||
|
COPY frontend/package.json frontend/package-lock.json* ./
|
||||||
|
|
||||||
|
# Install dependencies
|
||||||
|
RUN npm ci
|
||||||
|
|
||||||
|
# Copy source
|
||||||
|
COPY frontend/public ./public
|
||||||
|
COPY frontend/src ./src
|
||||||
|
COPY frontend/tsconfig.json ./
|
||||||
|
|
||||||
|
# Build application
|
||||||
|
RUN npm run build
|
||||||
|
|
||||||
|
# Production stage — nginx reverse-proxy + static files
|
||||||
|
FROM nginx:alpine
|
||||||
|
|
||||||
|
# Copy built React app
|
||||||
|
COPY --from=builder /app/build /usr/share/nginx/html
|
||||||
|
|
||||||
|
# Copy custom nginx config (proxies /api to backend)
|
||||||
|
COPY frontend/nginx.conf /etc/nginx/conf.d/default.conf
|
||||||
|
|
||||||
|
# Expose port
|
||||||
|
EXPOSE 3000
|
||||||
|
|
||||||
|
# Health check
|
||||||
|
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
|
||||||
|
CMD wget --quiet --tries=1 --spider http://localhost:3000/ || exit 1
|
||||||
|
|
||||||
|
CMD ["nginx", "-g", "daemon off;"]
|
||||||
509
README.md
@@ -1,69 +1,496 @@
|
|||||||
<<<<<<< Updated upstream
|
# ThreatHunt - Analyst-Assist Threat Hunting Platform
|
||||||
# ThreatHunt
|
|
||||||
=======
|
|
||||||
# Cyber Threat Hunter
|
|
||||||
|
|
||||||
A modern web application for threat hunting and security analysis, built with React frontend and Flask backend.
|
A modern threat hunting platform with integrated analyst-assist agent guidance. Analyze CSV artifact data exported from Velociraptor with AI-powered suggestions for investigation directions, analytical pivots, and hypothesis formation.
|
||||||
|
|
||||||
## Features
|
## Overview
|
||||||
|
|
||||||
- **Security Tools Detection**: Identify running security tools (AV, EDR, VPN)
|
ThreatHunt is a web application designed to help security analysts efficiently hunt for threats by:
|
||||||
- **CSV Processing**: Upload and analyze security logs
|
- Importing CSV artifacts from Velociraptor or other sources
|
||||||
- **Baseline Analysis**: System baseline comparison
|
- Displaying data in an organized, queryable interface
|
||||||
- **Network Analysis**: Network traffic and connection analysis
|
- Providing AI-powered guidance through an analyst-assist agent
|
||||||
- **VirusTotal Integration**: File and URL reputation checking
|
- Suggesting analytical directions, filters, and pivots
|
||||||
|
- Highlighting anomalies and patterns of interest
|
||||||
|
|
||||||
## Architecture
|
> **Agent Policy**: The analyst-assist agent provides read-only guidance only. It does not execute actions, escalate alerts, or modify data. All decisions remain with the analyst.
|
||||||
|
|
||||||
```
|
|
||||||
ThreatHunt/
|
|
||||||
├── frontend/ # React application
|
|
||||||
├── backend/ # Flask API server
|
|
||||||
├── uploaded/ # File upload storage
|
|
||||||
└── output/ # Analysis results
|
|
||||||
```
|
|
||||||
|
|
||||||
## Quick Start
|
## Quick Start
|
||||||
|
|
||||||
### Backend Setup
|
### Docker (Recommended)
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
cd backend
|
# Clone and navigate
|
||||||
chmod +x setup_backend.sh
|
git clone https://github.com/mblanke/ThreatHunt.git
|
||||||
./setup_backend.sh
|
cd ThreatHunt
|
||||||
source venv/bin/activate
|
|
||||||
python app.py
|
# Configure provider (choose one)
|
||||||
|
cp .env.example .env
|
||||||
|
# Edit .env and set your LLM provider:
|
||||||
|
# Option 1: Online (OpenAI, etc.)
|
||||||
|
# THREAT_HUNT_AGENT_PROVIDER=online
|
||||||
|
# THREAT_HUNT_ONLINE_API_KEY=sk-your-key
|
||||||
|
# Option 2: Local (Ollama, GGML, etc.)
|
||||||
|
# THREAT_HUNT_AGENT_PROVIDER=local
|
||||||
|
# THREAT_HUNT_LOCAL_MODEL_PATH=/path/to/model
|
||||||
|
# Option 3: Networked (Internal inference service)
|
||||||
|
# THREAT_HUNT_AGENT_PROVIDER=networked
|
||||||
|
# THREAT_HUNT_NETWORKED_ENDPOINT=http://service:5000
|
||||||
|
|
||||||
|
# Start services
|
||||||
|
docker-compose up -d
|
||||||
|
|
||||||
|
# Verify
|
||||||
|
curl http://localhost:8000/api/agent/health
|
||||||
|
curl http://localhost:3000
|
||||||
```
|
```
|
||||||
|
|
||||||
### Frontend Setup
|
Access at http://localhost:3000
|
||||||
|
|
||||||
|
### Local Development
|
||||||
|
|
||||||
|
**Backend**:
|
||||||
|
```bash
|
||||||
|
cd backend
|
||||||
|
python -m venv venv
|
||||||
|
source venv/bin/activate # Windows: venv\Scripts\activate
|
||||||
|
pip install -r requirements.txt
|
||||||
|
|
||||||
|
# Configure provider
|
||||||
|
export THREAT_HUNT_ONLINE_API_KEY=sk-your-key
|
||||||
|
# OR set another provider env var
|
||||||
|
|
||||||
|
# Run
|
||||||
|
python run.py
|
||||||
|
# API at http://localhost:8000/docs
|
||||||
|
```
|
||||||
|
|
||||||
|
**Frontend** (new terminal):
|
||||||
```bash
|
```bash
|
||||||
cd frontend
|
cd frontend
|
||||||
npm install
|
npm install
|
||||||
npm run dev
|
npm start
|
||||||
|
# App at http://localhost:3000
|
||||||
|
```
|
||||||
|
|
||||||
|
## Features
|
||||||
|
|
||||||
|
### Analyst-Assist Agent 🤖
|
||||||
|
- **Read-only guidance**: Explains data patterns and suggests investigation directions
|
||||||
|
- **Context-aware**: Understands current dataset, host, and artifact type
|
||||||
|
- **Pluggable providers**: Local, networked, or online LLM backends
|
||||||
|
- **Transparent reasoning**: Explains logic with caveats and confidence scores
|
||||||
|
- **Governance-compliant**: Strictly adheres to agent policy (no execution, no escalation)
|
||||||
|
|
||||||
|
### Chat Interface
|
||||||
|
- Analyst asks questions about artifact data
|
||||||
|
- Agent provides guidance with suggested pivots and filters
|
||||||
|
- Conversation history for context continuity
|
||||||
|
- Real-time typing and response indicators
|
||||||
|
|
||||||
|
### Data Management
|
||||||
|
- Import CSV artifacts from Velociraptor
|
||||||
|
- Browse and filter findings by severity, host, artifact type
|
||||||
|
- Annotate findings with analyst notes
|
||||||
|
- Track investigation progress
|
||||||
|
|
||||||
|
## Architecture
|
||||||
|
|
||||||
|
### Backend
|
||||||
|
- **Framework**: FastAPI (Python 3.11)
|
||||||
|
- **Agent Module**: Pluggable LLM provider interface
|
||||||
|
- **API**: RESTful endpoints with OpenAPI documentation
|
||||||
|
- **Structure**: Modular design with clear separation of concerns
|
||||||
|
|
||||||
|
### Frontend
|
||||||
|
- **Framework**: React 18 with TypeScript
|
||||||
|
- **Components**: Agent chat panel + analysis dashboard
|
||||||
|
- **Styling**: CSS with responsive design
|
||||||
|
- **State Management**: React hooks + Context API
|
||||||
|
|
||||||
|
### LLM Providers
|
||||||
|
Supports three provider architectures:
|
||||||
|
|
||||||
|
1. **Local**: On-device or on-prem models (GGML, Ollama, vLLM)
|
||||||
|
2. **Networked**: Shared internal inference services
|
||||||
|
3. **Online**: External hosted APIs (OpenAI, Anthropic, Google)
|
||||||
|
|
||||||
|
Auto-detection: Automatically uses the first available provider.
|
||||||
|
|
||||||
|
## Project Structure
|
||||||
|
|
||||||
|
```
|
||||||
|
ThreatHunt/
|
||||||
|
├── backend/
|
||||||
|
│ ├── app/
|
||||||
|
│ │ ├── agents/ # Analyst-assist agent
|
||||||
|
│ │ │ ├── core.py # ThreatHuntAgent class
|
||||||
|
│ │ │ ├── providers.py # LLM provider interface
|
||||||
|
│ │ │ ├── config.py # Configuration
|
||||||
|
│ │ │ └── __init__.py
|
||||||
|
│ │ ├── api/routes/ # API endpoints
|
||||||
|
│ │ │ ├── agent.py # /api/agent/* routes
|
||||||
|
│ │ │ ├── __init__.py
|
||||||
|
│ │ ├── main.py # FastAPI app
|
||||||
|
│ │ └── __init__.py
|
||||||
|
│ ├── requirements.txt
|
||||||
|
│ ├── run.py
|
||||||
|
│ └── Dockerfile
|
||||||
|
├── frontend/
|
||||||
|
│ ├── src/
|
||||||
|
│ │ ├── components/
|
||||||
|
│ │ │ ├── AgentPanel.tsx # Chat interface
|
||||||
|
│ │ │ └── AgentPanel.css
|
||||||
|
│ │ ├── utils/
|
||||||
|
│ │ │ └── agentApi.ts # API communication
|
||||||
|
│ │ ├── App.tsx
|
||||||
|
│ │ ├── App.css
|
||||||
|
│ │ ├── index.tsx
|
||||||
|
│ │ └── index.css
|
||||||
|
│ ├── public/index.html
|
||||||
|
│ ├── package.json
|
||||||
|
│ ├── tsconfig.json
|
||||||
|
│ └── Dockerfile
|
||||||
|
├── docker-compose.yml
|
||||||
|
├── .env.example
|
||||||
|
├── .gitignore
|
||||||
|
├── AGENT_IMPLEMENTATION.md # Technical guide
|
||||||
|
├── INTEGRATION_GUIDE.md # Deployment guide
|
||||||
|
├── IMPLEMENTATION_SUMMARY.md # Overview
|
||||||
|
├── README.md # This file
|
||||||
|
├── ROADMAP.md
|
||||||
|
└── THREATHUNT_INTENT.md
|
||||||
```
|
```
|
||||||
|
|
||||||
## API Endpoints
|
## API Endpoints
|
||||||
|
|
||||||
- `GET /` - Serve React app
|
### Agent Assistance
|
||||||
- `GET /api/health` - Health check
|
- **POST /api/agent/assist** - Request guidance on artifact data
|
||||||
- `POST /api/upload` - File upload
|
- **GET /api/agent/health** - Check agent availability
|
||||||
- `GET /api/analysis/<id>` - Get analysis results
|
|
||||||
|
|
||||||
## Security Considerations
|
See full API documentation at http://localhost:8000/docs
|
||||||
|
|
||||||
- File upload validation
|
## Configuration
|
||||||
- Input sanitization
|
|
||||||
- Rate limiting
|
### LLM Provider Selection
|
||||||
- CORS configuration
|
|
||||||
|
Set via `THREAT_HUNT_AGENT_PROVIDER` environment variable:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Auto-detect (tries local → networked → online)
|
||||||
|
THREAT_HUNT_AGENT_PROVIDER=auto
|
||||||
|
|
||||||
|
# Local (on-device/on-prem)
|
||||||
|
THREAT_HUNT_AGENT_PROVIDER=local
|
||||||
|
THREAT_HUNT_LOCAL_MODEL_PATH=/models/model.gguf
|
||||||
|
|
||||||
|
# Networked (internal service)
|
||||||
|
THREAT_HUNT_AGENT_PROVIDER=networked
|
||||||
|
THREAT_HUNT_NETWORKED_ENDPOINT=http://inference:5000
|
||||||
|
THREAT_HUNT_NETWORKED_KEY=api-key
|
||||||
|
|
||||||
|
# Online (hosted API)
|
||||||
|
THREAT_HUNT_AGENT_PROVIDER=online
|
||||||
|
THREAT_HUNT_ONLINE_API_KEY=sk-your-key
|
||||||
|
THREAT_HUNT_ONLINE_PROVIDER=openai
|
||||||
|
THREAT_HUNT_ONLINE_MODEL=gpt-3.5-turbo
|
||||||
|
```
|
||||||
|
|
||||||
|
### Agent Behavior
|
||||||
|
|
||||||
|
```bash
|
||||||
|
THREAT_HUNT_AGENT_MAX_TOKENS=1024
|
||||||
|
THREAT_HUNT_AGENT_REASONING=true
|
||||||
|
THREAT_HUNT_AGENT_HISTORY_LENGTH=10
|
||||||
|
THREAT_HUNT_AGENT_FILTER_SENSITIVE=true
|
||||||
|
```
|
||||||
|
|
||||||
|
See `.env.example` for all configuration options.
|
||||||
|
|
||||||
|
## Governance & Compliance
|
||||||
|
|
||||||
|
This implementation strictly follows governance principles:
|
||||||
|
|
||||||
|
- ✅ **Agents assist analysts** - No autonomous execution
|
||||||
|
- ✅ **No tool execution** - Agent provides guidance only
|
||||||
|
- ✅ **No alert escalation** - Analyst controls alerts
|
||||||
|
- ✅ **No data modification** - Read-only analysis
|
||||||
|
- ✅ **Transparent reasoning** - Explains guidance with caveats
|
||||||
|
- ✅ **Analyst authority** - All decisions remain with analyst
|
||||||
|
|
||||||
|
**References**:
|
||||||
|
- `goose-core/governance/AGENT_POLICY.md`
|
||||||
|
- `goose-core/governance/AI_RULES.md`
|
||||||
|
- `THREATHUNT_INTENT.md`
|
||||||
|
|
||||||
|
## Documentation
|
||||||
|
|
||||||
|
- **[AGENT_IMPLEMENTATION.md](AGENT_IMPLEMENTATION.md)** - Detailed technical architecture
|
||||||
|
- **[INTEGRATION_GUIDE.md](INTEGRATION_GUIDE.md)** - Deployment and configuration
|
||||||
|
- **[IMPLEMENTATION_SUMMARY.md](IMPLEMENTATION_SUMMARY.md)** - Feature overview
|
||||||
|
|
||||||
|
## Testing the Agent
|
||||||
|
|
||||||
|
### Check Health
|
||||||
|
```bash
|
||||||
|
curl http://localhost:8000/api/agent/health
|
||||||
|
```
|
||||||
|
|
||||||
|
### Test API
|
||||||
|
```bash
|
||||||
|
curl -X POST http://localhost:8000/api/agent/assist \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{
|
||||||
|
"query": "What patterns suggest suspicious activity?",
|
||||||
|
"dataset_name": "FileList",
|
||||||
|
"artifact_type": "FileList",
|
||||||
|
"host_identifier": "DESKTOP-ABC123"
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
### Use UI
|
||||||
|
1. Open http://localhost:3000
|
||||||
|
2. Enter a question in the agent panel
|
||||||
|
3. View guidance with suggested pivots and filters
|
||||||
|
|
||||||
|
## Troubleshooting
|
||||||
|
|
||||||
|
### Agent Unavailable (503)
|
||||||
|
- Check environment variables for provider configuration
|
||||||
|
- Verify LLM provider is accessible
|
||||||
|
- See logs: `docker-compose logs backend`
|
||||||
|
|
||||||
|
### No Frontend Response
|
||||||
|
- Verify backend health: `curl http://localhost:8000/api/agent/health`
|
||||||
|
- Check browser console for errors
|
||||||
|
- See logs: `docker-compose logs frontend`
|
||||||
|
|
||||||
|
See [INTEGRATION_GUIDE.md](INTEGRATION_GUIDE.md) for detailed troubleshooting.
|
||||||
|
|
||||||
|
## Development
|
||||||
|
|
||||||
|
### Running Tests
|
||||||
|
```bash
|
||||||
|
cd backend
|
||||||
|
pytest
|
||||||
|
|
||||||
|
cd ../frontend
|
||||||
|
npm test
|
||||||
|
```
|
||||||
|
|
||||||
|
### Building Images
|
||||||
|
```bash
|
||||||
|
docker-compose build
|
||||||
|
```
|
||||||
|
|
||||||
|
### Logs
|
||||||
|
```bash
|
||||||
|
docker-compose logs -f backend
|
||||||
|
docker-compose logs -f frontend
|
||||||
|
```
|
||||||
|
|
||||||
|
## Security Notes
|
||||||
|
|
||||||
|
For production deployment:
|
||||||
|
1. Add authentication to API endpoints
|
||||||
|
2. Enable HTTPS/TLS
|
||||||
|
3. Implement rate limiting
|
||||||
|
4. Filter sensitive data before LLM
|
||||||
|
5. Add audit logging
|
||||||
|
6. Use secrets management for API keys
|
||||||
|
|
||||||
|
See [INTEGRATION_GUIDE.md](INTEGRATION_GUIDE.md#security-notes) for details.
|
||||||
|
|
||||||
|
## Future Enhancements
|
||||||
|
|
||||||
|
- [ ] Integration with actual CVE databases
|
||||||
|
- [ ] Fine-tuned models for cybersecurity domain
|
||||||
|
- [ ] Structured output from LLMs (JSON mode)
|
||||||
|
- [ ] Feedback loop on guidance quality
|
||||||
|
- [ ] Multi-modal support (images, documents)
|
||||||
|
- [ ] Compliance reporting and audit trails
|
||||||
|
- [ ] Performance optimization and caching
|
||||||
|
|
||||||
|
## Contributing
|
||||||
|
|
||||||
|
Follow the architecture and governance principles in `goose-core`. All changes must:
|
||||||
|
- Adhere to agent policy (read-only, advisory only)
|
||||||
|
- Conform to shared terminology in goose-core
|
||||||
|
- Include appropriate documentation
|
||||||
|
- Pass tests and lint checks
|
||||||
|
|
||||||
|
## License
|
||||||
|
|
||||||
|
See LICENSE file
|
||||||
|
|
||||||
|
## Support
|
||||||
|
|
||||||
|
For issues or questions:
|
||||||
|
1. Check [INTEGRATION_GUIDE.md](INTEGRATION_GUIDE.md)
|
||||||
|
2. Review [AGENT_IMPLEMENTATION.md](AGENT_IMPLEMENTATION.md)
|
||||||
|
3. See API docs at http://localhost:8000/docs
|
||||||
|
4. Check backend logs for errors
|
||||||
|
|
||||||
|
## Getting Started
|
||||||
|
|
||||||
|
### Prerequisites
|
||||||
|
|
||||||
|
- Docker and Docker Compose
|
||||||
|
- Python 3.11+ (for local development)
|
||||||
|
- Node.js 18+ (for local development)
|
||||||
|
|
||||||
|
### Quick Start with Docker
|
||||||
|
|
||||||
|
1. Clone the repository:
|
||||||
|
```bash
|
||||||
|
git clone https://github.com/mblanke/ThreatHunt.git
|
||||||
|
cd ThreatHunt
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Start all services:
|
||||||
|
```bash
|
||||||
|
docker-compose up -d
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Access the application:
|
||||||
|
- Frontend: http://localhost:3000
|
||||||
|
- Backend API: http://localhost:8000
|
||||||
|
- API Documentation: http://localhost:8000/docs
|
||||||
|
|
||||||
|
### Local Development
|
||||||
|
|
||||||
|
#### Backend
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd backend
|
||||||
|
python -m venv venv
|
||||||
|
source venv/bin/activate # On Windows: venv\Scripts\activate
|
||||||
|
pip install -r requirements.txt
|
||||||
|
|
||||||
|
# Set up environment variables
|
||||||
|
cp .env.example .env
|
||||||
|
# Edit .env with your settings
|
||||||
|
|
||||||
|
# Run migrations
|
||||||
|
alembic upgrade head
|
||||||
|
|
||||||
|
# Start development server
|
||||||
|
uvicorn app.main:app --reload
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Frontend
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd frontend
|
||||||
|
npm install
|
||||||
|
npm start
|
||||||
|
```
|
||||||
|
|
||||||
|
## API Endpoints
|
||||||
|
|
||||||
|
### Authentication
|
||||||
|
- `POST /api/auth/register` - Register a new user
|
||||||
|
- `POST /api/auth/login` - Login and receive JWT token
|
||||||
|
- `GET /api/auth/me` - Get current user profile
|
||||||
|
- `PUT /api/auth/me` - Update current user profile
|
||||||
|
|
||||||
|
### User Management (Admin only)
|
||||||
|
- `GET /api/users` - List all users in tenant
|
||||||
|
- `GET /api/users/{user_id}` - Get user by ID
|
||||||
|
- `PUT /api/users/{user_id}` - Update user
|
||||||
|
- `DELETE /api/users/{user_id}` - Deactivate user
|
||||||
|
|
||||||
|
### Tenants
|
||||||
|
- `GET /api/tenants` - List tenants
|
||||||
|
- `POST /api/tenants` - Create tenant (admin)
|
||||||
|
- `GET /api/tenants/{tenant_id}` - Get tenant by ID
|
||||||
|
|
||||||
|
### Hosts
|
||||||
|
- `GET /api/hosts` - List hosts (scoped to tenant)
|
||||||
|
- `POST /api/hosts` - Create host
|
||||||
|
- `GET /api/hosts/{host_id}` - Get host by ID
|
||||||
|
|
||||||
|
### Ingestion
|
||||||
|
- `POST /api/ingestion/ingest` - Upload and parse CSV files exported from Velociraptor
|
||||||
|
|
||||||
|
### VirusTotal
|
||||||
|
- `POST /api/vt/lookup` - Lookup hash in VirusTotal
|
||||||
|
|
||||||
|
## Authentication Flow
|
||||||
|
|
||||||
|
1. User registers or logs in via `/api/auth/login`
|
||||||
|
2. Backend returns JWT token with user_id, tenant_id, and role
|
||||||
|
3. Frontend stores token in localStorage
|
||||||
|
4. All subsequent API requests include token in Authorization header
|
||||||
|
5. Backend validates token and enforces tenant scoping
|
||||||
|
|
||||||
|
## Multi-Tenancy
|
||||||
|
|
||||||
|
- All data is scoped to tenant_id
|
||||||
|
- Users can only access data within their tenant
|
||||||
|
- Admin users have elevated permissions within their tenant
|
||||||
|
- Cross-tenant access requires explicit permissions
|
||||||
|
|
||||||
|
## Database Migrations
|
||||||
|
|
||||||
|
Create a new migration:
|
||||||
|
```bash
|
||||||
|
cd backend
|
||||||
|
alembic revision --autogenerate -m "Description of changes"
|
||||||
|
```
|
||||||
|
|
||||||
|
Apply migrations:
|
||||||
|
```bash
|
||||||
|
alembic upgrade head
|
||||||
|
```
|
||||||
|
|
||||||
|
Rollback migrations:
|
||||||
|
```bash
|
||||||
|
alembic downgrade -1
|
||||||
|
```
|
||||||
|
|
||||||
|
## Environment Variables
|
||||||
|
|
||||||
|
### Backend
|
||||||
|
- `DATABASE_URL` - PostgreSQL connection string
|
||||||
|
- `SECRET_KEY` - Secret key for JWT signing (min 32 characters)
|
||||||
|
- `ACCESS_TOKEN_EXPIRE_MINUTES` - JWT token expiration time (default: 30)
|
||||||
|
- `VT_API_KEY` - VirusTotal API key for hash lookups
|
||||||
|
|
||||||
|
### Frontend
|
||||||
|
- `REACT_APP_API_URL` - Backend API URL (default: http://localhost:8000)
|
||||||
|
|
||||||
|
## Security
|
||||||
|
|
||||||
|
- Passwords are hashed using bcrypt
|
||||||
|
- JWT tokens include expiration time
|
||||||
|
- All API endpoints (except login/register) require authentication
|
||||||
|
- Role-based access control for admin operations
|
||||||
|
- Data isolation through tenant scoping
|
||||||
|
|
||||||
|
## Testing
|
||||||
|
|
||||||
|
### Backend
|
||||||
|
```bash
|
||||||
|
cd backend
|
||||||
|
pytest
|
||||||
|
```
|
||||||
|
|
||||||
|
### Frontend
|
||||||
|
```bash
|
||||||
|
cd frontend
|
||||||
|
npm test
|
||||||
|
```
|
||||||
|
|
||||||
## Contributing
|
## Contributing
|
||||||
|
|
||||||
1. Fork the repository
|
1. Fork the repository
|
||||||
2. Create feature branch
|
2. Create a feature branch
|
||||||
3. Submit pull request
|
3. Make your changes
|
||||||
|
4. Submit a pull request
|
||||||
|
|
||||||
## License
|
## License
|
||||||
|
|
||||||
MIT License
|
[Your License Here]
|
||||||
>>>>>>> Stashed changes
|
|
||||||
|
## Support
|
||||||
|
|
||||||
|
For issues and questions, please open an issue on GitHub.
|
||||||
|
|||||||
21
SKILLS/00-operating-model.md
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
|
||||||
|
# Operating Model
|
||||||
|
|
||||||
|
## Default cadence
|
||||||
|
- Prefer iterative progress over big bangs.
|
||||||
|
- Keep diffs small: target ≤ 300 changed lines per PR unless justified.
|
||||||
|
- Update tests/docs as part of the same change when possible.
|
||||||
|
|
||||||
|
## Working agreement
|
||||||
|
- Start with a PLAN for non-trivial tasks.
|
||||||
|
- Implement the smallest slice that satisfies acceptance criteria.
|
||||||
|
- Verify via DoD.
|
||||||
|
- Write a crisp PR summary: what changed, why, and how verified.
|
||||||
|
|
||||||
|
## Stop conditions (plan first)
|
||||||
|
Stop and produce a PLAN (do not code yet) if:
|
||||||
|
- scope is unclear
|
||||||
|
- more than 3 files will change
|
||||||
|
- data model changes
|
||||||
|
- auth/security boundaries
|
||||||
|
- performance-critical paths
|
||||||
36
SKILLS/05-agent-taxonomy.md
Normal file
@@ -0,0 +1,36 @@
|
|||||||
|
# Agent Types & Roles (Practical Taxonomy)
|
||||||
|
|
||||||
|
Use this skill to choose the *right* kind of agent workflow for the job.
|
||||||
|
|
||||||
|
## Common agent "types" (in practice)
|
||||||
|
|
||||||
|
### 1) Chat assistant (no tools)
|
||||||
|
Best for: explanations, brainstorming, small edits.
|
||||||
|
Risk: can hallucinate; no grounding in repo state.
|
||||||
|
|
||||||
|
### 2) Tool-using single agent
|
||||||
|
Best for: well-scoped tasks where the agent can read/write files and run commands.
|
||||||
|
Key control: strict DoD gates + minimal permissions.
|
||||||
|
|
||||||
|
### 3) Planner + Executor (2-role pattern)
|
||||||
|
Best for: medium complexity work (multi-file changes, feature work).
|
||||||
|
Flow: Planner writes plan + acceptance criteria → Executor implements → Reviewer checks.
|
||||||
|
|
||||||
|
### 4) Multi-agent (specialists)
|
||||||
|
Best for: bigger features with separable workstreams (UI, backend, docs, tests).
|
||||||
|
Rule: isolate context per role; use separate branches/worktrees.
|
||||||
|
|
||||||
|
### 5) Supervisor / orchestrator
|
||||||
|
Best for: long-running workflows with checkpoints (pipelines, report generation, PAD docs).
|
||||||
|
Rule: supervisor delegates, enforces gates, and composes final output.
|
||||||
|
|
||||||
|
## Decision rules (fast)
|
||||||
|
- If you can describe it in ≤ 5 steps → single tool-using agent.
|
||||||
|
- If you need tradeoffs/design → Planner + Executor.
|
||||||
|
- If UI + backend + docs/tests all move → multi-agent specialists.
|
||||||
|
- If it's a pipeline that runs repeatedly → orchestrator.
|
||||||
|
|
||||||
|
## Guardrails (always)
|
||||||
|
- DoD is the truth gate.
|
||||||
|
- Separate branches/worktrees for parallel work.
|
||||||
|
- Log decisions + commands in AGENT_LOG.md.
|
||||||
24
SKILLS/10-definition-of-done.md
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
|
||||||
|
# Definition of Done (DoD)
|
||||||
|
|
||||||
|
A change is "done" only when:
|
||||||
|
|
||||||
|
## Code correctness
|
||||||
|
- Builds successfully (if applicable)
|
||||||
|
- Tests pass
|
||||||
|
- Linting/formatting passes
|
||||||
|
- Types/checks pass (if applicable)
|
||||||
|
|
||||||
|
## Quality
|
||||||
|
- No new warnings introduced
|
||||||
|
- Edge cases handled (inputs validated, errors meaningful)
|
||||||
|
- Hot paths not regressed (if applicable)
|
||||||
|
|
||||||
|
## Hygiene
|
||||||
|
- No secrets committed
|
||||||
|
- Docs updated if behavior or usage changed
|
||||||
|
- PR summary includes verification steps
|
||||||
|
|
||||||
|
## Commands
|
||||||
|
- macOS/Linux: `./scripts/dod.sh`
|
||||||
|
- Windows: `\scripts\dod.ps1`
|
||||||
16
SKILLS/20-repo-map.md
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
|
||||||
|
# Repo Mapping Skill
|
||||||
|
|
||||||
|
When entering a repo:
|
||||||
|
1) Read README.md
|
||||||
|
2) Identify entrypoints (app main / server startup / CLI)
|
||||||
|
3) Identify config (env vars, .env.example, config files)
|
||||||
|
4) Identify test/lint scripts (package.json, pyproject.toml, Makefile, etc.)
|
||||||
|
5) Write a 10-line "repo map" in the PLAN before changing code
|
||||||
|
|
||||||
|
Output format:
|
||||||
|
- Purpose:
|
||||||
|
- Key modules:
|
||||||
|
- Data flow:
|
||||||
|
- Commands:
|
||||||
|
- Risks:
|
||||||
20
SKILLS/25-algorithms-performance.md
Normal file
@@ -0,0 +1,20 @@
|
|||||||
|
# Algorithms & Performance
|
||||||
|
|
||||||
|
Use this skill when performance matters (large inputs, hot paths, or repeated calls).
|
||||||
|
|
||||||
|
## Checklist
|
||||||
|
- Identify the **state** you're recomputing.
|
||||||
|
- Add **memoization / caching** when the same subproblem repeats.
|
||||||
|
- Prefer **linear scans** + caches over nested loops when possible.
|
||||||
|
- If you can write it as a **recurrence**, you can test it.
|
||||||
|
|
||||||
|
## Practical heuristics
|
||||||
|
- Measure first when possible (timing + input sizes).
|
||||||
|
- Optimize the biggest wins: avoid repeated I/O, repeated parsing, repeated network calls.
|
||||||
|
- Keep caches bounded (size/TTL) and invalidate safely.
|
||||||
|
- Choose data structures intentionally: dict/set for membership, heap for top-k, deque for queues.
|
||||||
|
|
||||||
|
## Review notes (for PRs)
|
||||||
|
- Call out accidental O(n²) patterns.
|
||||||
|
- Suggest table/DP or memoization when repeated work is obvious.
|
||||||
|
- Add tests that cover base cases + typical cases + worst-case size.
|
||||||
31
SKILLS/26-vibe-coding-fundamentals.md
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
# Vibe Coding With Fundamentals (Safety Rails)
|
||||||
|
|
||||||
|
Use this skill when you're using "vibe coding" (fast, conversational building) but want production-grade outcomes.
|
||||||
|
|
||||||
|
## The good
|
||||||
|
- Rapid scaffolding and iteration
|
||||||
|
- Fast UI prototypes
|
||||||
|
- Quick exploration of architectures and options
|
||||||
|
|
||||||
|
## The failure mode
|
||||||
|
- "It works on my machine" code with weak tests
|
||||||
|
- Security foot-guns (auth, input validation, secrets)
|
||||||
|
- Performance cliffs (accidental O(n²), repeated I/O)
|
||||||
|
- Unmaintainable abstractions
|
||||||
|
|
||||||
|
## Safety rails (apply every time)
|
||||||
|
- Always start with acceptance criteria (what "done" means).
|
||||||
|
- Prefer small PRs; never dump a huge AI diff.
|
||||||
|
- Require DoD gates (lint/test/build) before merge.
|
||||||
|
- Write tests for behavior changes.
|
||||||
|
- For anything security/data related: do a Reviewer pass.
|
||||||
|
|
||||||
|
## When to slow down
|
||||||
|
- Auth/session/token work
|
||||||
|
- Anything touching payments, PII, secrets
|
||||||
|
- Data migrations/schema changes
|
||||||
|
- Performance-critical paths
|
||||||
|
- "It's flaky" or "it only fails in CI"
|
||||||
|
|
||||||
|
## Practical prompt pattern (use in PLAN)
|
||||||
|
- "State assumptions, list files to touch, propose tests, and include rollback steps."
|
||||||
31
SKILLS/27-performance-profiling.md
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
# Performance Profiling (Bun/Node)
|
||||||
|
|
||||||
|
Use this skill when:
|
||||||
|
- a hot path feels slow
|
||||||
|
- CPU usage is high
|
||||||
|
- you suspect accidental O(n²) or repeated work
|
||||||
|
- you need evidence before optimizing
|
||||||
|
|
||||||
|
## Bun CPU profiling
|
||||||
|
Bun supports CPU profiling via `--cpu-prof` (generates a `.cpuprofile` you can open in Chrome DevTools).
|
||||||
|
|
||||||
|
Upcoming: `bun --cpu-prof-md <script>` outputs a CPU profile as **Markdown** so LLMs can read/grep it easily.
|
||||||
|
|
||||||
|
### Workflow (Bun)
|
||||||
|
1) Run the workload with profiling enabled
|
||||||
|
- Today: `bun --cpu-prof ./path/to/script.ts`
|
||||||
|
- Upcoming: `bun --cpu-prof-md ./path/to/script.ts`
|
||||||
|
2) Save the output (or `.cpuprofile`) into `./profiles/` with a timestamp.
|
||||||
|
3) Ask the Reviewer agent to:
|
||||||
|
- identify the top 5 hottest functions
|
||||||
|
- propose the smallest fix
|
||||||
|
- add a regression test or benchmark
|
||||||
|
|
||||||
|
## Node CPU profiling (fallback)
|
||||||
|
- `node --cpu-prof ./script.js` writes a `.cpuprofile` file.
|
||||||
|
- Open in Chrome DevTools → Performance → Load profile.
|
||||||
|
|
||||||
|
## Rules
|
||||||
|
- Optimize based on measured hotspots, not vibes.
|
||||||
|
- Prefer algorithmic wins (remove repeated work) over micro-optimizations.
|
||||||
|
- Keep profiling artifacts out of git unless explicitly needed (use `.gitignore`).
|
||||||
16
SKILLS/30-implementation-rules.md
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
|
||||||
|
# Implementation Rules
|
||||||
|
|
||||||
|
## Change policy
|
||||||
|
- Prefer edits over rewrites.
|
||||||
|
- Keep changes localized.
|
||||||
|
- One change = one purpose.
|
||||||
|
- Avoid unnecessary abstraction.
|
||||||
|
|
||||||
|
## Dependency policy
|
||||||
|
- Default: do not add dependencies.
|
||||||
|
- If adding: explain why, alternatives considered, and impact.
|
||||||
|
|
||||||
|
## Error handling
|
||||||
|
- Validate inputs at boundaries.
|
||||||
|
- Error messages must be actionable: what failed + what to do next.
|
||||||
14
SKILLS/40-testing-quality.md
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
|
||||||
|
# Testing & Quality
|
||||||
|
|
||||||
|
## Strategy
|
||||||
|
- If behavior changes: add/update tests.
|
||||||
|
- Unit tests for logic; integration tests for boundaries; E2E only where needed.
|
||||||
|
|
||||||
|
## Minimum for every PR
|
||||||
|
- A test plan in the PR summary (even if "existing tests cover this").
|
||||||
|
- Run DoD.
|
||||||
|
|
||||||
|
## Flaky tests
|
||||||
|
- Capture repro steps.
|
||||||
|
- Quarantine only with justification + follow-up issue.
|
||||||
16
SKILLS/50-pr-review.md
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
|
||||||
|
# PR Review Skill
|
||||||
|
|
||||||
|
Reviewer must check:
|
||||||
|
- Correctness: does it do what it claims?
|
||||||
|
- Safety: secrets, injection, auth boundaries
|
||||||
|
- Maintainability: readability, naming, duplication
|
||||||
|
- Tests: added/updated appropriately
|
||||||
|
- DoD: did it pass?
|
||||||
|
|
||||||
|
Reviewer output format:
|
||||||
|
1) Summary
|
||||||
|
2) Must-fix
|
||||||
|
3) Nice-to-have
|
||||||
|
4) Risks
|
||||||
|
5) Verification suggestions
|
||||||
41
SKILLS/56-ui-material-ui.md
Normal file
@@ -0,0 +1,41 @@
|
|||||||
|
# Material UI (MUI) Design System
|
||||||
|
|
||||||
|
Use this skill for any React/Next "portal/admin/dashboard" UI so you stay consistent and avoid random component soup.
|
||||||
|
|
||||||
|
## Standard choice
|
||||||
|
- Preferred UI library: **MUI (Material UI)**.
|
||||||
|
- Prefer MUI components over ad-hoc HTML/CSS unless there's a good reason.
|
||||||
|
- One design system per repo (do not mix Chakra/Ant/Bootstrap/etc.).
|
||||||
|
|
||||||
|
## Setup (Next.js/React)
|
||||||
|
- Install: `@mui/material @emotion/react @emotion/styled`
|
||||||
|
- If using icons: `@mui/icons-material`
|
||||||
|
- If using data grid: `@mui/x-data-grid` (or pro if licensed)
|
||||||
|
|
||||||
|
## Theming rules
|
||||||
|
- Define a single theme (typography, spacing, palette) and reuse everywhere.
|
||||||
|
- Use semantic colors (primary/secondary/error/warning/success/info), not hard-coded hex everywhere.
|
||||||
|
- Prefer MUI's `sx` for small styling; use `styled()` for reusable components.
|
||||||
|
|
||||||
|
## "Portal" patterns (modals, popovers, menus)
|
||||||
|
- Use MUI Dialog/Modal/Popover/Menu components instead of DIY portals.
|
||||||
|
- Accessibility requirements:
|
||||||
|
- Focus is trapped in Dialog/Modal.
|
||||||
|
- Escape closes modal unless explicitly prevented.
|
||||||
|
- All inputs have labels; buttons have clear text/aria-labels.
|
||||||
|
- Keyboard navigation works end-to-end.
|
||||||
|
|
||||||
|
## Layout conventions (for portals)
|
||||||
|
- Use: AppBar + Drawer (or NavigationRail equivalent) + main content.
|
||||||
|
- Keep pages as composition of small components: Page → Sections → Widgets.
|
||||||
|
- Keep forms consistent: FormControl + helper text + validation messages.
|
||||||
|
|
||||||
|
## Performance hygiene
|
||||||
|
- Avoid re-render storms: memoize heavy lists; use virtualization for large tables (DataGrid).
|
||||||
|
- Prefer server pagination for huge datasets.
|
||||||
|
|
||||||
|
## PR review checklist
|
||||||
|
- Theme is used (no random styling).
|
||||||
|
- Components are MUI where reasonable.
|
||||||
|
- Modal/popover accessibility is correct.
|
||||||
|
- No mixed UI libraries.
|
||||||
15
SKILLS/60-security-safety.md
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
|
||||||
|
# Security & Safety
|
||||||
|
|
||||||
|
## Secrets
|
||||||
|
- Never output secrets or tokens.
|
||||||
|
- Never log sensitive inputs.
|
||||||
|
- Never commit credentials.
|
||||||
|
|
||||||
|
## Inputs
|
||||||
|
- Validate external inputs at boundaries.
|
||||||
|
- Fail closed for auth/security decisions.
|
||||||
|
|
||||||
|
## Tooling
|
||||||
|
- No destructive commands unless requested and scoped.
|
||||||
|
- Prefer read-only operations first.
|
||||||
13
SKILLS/70-docs-artifacts.md
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
|
||||||
|
# Docs & Artifacts
|
||||||
|
|
||||||
|
Update documentation when:
|
||||||
|
- setup steps change
|
||||||
|
- env vars change
|
||||||
|
- endpoints/CLI behavior changes
|
||||||
|
- data formats change
|
||||||
|
|
||||||
|
Docs standards:
|
||||||
|
- Provide copy/paste commands
|
||||||
|
- Provide expected outputs where helpful
|
||||||
|
- Keep it short and accurate
|
||||||
11
SKILLS/80-mcp-tools.md
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
|
||||||
|
# MCP Tools Skill (Optional)
|
||||||
|
|
||||||
|
If this repo defines MCP servers/tools:
|
||||||
|
|
||||||
|
Rules:
|
||||||
|
- Tool calls must be explicit and logged.
|
||||||
|
- Maintain an allowlist of tools; deny by default.
|
||||||
|
- Every tool must have: purpose, inputs/outputs schema, examples, and tests.
|
||||||
|
- Prefer idempotent tool operations.
|
||||||
|
- Never add tools that can exfiltrate secrets without strict guards.
|
||||||
51
SKILLS/82-mcp-server-design.md
Normal file
@@ -0,0 +1,51 @@
|
|||||||
|
# MCP Server Design (Agent-First)
|
||||||
|
|
||||||
|
Build MCP servers like you're designing a UI for a non-human user.
|
||||||
|
|
||||||
|
This skill distills Phil Schmid's MCP server best practices into concrete repo rules.
|
||||||
|
Source: "MCP is Not the Problem, It's your Server" (Jan 21, 2026).
|
||||||
|
|
||||||
|
## 1) Outcomes, not operations
|
||||||
|
- Do **not** wrap REST endpoints 1:1 as tools.
|
||||||
|
- Expose high-level, outcome-oriented tools.
|
||||||
|
- Bad: `get_user`, `list_orders`, `get_order_status`
|
||||||
|
- Good: `track_latest_order(email)` (server orchestrates internally)
|
||||||
|
|
||||||
|
## 2) Flatten arguments
|
||||||
|
- Prefer top-level primitives + constrained enums.
|
||||||
|
- Avoid nested `dict`/config objects (agents hallucinate keys).
|
||||||
|
- Defaults reduce decision load.
|
||||||
|
|
||||||
|
## 3) Instructions are context
|
||||||
|
- Tool docstrings are *instructions*:
|
||||||
|
- when to use the tool
|
||||||
|
- argument formatting rules
|
||||||
|
- what the return means
|
||||||
|
- Error strings are also context:
|
||||||
|
- return actionable, self-correcting messages (not raw stack traces)
|
||||||
|
|
||||||
|
## 4) Curate ruthlessly
|
||||||
|
- Aim for **5–15 tools** per server.
|
||||||
|
- One server, one job. Split by persona if needed.
|
||||||
|
- Delete unused tools. Don't dump raw data into context.
|
||||||
|
|
||||||
|
## 5) Name tools for discovery
|
||||||
|
- Avoid generic names (`create_issue`).
|
||||||
|
- Prefer `{service}_{action}_{resource}`:
|
||||||
|
- `velociraptor_run_hunt`
|
||||||
|
- `github_list_prs`
|
||||||
|
- `slack_send_message`
|
||||||
|
|
||||||
|
## 6) Paginate large results
|
||||||
|
- Always support `limit` (default ~20–50).
|
||||||
|
- Return metadata: `has_more`, `next_offset`, `total_count`.
|
||||||
|
- Never return hundreds of rows unbounded.
|
||||||
|
|
||||||
|
## Repo conventions
|
||||||
|
- Put MCP tool specs in `mcp/` (schemas, examples, fixtures).
|
||||||
|
- Provide at least 1 "golden path" example call per tool.
|
||||||
|
- Add an eval that checks:
|
||||||
|
- tool names follow discovery convention
|
||||||
|
- args are flat + typed
|
||||||
|
- responses are concise + stable
|
||||||
|
- pagination works
|
||||||
40
SKILLS/83-fastmcp-3-patterns.md
Normal file
@@ -0,0 +1,40 @@
|
|||||||
|
# FastMCP 3 Patterns (Providers + Transforms)
|
||||||
|
|
||||||
|
Use this skill when you are building MCP servers in Python and want:
|
||||||
|
- composable tool sets
|
||||||
|
- per-user/per-session behavior
|
||||||
|
- auth, versioning, observability, and long-running tasks
|
||||||
|
|
||||||
|
## Mental model (FastMCP 3)
|
||||||
|
FastMCP 3 treats everything as three composable primitives:
|
||||||
|
- **Components**: what you expose (tools, resources, prompts)
|
||||||
|
- **Providers**: where components come from (decorators, files, OpenAPI, remote MCP, etc.)
|
||||||
|
- **Transforms**: how you reshape what clients see (namespace, filters, auth, versioning, visibility)
|
||||||
|
|
||||||
|
## Recommended architecture for Marc's platform
|
||||||
|
Build a **single "Cyber MCP Gateway"** that composes providers:
|
||||||
|
- LocalProvider: core cyber tools (run hunt, parse triage, generate report)
|
||||||
|
- OpenAPIProvider: wrap stable internal APIs (ticketing, asset DB) without 1:1 endpoint exposure
|
||||||
|
- ProxyProvider/FastMCPProvider: mount sub-servers (e.g., Velociraptor tools, Intel feeds)
|
||||||
|
|
||||||
|
Then apply transforms:
|
||||||
|
- Namespace per domain: `hunt.*`, `intel.*`, `pad.*`
|
||||||
|
- Visibility per session: hide dangerous tools unless user/role allows
|
||||||
|
- VersionFilter: keep old clients working while you evolve tools
|
||||||
|
|
||||||
|
## Production must-haves
|
||||||
|
- **Tool timeouts**: never let a tool hang forever
|
||||||
|
- **Pagination**: all list tools must be bounded
|
||||||
|
- **Background tasks**: use for long hunts / ingest jobs
|
||||||
|
- **Tracing**: emit OpenTelemetry traces so you can debug agent/tool behavior
|
||||||
|
|
||||||
|
## Auth rules
|
||||||
|
- Prefer component-level auth for "dangerous" tools.
|
||||||
|
- Default stance: read-only tools visible; write/execute tools gated.
|
||||||
|
|
||||||
|
## Versioning rules
|
||||||
|
- Version your components when you change schemas or semantics.
|
||||||
|
- Keep 1 previous version callable during migrations.
|
||||||
|
|
||||||
|
## Upgrade guidance
|
||||||
|
FastMCP 3 is in beta; pin to v2 for stability in production until you've tested.
|
||||||
148
_add_label_filter_networkmap.py
Normal file
@@ -0,0 +1,148 @@
|
|||||||
|
from pathlib import Path
|
||||||
|
p=Path(r'd:/Projects/Dev/ThreatHunt/frontend/src/components/NetworkMap.tsx')
|
||||||
|
t=p.read_text(encoding='utf-8')
|
||||||
|
|
||||||
|
# 1) Add label mode type near graph types
|
||||||
|
marker="interface GEdge { source: string; target: string; weight: number }\ninterface Graph { nodes: GNode[]; edges: GEdge[] }\n"
|
||||||
|
if marker in t and "type LabelMode" not in t:
|
||||||
|
t=t.replace(marker, marker+"\ntype LabelMode = 'all' | 'highlight' | 'none';\n")
|
||||||
|
|
||||||
|
# 2) extend drawLabels signature
|
||||||
|
old_sig="""function drawLabels(
|
||||||
|
ctx: CanvasRenderingContext2D, graph: Graph,
|
||||||
|
hovered: string | null, selected: string | null,
|
||||||
|
search: string, matchSet: Set<string>, vp: Viewport,
|
||||||
|
simplify: boolean,
|
||||||
|
) {
|
||||||
|
"""
|
||||||
|
new_sig="""function drawLabels(
|
||||||
|
ctx: CanvasRenderingContext2D, graph: Graph,
|
||||||
|
hovered: string | null, selected: string | null,
|
||||||
|
search: string, matchSet: Set<string>, vp: Viewport,
|
||||||
|
simplify: boolean, labelMode: LabelMode,
|
||||||
|
) {
|
||||||
|
"""
|
||||||
|
if old_sig in t:
|
||||||
|
t=t.replace(old_sig,new_sig)
|
||||||
|
|
||||||
|
# 3) label mode guards inside drawLabels
|
||||||
|
old_guard=""" const dimmed = search.length > 0;
|
||||||
|
if (simplify && !search && !hovered && !selected) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
new_guard=""" if (labelMode === 'none') return;
|
||||||
|
const dimmed = search.length > 0;
|
||||||
|
if (labelMode === 'highlight' && !search && !hovered && !selected) return;
|
||||||
|
if (simplify && labelMode !== 'all' && !search && !hovered && !selected) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
if old_guard in t:
|
||||||
|
t=t.replace(old_guard,new_guard)
|
||||||
|
|
||||||
|
old_show=""" const isHighlight = hovered === n.id || selected === n.id || matchSet.has(n.id);
|
||||||
|
const show = isHighlight || n.meta.type === 'host' || n.count >= 2;
|
||||||
|
if (!show) continue;
|
||||||
|
"""
|
||||||
|
new_show=""" const isHighlight = hovered === n.id || selected === n.id || matchSet.has(n.id);
|
||||||
|
const show = labelMode === 'all'
|
||||||
|
? (isHighlight || n.meta.type === 'host' || n.count >= 2)
|
||||||
|
: isHighlight;
|
||||||
|
if (!show) continue;
|
||||||
|
"""
|
||||||
|
if old_show in t:
|
||||||
|
t=t.replace(old_show,new_show)
|
||||||
|
|
||||||
|
# 4) drawGraph signature and call site
|
||||||
|
old_graph_sig="""function drawGraph(
|
||||||
|
ctx: CanvasRenderingContext2D, graph: Graph,
|
||||||
|
hovered: string | null, selected: string | null, search: string,
|
||||||
|
vp: Viewport, animTime: number, dpr: number,
|
||||||
|
) {
|
||||||
|
"""
|
||||||
|
new_graph_sig="""function drawGraph(
|
||||||
|
ctx: CanvasRenderingContext2D, graph: Graph,
|
||||||
|
hovered: string | null, selected: string | null, search: string,
|
||||||
|
vp: Viewport, animTime: number, dpr: number, labelMode: LabelMode,
|
||||||
|
) {
|
||||||
|
"""
|
||||||
|
if old_graph_sig in t:
|
||||||
|
t=t.replace(old_graph_sig,new_graph_sig)
|
||||||
|
|
||||||
|
old_drawlabels_call="drawLabels(ctx, graph, hovered, selected, search, matchSet, vp, simplify);"
|
||||||
|
new_drawlabels_call="drawLabels(ctx, graph, hovered, selected, search, matchSet, vp, simplify, labelMode);"
|
||||||
|
if old_drawlabels_call in t:
|
||||||
|
t=t.replace(old_drawlabels_call,new_drawlabels_call)
|
||||||
|
|
||||||
|
# 5) state for label mode
|
||||||
|
state_anchor=" const [selectedNode, setSelectedNode] = useState<GNode | null>(null);\n const [search, setSearch] = useState('');\n"
|
||||||
|
state_new=" const [selectedNode, setSelectedNode] = useState<GNode | null>(null);\n const [search, setSearch] = useState('');\n const [labelMode, setLabelMode] = useState<LabelMode>('highlight');\n"
|
||||||
|
if state_anchor in t:
|
||||||
|
t=t.replace(state_anchor,state_new)
|
||||||
|
|
||||||
|
# 6) pass labelMode in draw calls
|
||||||
|
old_tick_draw="drawGraph(ctx, g, hoveredRef.current, selectedNodeRef.current?.id ?? null, searchRef.current, vpRef.current, ts, dpr);"
|
||||||
|
new_tick_draw="drawGraph(ctx, g, hoveredRef.current, selectedNodeRef.current?.id ?? null, searchRef.current, vpRef.current, ts, dpr, labelMode);"
|
||||||
|
if old_tick_draw in t:
|
||||||
|
t=t.replace(old_tick_draw,new_tick_draw)
|
||||||
|
|
||||||
|
old_redraw_draw="if (ctx) drawGraph(ctx, graph, hovered, selectedNode?.id ?? null, search, vpRef.current, animTimeRef.current, dpr);"
|
||||||
|
new_redraw_draw="if (ctx) drawGraph(ctx, graph, hovered, selectedNode?.id ?? null, search, vpRef.current, animTimeRef.current, dpr, labelMode);"
|
||||||
|
if old_redraw_draw in t:
|
||||||
|
t=t.replace(old_redraw_draw,new_redraw_draw)
|
||||||
|
|
||||||
|
# 7) include labelMode in redraw deps
|
||||||
|
old_redraw_dep="] , [graph, hovered, selectedNode, search]);"
|
||||||
|
if old_redraw_dep in t:
|
||||||
|
t=t.replace(old_redraw_dep, "] , [graph, hovered, selectedNode, search, labelMode]);")
|
||||||
|
else:
|
||||||
|
t=t.replace(" }, [graph, hovered, selectedNode, search]);"," }, [graph, hovered, selectedNode, search, labelMode]);")
|
||||||
|
|
||||||
|
# 8) Add toolbar selector after search field
|
||||||
|
search_block=""" <TextField
|
||||||
|
size="small"
|
||||||
|
placeholder="Search hosts, IPs, users\u2026"
|
||||||
|
value={search}
|
||||||
|
onChange={e => setSearch(e.target.value)}
|
||||||
|
sx={{ width: 220, '& .MuiInputBase-input': { py: 0.8 } }}
|
||||||
|
slotProps={{
|
||||||
|
input: {
|
||||||
|
startAdornment: <SearchIcon sx={{ mr: 0.5, fontSize: 18, color: 'text.secondary' }} />,
|
||||||
|
},
|
||||||
|
}}
|
||||||
|
/>
|
||||||
|
"""
|
||||||
|
label_block=""" <TextField
|
||||||
|
size="small"
|
||||||
|
placeholder="Search hosts, IPs, users\u2026"
|
||||||
|
value={search}
|
||||||
|
onChange={e => setSearch(e.target.value)}
|
||||||
|
sx={{ width: 220, '& .MuiInputBase-input': { py: 0.8 } }}
|
||||||
|
slotProps={{
|
||||||
|
input: {
|
||||||
|
startAdornment: <SearchIcon sx={{ mr: 0.5, fontSize: 18, color: 'text.secondary' }} />,
|
||||||
|
},
|
||||||
|
}}
|
||||||
|
/>
|
||||||
|
|
||||||
|
<FormControl size="small" sx={{ minWidth: 140 }}>
|
||||||
|
<InputLabel id="label-mode-selector">Labels</InputLabel>
|
||||||
|
<Select
|
||||||
|
labelId="label-mode-selector"
|
||||||
|
value={labelMode}
|
||||||
|
label="Labels"
|
||||||
|
onChange={e => setLabelMode(e.target.value as LabelMode)}
|
||||||
|
sx={{ '& .MuiSelect-select': { py: 0.8 } }}
|
||||||
|
>
|
||||||
|
<MenuItem value="none">None</MenuItem>
|
||||||
|
<MenuItem value="highlight">Selected/Search</MenuItem>
|
||||||
|
<MenuItem value="all">All</MenuItem>
|
||||||
|
</Select>
|
||||||
|
</FormControl>
|
||||||
|
"""
|
||||||
|
if search_block in t:
|
||||||
|
t=t.replace(search_block,label_block)
|
||||||
|
|
||||||
|
p.write_text(t,encoding='utf-8')
|
||||||
|
print('added network map label filter control and renderer modes')
|
||||||
18
_add_scanner_budget_config.py
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
from pathlib import Path
|
||||||
|
p=Path(r'd:/Projects/Dev/ThreatHunt/backend/app/config.py')
|
||||||
|
t=p.read_text(encoding='utf-8')
|
||||||
|
old=''' # -- Scanner settings -----------------------------------------------
|
||||||
|
SCANNER_BATCH_SIZE: int = Field(default=500, description="Rows per scanner batch")
|
||||||
|
'''
|
||||||
|
new=''' # -- Scanner settings -----------------------------------------------
|
||||||
|
SCANNER_BATCH_SIZE: int = Field(default=500, description="Rows per scanner batch")
|
||||||
|
SCANNER_MAX_ROWS_PER_SCAN: int = Field(
|
||||||
|
default=300000,
|
||||||
|
description="Global row budget for a single AUP scan request (0 = unlimited)",
|
||||||
|
)
|
||||||
|
'''
|
||||||
|
if old not in t:
|
||||||
|
raise SystemExit('scanner settings block not found')
|
||||||
|
t=t.replace(old,new)
|
||||||
|
p.write_text(t,encoding='utf-8')
|
||||||
|
print('added SCANNER_MAX_ROWS_PER_SCAN config')
|
||||||
46
_apply_frontend_scale_patch.py
Normal file
@@ -0,0 +1,46 @@
|
|||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
root = Path(r"d:\Projects\Dev\ThreatHunt")
|
||||||
|
|
||||||
|
# -------- client.ts --------
|
||||||
|
client = root / "frontend/src/api/client.ts"
|
||||||
|
text = client.read_text(encoding="utf-8")
|
||||||
|
|
||||||
|
if "export interface NetworkSummary" not in text:
|
||||||
|
insert_after = "export interface InventoryStatus {\n hunt_id: string;\n status: 'ready' | 'building' | 'none';\n}\n"
|
||||||
|
addition = insert_after + "\nexport interface NetworkSummaryHost {\n id: string;\n hostname: string;\n row_count: number;\n ip_count: number;\n user_count: number;\n}\n\nexport interface NetworkSummary {\n stats: InventoryStats;\n top_hosts: NetworkSummaryHost[];\n top_edges: InventoryConnection[];\n status?: 'building' | 'deferred';\n message?: string;\n}\n"
|
||||||
|
text = text.replace(insert_after, addition)
|
||||||
|
|
||||||
|
net_old = """export const network = {\n hostInventory: (huntId: string, force = false) =>\n api<HostInventory>(`/api/network/host-inventory?hunt_id=${encodeURIComponent(huntId)}${force ? '&force=true' : ''}`),\n inventoryStatus: (huntId: string) =>\n api<InventoryStatus>(`/api/network/inventory-status?hunt_id=${encodeURIComponent(huntId)}`),\n rebuildInventory: (huntId: string) =>\n api<{ job_id: string; status: string }>(`/api/network/rebuild-inventory?hunt_id=${encodeURIComponent(huntId)}`, { method: 'POST' }),\n};"""
|
||||||
|
net_new = """export const network = {\n hostInventory: (huntId: string, force = false) =>\n api<HostInventory | { status: 'building' | 'deferred'; message?: string }>(`/api/network/host-inventory?hunt_id=${encodeURIComponent(huntId)}${force ? '&force=true' : ''}`),\n summary: (huntId: string, topN = 20) =>\n api<NetworkSummary | { status: 'building' | 'deferred'; message?: string }>(`/api/network/summary?hunt_id=${encodeURIComponent(huntId)}&top_n=${topN}`),\n subgraph: (huntId: string, maxHosts = 250, maxEdges = 1500, nodeId?: string) => {\n let qs = `/api/network/subgraph?hunt_id=${encodeURIComponent(huntId)}&max_hosts=${maxHosts}&max_edges=${maxEdges}`;\n if (nodeId) qs += `&node_id=${encodeURIComponent(nodeId)}`;\n return api<HostInventory | { status: 'building' | 'deferred'; message?: string }>(qs);\n },\n inventoryStatus: (huntId: string) =>\n api<InventoryStatus>(`/api/network/inventory-status?hunt_id=${encodeURIComponent(huntId)}`),\n rebuildInventory: (huntId: string) =>\n api<{ job_id: string; status: string }>(`/api/network/rebuild-inventory?hunt_id=${encodeURIComponent(huntId)}`, { method: 'POST' }),\n};"""
|
||||||
|
if net_old in text:
|
||||||
|
text = text.replace(net_old, net_new)
|
||||||
|
|
||||||
|
client.write_text(text, encoding="utf-8")
|
||||||
|
|
||||||
|
# -------- NetworkMap.tsx --------
|
||||||
|
nm = root / "frontend/src/components/NetworkMap.tsx"
|
||||||
|
text = nm.read_text(encoding="utf-8")
|
||||||
|
|
||||||
|
# add constants
|
||||||
|
if "LARGE_HUNT_HOST_THRESHOLD" not in text:
|
||||||
|
text = text.replace("let lastSelectedHuntId = '';\n", "let lastSelectedHuntId = '';\nconst LARGE_HUNT_HOST_THRESHOLD = 400;\nconst LARGE_HUNT_SUBGRAPH_HOSTS = 350;\nconst LARGE_HUNT_SUBGRAPH_EDGES = 2500;\n")
|
||||||
|
|
||||||
|
# inject helper in component after sleep
|
||||||
|
marker = " const sleep = (ms: number) => new Promise<void>(resolve => setTimeout(resolve, ms));\n"
|
||||||
|
if "loadScaleAwareGraph" not in text:
|
||||||
|
helper = marker + "\n const loadScaleAwareGraph = useCallback(async (huntId: string, forceRefresh = false) => {\n setLoading(true); setError(''); setGraph(null); setStats(null);\n setSelectedNode(null); setPopoverAnchor(null);\n\n const waitReadyThen = async <T,>(fn: () => Promise<T>): Promise<T> => {\n let delayMs = 1500;\n const startedAt = Date.now();\n for (;;) {\n const out: any = await fn();\n if (out && !out.status) return out as T;\n const st = await network.inventoryStatus(huntId);\n if (st.status === 'ready') {\n const out2: any = await fn();\n if (out2 && !out2.status) return out2 as T;\n }\n if (Date.now() - startedAt > 5 * 60 * 1000) throw new Error('Network data build timed out after 5 minutes');\n const jitter = Math.floor(Math.random() * 250);\n await sleep(delayMs + jitter);\n delayMs = Math.min(10000, Math.floor(delayMs * 1.5));\n }\n };\n\n try {\n setProgress('Loading network summary');\n const summary: any = await waitReadyThen(() => network.summary(huntId, 20));\n const totalHosts = summary?.stats?.total_hosts || 0;\n\n if (totalHosts > LARGE_HUNT_HOST_THRESHOLD) {\n setProgress(`Large hunt detected (${totalHosts} hosts). Loading focused subgraph`);\n const sub: any = await waitReadyThen(() => network.subgraph(huntId, LARGE_HUNT_SUBGRAPH_HOSTS, LARGE_HUNT_SUBGRAPH_EDGES));\n if (!sub?.hosts || sub.hosts.length === 0) {\n setError('No hosts found for subgraph.');\n return;\n }\n const { w, h } = canvasSizeRef.current;\n const g = buildGraphFromInventory(sub.hosts, sub.connections || [], w, h);\n simulate(g, w / 2, h / 2, 60);\n simAlphaRef.current = 0.3;\n setStats(summary.stats);\n graphCache.set(huntId, { graph: g, stats: summary.stats, ts: Date.now() });\n setGraph(g);\n return;\n }\n\n // Small/medium hunts: load full inventory\n setProgress('Loading host inventory');\n const inv: any = await waitReadyThen(() => network.hostInventory(huntId, forceRefresh));\n if (!inv?.hosts || inv.hosts.length === 0) {\n setError('No hosts found. Upload CSV files with host-identifying columns (ClientId, Fqdn, Hostname) to this hunt.');\n return;\n }\n const { w, h } = canvasSizeRef.current;\n const g = buildGraphFromInventory(inv.hosts, inv.connections || [], w, h);\n simulate(g, w / 2, h / 2, 60);\n simAlphaRef.current = 0.3;\n setStats(summary.stats || inv.stats);\n graphCache.set(huntId, { graph: g, stats: summary.stats || inv.stats, ts: Date.now() });\n setGraph(g);\n } catch (e: any) {\n console.error('[NetworkMap] scale-aware load error:', e);\n setError(e.message || 'Failed to load network data');\n } finally {\n setLoading(false);\n setProgress('');\n }\n }, []);\n"
|
||||||
|
text = text.replace(marker, helper)
|
||||||
|
|
||||||
|
# simplify existing loadGraph function body to delegate
|
||||||
|
pattern_start = text.find(" // Load host inventory for selected hunt (with cache).")
|
||||||
|
if pattern_start != -1:
|
||||||
|
# replace the whole loadGraph useCallback block by simple delegator
|
||||||
|
import re
|
||||||
|
block_re = re.compile(r" // Load host inventory for selected hunt \(with cache\)\.[\s\S]*?\n \}, \[\]\); // Stable - reads canvasSizeRef, no state deps\n", re.M)
|
||||||
|
repl = " // Load graph data for selected hunt (delegates to scale-aware loader).\n const loadGraph = useCallback(async (huntId: string, forceRefresh = false) => {\n if (!huntId) return;\n\n // Check module-level cache first (5 min TTL)\n if (!forceRefresh) {\n const cached = graphCache.get(huntId);\n if (cached && Date.now() - cached.ts < 5 * 60 * 1000) {\n setGraph(cached.graph);\n setStats(cached.stats);\n setError('');\n simAlphaRef.current = 0;\n return;\n }\n }\n\n await loadScaleAwareGraph(huntId, forceRefresh);\n // eslint-disable-next-line react-hooks/exhaustive-deps\n }, []); // Stable - reads canvasSizeRef, no state deps\n"
|
||||||
|
text = block_re.sub(repl, text, count=1)
|
||||||
|
|
||||||
|
nm.write_text(text, encoding="utf-8")
|
||||||
|
|
||||||
|
print("Patched frontend client + NetworkMap for scale-aware loading")
|
||||||
206
_apply_phase1_patch.py
Normal file
@@ -0,0 +1,206 @@
|
|||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
root = Path(r"d:\Projects\Dev\ThreatHunt")
|
||||||
|
|
||||||
|
# 1) config.py additions
|
||||||
|
cfg = root / "backend/app/config.py"
|
||||||
|
text = cfg.read_text(encoding="utf-8")
|
||||||
|
needle = " # -- Scanner settings -----------------------------------------------\n SCANNER_BATCH_SIZE: int = Field(default=500, description=\"Rows per scanner batch\")\n"
|
||||||
|
insert = " # -- Scanner settings -----------------------------------------------\n SCANNER_BATCH_SIZE: int = Field(default=500, description=\"Rows per scanner batch\")\n\n # -- Job queue settings ----------------------------------------------\n JOB_QUEUE_MAX_BACKLOG: int = Field(\n default=2000, description=\"Soft cap for queued background jobs\"\n )\n JOB_QUEUE_RETAIN_COMPLETED: int = Field(\n default=3000, description=\"Maximum completed/failed jobs to retain in memory\"\n )\n JOB_QUEUE_CLEANUP_INTERVAL_SECONDS: int = Field(\n default=60, description=\"How often to run in-memory job cleanup\"\n )\n JOB_QUEUE_CLEANUP_MAX_AGE_SECONDS: int = Field(\n default=3600, description=\"Age threshold for in-memory completed job cleanup\"\n )\n"
|
||||||
|
if needle in text:
|
||||||
|
text = text.replace(needle, insert)
|
||||||
|
cfg.write_text(text, encoding="utf-8")
|
||||||
|
|
||||||
|
# 2) scanner.py default scope = dataset-only
|
||||||
|
scanner = root / "backend/app/services/scanner.py"
|
||||||
|
text = scanner.read_text(encoding="utf-8")
|
||||||
|
text = text.replace(" scan_hunts: bool = True,", " scan_hunts: bool = False,")
|
||||||
|
text = text.replace(" scan_annotations: bool = True,", " scan_annotations: bool = False,")
|
||||||
|
text = text.replace(" scan_messages: bool = True,", " scan_messages: bool = False,")
|
||||||
|
scanner.write_text(text, encoding="utf-8")
|
||||||
|
|
||||||
|
# 3) keywords.py defaults = dataset-only
|
||||||
|
kw = root / "backend/app/api/routes/keywords.py"
|
||||||
|
text = kw.read_text(encoding="utf-8")
|
||||||
|
text = text.replace(" scan_hunts: bool = True", " scan_hunts: bool = False")
|
||||||
|
text = text.replace(" scan_annotations: bool = True", " scan_annotations: bool = False")
|
||||||
|
text = text.replace(" scan_messages: bool = True", " scan_messages: bool = False")
|
||||||
|
kw.write_text(text, encoding="utf-8")
|
||||||
|
|
||||||
|
# 4) job_queue.py dedupe + periodic cleanup
|
||||||
|
jq = root / "backend/app/services/job_queue.py"
|
||||||
|
text = jq.read_text(encoding="utf-8")
|
||||||
|
|
||||||
|
text = text.replace(
|
||||||
|
"from typing import Any, Callable, Coroutine, Optional\n",
|
||||||
|
"from typing import Any, Callable, Coroutine, Optional\n\nfrom app.config import settings\n"
|
||||||
|
)
|
||||||
|
|
||||||
|
text = text.replace(
|
||||||
|
" self._completion_callbacks: list[Callable[[Job], Coroutine]] = []\n",
|
||||||
|
" self._completion_callbacks: list[Callable[[Job], Coroutine]] = []\n self._cleanup_task: asyncio.Task | None = None\n"
|
||||||
|
)
|
||||||
|
|
||||||
|
start_old = ''' async def start(self):
|
||||||
|
if self._started:
|
||||||
|
return
|
||||||
|
self._started = True
|
||||||
|
for i in range(self._max_workers):
|
||||||
|
task = asyncio.create_task(self._worker(i))
|
||||||
|
self._workers.append(task)
|
||||||
|
logger.info(f"Job queue started with {self._max_workers} workers")
|
||||||
|
'''
|
||||||
|
start_new = ''' async def start(self):
|
||||||
|
if self._started:
|
||||||
|
return
|
||||||
|
self._started = True
|
||||||
|
for i in range(self._max_workers):
|
||||||
|
task = asyncio.create_task(self._worker(i))
|
||||||
|
self._workers.append(task)
|
||||||
|
if not self._cleanup_task or self._cleanup_task.done():
|
||||||
|
self._cleanup_task = asyncio.create_task(self._cleanup_loop())
|
||||||
|
logger.info(f"Job queue started with {self._max_workers} workers")
|
||||||
|
'''
|
||||||
|
text = text.replace(start_old, start_new)
|
||||||
|
|
||||||
|
stop_old = ''' async def stop(self):
|
||||||
|
self._started = False
|
||||||
|
for w in self._workers:
|
||||||
|
w.cancel()
|
||||||
|
await asyncio.gather(*self._workers, return_exceptions=True)
|
||||||
|
self._workers.clear()
|
||||||
|
logger.info("Job queue stopped")
|
||||||
|
'''
|
||||||
|
stop_new = ''' async def stop(self):
|
||||||
|
self._started = False
|
||||||
|
for w in self._workers:
|
||||||
|
w.cancel()
|
||||||
|
await asyncio.gather(*self._workers, return_exceptions=True)
|
||||||
|
self._workers.clear()
|
||||||
|
if self._cleanup_task:
|
||||||
|
self._cleanup_task.cancel()
|
||||||
|
await asyncio.gather(self._cleanup_task, return_exceptions=True)
|
||||||
|
self._cleanup_task = None
|
||||||
|
logger.info("Job queue stopped")
|
||||||
|
'''
|
||||||
|
text = text.replace(stop_old, stop_new)
|
||||||
|
|
||||||
|
submit_old = ''' def submit(self, job_type: JobType, **params) -> Job:
|
||||||
|
job = Job(id=str(uuid.uuid4()), job_type=job_type, params=params)
|
||||||
|
self._jobs[job.id] = job
|
||||||
|
self._queue.put_nowait(job.id)
|
||||||
|
logger.info(f"Job submitted: {job.id} ({job_type.value}) params={params}")
|
||||||
|
return job
|
||||||
|
'''
|
||||||
|
submit_new = ''' def submit(self, job_type: JobType, **params) -> Job:
|
||||||
|
# Soft backpressure: prefer dedupe over queue amplification
|
||||||
|
dedupe_job = self._find_active_duplicate(job_type, params)
|
||||||
|
if dedupe_job is not None:
|
||||||
|
logger.info(
|
||||||
|
f"Job deduped: reusing {dedupe_job.id} ({job_type.value}) params={params}"
|
||||||
|
)
|
||||||
|
return dedupe_job
|
||||||
|
|
||||||
|
if self._queue.qsize() >= settings.JOB_QUEUE_MAX_BACKLOG:
|
||||||
|
logger.warning(
|
||||||
|
"Job queue backlog high (%d >= %d). Accepting job but system may be degraded.",
|
||||||
|
self._queue.qsize(), settings.JOB_QUEUE_MAX_BACKLOG,
|
||||||
|
)
|
||||||
|
|
||||||
|
job = Job(id=str(uuid.uuid4()), job_type=job_type, params=params)
|
||||||
|
self._jobs[job.id] = job
|
||||||
|
self._queue.put_nowait(job.id)
|
||||||
|
logger.info(f"Job submitted: {job.id} ({job_type.value}) params={params}")
|
||||||
|
return job
|
||||||
|
'''
|
||||||
|
text = text.replace(submit_old, submit_new)
|
||||||
|
|
||||||
|
insert_methods_after = " def get_job(self, job_id: str) -> Job | None:\n return self._jobs.get(job_id)\n"
|
||||||
|
new_methods = ''' def get_job(self, job_id: str) -> Job | None:
|
||||||
|
return self._jobs.get(job_id)
|
||||||
|
|
||||||
|
def _find_active_duplicate(self, job_type: JobType, params: dict) -> Job | None:
|
||||||
|
"""Return queued/running job with same key workload to prevent duplicate storms."""
|
||||||
|
key_fields = ["dataset_id", "hunt_id", "hostname", "question", "mode"]
|
||||||
|
sig = tuple((k, params.get(k)) for k in key_fields if params.get(k) is not None)
|
||||||
|
if not sig:
|
||||||
|
return None
|
||||||
|
for j in self._jobs.values():
|
||||||
|
if j.job_type != job_type:
|
||||||
|
continue
|
||||||
|
if j.status not in (JobStatus.QUEUED, JobStatus.RUNNING):
|
||||||
|
continue
|
||||||
|
other_sig = tuple((k, j.params.get(k)) for k in key_fields if j.params.get(k) is not None)
|
||||||
|
if sig == other_sig:
|
||||||
|
return j
|
||||||
|
return None
|
||||||
|
'''
|
||||||
|
text = text.replace(insert_methods_after, new_methods)
|
||||||
|
|
||||||
|
cleanup_old = ''' def cleanup(self, max_age_seconds: float = 3600):
|
||||||
|
now = time.time()
|
||||||
|
to_remove = [
|
||||||
|
jid for jid, j in self._jobs.items()
|
||||||
|
if j.status in (JobStatus.COMPLETED, JobStatus.FAILED, JobStatus.CANCELLED)
|
||||||
|
and (now - j.created_at) > max_age_seconds
|
||||||
|
]
|
||||||
|
for jid in to_remove:
|
||||||
|
del self._jobs[jid]
|
||||||
|
if to_remove:
|
||||||
|
logger.info(f"Cleaned up {len(to_remove)} old jobs")
|
||||||
|
'''
|
||||||
|
cleanup_new = ''' def cleanup(self, max_age_seconds: float = 3600):
|
||||||
|
now = time.time()
|
||||||
|
terminal_states = (JobStatus.COMPLETED, JobStatus.FAILED, JobStatus.CANCELLED)
|
||||||
|
to_remove = [
|
||||||
|
jid for jid, j in self._jobs.items()
|
||||||
|
if j.status in terminal_states and (now - j.created_at) > max_age_seconds
|
||||||
|
]
|
||||||
|
|
||||||
|
# Also cap retained terminal jobs to avoid unbounded memory growth
|
||||||
|
terminal_jobs = sorted(
|
||||||
|
[j for j in self._jobs.values() if j.status in terminal_states],
|
||||||
|
key=lambda j: j.created_at,
|
||||||
|
reverse=True,
|
||||||
|
)
|
||||||
|
overflow = terminal_jobs[settings.JOB_QUEUE_RETAIN_COMPLETED :]
|
||||||
|
to_remove.extend([j.id for j in overflow])
|
||||||
|
|
||||||
|
removed = 0
|
||||||
|
for jid in set(to_remove):
|
||||||
|
if jid in self._jobs:
|
||||||
|
del self._jobs[jid]
|
||||||
|
removed += 1
|
||||||
|
if removed:
|
||||||
|
logger.info(f"Cleaned up {removed} old jobs")
|
||||||
|
|
||||||
|
async def _cleanup_loop(self):
|
||||||
|
interval = max(10, settings.JOB_QUEUE_CLEANUP_INTERVAL_SECONDS)
|
||||||
|
while self._started:
|
||||||
|
try:
|
||||||
|
self.cleanup(max_age_seconds=settings.JOB_QUEUE_CLEANUP_MAX_AGE_SECONDS)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Job queue cleanup loop error: {e}")
|
||||||
|
await asyncio.sleep(interval)
|
||||||
|
'''
|
||||||
|
text = text.replace(cleanup_old, cleanup_new)
|
||||||
|
|
||||||
|
jq.write_text(text, encoding="utf-8")
|
||||||
|
|
||||||
|
# 5) NetworkMap polling backoff/jitter max wait
|
||||||
|
nm = root / "frontend/src/components/NetworkMap.tsx"
|
||||||
|
text = nm.read_text(encoding="utf-8")
|
||||||
|
|
||||||
|
text = text.replace(
|
||||||
|
" // Poll until ready, then re-fetch\n for (;;) {\n await new Promise(r => setTimeout(r, 2000));\n const st = await network.inventoryStatus(huntId);\n if (st.status === 'ready') break;\n }\n",
|
||||||
|
" // Poll until ready (exponential backoff), then re-fetch\n let delayMs = 1500;\n const startedAt = Date.now();\n for (;;) {\n const jitter = Math.floor(Math.random() * 250);\n await new Promise(r => setTimeout(r, delayMs + jitter));\n const st = await network.inventoryStatus(huntId);\n if (st.status === 'ready') break;\n if (Date.now() - startedAt > 5 * 60 * 1000) {\n throw new Error('Host inventory build timed out after 5 minutes');\n }\n delayMs = Math.min(10000, Math.floor(delayMs * 1.5));\n }\n"
|
||||||
|
)
|
||||||
|
|
||||||
|
text = text.replace(
|
||||||
|
" const waitUntilReady = async (): Promise<boolean> => {\n // Poll inventory-status every 2s until 'ready' (or cancelled)\n setProgress('Host inventory is being prepared in the background');\n setLoading(true);\n for (;;) {\n await new Promise(r => setTimeout(r, 2000));\n if (cancelled) return false;\n try {\n const st = await network.inventoryStatus(selectedHuntId);\n if (cancelled) return false;\n if (st.status === 'ready') return true;\n // still building or none (job may not have started yet) - keep polling\n } catch { if (cancelled) return false; }\n }\n };\n",
|
||||||
|
" const waitUntilReady = async (): Promise<boolean> => {\n // Poll inventory-status with exponential backoff until 'ready' (or cancelled)\n setProgress('Host inventory is being prepared in the background');\n setLoading(true);\n let delayMs = 1500;\n const startedAt = Date.now();\n for (;;) {\n const jitter = Math.floor(Math.random() * 250);\n await new Promise(r => setTimeout(r, delayMs + jitter));\n if (cancelled) return false;\n try {\n const st = await network.inventoryStatus(selectedHuntId);\n if (cancelled) return false;\n if (st.status === 'ready') return true;\n if (Date.now() - startedAt > 5 * 60 * 1000) {\n setError('Host inventory build timed out. Please retry.');\n return false;\n }\n delayMs = Math.min(10000, Math.floor(delayMs * 1.5));\n // still building or none (job may not have started yet) - keep polling\n } catch {\n if (cancelled) return false;\n delayMs = Math.min(10000, Math.floor(delayMs * 1.5));\n }\n }\n };\n"
|
||||||
|
)
|
||||||
|
|
||||||
|
nm.write_text(text, encoding="utf-8")
|
||||||
|
|
||||||
|
print("Patched: config.py, scanner.py, keywords.py, job_queue.py, NetworkMap.tsx")
|
||||||
207
_apply_phase2_patch.py
Normal file
@@ -0,0 +1,207 @@
|
|||||||
|
from pathlib import Path
|
||||||
|
import re
|
||||||
|
|
||||||
|
root = Path(r"d:\Projects\Dev\ThreatHunt")
|
||||||
|
|
||||||
|
# ---------- config.py ----------
|
||||||
|
cfg = root / "backend/app/config.py"
|
||||||
|
text = cfg.read_text(encoding="utf-8")
|
||||||
|
marker = " JOB_QUEUE_CLEANUP_MAX_AGE_SECONDS: int = Field(\n default=3600, description=\"Age threshold for in-memory completed job cleanup\"\n )\n"
|
||||||
|
add = marker + "\n # -- Startup throttling ------------------------------------------------\n STARTUP_WARMUP_MAX_HUNTS: int = Field(\n default=5, description=\"Max hunts to warm inventory cache for at startup\"\n )\n STARTUP_REPROCESS_MAX_DATASETS: int = Field(\n default=25, description=\"Max unprocessed datasets to enqueue at startup\"\n )\n\n # -- Network API scale guards -----------------------------------------\n NETWORK_SUBGRAPH_MAX_HOSTS: int = Field(\n default=400, description=\"Hard cap for hosts returned by network subgraph endpoint\"\n )\n NETWORK_SUBGRAPH_MAX_EDGES: int = Field(\n default=3000, description=\"Hard cap for edges returned by network subgraph endpoint\"\n )\n"
|
||||||
|
if marker in text and "STARTUP_WARMUP_MAX_HUNTS" not in text:
|
||||||
|
text = text.replace(marker, add)
|
||||||
|
cfg.write_text(text, encoding="utf-8")
|
||||||
|
|
||||||
|
# ---------- job_queue.py ----------
|
||||||
|
jq = root / "backend/app/services/job_queue.py"
|
||||||
|
text = jq.read_text(encoding="utf-8")
|
||||||
|
|
||||||
|
# add helper methods after get_stats
|
||||||
|
anchor = " def get_stats(self) -> dict:\n by_status = {}\n for j in self._jobs.values():\n by_status[j.status.value] = by_status.get(j.status.value, 0) + 1\n return {\n \"total\": len(self._jobs),\n \"queued\": self._queue.qsize(),\n \"by_status\": by_status,\n \"workers\": self._max_workers,\n \"active_workers\": sum(1 for j in self._jobs.values() if j.status == JobStatus.RUNNING),\n }\n"
|
||||||
|
if "def is_backlogged(" not in text:
|
||||||
|
insert = anchor + "\n def is_backlogged(self) -> bool:\n return self._queue.qsize() >= settings.JOB_QUEUE_MAX_BACKLOG\n\n def can_accept(self, reserve: int = 0) -> bool:\n return (self._queue.qsize() + max(0, reserve)) < settings.JOB_QUEUE_MAX_BACKLOG\n"
|
||||||
|
text = text.replace(anchor, insert)
|
||||||
|
|
||||||
|
jq.write_text(text, encoding="utf-8")
|
||||||
|
|
||||||
|
# ---------- host_inventory.py keyset pagination ----------
|
||||||
|
hi = root / "backend/app/services/host_inventory.py"
|
||||||
|
text = hi.read_text(encoding="utf-8")
|
||||||
|
old = ''' batch_size = 5000
|
||||||
|
offset = 0
|
||||||
|
while True:
|
||||||
|
rr = await db.execute(
|
||||||
|
select(DatasetRow)
|
||||||
|
.where(DatasetRow.dataset_id == ds.id)
|
||||||
|
.order_by(DatasetRow.row_index)
|
||||||
|
.offset(offset).limit(batch_size)
|
||||||
|
)
|
||||||
|
rows = rr.scalars().all()
|
||||||
|
if not rows:
|
||||||
|
break
|
||||||
|
'''
|
||||||
|
new = ''' batch_size = 5000
|
||||||
|
last_row_index = -1
|
||||||
|
while True:
|
||||||
|
rr = await db.execute(
|
||||||
|
select(DatasetRow)
|
||||||
|
.where(DatasetRow.dataset_id == ds.id)
|
||||||
|
.where(DatasetRow.row_index > last_row_index)
|
||||||
|
.order_by(DatasetRow.row_index)
|
||||||
|
.limit(batch_size)
|
||||||
|
)
|
||||||
|
rows = rr.scalars().all()
|
||||||
|
if not rows:
|
||||||
|
break
|
||||||
|
'''
|
||||||
|
if old in text:
|
||||||
|
text = text.replace(old, new)
|
||||||
|
text = text.replace(" offset += batch_size\n if len(rows) < batch_size:\n break\n", " last_row_index = rows[-1].row_index\n if len(rows) < batch_size:\n break\n")
|
||||||
|
hi.write_text(text, encoding="utf-8")
|
||||||
|
|
||||||
|
# ---------- network.py add summary/subgraph + backpressure ----------
|
||||||
|
net = root / "backend/app/api/routes/network.py"
|
||||||
|
text = net.read_text(encoding="utf-8")
|
||||||
|
text = text.replace("from fastapi import APIRouter, Depends, HTTPException, Query", "from fastapi import APIRouter, Depends, HTTPException, Query")
|
||||||
|
if "from app.config import settings" not in text:
|
||||||
|
text = text.replace("from app.db import get_db\n", "from app.config import settings\nfrom app.db import get_db\n")
|
||||||
|
|
||||||
|
# add helpers and endpoints before inventory-status endpoint
|
||||||
|
if "def _build_summary" not in text:
|
||||||
|
helper_block = '''
|
||||||
|
|
||||||
|
def _build_summary(inv: dict, top_n: int = 20) -> dict:
|
||||||
|
hosts = inv.get("hosts", [])
|
||||||
|
conns = inv.get("connections", [])
|
||||||
|
top_hosts = sorted(hosts, key=lambda h: h.get("row_count", 0), reverse=True)[:top_n]
|
||||||
|
top_edges = sorted(conns, key=lambda c: c.get("count", 0), reverse=True)[:top_n]
|
||||||
|
return {
|
||||||
|
"stats": inv.get("stats", {}),
|
||||||
|
"top_hosts": [
|
||||||
|
{
|
||||||
|
"id": h.get("id"),
|
||||||
|
"hostname": h.get("hostname"),
|
||||||
|
"row_count": h.get("row_count", 0),
|
||||||
|
"ip_count": len(h.get("ips", [])),
|
||||||
|
"user_count": len(h.get("users", [])),
|
||||||
|
}
|
||||||
|
for h in top_hosts
|
||||||
|
],
|
||||||
|
"top_edges": top_edges,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _build_subgraph(inv: dict, node_id: str | None, max_hosts: int, max_edges: int) -> dict:
|
||||||
|
hosts = inv.get("hosts", [])
|
||||||
|
conns = inv.get("connections", [])
|
||||||
|
|
||||||
|
max_hosts = max(1, min(max_hosts, settings.NETWORK_SUBGRAPH_MAX_HOSTS))
|
||||||
|
max_edges = max(1, min(max_edges, settings.NETWORK_SUBGRAPH_MAX_EDGES))
|
||||||
|
|
||||||
|
if node_id:
|
||||||
|
rel_edges = [c for c in conns if c.get("source") == node_id or c.get("target") == node_id]
|
||||||
|
rel_edges = sorted(rel_edges, key=lambda c: c.get("count", 0), reverse=True)[:max_edges]
|
||||||
|
ids = {node_id}
|
||||||
|
for c in rel_edges:
|
||||||
|
ids.add(c.get("source"))
|
||||||
|
ids.add(c.get("target"))
|
||||||
|
rel_hosts = [h for h in hosts if h.get("id") in ids][:max_hosts]
|
||||||
|
else:
|
||||||
|
rel_hosts = sorted(hosts, key=lambda h: h.get("row_count", 0), reverse=True)[:max_hosts]
|
||||||
|
allowed = {h.get("id") for h in rel_hosts}
|
||||||
|
rel_edges = [
|
||||||
|
c for c in sorted(conns, key=lambda c: c.get("count", 0), reverse=True)
|
||||||
|
if c.get("source") in allowed and c.get("target") in allowed
|
||||||
|
][:max_edges]
|
||||||
|
|
||||||
|
return {
|
||||||
|
"hosts": rel_hosts,
|
||||||
|
"connections": rel_edges,
|
||||||
|
"stats": {
|
||||||
|
**inv.get("stats", {}),
|
||||||
|
"subgraph_hosts": len(rel_hosts),
|
||||||
|
"subgraph_connections": len(rel_edges),
|
||||||
|
"truncated": len(rel_hosts) < len(hosts) or len(rel_edges) < len(conns),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/summary")
|
||||||
|
async def get_inventory_summary(
|
||||||
|
hunt_id: str = Query(..., description="Hunt ID"),
|
||||||
|
top_n: int = Query(20, ge=1, le=200),
|
||||||
|
):
|
||||||
|
"""Return a lightweight summary view for large hunts."""
|
||||||
|
cached = inventory_cache.get(hunt_id)
|
||||||
|
if cached is None:
|
||||||
|
if not inventory_cache.is_building(hunt_id):
|
||||||
|
if job_queue.is_backlogged():
|
||||||
|
return JSONResponse(
|
||||||
|
status_code=202,
|
||||||
|
content={"status": "deferred", "message": "Queue busy, retry shortly"},
|
||||||
|
)
|
||||||
|
job_queue.submit(JobType.HOST_INVENTORY, hunt_id=hunt_id)
|
||||||
|
return JSONResponse(status_code=202, content={"status": "building"})
|
||||||
|
return _build_summary(cached, top_n=top_n)
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/subgraph")
|
||||||
|
async def get_inventory_subgraph(
|
||||||
|
hunt_id: str = Query(..., description="Hunt ID"),
|
||||||
|
node_id: str | None = Query(None, description="Optional focal node"),
|
||||||
|
max_hosts: int = Query(200, ge=1, le=5000),
|
||||||
|
max_edges: int = Query(1500, ge=1, le=20000),
|
||||||
|
):
|
||||||
|
"""Return a bounded subgraph for scale-safe rendering."""
|
||||||
|
cached = inventory_cache.get(hunt_id)
|
||||||
|
if cached is None:
|
||||||
|
if not inventory_cache.is_building(hunt_id):
|
||||||
|
if job_queue.is_backlogged():
|
||||||
|
return JSONResponse(
|
||||||
|
status_code=202,
|
||||||
|
content={"status": "deferred", "message": "Queue busy, retry shortly"},
|
||||||
|
)
|
||||||
|
job_queue.submit(JobType.HOST_INVENTORY, hunt_id=hunt_id)
|
||||||
|
return JSONResponse(status_code=202, content={"status": "building"})
|
||||||
|
return _build_subgraph(cached, node_id=node_id, max_hosts=max_hosts, max_edges=max_edges)
|
||||||
|
'''
|
||||||
|
text = text.replace("\n\n@router.get(\"/inventory-status\")", helper_block + "\n\n@router.get(\"/inventory-status\")")
|
||||||
|
|
||||||
|
# add backpressure in host-inventory enqueue points
|
||||||
|
text = text.replace(
|
||||||
|
" if not inventory_cache.is_building(hunt_id):\n job_queue.submit(JobType.HOST_INVENTORY, hunt_id=hunt_id)",
|
||||||
|
" if not inventory_cache.is_building(hunt_id):\n if job_queue.is_backlogged():\n return JSONResponse(status_code=202, content={\"status\": \"deferred\", \"message\": \"Queue busy, retry shortly\"})\n job_queue.submit(JobType.HOST_INVENTORY, hunt_id=hunt_id)"
|
||||||
|
)
|
||||||
|
text = text.replace(
|
||||||
|
" if not inventory_cache.is_building(hunt_id):\n logger.info(f\"Cache miss for {hunt_id}, triggering background build\")\n job_queue.submit(JobType.HOST_INVENTORY, hunt_id=hunt_id)",
|
||||||
|
" if not inventory_cache.is_building(hunt_id):\n logger.info(f\"Cache miss for {hunt_id}, triggering background build\")\n if job_queue.is_backlogged():\n return JSONResponse(status_code=202, content={\"status\": \"deferred\", \"message\": \"Queue busy, retry shortly\"})\n job_queue.submit(JobType.HOST_INVENTORY, hunt_id=hunt_id)"
|
||||||
|
)
|
||||||
|
|
||||||
|
net.write_text(text, encoding="utf-8")
|
||||||
|
|
||||||
|
# ---------- analysis.py backpressure on manual submit ----------
|
||||||
|
analysis = root / "backend/app/api/routes/analysis.py"
|
||||||
|
text = analysis.read_text(encoding="utf-8")
|
||||||
|
text = text.replace(
|
||||||
|
" job = job_queue.submit(jt, **params)\n return {\"job_id\": job.id, \"status\": job.status.value, \"job_type\": job_type}",
|
||||||
|
" if not job_queue.can_accept():\n raise HTTPException(status_code=429, detail=\"Job queue is busy. Retry shortly.\")\n job = job_queue.submit(jt, **params)\n return {\"job_id\": job.id, \"status\": job.status.value, \"job_type\": job_type}"
|
||||||
|
)
|
||||||
|
analysis.write_text(text, encoding="utf-8")
|
||||||
|
|
||||||
|
# ---------- main.py startup throttles ----------
|
||||||
|
main = root / "backend/app/main.py"
|
||||||
|
text = main.read_text(encoding="utf-8")
|
||||||
|
|
||||||
|
text = text.replace(
|
||||||
|
" for hid in hunt_ids:\n job_queue.submit(JobType.HOST_INVENTORY, hunt_id=hid)\n if hunt_ids:\n logger.info(f\"Queued host inventory warm-up for {len(hunt_ids)} hunts\")",
|
||||||
|
" warm_hunts = hunt_ids[: settings.STARTUP_WARMUP_MAX_HUNTS]\n for hid in warm_hunts:\n job_queue.submit(JobType.HOST_INVENTORY, hunt_id=hid)\n if warm_hunts:\n logger.info(f\"Queued host inventory warm-up for {len(warm_hunts)} hunts (total hunts with data: {len(hunt_ids)})\")"
|
||||||
|
)
|
||||||
|
|
||||||
|
text = text.replace(
|
||||||
|
" if unprocessed_ids:\n for ds_id in unprocessed_ids:\n job_queue.submit(JobType.TRIAGE, dataset_id=ds_id)\n job_queue.submit(JobType.ANOMALY, dataset_id=ds_id)\n job_queue.submit(JobType.KEYWORD_SCAN, dataset_id=ds_id)\n job_queue.submit(JobType.IOC_EXTRACT, dataset_id=ds_id)\n logger.info(f\"Queued processing pipeline for {len(unprocessed_ids)} unprocessed datasets\")\n async with async_session_factory() as update_db:\n from sqlalchemy import update\n from app.db.models import Dataset\n await update_db.execute(\n update(Dataset)\n .where(Dataset.id.in_(unprocessed_ids))\n .values(processing_status=\"processing\")\n )\n await update_db.commit()",
|
||||||
|
" if unprocessed_ids:\n to_reprocess = unprocessed_ids[: settings.STARTUP_REPROCESS_MAX_DATASETS]\n for ds_id in to_reprocess:\n job_queue.submit(JobType.TRIAGE, dataset_id=ds_id)\n job_queue.submit(JobType.ANOMALY, dataset_id=ds_id)\n job_queue.submit(JobType.KEYWORD_SCAN, dataset_id=ds_id)\n job_queue.submit(JobType.IOC_EXTRACT, dataset_id=ds_id)\n logger.info(f\"Queued processing pipeline for {len(to_reprocess)} datasets at startup (unprocessed total: {len(unprocessed_ids)})\")\n async with async_session_factory() as update_db:\n from sqlalchemy import update\n from app.db.models import Dataset\n await update_db.execute(\n update(Dataset)\n .where(Dataset.id.in_(to_reprocess))\n .values(processing_status=\"processing\")\n )\n await update_db.commit()"
|
||||||
|
)
|
||||||
|
|
||||||
|
main.write_text(text, encoding="utf-8")
|
||||||
|
|
||||||
|
print("Patched Phase 2 files")
|
||||||
75
_aup_add_dataset_scope_ui.py
Normal file
@@ -0,0 +1,75 @@
|
|||||||
|
from pathlib import Path
|
||||||
|
p=Path(r'd:/Projects/Dev/ThreatHunt/frontend/src/components/AUPScanner.tsx')
|
||||||
|
t=p.read_text(encoding='utf-8')
|
||||||
|
|
||||||
|
# default selection when hunt changes: first 3 datasets instead of all
|
||||||
|
old=''' datasets.list(0, 500, selectedHuntId).then(res => {
|
||||||
|
if (cancelled) return;
|
||||||
|
setDsList(res.datasets);
|
||||||
|
setSelectedDs(new Set(res.datasets.map(d => d.id)));
|
||||||
|
}).catch(() => {});
|
||||||
|
'''
|
||||||
|
new=''' datasets.list(0, 500, selectedHuntId).then(res => {
|
||||||
|
if (cancelled) return;
|
||||||
|
setDsList(res.datasets);
|
||||||
|
setSelectedDs(new Set(res.datasets.slice(0, 3).map(d => d.id)));
|
||||||
|
}).catch(() => {});
|
||||||
|
'''
|
||||||
|
if old not in t:
|
||||||
|
raise SystemExit('hunt-change dataset init block not found')
|
||||||
|
t=t.replace(old,new)
|
||||||
|
|
||||||
|
# insert dataset scope multi-select under hunt info
|
||||||
|
anchor=''' {!selectedHuntId && (
|
||||||
|
<Typography variant="caption" color="text.secondary" sx={{ mt: 0.5, display: 'block' }}>
|
||||||
|
All datasets will be scanned if no hunt is selected
|
||||||
|
</Typography>
|
||||||
|
)}
|
||||||
|
</Box>
|
||||||
|
|
||||||
|
{/* Theme selector */}
|
||||||
|
'''
|
||||||
|
insert=''' {!selectedHuntId && (
|
||||||
|
<Typography variant="caption" color="text.secondary" sx={{ mt: 0.5, display: 'block' }}>
|
||||||
|
Select a hunt to enable scoped scanning
|
||||||
|
</Typography>
|
||||||
|
)}
|
||||||
|
|
||||||
|
<FormControl size="small" fullWidth sx={{ mt: 1.2 }} disabled={!selectedHuntId || dsList.length === 0}>
|
||||||
|
<InputLabel id="aup-dataset-label">Datasets</InputLabel>
|
||||||
|
<Select
|
||||||
|
labelId="aup-dataset-label"
|
||||||
|
multiple
|
||||||
|
value={Array.from(selectedDs)}
|
||||||
|
label="Datasets"
|
||||||
|
renderValue={(selected) => `${(selected as string[]).length} selected`}
|
||||||
|
onChange={(e) => setSelectedDs(new Set(e.target.value as string[]))}
|
||||||
|
>
|
||||||
|
{dsList.map(d => (
|
||||||
|
<MenuItem key={d.id} value={d.id}>
|
||||||
|
<Checkbox size="small" checked={selectedDs.has(d.id)} />
|
||||||
|
<Typography variant="body2" sx={{ ml: 0.5 }}>
|
||||||
|
{d.name} ({d.row_count.toLocaleString()} rows)
|
||||||
|
</Typography>
|
||||||
|
</MenuItem>
|
||||||
|
))}
|
||||||
|
</Select>
|
||||||
|
</FormControl>
|
||||||
|
|
||||||
|
{selectedHuntId && dsList.length > 0 && (
|
||||||
|
<Stack direction="row" spacing={1} sx={{ mt: 1 }}>
|
||||||
|
<Button size="small" onClick={() => setSelectedDs(new Set(dsList.slice(0, 3).map(d => d.id)))}>Top 3</Button>
|
||||||
|
<Button size="small" onClick={() => setSelectedDs(new Set(dsList.map(d => d.id)))}>All</Button>
|
||||||
|
<Button size="small" onClick={() => setSelectedDs(new Set())}>Clear</Button>
|
||||||
|
</Stack>
|
||||||
|
)}
|
||||||
|
</Box>
|
||||||
|
|
||||||
|
{/* Theme selector */}
|
||||||
|
'''
|
||||||
|
if anchor not in t:
|
||||||
|
raise SystemExit('dataset scope anchor not found')
|
||||||
|
t=t.replace(anchor,insert)
|
||||||
|
|
||||||
|
p.write_text(t,encoding='utf-8')
|
||||||
|
print('added AUP dataset multi-select scoping and safer defaults')
|
||||||
182
_aup_add_host_user_to_hits.py
Normal file
@@ -0,0 +1,182 @@
|
|||||||
|
from pathlib import Path
|
||||||
|
p=Path(r'd:/Projects/Dev/ThreatHunt/backend/app/services/scanner.py')
|
||||||
|
t=p.read_text(encoding='utf-8')
|
||||||
|
|
||||||
|
# 1) Extend ScanHit dataclass
|
||||||
|
old='''@dataclass
|
||||||
|
class ScanHit:
|
||||||
|
theme_name: str
|
||||||
|
theme_color: str
|
||||||
|
keyword: str
|
||||||
|
source_type: str # dataset_row | hunt | annotation | message
|
||||||
|
source_id: str | int
|
||||||
|
field: str
|
||||||
|
matched_value: str
|
||||||
|
row_index: int | None = None
|
||||||
|
dataset_name: str | None = None
|
||||||
|
'''
|
||||||
|
new='''@dataclass
|
||||||
|
class ScanHit:
|
||||||
|
theme_name: str
|
||||||
|
theme_color: str
|
||||||
|
keyword: str
|
||||||
|
source_type: str # dataset_row | hunt | annotation | message
|
||||||
|
source_id: str | int
|
||||||
|
field: str
|
||||||
|
matched_value: str
|
||||||
|
row_index: int | None = None
|
||||||
|
dataset_name: str | None = None
|
||||||
|
hostname: str | None = None
|
||||||
|
username: str | None = None
|
||||||
|
'''
|
||||||
|
if old not in t:
|
||||||
|
raise SystemExit('ScanHit dataclass block not found')
|
||||||
|
t=t.replace(old,new)
|
||||||
|
|
||||||
|
# 2) Add helper to infer hostname/user from a row
|
||||||
|
insert_after='''BATCH_SIZE = 200
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ScanHit:
|
||||||
|
'''
|
||||||
|
helper='''BATCH_SIZE = 200
|
||||||
|
|
||||||
|
|
||||||
|
def _infer_hostname_and_user(data: dict) -> tuple[str | None, str | None]:
|
||||||
|
"""Best-effort extraction of hostname and user from a dataset row."""
|
||||||
|
if not data:
|
||||||
|
return None, None
|
||||||
|
|
||||||
|
host_keys = (
|
||||||
|
'hostname', 'host_name', 'host', 'computer_name', 'computer',
|
||||||
|
'fqdn', 'client_id', 'agent_id', 'endpoint_id',
|
||||||
|
)
|
||||||
|
user_keys = (
|
||||||
|
'username', 'user_name', 'user', 'account_name',
|
||||||
|
'logged_in_user', 'samaccountname', 'sam_account_name',
|
||||||
|
)
|
||||||
|
|
||||||
|
def pick(keys):
|
||||||
|
for k in keys:
|
||||||
|
for actual_key, v in data.items():
|
||||||
|
if actual_key.lower() == k and v not in (None, ''):
|
||||||
|
return str(v)
|
||||||
|
return None
|
||||||
|
|
||||||
|
return pick(host_keys), pick(user_keys)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ScanHit:
|
||||||
|
'''
|
||||||
|
if insert_after in t and '_infer_hostname_and_user' not in t:
|
||||||
|
t=t.replace(insert_after,helper)
|
||||||
|
|
||||||
|
# 3) Extend _match_text signature and ScanHit construction
|
||||||
|
old_sig=''' def _match_text(
|
||||||
|
self,
|
||||||
|
text: str,
|
||||||
|
patterns: dict,
|
||||||
|
source_type: str,
|
||||||
|
source_id: str | int,
|
||||||
|
field_name: str,
|
||||||
|
hits: list[ScanHit],
|
||||||
|
row_index: int | None = None,
|
||||||
|
dataset_name: str | None = None,
|
||||||
|
) -> None:
|
||||||
|
'''
|
||||||
|
new_sig=''' def _match_text(
|
||||||
|
self,
|
||||||
|
text: str,
|
||||||
|
patterns: dict,
|
||||||
|
source_type: str,
|
||||||
|
source_id: str | int,
|
||||||
|
field_name: str,
|
||||||
|
hits: list[ScanHit],
|
||||||
|
row_index: int | None = None,
|
||||||
|
dataset_name: str | None = None,
|
||||||
|
hostname: str | None = None,
|
||||||
|
username: str | None = None,
|
||||||
|
) -> None:
|
||||||
|
'''
|
||||||
|
if old_sig not in t:
|
||||||
|
raise SystemExit('_match_text signature not found')
|
||||||
|
t=t.replace(old_sig,new_sig)
|
||||||
|
|
||||||
|
old_hit=''' hits.append(ScanHit(
|
||||||
|
theme_name=theme_name,
|
||||||
|
theme_color=theme_color,
|
||||||
|
keyword=kw_value,
|
||||||
|
source_type=source_type,
|
||||||
|
source_id=source_id,
|
||||||
|
field=field_name,
|
||||||
|
matched_value=matched_preview,
|
||||||
|
row_index=row_index,
|
||||||
|
dataset_name=dataset_name,
|
||||||
|
))
|
||||||
|
'''
|
||||||
|
new_hit=''' hits.append(ScanHit(
|
||||||
|
theme_name=theme_name,
|
||||||
|
theme_color=theme_color,
|
||||||
|
keyword=kw_value,
|
||||||
|
source_type=source_type,
|
||||||
|
source_id=source_id,
|
||||||
|
field=field_name,
|
||||||
|
matched_value=matched_preview,
|
||||||
|
row_index=row_index,
|
||||||
|
dataset_name=dataset_name,
|
||||||
|
hostname=hostname,
|
||||||
|
username=username,
|
||||||
|
))
|
||||||
|
'''
|
||||||
|
if old_hit not in t:
|
||||||
|
raise SystemExit('ScanHit append block not found')
|
||||||
|
t=t.replace(old_hit,new_hit)
|
||||||
|
|
||||||
|
# 4) Pass inferred hostname/username in dataset scan path
|
||||||
|
old_call=''' for row in rows:
|
||||||
|
result.rows_scanned += 1
|
||||||
|
data = row.data or {}
|
||||||
|
for col_name, cell_value in data.items():
|
||||||
|
if cell_value is None:
|
||||||
|
continue
|
||||||
|
text = str(cell_value)
|
||||||
|
self._match_text(
|
||||||
|
text,
|
||||||
|
patterns,
|
||||||
|
"dataset_row",
|
||||||
|
row.id,
|
||||||
|
col_name,
|
||||||
|
result.hits,
|
||||||
|
row_index=row.row_index,
|
||||||
|
dataset_name=ds_name,
|
||||||
|
)
|
||||||
|
'''
|
||||||
|
new_call=''' for row in rows:
|
||||||
|
result.rows_scanned += 1
|
||||||
|
data = row.data or {}
|
||||||
|
hostname, username = _infer_hostname_and_user(data)
|
||||||
|
for col_name, cell_value in data.items():
|
||||||
|
if cell_value is None:
|
||||||
|
continue
|
||||||
|
text = str(cell_value)
|
||||||
|
self._match_text(
|
||||||
|
text,
|
||||||
|
patterns,
|
||||||
|
"dataset_row",
|
||||||
|
row.id,
|
||||||
|
col_name,
|
||||||
|
result.hits,
|
||||||
|
row_index=row.row_index,
|
||||||
|
dataset_name=ds_name,
|
||||||
|
hostname=hostname,
|
||||||
|
username=username,
|
||||||
|
)
|
||||||
|
'''
|
||||||
|
if old_call not in t:
|
||||||
|
raise SystemExit('dataset _match_text call block not found')
|
||||||
|
t=t.replace(old_call,new_call)
|
||||||
|
|
||||||
|
p.write_text(t,encoding='utf-8')
|
||||||
|
print('updated scanner hits with hostname+username context')
|
||||||
32
_aup_extend_scanhit_api.py
Normal file
@@ -0,0 +1,32 @@
|
|||||||
|
from pathlib import Path
|
||||||
|
p=Path(r'd:/Projects/Dev/ThreatHunt/backend/app/api/routes/keywords.py')
|
||||||
|
t=p.read_text(encoding='utf-8')
|
||||||
|
old='''class ScanHit(BaseModel):
|
||||||
|
theme_name: str
|
||||||
|
theme_color: str
|
||||||
|
keyword: str
|
||||||
|
source_type: str
|
||||||
|
source_id: str | int
|
||||||
|
field: str
|
||||||
|
matched_value: str
|
||||||
|
row_index: int | None = None
|
||||||
|
dataset_name: str | None = None
|
||||||
|
'''
|
||||||
|
new='''class ScanHit(BaseModel):
|
||||||
|
theme_name: str
|
||||||
|
theme_color: str
|
||||||
|
keyword: str
|
||||||
|
source_type: str
|
||||||
|
source_id: str | int
|
||||||
|
field: str
|
||||||
|
matched_value: str
|
||||||
|
row_index: int | None = None
|
||||||
|
dataset_name: str | None = None
|
||||||
|
hostname: str | None = None
|
||||||
|
username: str | None = None
|
||||||
|
'''
|
||||||
|
if old not in t:
|
||||||
|
raise SystemExit('ScanHit pydantic model block not found')
|
||||||
|
t=t.replace(old,new)
|
||||||
|
p.write_text(t,encoding='utf-8')
|
||||||
|
print('extended API ScanHit model with hostname+username')
|
||||||
21
_aup_extend_scanhit_frontend_type.py
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
from pathlib import Path
|
||||||
|
p=Path(r'd:/Projects/Dev/ThreatHunt/frontend/src/api/client.ts')
|
||||||
|
t=p.read_text(encoding='utf-8')
|
||||||
|
old='''export interface ScanHit {
|
||||||
|
theme_name: string; theme_color: string; keyword: string;
|
||||||
|
source_type: string; source_id: string | number; field: string;
|
||||||
|
matched_value: string; row_index: number | null; dataset_name: string | null;
|
||||||
|
}
|
||||||
|
'''
|
||||||
|
new='''export interface ScanHit {
|
||||||
|
theme_name: string; theme_color: string; keyword: string;
|
||||||
|
source_type: string; source_id: string | number; field: string;
|
||||||
|
matched_value: string; row_index: number | null; dataset_name: string | null;
|
||||||
|
hostname?: string | null; username?: string | null;
|
||||||
|
}
|
||||||
|
'''
|
||||||
|
if old not in t:
|
||||||
|
raise SystemExit('frontend ScanHit interface block not found')
|
||||||
|
t=t.replace(old,new)
|
||||||
|
p.write_text(t,encoding='utf-8')
|
||||||
|
print('extended frontend ScanHit type with hostname+username')
|
||||||
57
_aup_keywords_scope_and_missing.py
Normal file
@@ -0,0 +1,57 @@
|
|||||||
|
from pathlib import Path
|
||||||
|
p=Path(r'd:/Projects/Dev/ThreatHunt/backend/app/api/routes/keywords.py')
|
||||||
|
t=p.read_text(encoding='utf-8')
|
||||||
|
|
||||||
|
# add fast guard against unscoped global dataset scans
|
||||||
|
insert_after='''async def run_scan(body: ScanRequest, db: AsyncSession = Depends(get_db)):\n scanner = KeywordScanner(db)\n\n'''
|
||||||
|
if insert_after not in t:
|
||||||
|
raise SystemExit('run_scan header block not found')
|
||||||
|
if 'Select at least one dataset' not in t:
|
||||||
|
guard=''' if not body.dataset_ids and not body.scan_hunts and not body.scan_annotations and not body.scan_messages:\n raise HTTPException(400, "Select at least one dataset or enable additional sources (hunts/annotations/messages)")\n\n'''
|
||||||
|
t=t.replace(insert_after, insert_after+guard)
|
||||||
|
|
||||||
|
old=''' if missing:
|
||||||
|
missing_entries: list[dict] = []
|
||||||
|
for dataset_id in missing:
|
||||||
|
partial = await scanner.scan(dataset_ids=[dataset_id], theme_ids=body.theme_ids)
|
||||||
|
keyword_scan_cache.put(dataset_id, partial)
|
||||||
|
missing_entries.append({"result": partial, "built_at": None})
|
||||||
|
|
||||||
|
merged = _merge_cached_results(
|
||||||
|
cached_entries + missing_entries,
|
||||||
|
allowed_theme_names if body.theme_ids else None,
|
||||||
|
)
|
||||||
|
return {
|
||||||
|
"total_hits": merged["total_hits"],
|
||||||
|
"hits": merged["hits"],
|
||||||
|
"themes_scanned": len(themes),
|
||||||
|
"keywords_scanned": keywords_scanned,
|
||||||
|
"rows_scanned": merged["rows_scanned"],
|
||||||
|
"cache_used": len(cached_entries) > 0,
|
||||||
|
"cache_status": "partial" if cached_entries else "miss",
|
||||||
|
"cached_at": merged["cached_at"],
|
||||||
|
}
|
||||||
|
'''
|
||||||
|
new=''' if missing:
|
||||||
|
partial = await scanner.scan(dataset_ids=missing, theme_ids=body.theme_ids)
|
||||||
|
merged = _merge_cached_results(
|
||||||
|
cached_entries + [{"result": partial, "built_at": None}],
|
||||||
|
allowed_theme_names if body.theme_ids else None,
|
||||||
|
)
|
||||||
|
return {
|
||||||
|
"total_hits": merged["total_hits"],
|
||||||
|
"hits": merged["hits"],
|
||||||
|
"themes_scanned": len(themes),
|
||||||
|
"keywords_scanned": keywords_scanned,
|
||||||
|
"rows_scanned": merged["rows_scanned"],
|
||||||
|
"cache_used": len(cached_entries) > 0,
|
||||||
|
"cache_status": "partial" if cached_entries else "miss",
|
||||||
|
"cached_at": merged["cached_at"],
|
||||||
|
}
|
||||||
|
'''
|
||||||
|
if old not in t:
|
||||||
|
raise SystemExit('partial-cache missing block not found')
|
||||||
|
t=t.replace(old,new)
|
||||||
|
|
||||||
|
p.write_text(t,encoding='utf-8')
|
||||||
|
print('hardened keywords scan scope + optimized missing-cache path')
|
||||||
18
_aup_reduce_budget.py
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
from pathlib import Path
|
||||||
|
p=Path(r'd:/Projects/Dev/ThreatHunt/backend/app/config.py')
|
||||||
|
t=p.read_text(encoding='utf-8')
|
||||||
|
old=''' SCANNER_MAX_ROWS_PER_SCAN: int = Field(
|
||||||
|
default=300000,
|
||||||
|
description="Global row budget for a single AUP scan request (0 = unlimited)",
|
||||||
|
)
|
||||||
|
'''
|
||||||
|
new=''' SCANNER_MAX_ROWS_PER_SCAN: int = Field(
|
||||||
|
default=120000,
|
||||||
|
description="Global row budget for a single AUP scan request (0 = unlimited)",
|
||||||
|
)
|
||||||
|
'''
|
||||||
|
if old not in t:
|
||||||
|
raise SystemExit('SCANNER_MAX_ROWS_PER_SCAN block not found')
|
||||||
|
t=t.replace(old,new)
|
||||||
|
p.write_text(t,encoding='utf-8')
|
||||||
|
print('reduced SCANNER_MAX_ROWS_PER_SCAN default to 120000')
|
||||||
42
_aup_update_grid_columns.py
Normal file
@@ -0,0 +1,42 @@
|
|||||||
|
from pathlib import Path
|
||||||
|
p=Path(r'd:/Projects/Dev/ThreatHunt/frontend/src/components/AUPScanner.tsx')
|
||||||
|
t=p.read_text(encoding='utf-8')
|
||||||
|
old='''const RESULT_COLUMNS: GridColDef[] = [
|
||||||
|
{
|
||||||
|
field: 'theme_name', headerName: 'Theme', width: 140,
|
||||||
|
renderCell: (params) => (
|
||||||
|
<Chip label={params.value} size="small"
|
||||||
|
sx={{ bgcolor: params.row.theme_color, color: '#fff', fontWeight: 600 }} />
|
||||||
|
),
|
||||||
|
},
|
||||||
|
{ field: 'keyword', headerName: 'Keyword', width: 140 },
|
||||||
|
{ field: 'source_type', headerName: 'Source', width: 120 },
|
||||||
|
{ field: 'dataset_name', headerName: 'Dataset', width: 150 },
|
||||||
|
{ field: 'field', headerName: 'Field', width: 130 },
|
||||||
|
{ field: 'matched_value', headerName: 'Matched Value', flex: 1, minWidth: 200 },
|
||||||
|
{ field: 'row_index', headerName: 'Row #', width: 80, type: 'number' },
|
||||||
|
];
|
||||||
|
'''
|
||||||
|
new='''const RESULT_COLUMNS: GridColDef[] = [
|
||||||
|
{
|
||||||
|
field: 'theme_name', headerName: 'Theme', width: 140,
|
||||||
|
renderCell: (params) => (
|
||||||
|
<Chip label={params.value} size="small"
|
||||||
|
sx={{ bgcolor: params.row.theme_color, color: '#fff', fontWeight: 600 }} />
|
||||||
|
),
|
||||||
|
},
|
||||||
|
{ field: 'keyword', headerName: 'Keyword', width: 140 },
|
||||||
|
{ field: 'dataset_name', headerName: 'Dataset', width: 170 },
|
||||||
|
{ field: 'hostname', headerName: 'Hostname', width: 170, valueGetter: (v, row) => row.hostname || '' },
|
||||||
|
{ field: 'username', headerName: 'User', width: 160, valueGetter: (v, row) => row.username || '' },
|
||||||
|
{ field: 'matched_value', headerName: 'Matched Value', flex: 1, minWidth: 220 },
|
||||||
|
{ field: 'field', headerName: 'Field', width: 130 },
|
||||||
|
{ field: 'source_type', headerName: 'Source', width: 120 },
|
||||||
|
{ field: 'row_index', headerName: 'Row #', width: 90, type: 'number' },
|
||||||
|
];
|
||||||
|
'''
|
||||||
|
if old not in t:
|
||||||
|
raise SystemExit('RESULT_COLUMNS block not found')
|
||||||
|
t=t.replace(old,new)
|
||||||
|
p.write_text(t,encoding='utf-8')
|
||||||
|
print('updated AUP results grid columns with dataset/hostname/user/matched value focus')
|
||||||
40
_edit_aup.py
Normal file
@@ -0,0 +1,40 @@
|
|||||||
|
from pathlib import Path
|
||||||
|
p=Path(r'd:/Projects/Dev/ThreatHunt/frontend/src/components/AUPScanner.tsx')
|
||||||
|
t=p.read_text(encoding='utf-8')
|
||||||
|
t=t.replace(' const [scanHunts, setScanHunts] = useState(true);',' const [scanHunts, setScanHunts] = useState(false);')
|
||||||
|
t=t.replace(' const [scanAnnotations, setScanAnnotations] = useState(true);',' const [scanAnnotations, setScanAnnotations] = useState(false);')
|
||||||
|
t=t.replace(' const [scanMessages, setScanMessages] = useState(true);',' const [scanMessages, setScanMessages] = useState(false);')
|
||||||
|
t=t.replace(' scan_messages: scanMessages,\n });',' scan_messages: scanMessages,\n prefer_cache: true,\n });')
|
||||||
|
# add cache chip in summary alert
|
||||||
|
old=''' {scanResult && (
|
||||||
|
<Alert severity={scanResult.total_hits > 0 ? 'warning' : 'success'} sx={{ py: 0.5 }}>
|
||||||
|
<strong>{scanResult.total_hits}</strong> hits across{' '}
|
||||||
|
<strong>{scanResult.rows_scanned}</strong> rows |{' '}
|
||||||
|
{scanResult.themes_scanned} themes, {scanResult.keywords_scanned} keywords scanned
|
||||||
|
</Alert>
|
||||||
|
)}
|
||||||
|
'''
|
||||||
|
new=''' {scanResult && (
|
||||||
|
<Alert severity={scanResult.total_hits > 0 ? 'warning' : 'success'} sx={{ py: 0.5 }}>
|
||||||
|
<strong>{scanResult.total_hits}</strong> hits across{' '}
|
||||||
|
<strong>{scanResult.rows_scanned}</strong> rows |{' '}
|
||||||
|
{scanResult.themes_scanned} themes, {scanResult.keywords_scanned} keywords scanned
|
||||||
|
{scanResult.cache_status && (
|
||||||
|
<Chip
|
||||||
|
size="small"
|
||||||
|
label={scanResult.cache_status === 'hit' ? 'Cached' : 'Live'}
|
||||||
|
sx={{ ml: 1, height: 20 }}
|
||||||
|
color={scanResult.cache_status === 'hit' ? 'success' : 'default'}
|
||||||
|
variant="outlined"
|
||||||
|
/>
|
||||||
|
)}
|
||||||
|
</Alert>
|
||||||
|
)}
|
||||||
|
'''
|
||||||
|
if old in t:
|
||||||
|
t=t.replace(old,new)
|
||||||
|
else:
|
||||||
|
print('warning: summary block not replaced')
|
||||||
|
|
||||||
|
p.write_text(t,encoding='utf-8')
|
||||||
|
print('updated AUPScanner.tsx')
|
||||||
36
_edit_client.py
Normal file
@@ -0,0 +1,36 @@
|
|||||||
|
from pathlib import Path
|
||||||
|
import re
|
||||||
|
p=Path(r'd:/Projects/Dev/ThreatHunt/frontend/src/api/client.ts')
|
||||||
|
t=p.read_text(encoding='utf-8')
|
||||||
|
# Add HuntProgress interface after Hunt interface
|
||||||
|
if 'export interface HuntProgress' not in t:
|
||||||
|
insert = '''export interface HuntProgress {
|
||||||
|
hunt_id: string;
|
||||||
|
status: 'idle' | 'processing' | 'ready';
|
||||||
|
progress_percent: number;
|
||||||
|
dataset_total: number;
|
||||||
|
dataset_completed: number;
|
||||||
|
dataset_processing: number;
|
||||||
|
dataset_errors: number;
|
||||||
|
active_jobs: number;
|
||||||
|
queued_jobs: number;
|
||||||
|
network_status: 'none' | 'building' | 'ready';
|
||||||
|
stages: Record<string, any>;
|
||||||
|
}
|
||||||
|
|
||||||
|
'''
|
||||||
|
t=t.replace('export interface Hunt {\n id: string; name: string; description: string | null; status: string;\n owner_id: string | null; created_at: string; updated_at: string;\n dataset_count: number; hypothesis_count: number;\n}\n\n', 'export interface Hunt {\n id: string; name: string; description: string | null; status: string;\n owner_id: string | null; created_at: string; updated_at: string;\n dataset_count: number; hypothesis_count: number;\n}\n\n'+insert)
|
||||||
|
|
||||||
|
# Add hunts.progress method
|
||||||
|
if 'progress: (id: string)' not in t:
|
||||||
|
t=t.replace(" delete: (id: string) => api(`/api/hunts/${id}`, { method: 'DELETE' }),\n};", " delete: (id: string) => api(`/api/hunts/${id}`, { method: 'DELETE' }),\n progress: (id: string) => api<HuntProgress>(`/api/hunts/${id}/progress`),\n};")
|
||||||
|
|
||||||
|
# Extend ScanResponse
|
||||||
|
if 'cache_used?: boolean' not in t:
|
||||||
|
t=t.replace('export interface ScanResponse {\n total_hits: number; hits: ScanHit[]; themes_scanned: number;\n keywords_scanned: number; rows_scanned: number;\n}\n', 'export interface ScanResponse {\n total_hits: number; hits: ScanHit[]; themes_scanned: number;\n keywords_scanned: number; rows_scanned: number;\n cache_used?: boolean; cache_status?: string; cached_at?: string | null;\n}\n')
|
||||||
|
|
||||||
|
# Extend keywords.scan opts
|
||||||
|
t=t.replace(' scan_hunts?: boolean; scan_annotations?: boolean; scan_messages?: boolean;\n }) =>', ' scan_hunts?: boolean; scan_annotations?: boolean; scan_messages?: boolean;\n prefer_cache?: boolean; force_rescan?: boolean;\n }) =>')
|
||||||
|
|
||||||
|
p.write_text(t,encoding='utf-8')
|
||||||
|
print('updated client.ts')
|
||||||
20
_edit_config_reconcile.py
Normal file
@@ -0,0 +1,20 @@
|
|||||||
|
from pathlib import Path
|
||||||
|
p=Path(r'd:/Projects/Dev/ThreatHunt/backend/app/config.py')
|
||||||
|
t=p.read_text(encoding='utf-8')
|
||||||
|
anchor=''' STARTUP_REPROCESS_MAX_DATASETS: int = Field(
|
||||||
|
default=25, description="Max unprocessed datasets to enqueue at startup"
|
||||||
|
)
|
||||||
|
'''
|
||||||
|
insert=''' STARTUP_REPROCESS_MAX_DATASETS: int = Field(
|
||||||
|
default=25, description="Max unprocessed datasets to enqueue at startup"
|
||||||
|
)
|
||||||
|
STARTUP_RECONCILE_STALE_TASKS: bool = Field(
|
||||||
|
default=True,
|
||||||
|
description="Mark stale queued/running processing tasks as failed on startup",
|
||||||
|
)
|
||||||
|
'''
|
||||||
|
if anchor not in t:
|
||||||
|
raise SystemExit('startup anchor not found')
|
||||||
|
t=t.replace(anchor,insert)
|
||||||
|
p.write_text(t,encoding='utf-8')
|
||||||
|
print('updated config with STARTUP_RECONCILE_STALE_TASKS')
|
||||||
39
_edit_datasets.py
Normal file
@@ -0,0 +1,39 @@
|
|||||||
|
from pathlib import Path
|
||||||
|
p=Path(r'd:/Projects/Dev/ThreatHunt/backend/app/api/routes/datasets.py')
|
||||||
|
t=p.read_text(encoding='utf-8')
|
||||||
|
if 'from app.services.scanner import keyword_scan_cache' not in t:
|
||||||
|
t=t.replace('from app.services.host_inventory import inventory_cache','from app.services.host_inventory import inventory_cache\nfrom app.services.scanner import keyword_scan_cache')
|
||||||
|
old='''@router.delete(
|
||||||
|
"/{dataset_id}",
|
||||||
|
summary="Delete a dataset",
|
||||||
|
)
|
||||||
|
async def delete_dataset(
|
||||||
|
dataset_id: str,
|
||||||
|
db: AsyncSession = Depends(get_db),
|
||||||
|
):
|
||||||
|
repo = DatasetRepository(db)
|
||||||
|
deleted = await repo.delete_dataset(dataset_id)
|
||||||
|
if not deleted:
|
||||||
|
raise HTTPException(status_code=404, detail="Dataset not found")
|
||||||
|
return {"message": "Dataset deleted", "id": dataset_id}
|
||||||
|
'''
|
||||||
|
new='''@router.delete(
|
||||||
|
"/{dataset_id}",
|
||||||
|
summary="Delete a dataset",
|
||||||
|
)
|
||||||
|
async def delete_dataset(
|
||||||
|
dataset_id: str,
|
||||||
|
db: AsyncSession = Depends(get_db),
|
||||||
|
):
|
||||||
|
repo = DatasetRepository(db)
|
||||||
|
deleted = await repo.delete_dataset(dataset_id)
|
||||||
|
if not deleted:
|
||||||
|
raise HTTPException(status_code=404, detail="Dataset not found")
|
||||||
|
keyword_scan_cache.invalidate_dataset(dataset_id)
|
||||||
|
return {"message": "Dataset deleted", "id": dataset_id}
|
||||||
|
'''
|
||||||
|
if old not in t:
|
||||||
|
raise SystemExit('delete block not found')
|
||||||
|
t=t.replace(old,new)
|
||||||
|
p.write_text(t,encoding='utf-8')
|
||||||
|
print('updated datasets.py')
|
||||||
110
_edit_datasets_tasks.py
Normal file
@@ -0,0 +1,110 @@
|
|||||||
|
from pathlib import Path
|
||||||
|
p=Path(r'd:/Projects/Dev/ThreatHunt/backend/app/api/routes/datasets.py')
|
||||||
|
t=p.read_text(encoding='utf-8')
|
||||||
|
if 'ProcessingTask' not in t:
|
||||||
|
t=t.replace('from app.db.models import', 'from app.db.models import ProcessingTask\n# from app.db.models import')
|
||||||
|
t=t.replace('from app.services.scanner import keyword_scan_cache','from app.services.scanner import keyword_scan_cache')
|
||||||
|
# clean import replacement to proper single line
|
||||||
|
if '# from app.db.models import' in t:
|
||||||
|
t=t.replace('from app.db.models import ProcessingTask\n# from app.db.models import', 'from app.db.models import ProcessingTask')
|
||||||
|
|
||||||
|
old=''' # 1. AI Triage (chains to HOST_PROFILE automatically on completion)
|
||||||
|
job_queue.submit(JobType.TRIAGE, dataset_id=dataset.id)
|
||||||
|
jobs_queued.append("triage")
|
||||||
|
|
||||||
|
# 2. Anomaly detection (embedding-based outlier detection)
|
||||||
|
job_queue.submit(JobType.ANOMALY, dataset_id=dataset.id)
|
||||||
|
jobs_queued.append("anomaly")
|
||||||
|
|
||||||
|
# 3. AUP keyword scan
|
||||||
|
job_queue.submit(JobType.KEYWORD_SCAN, dataset_id=dataset.id)
|
||||||
|
jobs_queued.append("keyword_scan")
|
||||||
|
|
||||||
|
# 4. IOC extraction
|
||||||
|
job_queue.submit(JobType.IOC_EXTRACT, dataset_id=dataset.id)
|
||||||
|
jobs_queued.append("ioc_extract")
|
||||||
|
|
||||||
|
# 5. Host inventory (network map) - requires hunt_id
|
||||||
|
if hunt_id:
|
||||||
|
inventory_cache.invalidate(hunt_id)
|
||||||
|
job_queue.submit(JobType.HOST_INVENTORY, hunt_id=hunt_id)
|
||||||
|
jobs_queued.append("host_inventory")
|
||||||
|
'''
|
||||||
|
new=''' task_rows: list[ProcessingTask] = []
|
||||||
|
|
||||||
|
# 1. AI Triage (chains to HOST_PROFILE automatically on completion)
|
||||||
|
triage_job = job_queue.submit(JobType.TRIAGE, dataset_id=dataset.id)
|
||||||
|
jobs_queued.append("triage")
|
||||||
|
task_rows.append(ProcessingTask(
|
||||||
|
hunt_id=hunt_id,
|
||||||
|
dataset_id=dataset.id,
|
||||||
|
job_id=triage_job.id,
|
||||||
|
stage="triage",
|
||||||
|
status="queued",
|
||||||
|
progress=0.0,
|
||||||
|
message="Queued",
|
||||||
|
))
|
||||||
|
|
||||||
|
# 2. Anomaly detection (embedding-based outlier detection)
|
||||||
|
anomaly_job = job_queue.submit(JobType.ANOMALY, dataset_id=dataset.id)
|
||||||
|
jobs_queued.append("anomaly")
|
||||||
|
task_rows.append(ProcessingTask(
|
||||||
|
hunt_id=hunt_id,
|
||||||
|
dataset_id=dataset.id,
|
||||||
|
job_id=anomaly_job.id,
|
||||||
|
stage="anomaly",
|
||||||
|
status="queued",
|
||||||
|
progress=0.0,
|
||||||
|
message="Queued",
|
||||||
|
))
|
||||||
|
|
||||||
|
# 3. AUP keyword scan
|
||||||
|
kw_job = job_queue.submit(JobType.KEYWORD_SCAN, dataset_id=dataset.id)
|
||||||
|
jobs_queued.append("keyword_scan")
|
||||||
|
task_rows.append(ProcessingTask(
|
||||||
|
hunt_id=hunt_id,
|
||||||
|
dataset_id=dataset.id,
|
||||||
|
job_id=kw_job.id,
|
||||||
|
stage="keyword_scan",
|
||||||
|
status="queued",
|
||||||
|
progress=0.0,
|
||||||
|
message="Queued",
|
||||||
|
))
|
||||||
|
|
||||||
|
# 4. IOC extraction
|
||||||
|
ioc_job = job_queue.submit(JobType.IOC_EXTRACT, dataset_id=dataset.id)
|
||||||
|
jobs_queued.append("ioc_extract")
|
||||||
|
task_rows.append(ProcessingTask(
|
||||||
|
hunt_id=hunt_id,
|
||||||
|
dataset_id=dataset.id,
|
||||||
|
job_id=ioc_job.id,
|
||||||
|
stage="ioc_extract",
|
||||||
|
status="queued",
|
||||||
|
progress=0.0,
|
||||||
|
message="Queued",
|
||||||
|
))
|
||||||
|
|
||||||
|
# 5. Host inventory (network map) - requires hunt_id
|
||||||
|
if hunt_id:
|
||||||
|
inventory_cache.invalidate(hunt_id)
|
||||||
|
inv_job = job_queue.submit(JobType.HOST_INVENTORY, hunt_id=hunt_id)
|
||||||
|
jobs_queued.append("host_inventory")
|
||||||
|
task_rows.append(ProcessingTask(
|
||||||
|
hunt_id=hunt_id,
|
||||||
|
dataset_id=dataset.id,
|
||||||
|
job_id=inv_job.id,
|
||||||
|
stage="host_inventory",
|
||||||
|
status="queued",
|
||||||
|
progress=0.0,
|
||||||
|
message="Queued",
|
||||||
|
))
|
||||||
|
|
||||||
|
if task_rows:
|
||||||
|
db.add_all(task_rows)
|
||||||
|
await db.flush()
|
||||||
|
'''
|
||||||
|
if old not in t:
|
||||||
|
raise SystemExit('queue block not found')
|
||||||
|
t=t.replace(old,new)
|
||||||
|
p.write_text(t,encoding='utf-8')
|
||||||
|
print('updated datasets upload queue + processing tasks')
|
||||||
254
_edit_hunts.py
Normal file
@@ -0,0 +1,254 @@
|
|||||||
|
from pathlib import Path
|
||||||
|
p=Path(r'd:/Projects/Dev/ThreatHunt/backend/app/api/routes/hunts.py')
|
||||||
|
new='''"""API routes for hunt management."""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
|
||||||
|
from fastapi import APIRouter, Depends, HTTPException, Query
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
from sqlalchemy import select, func
|
||||||
|
from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
|
|
||||||
|
from app.db import get_db
|
||||||
|
from app.db.models import Hunt, Dataset
|
||||||
|
from app.services.job_queue import job_queue
|
||||||
|
from app.services.host_inventory import inventory_cache
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
router = APIRouter(prefix="/api/hunts", tags=["hunts"])
|
||||||
|
|
||||||
|
|
||||||
|
class HuntCreate(BaseModel):
|
||||||
|
name: str = Field(..., max_length=256)
|
||||||
|
description: str | None = None
|
||||||
|
|
||||||
|
|
||||||
|
class HuntUpdate(BaseModel):
|
||||||
|
name: str | None = None
|
||||||
|
description: str | None = None
|
||||||
|
status: str | None = None
|
||||||
|
|
||||||
|
|
||||||
|
class HuntResponse(BaseModel):
|
||||||
|
id: str
|
||||||
|
name: str
|
||||||
|
description: str | None
|
||||||
|
status: str
|
||||||
|
owner_id: str | None
|
||||||
|
created_at: str
|
||||||
|
updated_at: str
|
||||||
|
dataset_count: int = 0
|
||||||
|
hypothesis_count: int = 0
|
||||||
|
|
||||||
|
|
||||||
|
class HuntListResponse(BaseModel):
|
||||||
|
hunts: list[HuntResponse]
|
||||||
|
total: int
|
||||||
|
|
||||||
|
|
||||||
|
class HuntProgressResponse(BaseModel):
|
||||||
|
hunt_id: str
|
||||||
|
status: str
|
||||||
|
progress_percent: float
|
||||||
|
dataset_total: int
|
||||||
|
dataset_completed: int
|
||||||
|
dataset_processing: int
|
||||||
|
dataset_errors: int
|
||||||
|
active_jobs: int
|
||||||
|
queued_jobs: int
|
||||||
|
network_status: str
|
||||||
|
stages: dict
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("", response_model=HuntResponse, summary="Create a new hunt")
|
||||||
|
async def create_hunt(body: HuntCreate, db: AsyncSession = Depends(get_db)):
|
||||||
|
hunt = Hunt(name=body.name, description=body.description)
|
||||||
|
db.add(hunt)
|
||||||
|
await db.flush()
|
||||||
|
return HuntResponse(
|
||||||
|
id=hunt.id,
|
||||||
|
name=hunt.name,
|
||||||
|
description=hunt.description,
|
||||||
|
status=hunt.status,
|
||||||
|
owner_id=hunt.owner_id,
|
||||||
|
created_at=hunt.created_at.isoformat(),
|
||||||
|
updated_at=hunt.updated_at.isoformat(),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("", response_model=HuntListResponse, summary="List hunts")
|
||||||
|
async def list_hunts(
|
||||||
|
status: str | None = Query(None),
|
||||||
|
limit: int = Query(50, ge=1, le=500),
|
||||||
|
offset: int = Query(0, ge=0),
|
||||||
|
db: AsyncSession = Depends(get_db),
|
||||||
|
):
|
||||||
|
stmt = select(Hunt).order_by(Hunt.updated_at.desc())
|
||||||
|
if status:
|
||||||
|
stmt = stmt.where(Hunt.status == status)
|
||||||
|
stmt = stmt.limit(limit).offset(offset)
|
||||||
|
result = await db.execute(stmt)
|
||||||
|
hunts = result.scalars().all()
|
||||||
|
|
||||||
|
count_stmt = select(func.count(Hunt.id))
|
||||||
|
if status:
|
||||||
|
count_stmt = count_stmt.where(Hunt.status == status)
|
||||||
|
total = (await db.execute(count_stmt)).scalar_one()
|
||||||
|
|
||||||
|
return HuntListResponse(
|
||||||
|
hunts=[
|
||||||
|
HuntResponse(
|
||||||
|
id=h.id,
|
||||||
|
name=h.name,
|
||||||
|
description=h.description,
|
||||||
|
status=h.status,
|
||||||
|
owner_id=h.owner_id,
|
||||||
|
created_at=h.created_at.isoformat(),
|
||||||
|
updated_at=h.updated_at.isoformat(),
|
||||||
|
dataset_count=len(h.datasets) if h.datasets else 0,
|
||||||
|
hypothesis_count=len(h.hypotheses) if h.hypotheses else 0,
|
||||||
|
)
|
||||||
|
for h in hunts
|
||||||
|
],
|
||||||
|
total=total,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/{hunt_id}", response_model=HuntResponse, summary="Get hunt details")
|
||||||
|
async def get_hunt(hunt_id: str, db: AsyncSession = Depends(get_db)):
|
||||||
|
result = await db.execute(select(Hunt).where(Hunt.id == hunt_id))
|
||||||
|
hunt = result.scalar_one_or_none()
|
||||||
|
if not hunt:
|
||||||
|
raise HTTPException(status_code=404, detail="Hunt not found")
|
||||||
|
return HuntResponse(
|
||||||
|
id=hunt.id,
|
||||||
|
name=hunt.name,
|
||||||
|
description=hunt.description,
|
||||||
|
status=hunt.status,
|
||||||
|
owner_id=hunt.owner_id,
|
||||||
|
created_at=hunt.created_at.isoformat(),
|
||||||
|
updated_at=hunt.updated_at.isoformat(),
|
||||||
|
dataset_count=len(hunt.datasets) if hunt.datasets else 0,
|
||||||
|
hypothesis_count=len(hunt.hypotheses) if hunt.hypotheses else 0,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/{hunt_id}/progress", response_model=HuntProgressResponse, summary="Get hunt processing progress")
|
||||||
|
async def get_hunt_progress(hunt_id: str, db: AsyncSession = Depends(get_db)):
|
||||||
|
hunt = await db.get(Hunt, hunt_id)
|
||||||
|
if not hunt:
|
||||||
|
raise HTTPException(status_code=404, detail="Hunt not found")
|
||||||
|
|
||||||
|
ds_rows = await db.execute(
|
||||||
|
select(Dataset.id, Dataset.processing_status)
|
||||||
|
.where(Dataset.hunt_id == hunt_id)
|
||||||
|
)
|
||||||
|
datasets = ds_rows.all()
|
||||||
|
dataset_ids = {row[0] for row in datasets}
|
||||||
|
|
||||||
|
dataset_total = len(datasets)
|
||||||
|
dataset_completed = sum(1 for _, st in datasets if st == "completed")
|
||||||
|
dataset_errors = sum(1 for _, st in datasets if st == "completed_with_errors")
|
||||||
|
dataset_processing = max(0, dataset_total - dataset_completed - dataset_errors)
|
||||||
|
|
||||||
|
jobs = job_queue.list_jobs(limit=5000)
|
||||||
|
relevant_jobs = [
|
||||||
|
j for j in jobs
|
||||||
|
if j.get("params", {}).get("hunt_id") == hunt_id
|
||||||
|
or j.get("params", {}).get("dataset_id") in dataset_ids
|
||||||
|
]
|
||||||
|
active_jobs = sum(1 for j in relevant_jobs if j.get("status") == "running")
|
||||||
|
queued_jobs = sum(1 for j in relevant_jobs if j.get("status") == "queued")
|
||||||
|
|
||||||
|
if inventory_cache.get(hunt_id) is not None:
|
||||||
|
network_status = "ready"
|
||||||
|
network_ratio = 1.0
|
||||||
|
elif inventory_cache.is_building(hunt_id):
|
||||||
|
network_status = "building"
|
||||||
|
network_ratio = 0.5
|
||||||
|
else:
|
||||||
|
network_status = "none"
|
||||||
|
network_ratio = 0.0
|
||||||
|
|
||||||
|
dataset_ratio = ((dataset_completed + dataset_errors) / dataset_total) if dataset_total > 0 else 1.0
|
||||||
|
overall_ratio = min(1.0, (dataset_ratio * 0.85) + (network_ratio * 0.15))
|
||||||
|
progress_percent = round(overall_ratio * 100.0, 1)
|
||||||
|
|
||||||
|
status = "ready"
|
||||||
|
if dataset_total == 0:
|
||||||
|
status = "idle"
|
||||||
|
elif progress_percent < 100:
|
||||||
|
status = "processing"
|
||||||
|
|
||||||
|
stages = {
|
||||||
|
"datasets": {
|
||||||
|
"total": dataset_total,
|
||||||
|
"completed": dataset_completed,
|
||||||
|
"processing": dataset_processing,
|
||||||
|
"errors": dataset_errors,
|
||||||
|
"percent": round(dataset_ratio * 100.0, 1),
|
||||||
|
},
|
||||||
|
"network": {
|
||||||
|
"status": network_status,
|
||||||
|
"percent": round(network_ratio * 100.0, 1),
|
||||||
|
},
|
||||||
|
"jobs": {
|
||||||
|
"active": active_jobs,
|
||||||
|
"queued": queued_jobs,
|
||||||
|
"total_seen": len(relevant_jobs),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
return HuntProgressResponse(
|
||||||
|
hunt_id=hunt_id,
|
||||||
|
status=status,
|
||||||
|
progress_percent=progress_percent,
|
||||||
|
dataset_total=dataset_total,
|
||||||
|
dataset_completed=dataset_completed,
|
||||||
|
dataset_processing=dataset_processing,
|
||||||
|
dataset_errors=dataset_errors,
|
||||||
|
active_jobs=active_jobs,
|
||||||
|
queued_jobs=queued_jobs,
|
||||||
|
network_status=network_status,
|
||||||
|
stages=stages,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@router.put("/{hunt_id}", response_model=HuntResponse, summary="Update a hunt")
|
||||||
|
async def update_hunt(
|
||||||
|
hunt_id: str, body: HuntUpdate, db: AsyncSession = Depends(get_db)
|
||||||
|
):
|
||||||
|
result = await db.execute(select(Hunt).where(Hunt.id == hunt_id))
|
||||||
|
hunt = result.scalar_one_or_none()
|
||||||
|
if not hunt:
|
||||||
|
raise HTTPException(status_code=404, detail="Hunt not found")
|
||||||
|
if body.name is not None:
|
||||||
|
hunt.name = body.name
|
||||||
|
if body.description is not None:
|
||||||
|
hunt.description = body.description
|
||||||
|
if body.status is not None:
|
||||||
|
hunt.status = body.status
|
||||||
|
await db.flush()
|
||||||
|
return HuntResponse(
|
||||||
|
id=hunt.id,
|
||||||
|
name=hunt.name,
|
||||||
|
description=hunt.description,
|
||||||
|
status=hunt.status,
|
||||||
|
owner_id=hunt.owner_id,
|
||||||
|
created_at=hunt.created_at.isoformat(),
|
||||||
|
updated_at=hunt.updated_at.isoformat(),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@router.delete("/{hunt_id}", summary="Delete a hunt")
|
||||||
|
async def delete_hunt(hunt_id: str, db: AsyncSession = Depends(get_db)):
|
||||||
|
result = await db.execute(select(Hunt).where(Hunt.id == hunt_id))
|
||||||
|
hunt = result.scalar_one_or_none()
|
||||||
|
if not hunt:
|
||||||
|
raise HTTPException(status_code=404, detail="Hunt not found")
|
||||||
|
await db.delete(hunt)
|
||||||
|
return {"message": "Hunt deleted", "id": hunt_id}
|
||||||
|
'''
|
||||||
|
p.write_text(new,encoding='utf-8')
|
||||||
|
print('updated hunts.py')
|
||||||
102
_edit_hunts_progress_tasks.py
Normal file
@@ -0,0 +1,102 @@
|
|||||||
|
from pathlib import Path
|
||||||
|
p=Path(r'd:/Projects/Dev/ThreatHunt/backend/app/api/routes/hunts.py')
|
||||||
|
t=p.read_text(encoding='utf-8')
|
||||||
|
if 'ProcessingTask' not in t:
|
||||||
|
t=t.replace('from app.db.models import Hunt, Dataset','from app.db.models import Hunt, Dataset, ProcessingTask')
|
||||||
|
|
||||||
|
old=''' jobs = job_queue.list_jobs(limit=5000)
|
||||||
|
relevant_jobs = [
|
||||||
|
j for j in jobs
|
||||||
|
if j.get("params", {}).get("hunt_id") == hunt_id
|
||||||
|
or j.get("params", {}).get("dataset_id") in dataset_ids
|
||||||
|
]
|
||||||
|
active_jobs = sum(1 for j in relevant_jobs if j.get("status") == "running")
|
||||||
|
queued_jobs = sum(1 for j in relevant_jobs if j.get("status") == "queued")
|
||||||
|
|
||||||
|
if inventory_cache.get(hunt_id) is not None:
|
||||||
|
'''
|
||||||
|
new=''' jobs = job_queue.list_jobs(limit=5000)
|
||||||
|
relevant_jobs = [
|
||||||
|
j for j in jobs
|
||||||
|
if j.get("params", {}).get("hunt_id") == hunt_id
|
||||||
|
or j.get("params", {}).get("dataset_id") in dataset_ids
|
||||||
|
]
|
||||||
|
active_jobs_mem = sum(1 for j in relevant_jobs if j.get("status") == "running")
|
||||||
|
queued_jobs_mem = sum(1 for j in relevant_jobs if j.get("status") == "queued")
|
||||||
|
|
||||||
|
task_rows = await db.execute(
|
||||||
|
select(ProcessingTask.stage, ProcessingTask.status, ProcessingTask.progress)
|
||||||
|
.where(ProcessingTask.hunt_id == hunt_id)
|
||||||
|
)
|
||||||
|
tasks = task_rows.all()
|
||||||
|
|
||||||
|
task_total = len(tasks)
|
||||||
|
task_done = sum(1 for _, st, _ in tasks if st in ("completed", "failed", "cancelled"))
|
||||||
|
task_running = sum(1 for _, st, _ in tasks if st == "running")
|
||||||
|
task_queued = sum(1 for _, st, _ in tasks if st == "queued")
|
||||||
|
task_ratio = (task_done / task_total) if task_total > 0 else None
|
||||||
|
|
||||||
|
active_jobs = max(active_jobs_mem, task_running)
|
||||||
|
queued_jobs = max(queued_jobs_mem, task_queued)
|
||||||
|
|
||||||
|
stage_rollup: dict[str, dict] = {}
|
||||||
|
for stage, status, progress in tasks:
|
||||||
|
bucket = stage_rollup.setdefault(stage, {"total": 0, "done": 0, "running": 0, "queued": 0, "progress_sum": 0.0})
|
||||||
|
bucket["total"] += 1
|
||||||
|
if status in ("completed", "failed", "cancelled"):
|
||||||
|
bucket["done"] += 1
|
||||||
|
elif status == "running":
|
||||||
|
bucket["running"] += 1
|
||||||
|
elif status == "queued":
|
||||||
|
bucket["queued"] += 1
|
||||||
|
bucket["progress_sum"] += float(progress or 0.0)
|
||||||
|
|
||||||
|
for stage_name, bucket in stage_rollup.items():
|
||||||
|
total = max(1, bucket["total"])
|
||||||
|
bucket["percent"] = round(bucket["progress_sum"] / total, 1)
|
||||||
|
|
||||||
|
if inventory_cache.get(hunt_id) is not None:
|
||||||
|
'''
|
||||||
|
if old not in t:
|
||||||
|
raise SystemExit('job block not found')
|
||||||
|
t=t.replace(old,new)
|
||||||
|
|
||||||
|
old2=''' dataset_ratio = ((dataset_completed + dataset_errors) / dataset_total) if dataset_total > 0 else 1.0
|
||||||
|
overall_ratio = min(1.0, (dataset_ratio * 0.85) + (network_ratio * 0.15))
|
||||||
|
progress_percent = round(overall_ratio * 100.0, 1)
|
||||||
|
'''
|
||||||
|
new2=''' dataset_ratio = ((dataset_completed + dataset_errors) / dataset_total) if dataset_total > 0 else 1.0
|
||||||
|
if task_ratio is None:
|
||||||
|
overall_ratio = min(1.0, (dataset_ratio * 0.85) + (network_ratio * 0.15))
|
||||||
|
else:
|
||||||
|
overall_ratio = min(1.0, (dataset_ratio * 0.50) + (task_ratio * 0.35) + (network_ratio * 0.15))
|
||||||
|
progress_percent = round(overall_ratio * 100.0, 1)
|
||||||
|
'''
|
||||||
|
if old2 not in t:
|
||||||
|
raise SystemExit('ratio block not found')
|
||||||
|
t=t.replace(old2,new2)
|
||||||
|
|
||||||
|
old3=''' "jobs": {
|
||||||
|
"active": active_jobs,
|
||||||
|
"queued": queued_jobs,
|
||||||
|
"total_seen": len(relevant_jobs),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
'''
|
||||||
|
new3=''' "jobs": {
|
||||||
|
"active": active_jobs,
|
||||||
|
"queued": queued_jobs,
|
||||||
|
"total_seen": len(relevant_jobs),
|
||||||
|
"task_total": task_total,
|
||||||
|
"task_done": task_done,
|
||||||
|
"task_percent": round((task_ratio or 0.0) * 100.0, 1) if task_total else None,
|
||||||
|
},
|
||||||
|
"task_stages": stage_rollup,
|
||||||
|
}
|
||||||
|
'''
|
||||||
|
if old3 not in t:
|
||||||
|
raise SystemExit('stages jobs block not found')
|
||||||
|
t=t.replace(old3,new3)
|
||||||
|
|
||||||
|
p.write_text(t,encoding='utf-8')
|
||||||
|
print('updated hunt progress to merge persistent processing tasks')
|
||||||
46
_edit_job_queue.py
Normal file
@@ -0,0 +1,46 @@
|
|||||||
|
from pathlib import Path
|
||||||
|
p=Path(r'd:/Projects/Dev/ThreatHunt/backend/app/services/job_queue.py')
|
||||||
|
t=p.read_text(encoding='utf-8')
|
||||||
|
old='''async def _handle_keyword_scan(job: Job):
|
||||||
|
"""AUP keyword scan handler."""
|
||||||
|
from app.db import async_session_factory
|
||||||
|
from app.services.scanner import KeywordScanner
|
||||||
|
|
||||||
|
dataset_id = job.params.get("dataset_id")
|
||||||
|
job.message = f"Running AUP keyword scan on dataset {dataset_id}"
|
||||||
|
|
||||||
|
async with async_session_factory() as db:
|
||||||
|
scanner = KeywordScanner(db)
|
||||||
|
result = await scanner.scan(dataset_ids=[dataset_id])
|
||||||
|
|
||||||
|
hits = result.get("total_hits", 0)
|
||||||
|
job.message = f"Keyword scan complete: {hits} hits"
|
||||||
|
logger.info(f"Keyword scan for {dataset_id}: {hits} hits across {result.get('rows_scanned', 0)} rows")
|
||||||
|
return {"dataset_id": dataset_id, "total_hits": hits, "rows_scanned": result.get("rows_scanned", 0)}
|
||||||
|
'''
|
||||||
|
new='''async def _handle_keyword_scan(job: Job):
|
||||||
|
"""AUP keyword scan handler."""
|
||||||
|
from app.db import async_session_factory
|
||||||
|
from app.services.scanner import KeywordScanner, keyword_scan_cache
|
||||||
|
|
||||||
|
dataset_id = job.params.get("dataset_id")
|
||||||
|
job.message = f"Running AUP keyword scan on dataset {dataset_id}"
|
||||||
|
|
||||||
|
async with async_session_factory() as db:
|
||||||
|
scanner = KeywordScanner(db)
|
||||||
|
result = await scanner.scan(dataset_ids=[dataset_id])
|
||||||
|
|
||||||
|
# Cache dataset-only result for fast API reuse
|
||||||
|
if dataset_id:
|
||||||
|
keyword_scan_cache.put(dataset_id, result)
|
||||||
|
|
||||||
|
hits = result.get("total_hits", 0)
|
||||||
|
job.message = f"Keyword scan complete: {hits} hits"
|
||||||
|
logger.info(f"Keyword scan for {dataset_id}: {hits} hits across {result.get('rows_scanned', 0)} rows")
|
||||||
|
return {"dataset_id": dataset_id, "total_hits": hits, "rows_scanned": result.get("rows_scanned", 0)}
|
||||||
|
'''
|
||||||
|
if old not in t:
|
||||||
|
raise SystemExit('target block not found')
|
||||||
|
t=t.replace(old,new)
|
||||||
|
p.write_text(t,encoding='utf-8')
|
||||||
|
print('updated job_queue keyword scan handler')
|
||||||
13
_edit_jobqueue_reconcile.py
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
from pathlib import Path
|
||||||
|
p=Path(r'd:/Projects/Dev/ThreatHunt/backend/app/services/job_queue.py')
|
||||||
|
t=p.read_text(encoding='utf-8')
|
||||||
|
marker='''def register_all_handlers():
|
||||||
|
"""Register all job handlers and completion callbacks."""
|
||||||
|
'''
|
||||||
|
ins='''\n\nasync def reconcile_stale_processing_tasks() -> int:\n """Mark queued/running processing tasks from prior runs as failed."""\n from datetime import datetime, timezone\n from sqlalchemy import update\n\n try:\n from app.db import async_session_factory\n from app.db.models import ProcessingTask\n\n now = datetime.now(timezone.utc)\n async with async_session_factory() as db:\n result = await db.execute(\n update(ProcessingTask)\n .where(ProcessingTask.status.in_([\"queued\", \"running\"]))\n .values(\n status=\"failed\",\n error=\"Recovered after service restart before task completion\",\n message=\"Recovered stale task after restart\",\n completed_at=now,\n )\n )\n await db.commit()\n updated = int(result.rowcount or 0)\n\n if updated:\n logger.warning(\n \"Reconciled %d stale processing tasks (queued/running -> failed) during startup\",\n updated,\n )\n return updated\n except Exception as e:\n logger.warning(f\"Failed to reconcile stale processing tasks: {e}\")\n return 0\n\n\n'''
|
||||||
|
if ins.strip() not in t:
|
||||||
|
if marker not in t:
|
||||||
|
raise SystemExit('register marker not found')
|
||||||
|
t=t.replace(marker,ins+marker)
|
||||||
|
p.write_text(t,encoding='utf-8')
|
||||||
|
print('added reconcile_stale_processing_tasks to job_queue')
|
||||||
64
_edit_jobqueue_sync.py
Normal file
@@ -0,0 +1,64 @@
|
|||||||
|
from pathlib import Path
|
||||||
|
p=Path(r'd:/Projects/Dev/ThreatHunt/backend/app/services/job_queue.py')
|
||||||
|
t=p.read_text(encoding='utf-8')
|
||||||
|
ins='''\n\nasync def _sync_processing_task(job: Job):\n """Persist latest job state into processing_tasks (if linked by job_id)."""\n from datetime import datetime, timezone\n from sqlalchemy import update\n\n try:\n from app.db import async_session_factory\n from app.db.models import ProcessingTask\n\n values = {\n "status": job.status.value,\n "progress": float(job.progress),\n "message": job.message,\n "error": job.error,\n }\n if job.started_at:\n values["started_at"] = datetime.fromtimestamp(job.started_at, tz=timezone.utc)\n if job.completed_at:\n values["completed_at"] = datetime.fromtimestamp(job.completed_at, tz=timezone.utc)\n\n async with async_session_factory() as db:\n await db.execute(\n update(ProcessingTask)\n .where(ProcessingTask.job_id == job.id)\n .values(**values)\n )\n await db.commit()\n except Exception as e:\n logger.warning(f"Failed to sync processing task for job {job.id}: {e}")\n'''
|
||||||
|
marker='\n\n# -- Singleton + job handlers --\n'
|
||||||
|
if ins.strip() not in t:
|
||||||
|
t=t.replace(marker, ins+marker)
|
||||||
|
|
||||||
|
old=''' job.status = JobStatus.RUNNING
|
||||||
|
job.started_at = time.time()
|
||||||
|
job.message = "Running..."
|
||||||
|
logger.info(f"Worker {worker_id}: executing {job.id} ({job.job_type.value})")
|
||||||
|
|
||||||
|
try:
|
||||||
|
'''
|
||||||
|
new=''' job.status = JobStatus.RUNNING
|
||||||
|
job.started_at = time.time()
|
||||||
|
if job.progress <= 0:
|
||||||
|
job.progress = 5.0
|
||||||
|
job.message = "Running..."
|
||||||
|
await _sync_processing_task(job)
|
||||||
|
logger.info(f"Worker {worker_id}: executing {job.id} ({job.job_type.value})")
|
||||||
|
|
||||||
|
try:
|
||||||
|
'''
|
||||||
|
if old not in t:
|
||||||
|
raise SystemExit('worker running block not found')
|
||||||
|
t=t.replace(old,new)
|
||||||
|
|
||||||
|
old2=''' job.completed_at = time.time()
|
||||||
|
logger.info(f"Worker {worker_id}: completed {job.id} in {job.elapsed_ms}ms")
|
||||||
|
except Exception as e:
|
||||||
|
if not job.is_cancelled:
|
||||||
|
job.status = JobStatus.FAILED
|
||||||
|
job.error = str(e)
|
||||||
|
job.message = f"Failed: {e}"
|
||||||
|
job.completed_at = time.time()
|
||||||
|
logger.error(f"Worker {worker_id}: failed {job.id}: {e}", exc_info=True)
|
||||||
|
|
||||||
|
# Fire completion callbacks
|
||||||
|
'''
|
||||||
|
new2=''' job.completed_at = time.time()
|
||||||
|
logger.info(f"Worker {worker_id}: completed {job.id} in {job.elapsed_ms}ms")
|
||||||
|
except Exception as e:
|
||||||
|
if not job.is_cancelled:
|
||||||
|
job.status = JobStatus.FAILED
|
||||||
|
job.error = str(e)
|
||||||
|
job.message = f"Failed: {e}"
|
||||||
|
job.completed_at = time.time()
|
||||||
|
logger.error(f"Worker {worker_id}: failed {job.id}: {e}", exc_info=True)
|
||||||
|
|
||||||
|
if job.is_cancelled and not job.completed_at:
|
||||||
|
job.completed_at = time.time()
|
||||||
|
|
||||||
|
await _sync_processing_task(job)
|
||||||
|
|
||||||
|
# Fire completion callbacks
|
||||||
|
'''
|
||||||
|
if old2 not in t:
|
||||||
|
raise SystemExit('worker completion block not found')
|
||||||
|
t=t.replace(old2,new2)
|
||||||
|
|
||||||
|
p.write_text(t, encoding='utf-8')
|
||||||
|
print('updated job_queue persistent task syncing')
|
||||||
39
_edit_jobqueue_triage_task.py
Normal file
@@ -0,0 +1,39 @@
|
|||||||
|
from pathlib import Path
|
||||||
|
p=Path(r'd:/Projects/Dev/ThreatHunt/backend/app/services/job_queue.py')
|
||||||
|
t=p.read_text(encoding='utf-8')
|
||||||
|
old=''' if hunt_id:
|
||||||
|
job_queue.submit(JobType.HOST_PROFILE, hunt_id=hunt_id)
|
||||||
|
logger.info(f"Triage done for {dataset_id} - chained HOST_PROFILE for hunt {hunt_id}")
|
||||||
|
except Exception as e:
|
||||||
|
'''
|
||||||
|
new=''' if hunt_id:
|
||||||
|
hp_job = job_queue.submit(JobType.HOST_PROFILE, hunt_id=hunt_id)
|
||||||
|
try:
|
||||||
|
from sqlalchemy import select
|
||||||
|
from app.db.models import ProcessingTask
|
||||||
|
async with async_session_factory() as db:
|
||||||
|
existing = await db.execute(
|
||||||
|
select(ProcessingTask.id).where(ProcessingTask.job_id == hp_job.id)
|
||||||
|
)
|
||||||
|
if existing.first() is None:
|
||||||
|
db.add(ProcessingTask(
|
||||||
|
hunt_id=hunt_id,
|
||||||
|
dataset_id=dataset_id,
|
||||||
|
job_id=hp_job.id,
|
||||||
|
stage="host_profile",
|
||||||
|
status="queued",
|
||||||
|
progress=0.0,
|
||||||
|
message="Queued",
|
||||||
|
))
|
||||||
|
await db.commit()
|
||||||
|
except Exception as persist_err:
|
||||||
|
logger.warning(f"Failed to persist chained HOST_PROFILE task: {persist_err}")
|
||||||
|
|
||||||
|
logger.info(f"Triage done for {dataset_id} - chained HOST_PROFILE for hunt {hunt_id}")
|
||||||
|
except Exception as e:
|
||||||
|
'''
|
||||||
|
if old not in t:
|
||||||
|
raise SystemExit('triage chain block not found')
|
||||||
|
t=t.replace(old,new)
|
||||||
|
p.write_text(t,encoding='utf-8')
|
||||||
|
print('updated triage chain to persist host_profile task row')
|
||||||
321
_edit_keywords.py
Normal file
@@ -0,0 +1,321 @@
|
|||||||
|
from pathlib import Path
|
||||||
|
p=Path(r'd:/Projects/Dev/ThreatHunt/backend/app/api/routes/keywords.py')
|
||||||
|
new_text='''"""API routes for AUP keyword themes, keyword CRUD, and scanning."""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
|
||||||
|
from fastapi import APIRouter, Depends, HTTPException, Query
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
from sqlalchemy import select
|
||||||
|
from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
|
|
||||||
|
from app.db import get_db
|
||||||
|
from app.db.models import KeywordTheme, Keyword
|
||||||
|
from app.services.scanner import KeywordScanner, keyword_scan_cache
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
router = APIRouter(prefix="/api/keywords", tags=["keywords"])
|
||||||
|
|
||||||
|
|
||||||
|
class ThemeCreate(BaseModel):
|
||||||
|
name: str = Field(..., min_length=1, max_length=128)
|
||||||
|
color: str = Field(default="#9e9e9e", max_length=16)
|
||||||
|
enabled: bool = True
|
||||||
|
|
||||||
|
|
||||||
|
class ThemeUpdate(BaseModel):
|
||||||
|
name: str | None = None
|
||||||
|
color: str | None = None
|
||||||
|
enabled: bool | None = None
|
||||||
|
|
||||||
|
|
||||||
|
class KeywordOut(BaseModel):
|
||||||
|
id: int
|
||||||
|
theme_id: str
|
||||||
|
value: str
|
||||||
|
is_regex: bool
|
||||||
|
created_at: str
|
||||||
|
|
||||||
|
|
||||||
|
class ThemeOut(BaseModel):
|
||||||
|
id: str
|
||||||
|
name: str
|
||||||
|
color: str
|
||||||
|
enabled: bool
|
||||||
|
is_builtin: bool
|
||||||
|
created_at: str
|
||||||
|
keyword_count: int
|
||||||
|
keywords: list[KeywordOut]
|
||||||
|
|
||||||
|
|
||||||
|
class ThemeListResponse(BaseModel):
|
||||||
|
themes: list[ThemeOut]
|
||||||
|
total: int
|
||||||
|
|
||||||
|
|
||||||
|
class KeywordCreate(BaseModel):
|
||||||
|
value: str = Field(..., min_length=1, max_length=256)
|
||||||
|
is_regex: bool = False
|
||||||
|
|
||||||
|
|
||||||
|
class KeywordBulkCreate(BaseModel):
|
||||||
|
values: list[str] = Field(..., min_items=1)
|
||||||
|
is_regex: bool = False
|
||||||
|
|
||||||
|
|
||||||
|
class ScanRequest(BaseModel):
|
||||||
|
dataset_ids: list[str] | None = None
|
||||||
|
theme_ids: list[str] | None = None
|
||||||
|
scan_hunts: bool = False
|
||||||
|
scan_annotations: bool = False
|
||||||
|
scan_messages: bool = False
|
||||||
|
prefer_cache: bool = True
|
||||||
|
force_rescan: bool = False
|
||||||
|
|
||||||
|
|
||||||
|
class ScanHit(BaseModel):
|
||||||
|
theme_name: str
|
||||||
|
theme_color: str
|
||||||
|
keyword: str
|
||||||
|
source_type: str
|
||||||
|
source_id: str | int
|
||||||
|
field: str
|
||||||
|
matched_value: str
|
||||||
|
row_index: int | None = None
|
||||||
|
dataset_name: str | None = None
|
||||||
|
|
||||||
|
|
||||||
|
class ScanResponse(BaseModel):
|
||||||
|
total_hits: int
|
||||||
|
hits: list[ScanHit]
|
||||||
|
themes_scanned: int
|
||||||
|
keywords_scanned: int
|
||||||
|
rows_scanned: int
|
||||||
|
cache_used: bool = False
|
||||||
|
cache_status: str = "miss"
|
||||||
|
cached_at: str | None = None
|
||||||
|
|
||||||
|
|
||||||
|
def _theme_to_out(t: KeywordTheme) -> ThemeOut:
|
||||||
|
return ThemeOut(
|
||||||
|
id=t.id,
|
||||||
|
name=t.name,
|
||||||
|
color=t.color,
|
||||||
|
enabled=t.enabled,
|
||||||
|
is_builtin=t.is_builtin,
|
||||||
|
created_at=t.created_at.isoformat(),
|
||||||
|
keyword_count=len(t.keywords),
|
||||||
|
keywords=[
|
||||||
|
KeywordOut(
|
||||||
|
id=k.id,
|
||||||
|
theme_id=k.theme_id,
|
||||||
|
value=k.value,
|
||||||
|
is_regex=k.is_regex,
|
||||||
|
created_at=k.created_at.isoformat(),
|
||||||
|
)
|
||||||
|
for k in t.keywords
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _merge_cached_results(entries: list[dict], allowed_theme_names: set[str] | None = None) -> dict:
|
||||||
|
hits: list[dict] = []
|
||||||
|
total_rows = 0
|
||||||
|
cached_at: str | None = None
|
||||||
|
|
||||||
|
for entry in entries:
|
||||||
|
result = entry["result"]
|
||||||
|
total_rows += int(result.get("rows_scanned", 0) or 0)
|
||||||
|
if entry.get("built_at"):
|
||||||
|
if not cached_at or entry["built_at"] > cached_at:
|
||||||
|
cached_at = entry["built_at"]
|
||||||
|
for h in result.get("hits", []):
|
||||||
|
if allowed_theme_names is not None and h.get("theme_name") not in allowed_theme_names:
|
||||||
|
continue
|
||||||
|
hits.append(h)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"total_hits": len(hits),
|
||||||
|
"hits": hits,
|
||||||
|
"rows_scanned": total_rows,
|
||||||
|
"cached_at": cached_at,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/themes", response_model=ThemeListResponse)
|
||||||
|
async def list_themes(db: AsyncSession = Depends(get_db)):
|
||||||
|
result = await db.execute(select(KeywordTheme).order_by(KeywordTheme.name))
|
||||||
|
themes = result.scalars().all()
|
||||||
|
return ThemeListResponse(themes=[_theme_to_out(t) for t in themes], total=len(themes))
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/themes", response_model=ThemeOut, status_code=201)
|
||||||
|
async def create_theme(body: ThemeCreate, db: AsyncSession = Depends(get_db)):
|
||||||
|
exists = await db.scalar(select(KeywordTheme.id).where(KeywordTheme.name == body.name))
|
||||||
|
if exists:
|
||||||
|
raise HTTPException(409, f"Theme '{body.name}' already exists")
|
||||||
|
theme = KeywordTheme(name=body.name, color=body.color, enabled=body.enabled)
|
||||||
|
db.add(theme)
|
||||||
|
await db.flush()
|
||||||
|
await db.refresh(theme)
|
||||||
|
keyword_scan_cache.clear()
|
||||||
|
return _theme_to_out(theme)
|
||||||
|
|
||||||
|
|
||||||
|
@router.put("/themes/{theme_id}", response_model=ThemeOut)
|
||||||
|
async def update_theme(theme_id: str, body: ThemeUpdate, db: AsyncSession = Depends(get_db)):
|
||||||
|
theme = await db.get(KeywordTheme, theme_id)
|
||||||
|
if not theme:
|
||||||
|
raise HTTPException(404, "Theme not found")
|
||||||
|
if body.name is not None:
|
||||||
|
dup = await db.scalar(
|
||||||
|
select(KeywordTheme.id).where(KeywordTheme.name == body.name, KeywordTheme.id != theme_id)
|
||||||
|
)
|
||||||
|
if dup:
|
||||||
|
raise HTTPException(409, f"Theme '{body.name}' already exists")
|
||||||
|
theme.name = body.name
|
||||||
|
if body.color is not None:
|
||||||
|
theme.color = body.color
|
||||||
|
if body.enabled is not None:
|
||||||
|
theme.enabled = body.enabled
|
||||||
|
await db.flush()
|
||||||
|
await db.refresh(theme)
|
||||||
|
keyword_scan_cache.clear()
|
||||||
|
return _theme_to_out(theme)
|
||||||
|
|
||||||
|
|
||||||
|
@router.delete("/themes/{theme_id}", status_code=204)
|
||||||
|
async def delete_theme(theme_id: str, db: AsyncSession = Depends(get_db)):
|
||||||
|
theme = await db.get(KeywordTheme, theme_id)
|
||||||
|
if not theme:
|
||||||
|
raise HTTPException(404, "Theme not found")
|
||||||
|
await db.delete(theme)
|
||||||
|
keyword_scan_cache.clear()
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/themes/{theme_id}/keywords", response_model=KeywordOut, status_code=201)
|
||||||
|
async def add_keyword(theme_id: str, body: KeywordCreate, db: AsyncSession = Depends(get_db)):
|
||||||
|
theme = await db.get(KeywordTheme, theme_id)
|
||||||
|
if not theme:
|
||||||
|
raise HTTPException(404, "Theme not found")
|
||||||
|
kw = Keyword(theme_id=theme_id, value=body.value, is_regex=body.is_regex)
|
||||||
|
db.add(kw)
|
||||||
|
await db.flush()
|
||||||
|
await db.refresh(kw)
|
||||||
|
keyword_scan_cache.clear()
|
||||||
|
return KeywordOut(
|
||||||
|
id=kw.id, theme_id=kw.theme_id, value=kw.value,
|
||||||
|
is_regex=kw.is_regex, created_at=kw.created_at.isoformat(),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/themes/{theme_id}/keywords/bulk", response_model=dict, status_code=201)
|
||||||
|
async def add_keywords_bulk(theme_id: str, body: KeywordBulkCreate, db: AsyncSession = Depends(get_db)):
|
||||||
|
theme = await db.get(KeywordTheme, theme_id)
|
||||||
|
if not theme:
|
||||||
|
raise HTTPException(404, "Theme not found")
|
||||||
|
added = 0
|
||||||
|
for val in body.values:
|
||||||
|
val = val.strip()
|
||||||
|
if not val:
|
||||||
|
continue
|
||||||
|
db.add(Keyword(theme_id=theme_id, value=val, is_regex=body.is_regex))
|
||||||
|
added += 1
|
||||||
|
await db.flush()
|
||||||
|
keyword_scan_cache.clear()
|
||||||
|
return {"added": added, "theme_id": theme_id}
|
||||||
|
|
||||||
|
|
||||||
|
@router.delete("/keywords/{keyword_id}", status_code=204)
|
||||||
|
async def delete_keyword(keyword_id: int, db: AsyncSession = Depends(get_db)):
|
||||||
|
kw = await db.get(Keyword, keyword_id)
|
||||||
|
if not kw:
|
||||||
|
raise HTTPException(404, "Keyword not found")
|
||||||
|
await db.delete(kw)
|
||||||
|
keyword_scan_cache.clear()
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/scan", response_model=ScanResponse)
|
||||||
|
async def run_scan(body: ScanRequest, db: AsyncSession = Depends(get_db)):
|
||||||
|
scanner = KeywordScanner(db)
|
||||||
|
|
||||||
|
can_use_cache = (
|
||||||
|
body.prefer_cache
|
||||||
|
and not body.force_rescan
|
||||||
|
and bool(body.dataset_ids)
|
||||||
|
and not body.scan_hunts
|
||||||
|
and not body.scan_annotations
|
||||||
|
and not body.scan_messages
|
||||||
|
)
|
||||||
|
|
||||||
|
if can_use_cache:
|
||||||
|
themes = await scanner._load_themes(body.theme_ids)
|
||||||
|
allowed_theme_names = {t.name for t in themes}
|
||||||
|
keywords_scanned = sum(len(theme.keywords) for theme in themes)
|
||||||
|
|
||||||
|
cached_entries: list[dict] = []
|
||||||
|
missing: list[str] = []
|
||||||
|
for dataset_id in (body.dataset_ids or []):
|
||||||
|
entry = keyword_scan_cache.get(dataset_id)
|
||||||
|
if not entry:
|
||||||
|
missing.append(dataset_id)
|
||||||
|
continue
|
||||||
|
cached_entries.append({"result": entry.result, "built_at": entry.built_at})
|
||||||
|
|
||||||
|
if not missing and cached_entries:
|
||||||
|
merged = _merge_cached_results(cached_entries, allowed_theme_names if body.theme_ids else None)
|
||||||
|
return {
|
||||||
|
"total_hits": merged["total_hits"],
|
||||||
|
"hits": merged["hits"],
|
||||||
|
"themes_scanned": len(themes),
|
||||||
|
"keywords_scanned": keywords_scanned,
|
||||||
|
"rows_scanned": merged["rows_scanned"],
|
||||||
|
"cache_used": True,
|
||||||
|
"cache_status": "hit",
|
||||||
|
"cached_at": merged["cached_at"],
|
||||||
|
}
|
||||||
|
|
||||||
|
result = await scanner.scan(
|
||||||
|
dataset_ids=body.dataset_ids,
|
||||||
|
theme_ids=body.theme_ids,
|
||||||
|
scan_hunts=body.scan_hunts,
|
||||||
|
scan_annotations=body.scan_annotations,
|
||||||
|
scan_messages=body.scan_messages,
|
||||||
|
)
|
||||||
|
|
||||||
|
return {
|
||||||
|
**result,
|
||||||
|
"cache_used": False,
|
||||||
|
"cache_status": "miss",
|
||||||
|
"cached_at": None,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/scan/quick", response_model=ScanResponse)
|
||||||
|
async def quick_scan(
|
||||||
|
dataset_id: str = Query(..., description="Dataset to scan"),
|
||||||
|
db: AsyncSession = Depends(get_db),
|
||||||
|
):
|
||||||
|
entry = keyword_scan_cache.get(dataset_id)
|
||||||
|
if entry is not None:
|
||||||
|
result = entry.result
|
||||||
|
return {
|
||||||
|
**result,
|
||||||
|
"cache_used": True,
|
||||||
|
"cache_status": "hit",
|
||||||
|
"cached_at": entry.built_at,
|
||||||
|
}
|
||||||
|
|
||||||
|
scanner = KeywordScanner(db)
|
||||||
|
result = await scanner.scan(dataset_ids=[dataset_id])
|
||||||
|
keyword_scan_cache.put(dataset_id, result)
|
||||||
|
return {
|
||||||
|
**result,
|
||||||
|
"cache_used": False,
|
||||||
|
"cache_status": "miss",
|
||||||
|
"cached_at": None,
|
||||||
|
}
|
||||||
|
'''
|
||||||
|
p.write_text(new_text,encoding='utf-8')
|
||||||
|
print('updated keywords.py')
|
||||||
31
_edit_main_reconcile.py
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
from pathlib import Path
|
||||||
|
p=Path(r'd:/Projects/Dev/ThreatHunt/backend/app/main.py')
|
||||||
|
t=p.read_text(encoding='utf-8')
|
||||||
|
old=''' # Start job queue
|
||||||
|
from app.services.job_queue import job_queue, register_all_handlers, JobType
|
||||||
|
register_all_handlers()
|
||||||
|
await job_queue.start()
|
||||||
|
logger.info("Job queue started (%d workers)", job_queue._max_workers)
|
||||||
|
'''
|
||||||
|
new=''' # Start job queue
|
||||||
|
from app.services.job_queue import (
|
||||||
|
job_queue,
|
||||||
|
register_all_handlers,
|
||||||
|
reconcile_stale_processing_tasks,
|
||||||
|
JobType,
|
||||||
|
)
|
||||||
|
|
||||||
|
if settings.STARTUP_RECONCILE_STALE_TASKS:
|
||||||
|
reconciled = await reconcile_stale_processing_tasks()
|
||||||
|
if reconciled:
|
||||||
|
logger.info("Startup reconciliation marked %d stale tasks", reconciled)
|
||||||
|
|
||||||
|
register_all_handlers()
|
||||||
|
await job_queue.start()
|
||||||
|
logger.info("Job queue started (%d workers)", job_queue._max_workers)
|
||||||
|
'''
|
||||||
|
if old not in t:
|
||||||
|
raise SystemExit('startup queue block not found')
|
||||||
|
t=t.replace(old,new)
|
||||||
|
p.write_text(t,encoding='utf-8')
|
||||||
|
print('wired startup reconciliation in main lifespan')
|
||||||
45
_edit_models_processing.py
Normal file
@@ -0,0 +1,45 @@
|
|||||||
|
from pathlib import Path
|
||||||
|
p=Path(r'd:/Projects/Dev/ThreatHunt/backend/app/db/models.py')
|
||||||
|
t=p.read_text(encoding='utf-8')
|
||||||
|
if 'class ProcessingTask(Base):' in t:
|
||||||
|
print('processing task model already exists')
|
||||||
|
raise SystemExit(0)
|
||||||
|
insert='''
|
||||||
|
|
||||||
|
# -- Persistent Processing Tasks (Phase 2) ---
|
||||||
|
|
||||||
|
class ProcessingTask(Base):
|
||||||
|
__tablename__ = "processing_tasks"
|
||||||
|
|
||||||
|
id: Mapped[str] = mapped_column(String(32), primary_key=True, default=_new_id)
|
||||||
|
hunt_id: Mapped[Optional[str]] = mapped_column(
|
||||||
|
String(32), ForeignKey("hunts.id", ondelete="CASCADE"), nullable=True, index=True
|
||||||
|
)
|
||||||
|
dataset_id: Mapped[Optional[str]] = mapped_column(
|
||||||
|
String(32), ForeignKey("datasets.id", ondelete="CASCADE"), nullable=True, index=True
|
||||||
|
)
|
||||||
|
job_id: Mapped[Optional[str]] = mapped_column(String(64), nullable=True, index=True)
|
||||||
|
stage: Mapped[str] = mapped_column(String(64), nullable=False, index=True)
|
||||||
|
status: Mapped[str] = mapped_column(String(20), default="queued", index=True)
|
||||||
|
progress: Mapped[float] = mapped_column(Float, default=0.0)
|
||||||
|
message: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
||||||
|
error: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
||||||
|
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=_utcnow)
|
||||||
|
started_at: Mapped[Optional[datetime]] = mapped_column(DateTime(timezone=True), nullable=True)
|
||||||
|
completed_at: Mapped[Optional[datetime]] = mapped_column(DateTime(timezone=True), nullable=True)
|
||||||
|
updated_at: Mapped[datetime] = mapped_column(
|
||||||
|
DateTime(timezone=True), default=_utcnow, onupdate=_utcnow
|
||||||
|
)
|
||||||
|
|
||||||
|
__table_args__ = (
|
||||||
|
Index("ix_processing_tasks_hunt_stage", "hunt_id", "stage"),
|
||||||
|
Index("ix_processing_tasks_dataset_stage", "dataset_id", "stage"),
|
||||||
|
)
|
||||||
|
'''
|
||||||
|
# insert before Playbook section
|
||||||
|
marker='\n\n# -- Playbook / Investigation Templates (Feature 3) ---\n'
|
||||||
|
if marker not in t:
|
||||||
|
raise SystemExit('marker not found for insertion')
|
||||||
|
t=t.replace(marker, insert+marker)
|
||||||
|
p.write_text(t,encoding='utf-8')
|
||||||
|
print('added ProcessingTask model')
|
||||||
59
_edit_networkmap_hit.py
Normal file
@@ -0,0 +1,59 @@
|
|||||||
|
from pathlib import Path
|
||||||
|
p=Path(r'd:/Projects/Dev/ThreatHunt/frontend/src/components/NetworkMap.tsx')
|
||||||
|
t=p.read_text(encoding='utf-8')
|
||||||
|
insert='''
|
||||||
|
function isPointOnNodeLabel(node: GNode, wx: number, wy: number, vp: Viewport): boolean {
|
||||||
|
const fontSize = Math.max(9, Math.round(12 / vp.scale));
|
||||||
|
const approxCharW = Math.max(5, fontSize * 0.58);
|
||||||
|
const line1 = node.label || '';
|
||||||
|
const line2 = node.meta.ips.length > 0 ? node.meta.ips[0] : '';
|
||||||
|
const tw = Math.max(line1.length * approxCharW, line2 ? line2.length * approxCharW : 0);
|
||||||
|
const px = 5, py = 2;
|
||||||
|
const totalH = line2 ? fontSize * 2 + py * 2 : fontSize + py * 2;
|
||||||
|
const lx = node.x, ly = node.y - node.radius - 6;
|
||||||
|
const rx = lx - tw / 2 - px;
|
||||||
|
const ry = ly - totalH;
|
||||||
|
const rw = tw + px * 2;
|
||||||
|
const rh = totalH;
|
||||||
|
return wx >= rx && wx <= (rx + rw) && wy >= ry && wy <= (ry + rh);
|
||||||
|
}
|
||||||
|
|
||||||
|
'''
|
||||||
|
if 'function isPointOnNodeLabel' not in t:
|
||||||
|
t=t.replace('// == Hit-test =============================================================\n', '// == Hit-test =============================================================\n'+insert)
|
||||||
|
|
||||||
|
old='''function hitTest(
|
||||||
|
graph: Graph, canvas: HTMLCanvasElement, clientX: number, clientY: number, vp: Viewport,
|
||||||
|
): GNode | null {
|
||||||
|
const { wx, wy } = screenToWorld(canvas, clientX, clientY, vp);
|
||||||
|
for (const n of graph.nodes) {
|
||||||
|
const dx = n.x - wx, dy = n.y - wy;
|
||||||
|
if (dx * dx + dy * dy < (n.radius + 5) ** 2) return n;
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
'''
|
||||||
|
new='''function hitTest(
|
||||||
|
graph: Graph, canvas: HTMLCanvasElement, clientX: number, clientY: number, vp: Viewport,
|
||||||
|
): GNode | null {
|
||||||
|
const { wx, wy } = screenToWorld(canvas, clientX, clientY, vp);
|
||||||
|
|
||||||
|
// Node-circle hit has priority
|
||||||
|
for (const n of graph.nodes) {
|
||||||
|
const dx = n.x - wx, dy = n.y - wy;
|
||||||
|
if (dx * dx + dy * dy < (n.radius + 5) ** 2) return n;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Then label hit (so clicking text works too)
|
||||||
|
for (const n of graph.nodes) {
|
||||||
|
if (isPointOnNodeLabel(n, wx, wy, vp)) return n;
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
'''
|
||||||
|
if old not in t:
|
||||||
|
raise SystemExit('hitTest block not found')
|
||||||
|
t=t.replace(old,new)
|
||||||
|
p.write_text(t,encoding='utf-8')
|
||||||
|
print('updated NetworkMap hit-test for labels')
|
||||||
272
_edit_scanner.py
Normal file
@@ -0,0 +1,272 @@
|
|||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
p = Path(r'd:/Projects/Dev/ThreatHunt/backend/app/services/scanner.py')
|
||||||
|
text = p.read_text(encoding='utf-8')
|
||||||
|
new_text = '''"""AUP Keyword Scanner searches dataset rows, hunts, annotations, and
|
||||||
|
messages for keyword matches.
|
||||||
|
|
||||||
|
Scanning is done in Python (not SQL LIKE on JSON columns) for portability
|
||||||
|
across SQLite / PostgreSQL and to provide per-cell match context.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import re
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
|
||||||
|
from sqlalchemy import select
|
||||||
|
from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
|
|
||||||
|
from app.db.models import (
|
||||||
|
KeywordTheme,
|
||||||
|
DatasetRow,
|
||||||
|
Dataset,
|
||||||
|
Hunt,
|
||||||
|
Annotation,
|
||||||
|
Message,
|
||||||
|
)
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
BATCH_SIZE = 200
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ScanHit:
|
||||||
|
theme_name: str
|
||||||
|
theme_color: str
|
||||||
|
keyword: str
|
||||||
|
source_type: str # dataset_row | hunt | annotation | message
|
||||||
|
source_id: str | int
|
||||||
|
field: str
|
||||||
|
matched_value: str
|
||||||
|
row_index: int | None = None
|
||||||
|
dataset_name: str | None = None
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ScanResult:
|
||||||
|
total_hits: int = 0
|
||||||
|
hits: list[ScanHit] = field(default_factory=list)
|
||||||
|
themes_scanned: int = 0
|
||||||
|
keywords_scanned: int = 0
|
||||||
|
rows_scanned: int = 0
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class KeywordScanCacheEntry:
|
||||||
|
dataset_id: str
|
||||||
|
result: dict
|
||||||
|
built_at: str = field(default_factory=lambda: datetime.now(timezone.utc).isoformat())
|
||||||
|
|
||||||
|
|
||||||
|
class KeywordScanCache:
|
||||||
|
"""In-memory per-dataset cache for dataset-only keyword scans.
|
||||||
|
|
||||||
|
This enables fast-path reads when users run AUP scans against datasets that
|
||||||
|
were already scanned during upload pipeline processing.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self._entries: dict[str, KeywordScanCacheEntry] = {}
|
||||||
|
|
||||||
|
def put(self, dataset_id: str, result: dict):
|
||||||
|
self._entries[dataset_id] = KeywordScanCacheEntry(dataset_id=dataset_id, result=result)
|
||||||
|
|
||||||
|
def get(self, dataset_id: str) -> KeywordScanCacheEntry | None:
|
||||||
|
return self._entries.get(dataset_id)
|
||||||
|
|
||||||
|
def invalidate_dataset(self, dataset_id: str):
|
||||||
|
self._entries.pop(dataset_id, None)
|
||||||
|
|
||||||
|
def clear(self):
|
||||||
|
self._entries.clear()
|
||||||
|
|
||||||
|
|
||||||
|
keyword_scan_cache = KeywordScanCache()
|
||||||
|
|
||||||
|
|
||||||
|
class KeywordScanner:
|
||||||
|
"""Scans multiple data sources for keyword/regex matches."""
|
||||||
|
|
||||||
|
def __init__(self, db: AsyncSession):
|
||||||
|
self.db = db
|
||||||
|
|
||||||
|
# Public API
|
||||||
|
|
||||||
|
async def scan(
|
||||||
|
self,
|
||||||
|
dataset_ids: list[str] | None = None,
|
||||||
|
theme_ids: list[str] | None = None,
|
||||||
|
scan_hunts: bool = False,
|
||||||
|
scan_annotations: bool = False,
|
||||||
|
scan_messages: bool = False,
|
||||||
|
) -> dict:
|
||||||
|
"""Run a full AUP scan and return dict matching ScanResponse."""
|
||||||
|
# Load themes + keywords
|
||||||
|
themes = await self._load_themes(theme_ids)
|
||||||
|
if not themes:
|
||||||
|
return ScanResult().__dict__
|
||||||
|
|
||||||
|
# Pre-compile patterns per theme
|
||||||
|
patterns = self._compile_patterns(themes)
|
||||||
|
result = ScanResult(
|
||||||
|
themes_scanned=len(themes),
|
||||||
|
keywords_scanned=sum(len(kws) for kws in patterns.values()),
|
||||||
|
)
|
||||||
|
|
||||||
|
# Scan dataset rows
|
||||||
|
await self._scan_datasets(patterns, result, dataset_ids)
|
||||||
|
|
||||||
|
# Scan hunts
|
||||||
|
if scan_hunts:
|
||||||
|
await self._scan_hunts(patterns, result)
|
||||||
|
|
||||||
|
# Scan annotations
|
||||||
|
if scan_annotations:
|
||||||
|
await self._scan_annotations(patterns, result)
|
||||||
|
|
||||||
|
# Scan messages
|
||||||
|
if scan_messages:
|
||||||
|
await self._scan_messages(patterns, result)
|
||||||
|
|
||||||
|
result.total_hits = len(result.hits)
|
||||||
|
return {
|
||||||
|
"total_hits": result.total_hits,
|
||||||
|
"hits": [h.__dict__ for h in result.hits],
|
||||||
|
"themes_scanned": result.themes_scanned,
|
||||||
|
"keywords_scanned": result.keywords_scanned,
|
||||||
|
"rows_scanned": result.rows_scanned,
|
||||||
|
}
|
||||||
|
|
||||||
|
# Internal
|
||||||
|
|
||||||
|
async def _load_themes(self, theme_ids: list[str] | None) -> list[KeywordTheme]:
|
||||||
|
q = select(KeywordTheme).where(KeywordTheme.enabled == True) # noqa: E712
|
||||||
|
if theme_ids:
|
||||||
|
q = q.where(KeywordTheme.id.in_(theme_ids))
|
||||||
|
result = await self.db.execute(q)
|
||||||
|
return list(result.scalars().all())
|
||||||
|
|
||||||
|
def _compile_patterns(
|
||||||
|
self, themes: list[KeywordTheme]
|
||||||
|
) -> dict[tuple[str, str, str], list[tuple[str, re.Pattern]]]:
|
||||||
|
"""Returns {(theme_id, theme_name, theme_color): [(keyword_value, compiled_pattern), ...]}"""
|
||||||
|
patterns: dict[tuple[str, str, str], list[tuple[str, re.Pattern]]] = {}
|
||||||
|
for theme in themes:
|
||||||
|
key = (theme.id, theme.name, theme.color)
|
||||||
|
compiled = []
|
||||||
|
for kw in theme.keywords:
|
||||||
|
try:
|
||||||
|
if kw.is_regex:
|
||||||
|
pat = re.compile(kw.value, re.IGNORECASE)
|
||||||
|
else:
|
||||||
|
pat = re.compile(re.escape(kw.value), re.IGNORECASE)
|
||||||
|
compiled.append((kw.value, pat))
|
||||||
|
except re.error:
|
||||||
|
logger.warning("Invalid regex pattern '%s' in theme '%s', skipping",
|
||||||
|
kw.value, theme.name)
|
||||||
|
patterns[key] = compiled
|
||||||
|
return patterns
|
||||||
|
|
||||||
|
def _match_text(
|
||||||
|
self,
|
||||||
|
text: str,
|
||||||
|
patterns: dict,
|
||||||
|
source_type: str,
|
||||||
|
source_id: str | int,
|
||||||
|
field_name: str,
|
||||||
|
hits: list[ScanHit],
|
||||||
|
row_index: int | None = None,
|
||||||
|
dataset_name: str | None = None,
|
||||||
|
) -> None:
|
||||||
|
"""Check text against all compiled patterns, append hits."""
|
||||||
|
if not text:
|
||||||
|
return
|
||||||
|
for (theme_id, theme_name, theme_color), keyword_patterns in patterns.items():
|
||||||
|
for kw_value, pat in keyword_patterns:
|
||||||
|
if pat.search(text):
|
||||||
|
matched_preview = text[:200] + ("" if len(text) > 200 else "")
|
||||||
|
hits.append(ScanHit(
|
||||||
|
theme_name=theme_name,
|
||||||
|
theme_color=theme_color,
|
||||||
|
keyword=kw_value,
|
||||||
|
source_type=source_type,
|
||||||
|
source_id=source_id,
|
||||||
|
field=field_name,
|
||||||
|
matched_value=matched_preview,
|
||||||
|
row_index=row_index,
|
||||||
|
dataset_name=dataset_name,
|
||||||
|
))
|
||||||
|
|
||||||
|
async def _scan_datasets(
|
||||||
|
self, patterns: dict, result: ScanResult, dataset_ids: list[str] | None
|
||||||
|
) -> None:
|
||||||
|
"""Scan dataset rows in batches."""
|
||||||
|
ds_q = select(Dataset.id, Dataset.name)
|
||||||
|
if dataset_ids:
|
||||||
|
ds_q = ds_q.where(Dataset.id.in_(dataset_ids))
|
||||||
|
ds_result = await self.db.execute(ds_q)
|
||||||
|
ds_map = {r[0]: r[1] for r in ds_result.fetchall()}
|
||||||
|
|
||||||
|
if not ds_map:
|
||||||
|
return
|
||||||
|
|
||||||
|
offset = 0
|
||||||
|
row_q_base = select(DatasetRow).where(
|
||||||
|
DatasetRow.dataset_id.in_(list(ds_map.keys()))
|
||||||
|
).order_by(DatasetRow.id)
|
||||||
|
|
||||||
|
while True:
|
||||||
|
rows_result = await self.db.execute(
|
||||||
|
row_q_base.offset(offset).limit(BATCH_SIZE)
|
||||||
|
)
|
||||||
|
rows = rows_result.scalars().all()
|
||||||
|
if not rows:
|
||||||
|
break
|
||||||
|
|
||||||
|
for row in rows:
|
||||||
|
result.rows_scanned += 1
|
||||||
|
data = row.data or {}
|
||||||
|
for col_name, cell_value in data.items():
|
||||||
|
if cell_value is None:
|
||||||
|
continue
|
||||||
|
text = str(cell_value)
|
||||||
|
self._match_text(
|
||||||
|
text, patterns, "dataset_row", row.id,
|
||||||
|
col_name, result.hits,
|
||||||
|
row_index=row.row_index,
|
||||||
|
dataset_name=ds_map.get(row.dataset_id),
|
||||||
|
)
|
||||||
|
|
||||||
|
offset += BATCH_SIZE
|
||||||
|
import asyncio
|
||||||
|
await asyncio.sleep(0)
|
||||||
|
if len(rows) < BATCH_SIZE:
|
||||||
|
break
|
||||||
|
|
||||||
|
async def _scan_hunts(self, patterns: dict, result: ScanResult) -> None:
|
||||||
|
"""Scan hunt names and descriptions."""
|
||||||
|
hunts_result = await self.db.execute(select(Hunt))
|
||||||
|
for hunt in hunts_result.scalars().all():
|
||||||
|
self._match_text(hunt.name, patterns, "hunt", hunt.id, "name", result.hits)
|
||||||
|
if hunt.description:
|
||||||
|
self._match_text(hunt.description, patterns, "hunt", hunt.id, "description", result.hits)
|
||||||
|
|
||||||
|
async def _scan_annotations(self, patterns: dict, result: ScanResult) -> None:
|
||||||
|
"""Scan annotation text."""
|
||||||
|
ann_result = await self.db.execute(select(Annotation))
|
||||||
|
for ann in ann_result.scalars().all():
|
||||||
|
self._match_text(ann.text, patterns, "annotation", ann.id, "text", result.hits)
|
||||||
|
|
||||||
|
async def _scan_messages(self, patterns: dict, result: ScanResult) -> None:
|
||||||
|
"""Scan conversation messages (user messages only)."""
|
||||||
|
msg_result = await self.db.execute(
|
||||||
|
select(Message).where(Message.role == "user")
|
||||||
|
)
|
||||||
|
for msg in msg_result.scalars().all():
|
||||||
|
self._match_text(msg.content, patterns, "message", msg.id, "content", result.hits)
|
||||||
|
'''
|
||||||
|
|
||||||
|
p.write_text(new_text, encoding='utf-8')
|
||||||
|
print('updated scanner.py')
|
||||||
31
_edit_test_api.py
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
from pathlib import Path
|
||||||
|
p=Path(r'd:/Projects/Dev/ThreatHunt/backend/tests/test_api.py')
|
||||||
|
t=p.read_text(encoding='utf-8')
|
||||||
|
insert='''
|
||||||
|
async def test_hunt_progress(self, client):
|
||||||
|
create = await client.post("/api/hunts", json={"name": "Progress Hunt"})
|
||||||
|
hunt_id = create.json()["id"]
|
||||||
|
|
||||||
|
# attach one dataset so progress has scope
|
||||||
|
from tests.conftest import SAMPLE_CSV
|
||||||
|
import io
|
||||||
|
files = {"file": ("progress.csv", io.BytesIO(SAMPLE_CSV), "text/csv")}
|
||||||
|
up = await client.post(f"/api/datasets/upload?hunt_id={hunt_id}", files=files)
|
||||||
|
assert up.status_code == 200
|
||||||
|
|
||||||
|
res = await client.get(f"/api/hunts/{hunt_id}/progress")
|
||||||
|
assert res.status_code == 200
|
||||||
|
body = res.json()
|
||||||
|
assert body["hunt_id"] == hunt_id
|
||||||
|
assert "progress_percent" in body
|
||||||
|
assert "dataset_total" in body
|
||||||
|
assert "network_status" in body
|
||||||
|
'''
|
||||||
|
needle=''' async def test_get_nonexistent_hunt(self, client):
|
||||||
|
resp = await client.get("/api/hunts/nonexistent-id")
|
||||||
|
assert resp.status_code == 404
|
||||||
|
'''
|
||||||
|
if needle in t and 'test_hunt_progress' not in t:
|
||||||
|
t=t.replace(needle, needle+'\n'+insert)
|
||||||
|
p.write_text(t,encoding='utf-8')
|
||||||
|
print('updated test_api.py')
|
||||||
32
_edit_test_keywords.py
Normal file
@@ -0,0 +1,32 @@
|
|||||||
|
from pathlib import Path
|
||||||
|
p=Path(r'd:/Projects/Dev/ThreatHunt/backend/tests/test_keywords.py')
|
||||||
|
t=p.read_text(encoding='utf-8')
|
||||||
|
add='''
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_quick_scan_cache_hit(client: AsyncClient):
|
||||||
|
"""Second quick scan should return cache hit metadata."""
|
||||||
|
theme_res = await client.post("/api/keywords/themes", json={"name": "Quick Cache Theme", "color": "#00aa00"})
|
||||||
|
tid = theme_res.json()["id"]
|
||||||
|
await client.post(f"/api/keywords/themes/{tid}/keywords", json={"value": "chrome.exe"})
|
||||||
|
|
||||||
|
from tests.conftest import SAMPLE_CSV
|
||||||
|
import io
|
||||||
|
files = {"file": ("cache_quick.csv", io.BytesIO(SAMPLE_CSV), "text/csv")}
|
||||||
|
upload = await client.post("/api/datasets/upload", files=files)
|
||||||
|
ds_id = upload.json()["id"]
|
||||||
|
|
||||||
|
first = await client.get(f"/api/keywords/scan/quick?dataset_id={ds_id}")
|
||||||
|
assert first.status_code == 200
|
||||||
|
assert first.json().get("cache_status") in ("miss", "hit")
|
||||||
|
|
||||||
|
second = await client.get(f"/api/keywords/scan/quick?dataset_id={ds_id}")
|
||||||
|
assert second.status_code == 200
|
||||||
|
body = second.json()
|
||||||
|
assert body.get("cache_used") is True
|
||||||
|
assert body.get("cache_status") == "hit"
|
||||||
|
'''
|
||||||
|
if 'test_quick_scan_cache_hit' not in t:
|
||||||
|
t=t + add
|
||||||
|
p.write_text(t,encoding='utf-8')
|
||||||
|
print('updated test_keywords.py')
|
||||||
26
_edit_upload.py
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
from pathlib import Path
|
||||||
|
p=Path(r'd:/Projects/Dev/ThreatHunt/frontend/src/components/FileUpload.tsx')
|
||||||
|
t=p.read_text(encoding='utf-8')
|
||||||
|
# import useEffect
|
||||||
|
t=t.replace("import React, { useState, useCallback, useRef } from 'react';","import React, { useState, useCallback, useRef, useEffect } from 'react';")
|
||||||
|
# import HuntProgress type
|
||||||
|
t=t.replace("import { datasets, hunts, type UploadResult, type Hunt } from '../api/client';","import { datasets, hunts, type UploadResult, type Hunt, type HuntProgress } from '../api/client';")
|
||||||
|
# add state
|
||||||
|
if 'const [huntProgress, setHuntProgress]' not in t:
|
||||||
|
t=t.replace(" const [huntList, setHuntList] = useState<Hunt[]>([]);\n const [huntId, setHuntId] = useState('');"," const [huntList, setHuntList] = useState<Hunt[]>([]);\n const [huntId, setHuntId] = useState('');\n const [huntProgress, setHuntProgress] = useState<HuntProgress | null>(null);")
|
||||||
|
# add polling effect after hunts list effect
|
||||||
|
marker=" React.useEffect(() => {\n hunts.list(0, 100).then(r => setHuntList(r.hunts)).catch(() => {});\n }, []);\n"
|
||||||
|
if marker in t and 'setInterval' not in t.split(marker,1)[1][:500]:
|
||||||
|
add='''\n useEffect(() => {\n let timer: any = null;\n let cancelled = false;\n\n const pull = async () => {\n if (!huntId) {\n if (!cancelled) setHuntProgress(null);\n return;\n }\n try {\n const p = await hunts.progress(huntId);\n if (!cancelled) setHuntProgress(p);\n } catch {\n if (!cancelled) setHuntProgress(null);\n }\n };\n\n pull();\n if (huntId) timer = setInterval(pull, 2000);\n return () => { cancelled = true; if (timer) clearInterval(timer); };\n }, [huntId, jobs.length]);\n'''
|
||||||
|
t=t.replace(marker, marker+add)
|
||||||
|
|
||||||
|
# insert master progress UI after overall summary
|
||||||
|
insert_after=''' {overallTotal > 0 && (\n <Stack direction="row" alignItems="center" spacing={1} sx={{ mt: 2 }}>\n <Typography variant="body2" color="text.secondary">\n {overallDone + overallErr} / {overallTotal} files processed\n {overallErr > 0 && ` ({overallErr} failed)`}\n </Typography>\n <Box sx={{ flexGrow: 1 }} />\n {overallDone + overallErr === overallTotal && overallTotal > 0 && (\n <Tooltip title="Clear completed">\n <IconButton size="small" onClick={clearCompleted}><ClearIcon fontSize="small" /></IconButton>\n </Tooltip>\n )}\n </Stack>\n )}\n'''
|
||||||
|
add_block='''\n {huntId && huntProgress && (\n <Paper sx={{ p: 1.5, mt: 1.5 }}>\n <Stack direction="row" alignItems="center" spacing={1} sx={{ mb: 0.8 }}>\n <Typography variant="body2" sx={{ fontWeight: 600 }}>\n Master Processing Progress\n </Typography>\n <Chip\n size="small"\n label={huntProgress.status.toUpperCase()}\n color={huntProgress.status === 'ready' ? 'success' : huntProgress.status === 'processing' ? 'warning' : 'default'}\n variant="outlined"\n />\n <Box sx={{ flexGrow: 1 }} />\n <Typography variant="caption" color="text.secondary">\n {huntProgress.progress_percent.toFixed(1)}%\n </Typography>\n </Stack>\n <LinearProgress\n variant="determinate"\n value={Math.max(0, Math.min(100, huntProgress.progress_percent))}\n sx={{ height: 8, borderRadius: 4 }}\n />\n <Stack direction="row" spacing={1} sx={{ mt: 1 }} flexWrap="wrap" useFlexGap>\n <Chip size="small" label={`Datasets ${huntProgress.dataset_completed}/${huntProgress.dataset_total}`} variant="outlined" />\n <Chip size="small" label={`Active jobs ${huntProgress.active_jobs}`} variant="outlined" />\n <Chip size="small" label={`Queued jobs ${huntProgress.queued_jobs}`} variant="outlined" />\n <Chip size="small" label={`Network ${huntProgress.network_status}`} variant="outlined" />\n </Stack>\n </Paper>\n )}\n'''
|
||||||
|
if insert_after in t:
|
||||||
|
t=t.replace(insert_after, insert_after+add_block)
|
||||||
|
else:
|
||||||
|
print('warning: summary block not found')
|
||||||
|
|
||||||
|
p.write_text(t,encoding='utf-8')
|
||||||
|
print('updated FileUpload.tsx')
|
||||||
42
_edit_upload2.py
Normal file
@@ -0,0 +1,42 @@
|
|||||||
|
from pathlib import Path
|
||||||
|
p=Path(r'd:/Projects/Dev/ThreatHunt/frontend/src/components/FileUpload.tsx')
|
||||||
|
t=p.read_text(encoding='utf-8')
|
||||||
|
marker=''' {/* Per-file progress list */}
|
||||||
|
'''
|
||||||
|
add=''' {huntId && huntProgress && (
|
||||||
|
<Paper sx={{ p: 1.5, mt: 1.5 }}>
|
||||||
|
<Stack direction="row" alignItems="center" spacing={1} sx={{ mb: 0.8 }}>
|
||||||
|
<Typography variant="body2" sx={{ fontWeight: 600 }}>
|
||||||
|
Master Processing Progress
|
||||||
|
</Typography>
|
||||||
|
<Chip
|
||||||
|
size="small"
|
||||||
|
label={huntProgress.status.toUpperCase()}
|
||||||
|
color={huntProgress.status === 'ready' ? 'success' : huntProgress.status === 'processing' ? 'warning' : 'default'}
|
||||||
|
variant="outlined"
|
||||||
|
/>
|
||||||
|
<Box sx={{ flexGrow: 1 }} />
|
||||||
|
<Typography variant="caption" color="text.secondary">
|
||||||
|
{huntProgress.progress_percent.toFixed(1)}%
|
||||||
|
</Typography>
|
||||||
|
</Stack>
|
||||||
|
<LinearProgress
|
||||||
|
variant="determinate"
|
||||||
|
value={Math.max(0, Math.min(100, huntProgress.progress_percent))}
|
||||||
|
sx={{ height: 8, borderRadius: 4 }}
|
||||||
|
/>
|
||||||
|
<Stack direction="row" spacing={1} sx={{ mt: 1 }} flexWrap="wrap" useFlexGap>
|
||||||
|
<Chip size="small" label={`Datasets ${huntProgress.dataset_completed}/${huntProgress.dataset_total}`} variant="outlined" />
|
||||||
|
<Chip size="small" label={`Active jobs ${huntProgress.active_jobs}`} variant="outlined" />
|
||||||
|
<Chip size="small" label={`Queued jobs ${huntProgress.queued_jobs}`} variant="outlined" />
|
||||||
|
<Chip size="small" label={`Network ${huntProgress.network_status}`} variant="outlined" />
|
||||||
|
</Stack>
|
||||||
|
</Paper>
|
||||||
|
)}
|
||||||
|
|
||||||
|
'''
|
||||||
|
if marker not in t:
|
||||||
|
raise SystemExit('marker not found')
|
||||||
|
t=t.replace(marker, add+marker)
|
||||||
|
p.write_text(t,encoding='utf-8')
|
||||||
|
print('inserted master progress block')
|
||||||
55
_enforce_scanner_budget.py
Normal file
@@ -0,0 +1,55 @@
|
|||||||
|
from pathlib import Path
|
||||||
|
p=Path(r'd:/Projects/Dev/ThreatHunt/backend/app/services/scanner.py')
|
||||||
|
t=p.read_text(encoding='utf-8')
|
||||||
|
if 'from app.config import settings' not in t:
|
||||||
|
t=t.replace('from sqlalchemy.ext.asyncio import AsyncSession\n','from sqlalchemy.ext.asyncio import AsyncSession\n\nfrom app.config import settings\n')
|
||||||
|
|
||||||
|
old=''' import asyncio
|
||||||
|
|
||||||
|
for ds_id, ds_name in ds_map.items():
|
||||||
|
last_id = 0
|
||||||
|
while True:
|
||||||
|
'''
|
||||||
|
new=''' import asyncio
|
||||||
|
|
||||||
|
max_rows = max(0, int(settings.SCANNER_MAX_ROWS_PER_SCAN))
|
||||||
|
budget_reached = False
|
||||||
|
|
||||||
|
for ds_id, ds_name in ds_map.items():
|
||||||
|
if max_rows and result.rows_scanned >= max_rows:
|
||||||
|
budget_reached = True
|
||||||
|
break
|
||||||
|
|
||||||
|
last_id = 0
|
||||||
|
while True:
|
||||||
|
if max_rows and result.rows_scanned >= max_rows:
|
||||||
|
budget_reached = True
|
||||||
|
break
|
||||||
|
'''
|
||||||
|
if old not in t:
|
||||||
|
raise SystemExit('scanner loop block not found')
|
||||||
|
t=t.replace(old,new)
|
||||||
|
|
||||||
|
old2=''' if len(rows) < BATCH_SIZE:
|
||||||
|
break
|
||||||
|
|
||||||
|
'''
|
||||||
|
new2=''' if len(rows) < BATCH_SIZE:
|
||||||
|
break
|
||||||
|
|
||||||
|
if budget_reached:
|
||||||
|
break
|
||||||
|
|
||||||
|
if budget_reached:
|
||||||
|
logger.warning(
|
||||||
|
"AUP scan row budget reached (%d rows). Returning partial results.",
|
||||||
|
result.rows_scanned,
|
||||||
|
)
|
||||||
|
|
||||||
|
'''
|
||||||
|
if old2 not in t:
|
||||||
|
raise SystemExit('scanner break block not found')
|
||||||
|
t=t.replace(old2,new2,1)
|
||||||
|
|
||||||
|
p.write_text(t,encoding='utf-8')
|
||||||
|
print('added scanner global row budget enforcement')
|
||||||
12
_fix_aup_dep.py
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
from pathlib import Path
|
||||||
|
p=Path(r'd:/Projects/Dev/ThreatHunt/frontend/src/components/AUPScanner.tsx')
|
||||||
|
t=p.read_text(encoding='utf-8')
|
||||||
|
old=''' }, [selectedDs, selectedThemes, scanHunts, scanAnnotations, scanMessages, enqueueSnackbar]);
|
||||||
|
'''
|
||||||
|
new=''' }, [selectedHuntId, selectedDs, selectedThemes, scanHunts, scanAnnotations, scanMessages, enqueueSnackbar]);
|
||||||
|
'''
|
||||||
|
if old not in t:
|
||||||
|
raise SystemExit('runScan deps block not found')
|
||||||
|
t=t.replace(old,new)
|
||||||
|
p.write_text(t,encoding='utf-8')
|
||||||
|
print('fixed AUPScanner runScan dependency list')
|
||||||
7
_fix_import_datasets.py
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
from pathlib import Path
|
||||||
|
p=Path(r'd:/Projects/Dev/ThreatHunt/backend/app/api/routes/datasets.py')
|
||||||
|
t=p.read_text(encoding='utf-8')
|
||||||
|
if 'from app.db.models import ProcessingTask' not in t:
|
||||||
|
t=t.replace('from app.db import get_db\n', 'from app.db import get_db\nfrom app.db.models import ProcessingTask\n')
|
||||||
|
p.write_text(t, encoding='utf-8')
|
||||||
|
print('added ProcessingTask import')
|
||||||
25
_fix_keywords_empty_guard.py
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
from pathlib import Path
|
||||||
|
p=Path(r'd:/Projects/Dev/ThreatHunt/backend/app/api/routes/keywords.py')
|
||||||
|
t=p.read_text(encoding='utf-8')
|
||||||
|
old=''' if not body.dataset_ids and not body.scan_hunts and not body.scan_annotations and not body.scan_messages:
|
||||||
|
raise HTTPException(400, "Select at least one dataset or enable additional sources (hunts/annotations/messages)")
|
||||||
|
|
||||||
|
'''
|
||||||
|
new=''' if not body.dataset_ids and not body.scan_hunts and not body.scan_annotations and not body.scan_messages:
|
||||||
|
return {
|
||||||
|
"total_hits": 0,
|
||||||
|
"hits": [],
|
||||||
|
"themes_scanned": 0,
|
||||||
|
"keywords_scanned": 0,
|
||||||
|
"rows_scanned": 0,
|
||||||
|
"cache_used": False,
|
||||||
|
"cache_status": "miss",
|
||||||
|
"cached_at": None,
|
||||||
|
}
|
||||||
|
|
||||||
|
'''
|
||||||
|
if old not in t:
|
||||||
|
raise SystemExit('scope guard block not found')
|
||||||
|
t=t.replace(old,new)
|
||||||
|
p.write_text(t,encoding='utf-8')
|
||||||
|
print('adjusted empty scan guard to return fast empty result (200)')
|
||||||
47
_fix_label_selector_networkmap.py
Normal file
@@ -0,0 +1,47 @@
|
|||||||
|
from pathlib import Path
|
||||||
|
p=Path(r'd:/Projects/Dev/ThreatHunt/frontend/src/components/NetworkMap.tsx')
|
||||||
|
t=p.read_text(encoding='utf-8')
|
||||||
|
|
||||||
|
# Add label selector in toolbar before refresh button
|
||||||
|
insert_after=""" <TextField
|
||||||
|
size=\"small\"
|
||||||
|
placeholder=\"Search hosts, IPs, users\\u2026\"
|
||||||
|
value={search}
|
||||||
|
onChange={e => setSearch(e.target.value)}
|
||||||
|
sx={{ width: 220, '& .MuiInputBase-input': { py: 0.8 } }}
|
||||||
|
slotProps={{
|
||||||
|
input: {
|
||||||
|
startAdornment: <SearchIcon sx={{ mr: 0.5, fontSize: 18, color: 'text.secondary' }} />,
|
||||||
|
},
|
||||||
|
}}
|
||||||
|
/>
|
||||||
|
"""
|
||||||
|
label_ctrl="""
|
||||||
|
<FormControl size=\"small\" sx={{ minWidth: 150 }}>
|
||||||
|
<InputLabel id=\"label-mode-selector\">Labels</InputLabel>
|
||||||
|
<Select
|
||||||
|
labelId=\"label-mode-selector\"
|
||||||
|
value={labelMode}
|
||||||
|
label=\"Labels\"
|
||||||
|
onChange={e => setLabelMode(e.target.value as LabelMode)}
|
||||||
|
sx={{ '& .MuiSelect-select': { py: 0.8 } }}
|
||||||
|
>
|
||||||
|
<MenuItem value=\"none\">None</MenuItem>
|
||||||
|
<MenuItem value=\"highlight\">Selected/Search</MenuItem>
|
||||||
|
<MenuItem value=\"all\">All</MenuItem>
|
||||||
|
</Select>
|
||||||
|
</FormControl>
|
||||||
|
"""
|
||||||
|
if 'label-mode-selector' not in t:
|
||||||
|
if insert_after not in t:
|
||||||
|
raise SystemExit('search block not found for label selector insertion')
|
||||||
|
t=t.replace(insert_after, insert_after+label_ctrl)
|
||||||
|
|
||||||
|
# Fix useCallback dependency for startAnimLoop
|
||||||
|
old=' }, [canvasSize]);'
|
||||||
|
new=' }, [canvasSize, labelMode]);'
|
||||||
|
if old in t:
|
||||||
|
t=t.replace(old,new,1)
|
||||||
|
|
||||||
|
p.write_text(t,encoding='utf-8')
|
||||||
|
print('inserted label selector UI and fixed callback dependency')
|
||||||
10
_fix_last_dep_networkmap.py
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
from pathlib import Path
|
||||||
|
p=Path(r'd:/Projects/Dev/ThreatHunt/frontend/src/components/NetworkMap.tsx')
|
||||||
|
t=p.read_text(encoding='utf-8')
|
||||||
|
count=t.count('}, [canvasSize]);')
|
||||||
|
if count:
|
||||||
|
t=t.replace('}, [canvasSize]);','}, [canvasSize, labelMode]);')
|
||||||
|
# In case formatter created spaced variant
|
||||||
|
t=t.replace('}, [canvasSize ]);','}, [canvasSize, labelMode]);')
|
||||||
|
p.write_text(t,encoding='utf-8')
|
||||||
|
print('patched remaining canvasSize callback deps:', count)
|
||||||
71
_harden_aup_scope_ui.py
Normal file
@@ -0,0 +1,71 @@
|
|||||||
|
from pathlib import Path
|
||||||
|
p=Path(r'd:/Projects/Dev/ThreatHunt/frontend/src/components/AUPScanner.tsx')
|
||||||
|
t=p.read_text(encoding='utf-8')
|
||||||
|
|
||||||
|
# Auto-select first hunt with datasets after load
|
||||||
|
old=''' const [tRes, hRes] = await Promise.all([
|
||||||
|
keywords.listThemes(),
|
||||||
|
hunts.list(0, 200),
|
||||||
|
]);
|
||||||
|
setThemes(tRes.themes);
|
||||||
|
setHuntList(hRes.hunts);
|
||||||
|
'''
|
||||||
|
new=''' const [tRes, hRes] = await Promise.all([
|
||||||
|
keywords.listThemes(),
|
||||||
|
hunts.list(0, 200),
|
||||||
|
]);
|
||||||
|
setThemes(tRes.themes);
|
||||||
|
setHuntList(hRes.hunts);
|
||||||
|
if (!selectedHuntId && hRes.hunts.length > 0) {
|
||||||
|
const best = hRes.hunts.find(h => h.dataset_count > 0) || hRes.hunts[0];
|
||||||
|
setSelectedHuntId(best.id);
|
||||||
|
}
|
||||||
|
'''
|
||||||
|
if old not in t:
|
||||||
|
raise SystemExit('loadData block not found')
|
||||||
|
t=t.replace(old,new)
|
||||||
|
|
||||||
|
# Guard runScan
|
||||||
|
old2=''' const runScan = useCallback(async () => {
|
||||||
|
setScanning(true);
|
||||||
|
setScanResult(null);
|
||||||
|
try {
|
||||||
|
'''
|
||||||
|
new2=''' const runScan = useCallback(async () => {
|
||||||
|
if (!selectedHuntId) {
|
||||||
|
enqueueSnackbar('Please select a hunt before running AUP scan', { variant: 'warning' });
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (selectedDs.size === 0) {
|
||||||
|
enqueueSnackbar('No datasets selected for this hunt', { variant: 'warning' });
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
setScanning(true);
|
||||||
|
setScanResult(null);
|
||||||
|
try {
|
||||||
|
'''
|
||||||
|
if old2 not in t:
|
||||||
|
raise SystemExit('runScan header not found')
|
||||||
|
t=t.replace(old2,new2)
|
||||||
|
|
||||||
|
# update loadData deps
|
||||||
|
old3=''' }, [enqueueSnackbar]);
|
||||||
|
'''
|
||||||
|
new3=''' }, [enqueueSnackbar, selectedHuntId]);
|
||||||
|
'''
|
||||||
|
if old3 not in t:
|
||||||
|
raise SystemExit('loadData deps not found')
|
||||||
|
t=t.replace(old3,new3,1)
|
||||||
|
|
||||||
|
# disable button if no hunt or no datasets
|
||||||
|
old4=''' onClick={runScan} disabled={scanning}
|
||||||
|
'''
|
||||||
|
new4=''' onClick={runScan} disabled={scanning || !selectedHuntId || selectedDs.size === 0}
|
||||||
|
'''
|
||||||
|
if old4 not in t:
|
||||||
|
raise SystemExit('scan button props not found')
|
||||||
|
t=t.replace(old4,new4)
|
||||||
|
|
||||||
|
p.write_text(t,encoding='utf-8')
|
||||||
|
print('hardened AUPScanner to require explicit hunt/dataset scope')
|
||||||
84
_optimize_keywords_partial_cache.py
Normal file
@@ -0,0 +1,84 @@
|
|||||||
|
from pathlib import Path
|
||||||
|
p=Path(r'd:/Projects/Dev/ThreatHunt/backend/app/api/routes/keywords.py')
|
||||||
|
t=p.read_text(encoding='utf-8')
|
||||||
|
old=''' if can_use_cache:
|
||||||
|
themes = await scanner._load_themes(body.theme_ids)
|
||||||
|
allowed_theme_names = {t.name for t in themes}
|
||||||
|
keywords_scanned = sum(len(theme.keywords) for theme in themes)
|
||||||
|
|
||||||
|
cached_entries: list[dict] = []
|
||||||
|
missing: list[str] = []
|
||||||
|
for dataset_id in (body.dataset_ids or []):
|
||||||
|
entry = keyword_scan_cache.get(dataset_id)
|
||||||
|
if not entry:
|
||||||
|
missing.append(dataset_id)
|
||||||
|
continue
|
||||||
|
cached_entries.append({"result": entry.result, "built_at": entry.built_at})
|
||||||
|
|
||||||
|
if not missing and cached_entries:
|
||||||
|
merged = _merge_cached_results(cached_entries, allowed_theme_names if body.theme_ids else None)
|
||||||
|
return {
|
||||||
|
"total_hits": merged["total_hits"],
|
||||||
|
"hits": merged["hits"],
|
||||||
|
"themes_scanned": len(themes),
|
||||||
|
"keywords_scanned": keywords_scanned,
|
||||||
|
"rows_scanned": merged["rows_scanned"],
|
||||||
|
"cache_used": True,
|
||||||
|
"cache_status": "hit",
|
||||||
|
"cached_at": merged["cached_at"],
|
||||||
|
}
|
||||||
|
'''
|
||||||
|
new=''' if can_use_cache:
|
||||||
|
themes = await scanner._load_themes(body.theme_ids)
|
||||||
|
allowed_theme_names = {t.name for t in themes}
|
||||||
|
keywords_scanned = sum(len(theme.keywords) for theme in themes)
|
||||||
|
|
||||||
|
cached_entries: list[dict] = []
|
||||||
|
missing: list[str] = []
|
||||||
|
for dataset_id in (body.dataset_ids or []):
|
||||||
|
entry = keyword_scan_cache.get(dataset_id)
|
||||||
|
if not entry:
|
||||||
|
missing.append(dataset_id)
|
||||||
|
continue
|
||||||
|
cached_entries.append({"result": entry.result, "built_at": entry.built_at})
|
||||||
|
|
||||||
|
if not missing and cached_entries:
|
||||||
|
merged = _merge_cached_results(cached_entries, allowed_theme_names if body.theme_ids else None)
|
||||||
|
return {
|
||||||
|
"total_hits": merged["total_hits"],
|
||||||
|
"hits": merged["hits"],
|
||||||
|
"themes_scanned": len(themes),
|
||||||
|
"keywords_scanned": keywords_scanned,
|
||||||
|
"rows_scanned": merged["rows_scanned"],
|
||||||
|
"cache_used": True,
|
||||||
|
"cache_status": "hit",
|
||||||
|
"cached_at": merged["cached_at"],
|
||||||
|
}
|
||||||
|
|
||||||
|
if missing:
|
||||||
|
missing_entries: list[dict] = []
|
||||||
|
for dataset_id in missing:
|
||||||
|
partial = await scanner.scan(dataset_ids=[dataset_id], theme_ids=body.theme_ids)
|
||||||
|
keyword_scan_cache.put(dataset_id, partial)
|
||||||
|
missing_entries.append({"result": partial, "built_at": None})
|
||||||
|
|
||||||
|
merged = _merge_cached_results(
|
||||||
|
cached_entries + missing_entries,
|
||||||
|
allowed_theme_names if body.theme_ids else None,
|
||||||
|
)
|
||||||
|
return {
|
||||||
|
"total_hits": merged["total_hits"],
|
||||||
|
"hits": merged["hits"],
|
||||||
|
"themes_scanned": len(themes),
|
||||||
|
"keywords_scanned": keywords_scanned,
|
||||||
|
"rows_scanned": merged["rows_scanned"],
|
||||||
|
"cache_used": len(cached_entries) > 0,
|
||||||
|
"cache_status": "partial" if cached_entries else "miss",
|
||||||
|
"cached_at": merged["cached_at"],
|
||||||
|
}
|
||||||
|
'''
|
||||||
|
if old not in t:
|
||||||
|
raise SystemExit('cache block not found')
|
||||||
|
t=t.replace(old,new)
|
||||||
|
p.write_text(t,encoding='utf-8')
|
||||||
|
print('updated keyword /scan to use partial cache + scan missing datasets only')
|
||||||
61
_optimize_scanner_keyset.py
Normal file
@@ -0,0 +1,61 @@
|
|||||||
|
from pathlib import Path
|
||||||
|
p=Path(r'd:/Projects/Dev/ThreatHunt/backend/app/services/scanner.py')
|
||||||
|
t=p.read_text(encoding='utf-8')
|
||||||
|
start=t.index(' async def _scan_datasets(')
|
||||||
|
end=t.index(' async def _scan_hunts', start)
|
||||||
|
new_func=''' async def _scan_datasets(
|
||||||
|
self, patterns: dict, result: ScanResult, dataset_ids: list[str] | None
|
||||||
|
) -> None:
|
||||||
|
"""Scan dataset rows in batches using keyset pagination (no OFFSET)."""
|
||||||
|
ds_q = select(Dataset.id, Dataset.name)
|
||||||
|
if dataset_ids:
|
||||||
|
ds_q = ds_q.where(Dataset.id.in_(dataset_ids))
|
||||||
|
ds_result = await self.db.execute(ds_q)
|
||||||
|
ds_map = {r[0]: r[1] for r in ds_result.fetchall()}
|
||||||
|
|
||||||
|
if not ds_map:
|
||||||
|
return
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
|
||||||
|
for ds_id, ds_name in ds_map.items():
|
||||||
|
last_id = 0
|
||||||
|
while True:
|
||||||
|
rows_result = await self.db.execute(
|
||||||
|
select(DatasetRow)
|
||||||
|
.where(DatasetRow.dataset_id == ds_id)
|
||||||
|
.where(DatasetRow.id > last_id)
|
||||||
|
.order_by(DatasetRow.id)
|
||||||
|
.limit(BATCH_SIZE)
|
||||||
|
)
|
||||||
|
rows = rows_result.scalars().all()
|
||||||
|
if not rows:
|
||||||
|
break
|
||||||
|
|
||||||
|
for row in rows:
|
||||||
|
result.rows_scanned += 1
|
||||||
|
data = row.data or {}
|
||||||
|
for col_name, cell_value in data.items():
|
||||||
|
if cell_value is None:
|
||||||
|
continue
|
||||||
|
text = str(cell_value)
|
||||||
|
self._match_text(
|
||||||
|
text,
|
||||||
|
patterns,
|
||||||
|
"dataset_row",
|
||||||
|
row.id,
|
||||||
|
col_name,
|
||||||
|
result.hits,
|
||||||
|
row_index=row.row_index,
|
||||||
|
dataset_name=ds_name,
|
||||||
|
)
|
||||||
|
|
||||||
|
last_id = rows[-1].id
|
||||||
|
await asyncio.sleep(0)
|
||||||
|
if len(rows) < BATCH_SIZE:
|
||||||
|
break
|
||||||
|
|
||||||
|
'''
|
||||||
|
out=t[:start]+new_func+t[end:]
|
||||||
|
p.write_text(out,encoding='utf-8')
|
||||||
|
print('optimized scanner _scan_datasets to keyset pagination')
|
||||||
36
_patch_inventory_stats.py
Normal file
@@ -0,0 +1,36 @@
|
|||||||
|
from pathlib import Path
|
||||||
|
p=Path(r'd:/Projects/Dev/ThreatHunt/backend/app/services/host_inventory.py')
|
||||||
|
t=p.read_text(encoding='utf-8')
|
||||||
|
old=''' return {
|
||||||
|
"hosts": host_list,
|
||||||
|
"connections": conn_list,
|
||||||
|
"stats": {
|
||||||
|
"total_hosts": len(host_list),
|
||||||
|
"total_datasets_scanned": len(all_datasets),
|
||||||
|
"datasets_with_hosts": ds_with_hosts,
|
||||||
|
"total_rows_scanned": total_rows,
|
||||||
|
"hosts_with_ips": sum(1 for h in host_list if h['ips']),
|
||||||
|
"hosts_with_users": sum(1 for h in host_list if h['users']),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
'''
|
||||||
|
new=''' return {
|
||||||
|
"hosts": host_list,
|
||||||
|
"connections": conn_list,
|
||||||
|
"stats": {
|
||||||
|
"total_hosts": len(host_list),
|
||||||
|
"total_datasets_scanned": len(all_datasets),
|
||||||
|
"datasets_with_hosts": ds_with_hosts,
|
||||||
|
"total_rows_scanned": total_rows,
|
||||||
|
"hosts_with_ips": sum(1 for h in host_list if h['ips']),
|
||||||
|
"hosts_with_users": sum(1 for h in host_list if h['users']),
|
||||||
|
"row_budget_per_dataset": settings.NETWORK_INVENTORY_MAX_ROWS_PER_DATASET,
|
||||||
|
"sampled_mode": settings.NETWORK_INVENTORY_MAX_ROWS_PER_DATASET > 0,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
'''
|
||||||
|
if old not in t:
|
||||||
|
raise SystemExit('return block not found')
|
||||||
|
t=t.replace(old,new)
|
||||||
|
p.write_text(t,encoding='utf-8')
|
||||||
|
print('patched inventory stats metadata')
|
||||||
10
_patch_inventory_stats2.py
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
from pathlib import Path
|
||||||
|
p=Path(r'd:/Projects/Dev/ThreatHunt/backend/app/services/host_inventory.py')
|
||||||
|
t=p.read_text(encoding='utf-8')
|
||||||
|
needle=' "hosts_with_users": sum(1 for h in host_list if h[\'users\']),\n'
|
||||||
|
if '"row_budget_per_dataset"' not in t:
|
||||||
|
if needle not in t:
|
||||||
|
raise SystemExit('needle not found')
|
||||||
|
t=t.replace(needle, needle + ' "row_budget_per_dataset": settings.NETWORK_INVENTORY_MAX_ROWS_PER_DATASET,\n "sampled_mode": settings.NETWORK_INVENTORY_MAX_ROWS_PER_DATASET > 0,\n')
|
||||||
|
p.write_text(t,encoding='utf-8')
|
||||||
|
print('inserted inventory budget stats lines')
|
||||||
14
_patch_network_sleep.py
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
p = Path(r"d:\Projects\Dev\ThreatHunt\frontend\src\components\NetworkMap.tsx")
|
||||||
|
text = p.read_text(encoding="utf-8")
|
||||||
|
|
||||||
|
anchor = " useEffect(() => { canvasSizeRef.current = canvasSize; }, [canvasSize]);\n"
|
||||||
|
insert = anchor + "\n const sleep = (ms: number) => new Promise<void>(resolve => setTimeout(resolve, ms));\n"
|
||||||
|
if "const sleep = (ms: number)" not in text and anchor in text:
|
||||||
|
text = text.replace(anchor, insert)
|
||||||
|
|
||||||
|
text = text.replace("await new Promise(r => setTimeout(r, delayMs + jitter));", "await sleep(delayMs + jitter);")
|
||||||
|
|
||||||
|
p.write_text(text, encoding="utf-8")
|
||||||
|
print("Patched sleep helper + polling awaits")
|
||||||
37
_patch_network_wait.py
Normal file
@@ -0,0 +1,37 @@
|
|||||||
|
from pathlib import Path
|
||||||
|
import re
|
||||||
|
|
||||||
|
p = Path(r"d:\Projects\Dev\ThreatHunt\frontend\src\components\NetworkMap.tsx")
|
||||||
|
text = p.read_text(encoding="utf-8")
|
||||||
|
pattern = re.compile(r"const waitUntilReady = async \(\): Promise<boolean> => \{[\s\S]*?\n\s*\};", re.M)
|
||||||
|
replacement = '''const waitUntilReady = async (): Promise<boolean> => {
|
||||||
|
// Poll inventory-status with exponential backoff until 'ready' (or cancelled)
|
||||||
|
setProgress('Host inventory is being prepared in the background');
|
||||||
|
setLoading(true);
|
||||||
|
let delayMs = 1500;
|
||||||
|
const startedAt = Date.now();
|
||||||
|
for (;;) {
|
||||||
|
const jitter = Math.floor(Math.random() * 250);
|
||||||
|
await new Promise(r => setTimeout(r, delayMs + jitter));
|
||||||
|
if (cancelled) return false;
|
||||||
|
try {
|
||||||
|
const st = await network.inventoryStatus(selectedHuntId);
|
||||||
|
if (cancelled) return false;
|
||||||
|
if (st.status === 'ready') return true;
|
||||||
|
if (Date.now() - startedAt > 5 * 60 * 1000) {
|
||||||
|
setError('Host inventory build timed out. Please retry.');
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
delayMs = Math.min(10000, Math.floor(delayMs * 1.5));
|
||||||
|
// still building or none (job may not have started yet) - keep polling
|
||||||
|
} catch {
|
||||||
|
if (cancelled) return false;
|
||||||
|
delayMs = Math.min(10000, Math.floor(delayMs * 1.5));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};'''
|
||||||
|
new_text, n = pattern.subn(replacement, text, count=1)
|
||||||
|
if n != 1:
|
||||||
|
raise SystemExit(f"Failed to patch waitUntilReady, matches={n}")
|
||||||
|
p.write_text(new_text, encoding="utf-8")
|
||||||
|
print("Patched waitUntilReady")
|
||||||
26
_perf_edit_config_inventory.py
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
from pathlib import Path
|
||||||
|
p=Path(r'd:/Projects/Dev/ThreatHunt/backend/app/config.py')
|
||||||
|
t=p.read_text(encoding='utf-8')
|
||||||
|
old=''' NETWORK_INVENTORY_MAX_ROWS_PER_DATASET: int = Field(
|
||||||
|
default=25000,
|
||||||
|
description="Row budget per dataset when building host inventory (0 = unlimited)",
|
||||||
|
)
|
||||||
|
'''
|
||||||
|
new=''' NETWORK_INVENTORY_MAX_ROWS_PER_DATASET: int = Field(
|
||||||
|
default=5000,
|
||||||
|
description="Row budget per dataset when building host inventory (0 = unlimited)",
|
||||||
|
)
|
||||||
|
NETWORK_INVENTORY_MAX_TOTAL_ROWS: int = Field(
|
||||||
|
default=120000,
|
||||||
|
description="Global row budget across all datasets for host inventory build (0 = unlimited)",
|
||||||
|
)
|
||||||
|
NETWORK_INVENTORY_MAX_CONNECTIONS: int = Field(
|
||||||
|
default=120000,
|
||||||
|
description="Max unique connection tuples retained during host inventory build",
|
||||||
|
)
|
||||||
|
'''
|
||||||
|
if old not in t:
|
||||||
|
raise SystemExit('network inventory block not found')
|
||||||
|
t=t.replace(old,new)
|
||||||
|
p.write_text(t,encoding='utf-8')
|
||||||
|
print('updated network inventory budgets in config')
|
||||||
164
_perf_edit_host_inventory_budgets.py
Normal file
@@ -0,0 +1,164 @@
|
|||||||
|
from pathlib import Path
|
||||||
|
p=Path(r'd:/Projects/Dev/ThreatHunt/backend/app/services/host_inventory.py')
|
||||||
|
t=p.read_text(encoding='utf-8')
|
||||||
|
|
||||||
|
# insert budget vars near existing counters
|
||||||
|
old=''' connections: dict[tuple, int] = defaultdict(int)
|
||||||
|
total_rows = 0
|
||||||
|
ds_with_hosts = 0
|
||||||
|
'''
|
||||||
|
new=''' connections: dict[tuple, int] = defaultdict(int)
|
||||||
|
total_rows = 0
|
||||||
|
ds_with_hosts = 0
|
||||||
|
sampled_dataset_count = 0
|
||||||
|
total_row_budget = max(0, int(settings.NETWORK_INVENTORY_MAX_TOTAL_ROWS))
|
||||||
|
max_connections = max(0, int(settings.NETWORK_INVENTORY_MAX_CONNECTIONS))
|
||||||
|
global_budget_reached = False
|
||||||
|
dropped_connections = 0
|
||||||
|
'''
|
||||||
|
if old not in t:
|
||||||
|
raise SystemExit('counter block not found')
|
||||||
|
t=t.replace(old,new)
|
||||||
|
|
||||||
|
# update batch size and sampled count increments + global budget checks
|
||||||
|
old2=''' batch_size = 10000
|
||||||
|
max_rows_per_dataset = max(0, int(settings.NETWORK_INVENTORY_MAX_ROWS_PER_DATASET))
|
||||||
|
rows_scanned_this_dataset = 0
|
||||||
|
sampled_dataset = False
|
||||||
|
last_row_index = -1
|
||||||
|
while True:
|
||||||
|
'''
|
||||||
|
new2=''' batch_size = 5000
|
||||||
|
max_rows_per_dataset = max(0, int(settings.NETWORK_INVENTORY_MAX_ROWS_PER_DATASET))
|
||||||
|
rows_scanned_this_dataset = 0
|
||||||
|
sampled_dataset = False
|
||||||
|
last_row_index = -1
|
||||||
|
while True:
|
||||||
|
if total_row_budget and total_rows >= total_row_budget:
|
||||||
|
global_budget_reached = True
|
||||||
|
break
|
||||||
|
'''
|
||||||
|
if old2 not in t:
|
||||||
|
raise SystemExit('batch block not found')
|
||||||
|
t=t.replace(old2,new2)
|
||||||
|
|
||||||
|
old3=''' if max_rows_per_dataset and rows_scanned_this_dataset >= max_rows_per_dataset:
|
||||||
|
sampled_dataset = True
|
||||||
|
break
|
||||||
|
|
||||||
|
data = ro.data or {}
|
||||||
|
total_rows += 1
|
||||||
|
rows_scanned_this_dataset += 1
|
||||||
|
'''
|
||||||
|
new3=''' if max_rows_per_dataset and rows_scanned_this_dataset >= max_rows_per_dataset:
|
||||||
|
sampled_dataset = True
|
||||||
|
break
|
||||||
|
if total_row_budget and total_rows >= total_row_budget:
|
||||||
|
sampled_dataset = True
|
||||||
|
global_budget_reached = True
|
||||||
|
break
|
||||||
|
|
||||||
|
data = ro.data or {}
|
||||||
|
total_rows += 1
|
||||||
|
rows_scanned_this_dataset += 1
|
||||||
|
'''
|
||||||
|
if old3 not in t:
|
||||||
|
raise SystemExit('row scan block not found')
|
||||||
|
t=t.replace(old3,new3)
|
||||||
|
|
||||||
|
# cap connection map growth
|
||||||
|
old4=''' for c in cols['remote_ip']:
|
||||||
|
rip = _clean(data.get(c))
|
||||||
|
if _is_valid_ip(rip):
|
||||||
|
rport = ''
|
||||||
|
for pc in cols['remote_port']:
|
||||||
|
rport = _clean(data.get(pc))
|
||||||
|
if rport:
|
||||||
|
break
|
||||||
|
connections[(host_key, rip, rport)] += 1
|
||||||
|
'''
|
||||||
|
new4=''' for c in cols['remote_ip']:
|
||||||
|
rip = _clean(data.get(c))
|
||||||
|
if _is_valid_ip(rip):
|
||||||
|
rport = ''
|
||||||
|
for pc in cols['remote_port']:
|
||||||
|
rport = _clean(data.get(pc))
|
||||||
|
if rport:
|
||||||
|
break
|
||||||
|
conn_key = (host_key, rip, rport)
|
||||||
|
if max_connections and len(connections) >= max_connections and conn_key not in connections:
|
||||||
|
dropped_connections += 1
|
||||||
|
continue
|
||||||
|
connections[conn_key] += 1
|
||||||
|
'''
|
||||||
|
if old4 not in t:
|
||||||
|
raise SystemExit('connection block not found')
|
||||||
|
t=t.replace(old4,new4)
|
||||||
|
|
||||||
|
# sampled_dataset counter
|
||||||
|
old5=''' if sampled_dataset:
|
||||||
|
logger.info(
|
||||||
|
"Host inventory row budget reached for dataset %s (%d rows)",
|
||||||
|
ds.id,
|
||||||
|
rows_scanned_this_dataset,
|
||||||
|
)
|
||||||
|
break
|
||||||
|
'''
|
||||||
|
new5=''' if sampled_dataset:
|
||||||
|
sampled_dataset_count += 1
|
||||||
|
logger.info(
|
||||||
|
"Host inventory row budget reached for dataset %s (%d rows)",
|
||||||
|
ds.id,
|
||||||
|
rows_scanned_this_dataset,
|
||||||
|
)
|
||||||
|
break
|
||||||
|
'''
|
||||||
|
if old5 not in t:
|
||||||
|
raise SystemExit('sampled block not found')
|
||||||
|
t=t.replace(old5,new5)
|
||||||
|
|
||||||
|
# break dataset loop if global budget reached
|
||||||
|
old6=''' if len(rows) < batch_size:
|
||||||
|
break
|
||||||
|
|
||||||
|
# Post-process hosts
|
||||||
|
'''
|
||||||
|
new6=''' if len(rows) < batch_size:
|
||||||
|
break
|
||||||
|
|
||||||
|
if global_budget_reached:
|
||||||
|
logger.info(
|
||||||
|
"Host inventory global row budget reached for hunt %s at %d rows",
|
||||||
|
hunt_id,
|
||||||
|
total_rows,
|
||||||
|
)
|
||||||
|
break
|
||||||
|
|
||||||
|
# Post-process hosts
|
||||||
|
'''
|
||||||
|
if old6 not in t:
|
||||||
|
raise SystemExit('post-process boundary block not found')
|
||||||
|
t=t.replace(old6,new6)
|
||||||
|
|
||||||
|
# add stats
|
||||||
|
old7=''' "row_budget_per_dataset": settings.NETWORK_INVENTORY_MAX_ROWS_PER_DATASET,
|
||||||
|
"sampled_mode": settings.NETWORK_INVENTORY_MAX_ROWS_PER_DATASET > 0,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
'''
|
||||||
|
new7=''' "row_budget_per_dataset": settings.NETWORK_INVENTORY_MAX_ROWS_PER_DATASET,
|
||||||
|
"row_budget_total": settings.NETWORK_INVENTORY_MAX_TOTAL_ROWS,
|
||||||
|
"connection_budget": settings.NETWORK_INVENTORY_MAX_CONNECTIONS,
|
||||||
|
"sampled_mode": settings.NETWORK_INVENTORY_MAX_ROWS_PER_DATASET > 0 or settings.NETWORK_INVENTORY_MAX_TOTAL_ROWS > 0,
|
||||||
|
"sampled_datasets": sampled_dataset_count,
|
||||||
|
"global_budget_reached": global_budget_reached,
|
||||||
|
"dropped_connections": dropped_connections,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
'''
|
||||||
|
if old7 not in t:
|
||||||
|
raise SystemExit('stats block not found')
|
||||||
|
t=t.replace(old7,new7)
|
||||||
|
|
||||||
|
p.write_text(t,encoding='utf-8')
|
||||||
|
print('updated host inventory with global row and connection budgets')
|
||||||
39
_perf_edit_networkmap_render.py
Normal file
@@ -0,0 +1,39 @@
|
|||||||
|
from pathlib import Path
|
||||||
|
p=Path(r'd:/Projects/Dev/ThreatHunt/frontend/src/components/NetworkMap.tsx')
|
||||||
|
t=p.read_text(encoding='utf-8')
|
||||||
|
repls={
|
||||||
|
"const LARGE_HUNT_SUBGRAPH_HOSTS = 350;":"const LARGE_HUNT_SUBGRAPH_HOSTS = 220;",
|
||||||
|
"const LARGE_HUNT_SUBGRAPH_EDGES = 2500;":"const LARGE_HUNT_SUBGRAPH_EDGES = 1200;",
|
||||||
|
"const RENDER_SIMPLIFY_NODE_THRESHOLD = 220;":"const RENDER_SIMPLIFY_NODE_THRESHOLD = 120;",
|
||||||
|
"const RENDER_SIMPLIFY_EDGE_THRESHOLD = 1200;":"const RENDER_SIMPLIFY_EDGE_THRESHOLD = 500;",
|
||||||
|
"const EDGE_DRAW_TARGET = 1000;":"const EDGE_DRAW_TARGET = 600;"
|
||||||
|
}
|
||||||
|
for a,b in repls.items():
|
||||||
|
if a not in t:
|
||||||
|
raise SystemExit(f'missing constant: {a}')
|
||||||
|
t=t.replace(a,b)
|
||||||
|
|
||||||
|
old=''' // Then label hit (so clicking text works too)
|
||||||
|
for (const n of graph.nodes) {
|
||||||
|
if (isPointOnNodeLabel(n, wx, wy, vp)) return n;
|
||||||
|
}
|
||||||
|
'''
|
||||||
|
new=''' // Then label hit (so clicking text works too on manageable graph sizes)
|
||||||
|
if (graph.nodes.length <= 220) {
|
||||||
|
for (const n of graph.nodes) {
|
||||||
|
if (isPointOnNodeLabel(n, wx, wy, vp)) return n;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
'''
|
||||||
|
if old not in t:
|
||||||
|
raise SystemExit('label hit block not found')
|
||||||
|
t=t.replace(old,new)
|
||||||
|
|
||||||
|
old2='simulate(g, w / 2, h / 2, 60);'
|
||||||
|
if t.count(old2) < 2:
|
||||||
|
raise SystemExit('expected two simulate calls')
|
||||||
|
t=t.replace(old2,'simulate(g, w / 2, h / 2, 20);',1)
|
||||||
|
t=t.replace(old2,'simulate(g, w / 2, h / 2, 30);',1)
|
||||||
|
|
||||||
|
p.write_text(t,encoding='utf-8')
|
||||||
|
print('tightened network map rendering + load limits')
|
||||||
107
_perf_patch_backend.py
Normal file
@@ -0,0 +1,107 @@
|
|||||||
|
from pathlib import Path
|
||||||
|
# config updates
|
||||||
|
cfg=Path(r'd:/Projects/Dev/ThreatHunt/backend/app/config.py')
|
||||||
|
t=cfg.read_text(encoding='utf-8')
|
||||||
|
anchor=''' NETWORK_SUBGRAPH_MAX_EDGES: int = Field(
|
||||||
|
default=3000, description="Hard cap for edges returned by network subgraph endpoint"
|
||||||
|
)
|
||||||
|
'''
|
||||||
|
ins=''' NETWORK_SUBGRAPH_MAX_EDGES: int = Field(
|
||||||
|
default=3000, description="Hard cap for edges returned by network subgraph endpoint"
|
||||||
|
)
|
||||||
|
NETWORK_INVENTORY_MAX_ROWS_PER_DATASET: int = Field(
|
||||||
|
default=200000,
|
||||||
|
description="Row budget per dataset when building host inventory (0 = unlimited)",
|
||||||
|
)
|
||||||
|
'''
|
||||||
|
if 'NETWORK_INVENTORY_MAX_ROWS_PER_DATASET' not in t:
|
||||||
|
if anchor not in t:
|
||||||
|
raise SystemExit('config network anchor not found')
|
||||||
|
t=t.replace(anchor,ins)
|
||||||
|
cfg.write_text(t,encoding='utf-8')
|
||||||
|
|
||||||
|
# host inventory updates
|
||||||
|
p=Path(r'd:/Projects/Dev/ThreatHunt/backend/app/services/host_inventory.py')
|
||||||
|
t=p.read_text(encoding='utf-8')
|
||||||
|
if 'from app.config import settings' not in t:
|
||||||
|
t=t.replace('from app.db.models import Dataset, DatasetRow\n', 'from app.db.models import Dataset, DatasetRow\nfrom app.config import settings\n')
|
||||||
|
|
||||||
|
t=t.replace(' batch_size = 5000\n last_row_index = -1\n while True:\n', ' batch_size = 10000\n max_rows_per_dataset = max(0, int(settings.NETWORK_INVENTORY_MAX_ROWS_PER_DATASET))\n rows_scanned_this_dataset = 0\n sampled_dataset = False\n last_row_index = -1\n while True:\n')
|
||||||
|
|
||||||
|
old=''' for ro in rows:
|
||||||
|
data = ro.data or {}
|
||||||
|
total_rows += 1
|
||||||
|
|
||||||
|
fqdn = ''
|
||||||
|
'''
|
||||||
|
new=''' for ro in rows:
|
||||||
|
if max_rows_per_dataset and rows_scanned_this_dataset >= max_rows_per_dataset:
|
||||||
|
sampled_dataset = True
|
||||||
|
break
|
||||||
|
|
||||||
|
data = ro.data or {}
|
||||||
|
total_rows += 1
|
||||||
|
rows_scanned_this_dataset += 1
|
||||||
|
|
||||||
|
fqdn = ''
|
||||||
|
'''
|
||||||
|
if old not in t:
|
||||||
|
raise SystemExit('row loop anchor not found')
|
||||||
|
t=t.replace(old,new)
|
||||||
|
|
||||||
|
old2=''' last_row_index = rows[-1].row_index
|
||||||
|
if len(rows) < batch_size:
|
||||||
|
break
|
||||||
|
'''
|
||||||
|
new2=''' if sampled_dataset:
|
||||||
|
logger.info(
|
||||||
|
"Host inventory row budget reached for dataset %s (%d rows)",
|
||||||
|
ds.id,
|
||||||
|
rows_scanned_this_dataset,
|
||||||
|
)
|
||||||
|
break
|
||||||
|
|
||||||
|
last_row_index = rows[-1].row_index
|
||||||
|
if len(rows) < batch_size:
|
||||||
|
break
|
||||||
|
'''
|
||||||
|
if old2 not in t:
|
||||||
|
raise SystemExit('batch loop end anchor not found')
|
||||||
|
t=t.replace(old2,new2)
|
||||||
|
|
||||||
|
old3=''' return {
|
||||||
|
"hosts": host_list,
|
||||||
|
"connections": conn_list,
|
||||||
|
"stats": {
|
||||||
|
"total_hosts": len(host_list),
|
||||||
|
"total_datasets_scanned": len(all_datasets),
|
||||||
|
"datasets_with_hosts": ds_with_hosts,
|
||||||
|
"total_rows_scanned": total_rows,
|
||||||
|
"hosts_with_ips": sum(1 for h in host_list if h['ips']),
|
||||||
|
"hosts_with_users": sum(1 for h in host_list if h['users']),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
'''
|
||||||
|
new3=''' sampled = settings.NETWORK_INVENTORY_MAX_ROWS_PER_DATASET > 0
|
||||||
|
|
||||||
|
return {
|
||||||
|
"hosts": host_list,
|
||||||
|
"connections": conn_list,
|
||||||
|
"stats": {
|
||||||
|
"total_hosts": len(host_list),
|
||||||
|
"total_datasets_scanned": len(all_datasets),
|
||||||
|
"datasets_with_hosts": ds_with_hosts,
|
||||||
|
"total_rows_scanned": total_rows,
|
||||||
|
"hosts_with_ips": sum(1 for h in host_list if h['ips']),
|
||||||
|
"hosts_with_users": sum(1 for h in host_list if h['users']),
|
||||||
|
"row_budget_per_dataset": settings.NETWORK_INVENTORY_MAX_ROWS_PER_DATASET,
|
||||||
|
"sampled_mode": sampled,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
'''
|
||||||
|
if old3 not in t:
|
||||||
|
raise SystemExit('return stats anchor not found')
|
||||||
|
t=t.replace(old3,new3)
|
||||||
|
|
||||||
|
p.write_text(t,encoding='utf-8')
|
||||||
|
print('patched config + host inventory row budget')
|
||||||
38
_perf_patch_backend2.py
Normal file
@@ -0,0 +1,38 @@
|
|||||||
|
from pathlib import Path
|
||||||
|
cfg=Path(r'd:/Projects/Dev/ThreatHunt/backend/app/config.py')
|
||||||
|
t=cfg.read_text(encoding='utf-8')
|
||||||
|
if 'NETWORK_INVENTORY_MAX_ROWS_PER_DATASET' not in t:
|
||||||
|
t=t.replace(
|
||||||
|
''' NETWORK_SUBGRAPH_MAX_EDGES: int = Field(
|
||||||
|
default=3000, description="Hard cap for edges returned by network subgraph endpoint"
|
||||||
|
)
|
||||||
|
''',
|
||||||
|
''' NETWORK_SUBGRAPH_MAX_EDGES: int = Field(
|
||||||
|
default=3000, description="Hard cap for edges returned by network subgraph endpoint"
|
||||||
|
)
|
||||||
|
NETWORK_INVENTORY_MAX_ROWS_PER_DATASET: int = Field(
|
||||||
|
default=200000,
|
||||||
|
description="Row budget per dataset when building host inventory (0 = unlimited)",
|
||||||
|
)
|
||||||
|
''')
|
||||||
|
cfg.write_text(t,encoding='utf-8')
|
||||||
|
|
||||||
|
p=Path(r'd:/Projects/Dev/ThreatHunt/backend/app/services/host_inventory.py')
|
||||||
|
t=p.read_text(encoding='utf-8')
|
||||||
|
if 'from app.config import settings' not in t:
|
||||||
|
t=t.replace('from app.db.models import Dataset, DatasetRow\n','from app.db.models import Dataset, DatasetRow\nfrom app.config import settings\n')
|
||||||
|
|
||||||
|
t=t.replace(' batch_size = 5000\n last_row_index = -1\n while True:\n',
|
||||||
|
' batch_size = 10000\n max_rows_per_dataset = max(0, int(settings.NETWORK_INVENTORY_MAX_ROWS_PER_DATASET))\n rows_scanned_this_dataset = 0\n sampled_dataset = False\n last_row_index = -1\n while True:\n')
|
||||||
|
|
||||||
|
t=t.replace(' for ro in rows:\n data = ro.data or {}\n total_rows += 1\n\n',
|
||||||
|
' for ro in rows:\n if max_rows_per_dataset and rows_scanned_this_dataset >= max_rows_per_dataset:\n sampled_dataset = True\n break\n\n data = ro.data or {}\n total_rows += 1\n rows_scanned_this_dataset += 1\n\n')
|
||||||
|
|
||||||
|
t=t.replace(' last_row_index = rows[-1].row_index\n if len(rows) < batch_size:\n break\n',
|
||||||
|
' if sampled_dataset:\n logger.info(\n "Host inventory row budget reached for dataset %s (%d rows)",\n ds.id,\n rows_scanned_this_dataset,\n )\n break\n\n last_row_index = rows[-1].row_index\n if len(rows) < batch_size:\n break\n')
|
||||||
|
|
||||||
|
t=t.replace(' return {\n "hosts": host_list,\n "connections": conn_list,\n "stats": {\n "total_hosts": len(host_list),\n "total_datasets_scanned": len(all_datasets),\n "datasets_with_hosts": ds_with_hosts,\n "total_rows_scanned": total_rows,\n "hosts_with_ips": sum(1 for h in host_list if h[\'ips\']),\n "hosts_with_users": sum(1 for h in host_list if h[\'users\']),\n },\n }\n',
|
||||||
|
' sampled = settings.NETWORK_INVENTORY_MAX_ROWS_PER_DATASET > 0\n\n return {\n "hosts": host_list,\n "connections": conn_list,\n "stats": {\n "total_hosts": len(host_list),\n "total_datasets_scanned": len(all_datasets),\n "datasets_with_hosts": ds_with_hosts,\n "total_rows_scanned": total_rows,\n "hosts_with_ips": sum(1 for h in host_list if h[\'ips\']),\n "hosts_with_users": sum(1 for h in host_list if h[\'users\']),\n "row_budget_per_dataset": settings.NETWORK_INVENTORY_MAX_ROWS_PER_DATASET,\n "sampled_mode": sampled,\n },\n }\n')
|
||||||
|
|
||||||
|
p.write_text(t,encoding='utf-8')
|
||||||
|
print('patched backend inventory performance settings')
|
||||||
220
_perf_patch_networkmap.py
Normal file
@@ -0,0 +1,220 @@
|
|||||||
|
from pathlib import Path
|
||||||
|
p=Path(r'd:/Projects/Dev/ThreatHunt/frontend/src/components/NetworkMap.tsx')
|
||||||
|
t=p.read_text(encoding='utf-8')
|
||||||
|
|
||||||
|
# constants
|
||||||
|
if 'RENDER_SIMPLIFY_NODE_THRESHOLD' not in t:
|
||||||
|
t=t.replace(
|
||||||
|
"const LARGE_HUNT_SUBGRAPH_EDGES = 2500;\n",
|
||||||
|
"const LARGE_HUNT_SUBGRAPH_EDGES = 2500;\nconst RENDER_SIMPLIFY_NODE_THRESHOLD = 220;\nconst RENDER_SIMPLIFY_EDGE_THRESHOLD = 1200;\nconst EDGE_DRAW_TARGET = 1000;\n")
|
||||||
|
|
||||||
|
# drawBackground signature
|
||||||
|
t_old='''function drawBackground(
|
||||||
|
ctx: CanvasRenderingContext2D, w: number, h: number, vp: Viewport, dpr: number,
|
||||||
|
) {
|
||||||
|
'''
|
||||||
|
if t_old in t:
|
||||||
|
t=t.replace(t_old,
|
||||||
|
'''function drawBackground(
|
||||||
|
ctx: CanvasRenderingContext2D, w: number, h: number, vp: Viewport, dpr: number,
|
||||||
|
simplify: boolean,
|
||||||
|
) {
|
||||||
|
''')
|
||||||
|
|
||||||
|
# skip grid when simplify
|
||||||
|
if 'if (!simplify) {' not in t:
|
||||||
|
t=t.replace(
|
||||||
|
''' ctx.save();
|
||||||
|
ctx.translate(vp.x * dpr, vp.y * dpr);
|
||||||
|
ctx.scale(vp.scale * dpr, vp.scale * dpr);
|
||||||
|
const startX = -vp.x / vp.scale - GRID_SPACING;
|
||||||
|
const startY = -vp.y / vp.scale - GRID_SPACING;
|
||||||
|
const endX = startX + w / (vp.scale * dpr) + GRID_SPACING * 2;
|
||||||
|
const endY = startY + h / (vp.scale * dpr) + GRID_SPACING * 2;
|
||||||
|
ctx.fillStyle = GRID_DOT_COLOR;
|
||||||
|
for (let gx = Math.floor(startX / GRID_SPACING) * GRID_SPACING; gx < endX; gx += GRID_SPACING) {
|
||||||
|
for (let gy = Math.floor(startY / GRID_SPACING) * GRID_SPACING; gy < endY; gy += GRID_SPACING) {
|
||||||
|
ctx.beginPath(); ctx.arc(gx, gy, 1, 0, Math.PI * 2); ctx.fill();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ctx.restore();
|
||||||
|
''',
|
||||||
|
''' if (!simplify) {
|
||||||
|
ctx.save();
|
||||||
|
ctx.translate(vp.x * dpr, vp.y * dpr);
|
||||||
|
ctx.scale(vp.scale * dpr, vp.scale * dpr);
|
||||||
|
const startX = -vp.x / vp.scale - GRID_SPACING;
|
||||||
|
const startY = -vp.y / vp.scale - GRID_SPACING;
|
||||||
|
const endX = startX + w / (vp.scale * dpr) + GRID_SPACING * 2;
|
||||||
|
const endY = startY + h / (vp.scale * dpr) + GRID_SPACING * 2;
|
||||||
|
ctx.fillStyle = GRID_DOT_COLOR;
|
||||||
|
for (let gx = Math.floor(startX / GRID_SPACING) * GRID_SPACING; gx < endX; gx += GRID_SPACING) {
|
||||||
|
for (let gy = Math.floor(startY / GRID_SPACING) * GRID_SPACING; gy < endY; gy += GRID_SPACING) {
|
||||||
|
ctx.beginPath(); ctx.arc(gx, gy, 1, 0, Math.PI * 2); ctx.fill();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ctx.restore();
|
||||||
|
}
|
||||||
|
''')
|
||||||
|
|
||||||
|
# drawEdges signature
|
||||||
|
t=t.replace('''function drawEdges(
|
||||||
|
ctx: CanvasRenderingContext2D, graph: Graph,
|
||||||
|
hovered: string | null, selected: string | null,
|
||||||
|
nodeMap: Map<string, GNode>, animTime: number,
|
||||||
|
) {
|
||||||
|
for (const e of graph.edges) {
|
||||||
|
''',
|
||||||
|
'''function drawEdges(
|
||||||
|
ctx: CanvasRenderingContext2D, graph: Graph,
|
||||||
|
hovered: string | null, selected: string | null,
|
||||||
|
nodeMap: Map<string, GNode>, animTime: number,
|
||||||
|
simplify: boolean,
|
||||||
|
) {
|
||||||
|
const edgeStep = simplify ? Math.max(1, Math.ceil(graph.edges.length / EDGE_DRAW_TARGET)) : 1;
|
||||||
|
for (let ei = 0; ei < graph.edges.length; ei += edgeStep) {
|
||||||
|
const e = graph.edges[ei];
|
||||||
|
''')
|
||||||
|
|
||||||
|
# simplify edge path
|
||||||
|
t=t.replace('ctx.beginPath(); ctx.moveTo(a.x, a.y); ctx.quadraticCurveTo(cpx, cpy, b.x, b.y);',
|
||||||
|
'ctx.beginPath(); ctx.moveTo(a.x, a.y); if (simplify) { ctx.lineTo(b.x, b.y); } else { ctx.quadraticCurveTo(cpx, cpy, b.x, b.y); }')
|
||||||
|
|
||||||
|
t=t.replace('ctx.beginPath(); ctx.moveTo(a.x, a.y); ctx.quadraticCurveTo(cpx, cpy, b.x, b.y);',
|
||||||
|
'ctx.beginPath(); ctx.moveTo(a.x, a.y); if (simplify) { ctx.lineTo(b.x, b.y); } else { ctx.quadraticCurveTo(cpx, cpy, b.x, b.y); }')
|
||||||
|
|
||||||
|
# reduce glow when simplify
|
||||||
|
t=t.replace(''' ctx.save();
|
||||||
|
ctx.shadowColor = 'rgba(96,165,250,0.5)'; ctx.shadowBlur = 8;
|
||||||
|
ctx.strokeStyle = 'rgba(96,165,250,0.3)';
|
||||||
|
ctx.lineWidth = Math.min(5, 2 + e.weight * 0.2);
|
||||||
|
ctx.beginPath(); ctx.moveTo(a.x, a.y); if (simplify) { ctx.lineTo(b.x, b.y); } else { ctx.quadraticCurveTo(cpx, cpy, b.x, b.y); }
|
||||||
|
ctx.stroke(); ctx.restore();
|
||||||
|
''',
|
||||||
|
''' if (!simplify) {
|
||||||
|
ctx.save();
|
||||||
|
ctx.shadowColor = 'rgba(96,165,250,0.5)'; ctx.shadowBlur = 8;
|
||||||
|
ctx.strokeStyle = 'rgba(96,165,250,0.3)';
|
||||||
|
ctx.lineWidth = Math.min(5, 2 + e.weight * 0.2);
|
||||||
|
ctx.beginPath(); ctx.moveTo(a.x, a.y); if (simplify) { ctx.lineTo(b.x, b.y); } else { ctx.quadraticCurveTo(cpx, cpy, b.x, b.y); }
|
||||||
|
ctx.stroke(); ctx.restore();
|
||||||
|
}
|
||||||
|
''')
|
||||||
|
|
||||||
|
# drawLabels signature and early return
|
||||||
|
t=t.replace('''function drawLabels(
|
||||||
|
ctx: CanvasRenderingContext2D, graph: Graph,
|
||||||
|
hovered: string | null, selected: string | null,
|
||||||
|
search: string, matchSet: Set<string>, vp: Viewport,
|
||||||
|
) {
|
||||||
|
''',
|
||||||
|
'''function drawLabels(
|
||||||
|
ctx: CanvasRenderingContext2D, graph: Graph,
|
||||||
|
hovered: string | null, selected: string | null,
|
||||||
|
search: string, matchSet: Set<string>, vp: Viewport,
|
||||||
|
simplify: boolean,
|
||||||
|
) {
|
||||||
|
''')
|
||||||
|
|
||||||
|
if 'if (simplify && !search && !hovered && !selected) {' not in t:
|
||||||
|
t=t.replace(' const dimmed = search.length > 0;\n',
|
||||||
|
' const dimmed = search.length > 0;\n if (simplify && !search && !hovered && !selected) {\n return;\n }\n')
|
||||||
|
|
||||||
|
# drawGraph adapt
|
||||||
|
t=t.replace(''' drawBackground(ctx, w, h, vp, dpr);
|
||||||
|
ctx.save();
|
||||||
|
ctx.translate(vp.x * dpr, vp.y * dpr);
|
||||||
|
ctx.scale(vp.scale * dpr, vp.scale * dpr);
|
||||||
|
drawEdges(ctx, graph, hovered, selected, nodeMap, animTime);
|
||||||
|
drawNodes(ctx, graph, hovered, selected, search, matchSet);
|
||||||
|
drawLabels(ctx, graph, hovered, selected, search, matchSet, vp);
|
||||||
|
ctx.restore();
|
||||||
|
''',
|
||||||
|
''' const simplify = graph.nodes.length > RENDER_SIMPLIFY_NODE_THRESHOLD || graph.edges.length > RENDER_SIMPLIFY_EDGE_THRESHOLD;
|
||||||
|
drawBackground(ctx, w, h, vp, dpr, simplify);
|
||||||
|
ctx.save();
|
||||||
|
ctx.translate(vp.x * dpr, vp.y * dpr);
|
||||||
|
ctx.scale(vp.scale * dpr, vp.scale * dpr);
|
||||||
|
drawEdges(ctx, graph, hovered, selected, nodeMap, animTime, simplify);
|
||||||
|
drawNodes(ctx, graph, hovered, selected, search, matchSet);
|
||||||
|
drawLabels(ctx, graph, hovered, selected, search, matchSet, vp, simplify);
|
||||||
|
ctx.restore();
|
||||||
|
''')
|
||||||
|
|
||||||
|
# hover RAF ref
|
||||||
|
if 'const hoverRafRef = useRef<number>(0);' not in t:
|
||||||
|
t=t.replace(' const graphRef = useRef<Graph | null>(null);\n', ' const graphRef = useRef<Graph | null>(null);\n const hoverRafRef = useRef<number>(0);\n')
|
||||||
|
|
||||||
|
# throttle hover hit test on mousemove
|
||||||
|
old_mm=''' const node = hitTest(graph, canvasRef.current, e.clientX, e.clientY, vpRef.current);
|
||||||
|
setHovered(node?.id ?? null);
|
||||||
|
}, [graph, redraw, startAnimLoop]);
|
||||||
|
'''
|
||||||
|
new_mm=''' cancelAnimationFrame(hoverRafRef.current);
|
||||||
|
const clientX = e.clientX;
|
||||||
|
const clientY = e.clientY;
|
||||||
|
hoverRafRef.current = requestAnimationFrame(() => {
|
||||||
|
const node = hitTest(graph, canvasRef.current as HTMLCanvasElement, clientX, clientY, vpRef.current);
|
||||||
|
setHovered(prev => (prev === (node?.id ?? null) ? prev : (node?.id ?? null)));
|
||||||
|
});
|
||||||
|
}, [graph, redraw, startAnimLoop]);
|
||||||
|
'''
|
||||||
|
if old_mm in t:
|
||||||
|
t=t.replace(old_mm,new_mm)
|
||||||
|
|
||||||
|
# cleanup hover raf on unmount in existing animation cleanup effect
|
||||||
|
if 'cancelAnimationFrame(hoverRafRef.current);' not in t:
|
||||||
|
t=t.replace(''' useEffect(() => {
|
||||||
|
if (graph) startAnimLoop();
|
||||||
|
return () => { cancelAnimationFrame(animFrameRef.current); isAnimatingRef.current = false; };
|
||||||
|
}, [graph, startAnimLoop]);
|
||||||
|
''',
|
||||||
|
''' useEffect(() => {
|
||||||
|
if (graph) startAnimLoop();
|
||||||
|
return () => {
|
||||||
|
cancelAnimationFrame(animFrameRef.current);
|
||||||
|
cancelAnimationFrame(hoverRafRef.current);
|
||||||
|
isAnimatingRef.current = false;
|
||||||
|
};
|
||||||
|
}, [graph, startAnimLoop]);
|
||||||
|
''')
|
||||||
|
|
||||||
|
# connectedNodes optimization map
|
||||||
|
if 'const nodeById = useMemo(() => {' not in t:
|
||||||
|
t=t.replace(''' const connectionCount = selectedNode && graph
|
||||||
|
? graph.edges.filter(e => e.source === selectedNode.id || e.target === selectedNode.id).length
|
||||||
|
: 0;
|
||||||
|
|
||||||
|
const connectedNodes = useMemo(() => {
|
||||||
|
''',
|
||||||
|
''' const connectionCount = selectedNode && graph
|
||||||
|
? graph.edges.filter(e => e.source === selectedNode.id || e.target === selectedNode.id).length
|
||||||
|
: 0;
|
||||||
|
|
||||||
|
const nodeById = useMemo(() => {
|
||||||
|
const m = new Map<string, GNode>();
|
||||||
|
if (!graph) return m;
|
||||||
|
for (const n of graph.nodes) m.set(n.id, n);
|
||||||
|
return m;
|
||||||
|
}, [graph]);
|
||||||
|
|
||||||
|
const connectedNodes = useMemo(() => {
|
||||||
|
''')
|
||||||
|
|
||||||
|
t=t.replace(''' const n = graph.nodes.find(x => x.id === e.target);
|
||||||
|
if (n) neighbors.push({ id: n.id, type: n.meta.type, weight: e.weight });
|
||||||
|
} else if (e.target === selectedNode.id) {
|
||||||
|
const n = graph.nodes.find(x => x.id === e.source);
|
||||||
|
if (n) neighbors.push({ id: n.id, type: n.meta.type, weight: e.weight });
|
||||||
|
''',
|
||||||
|
''' const n = nodeById.get(e.target);
|
||||||
|
if (n) neighbors.push({ id: n.id, type: n.meta.type, weight: e.weight });
|
||||||
|
} else if (e.target === selectedNode.id) {
|
||||||
|
const n = nodeById.get(e.source);
|
||||||
|
if (n) neighbors.push({ id: n.id, type: n.meta.type, weight: e.weight });
|
||||||
|
''')
|
||||||
|
|
||||||
|
t=t.replace(' }, [selectedNode, graph]);\n', ' }, [selectedNode, graph, nodeById]);\n')
|
||||||
|
|
||||||
|
p.write_text(t,encoding='utf-8')
|
||||||
|
print('patched NetworkMap adaptive render + hover throttle')
|
||||||
153
_perf_patch_networkmap2.py
Normal file
@@ -0,0 +1,153 @@
|
|||||||
|
from pathlib import Path
|
||||||
|
p=Path(r'd:/Projects/Dev/ThreatHunt/frontend/src/components/NetworkMap.tsx')
|
||||||
|
t=p.read_text(encoding='utf-8')
|
||||||
|
|
||||||
|
if 'RENDER_SIMPLIFY_NODE_THRESHOLD' not in t:
|
||||||
|
t=t.replace('const LARGE_HUNT_SUBGRAPH_EDGES = 2500;\n', 'const LARGE_HUNT_SUBGRAPH_EDGES = 2500;\nconst RENDER_SIMPLIFY_NODE_THRESHOLD = 220;\nconst RENDER_SIMPLIFY_EDGE_THRESHOLD = 1200;\nconst EDGE_DRAW_TARGET = 1000;\n')
|
||||||
|
|
||||||
|
t=t.replace('function drawBackground(\n ctx: CanvasRenderingContext2D, w: number, h: number, vp: Viewport, dpr: number,\n) {', 'function drawBackground(\n ctx: CanvasRenderingContext2D, w: number, h: number, vp: Viewport, dpr: number,\n simplify: boolean,\n) {')
|
||||||
|
|
||||||
|
t=t.replace(''' ctx.save();
|
||||||
|
ctx.translate(vp.x * dpr, vp.y * dpr);
|
||||||
|
ctx.scale(vp.scale * dpr, vp.scale * dpr);
|
||||||
|
const startX = -vp.x / vp.scale - GRID_SPACING;
|
||||||
|
const startY = -vp.y / vp.scale - GRID_SPACING;
|
||||||
|
const endX = startX + w / (vp.scale * dpr) + GRID_SPACING * 2;
|
||||||
|
const endY = startY + h / (vp.scale * dpr) + GRID_SPACING * 2;
|
||||||
|
ctx.fillStyle = GRID_DOT_COLOR;
|
||||||
|
for (let gx = Math.floor(startX / GRID_SPACING) * GRID_SPACING; gx < endX; gx += GRID_SPACING) {
|
||||||
|
for (let gy = Math.floor(startY / GRID_SPACING) * GRID_SPACING; gy < endY; gy += GRID_SPACING) {
|
||||||
|
ctx.beginPath(); ctx.arc(gx, gy, 1, 0, Math.PI * 2); ctx.fill();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ctx.restore();
|
||||||
|
''',''' if (!simplify) {
|
||||||
|
ctx.save();
|
||||||
|
ctx.translate(vp.x * dpr, vp.y * dpr);
|
||||||
|
ctx.scale(vp.scale * dpr, vp.scale * dpr);
|
||||||
|
const startX = -vp.x / vp.scale - GRID_SPACING;
|
||||||
|
const startY = -vp.y / vp.scale - GRID_SPACING;
|
||||||
|
const endX = startX + w / (vp.scale * dpr) + GRID_SPACING * 2;
|
||||||
|
const endY = startY + h / (vp.scale * dpr) + GRID_SPACING * 2;
|
||||||
|
ctx.fillStyle = GRID_DOT_COLOR;
|
||||||
|
for (let gx = Math.floor(startX / GRID_SPACING) * GRID_SPACING; gx < endX; gx += GRID_SPACING) {
|
||||||
|
for (let gy = Math.floor(startY / GRID_SPACING) * GRID_SPACING; gy < endY; gy += GRID_SPACING) {
|
||||||
|
ctx.beginPath(); ctx.arc(gx, gy, 1, 0, Math.PI * 2); ctx.fill();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ctx.restore();
|
||||||
|
}
|
||||||
|
''')
|
||||||
|
|
||||||
|
t=t.replace('''function drawEdges(
|
||||||
|
ctx: CanvasRenderingContext2D, graph: Graph,
|
||||||
|
hovered: string | null, selected: string | null,
|
||||||
|
nodeMap: Map<string, GNode>, animTime: number,
|
||||||
|
) {
|
||||||
|
for (const e of graph.edges) {
|
||||||
|
''','''function drawEdges(
|
||||||
|
ctx: CanvasRenderingContext2D, graph: Graph,
|
||||||
|
hovered: string | null, selected: string | null,
|
||||||
|
nodeMap: Map<string, GNode>, animTime: number,
|
||||||
|
simplify: boolean,
|
||||||
|
) {
|
||||||
|
const edgeStep = simplify ? Math.max(1, Math.ceil(graph.edges.length / EDGE_DRAW_TARGET)) : 1;
|
||||||
|
for (let ei = 0; ei < graph.edges.length; ei += edgeStep) {
|
||||||
|
const e = graph.edges[ei];
|
||||||
|
''')
|
||||||
|
|
||||||
|
t=t.replace('ctx.beginPath(); ctx.moveTo(a.x, a.y); ctx.quadraticCurveTo(cpx, cpy, b.x, b.y);', 'ctx.beginPath(); ctx.moveTo(a.x, a.y); if (simplify) { ctx.lineTo(b.x, b.y); } else { ctx.quadraticCurveTo(cpx, cpy, b.x, b.y); }')
|
||||||
|
|
||||||
|
t=t.replace('''function drawLabels(
|
||||||
|
ctx: CanvasRenderingContext2D, graph: Graph,
|
||||||
|
hovered: string | null, selected: string | null,
|
||||||
|
search: string, matchSet: Set<string>, vp: Viewport,
|
||||||
|
) {
|
||||||
|
const dimmed = search.length > 0;
|
||||||
|
''','''function drawLabels(
|
||||||
|
ctx: CanvasRenderingContext2D, graph: Graph,
|
||||||
|
hovered: string | null, selected: string | null,
|
||||||
|
search: string, matchSet: Set<string>, vp: Viewport,
|
||||||
|
simplify: boolean,
|
||||||
|
) {
|
||||||
|
const dimmed = search.length > 0;
|
||||||
|
if (simplify && !search && !hovered && !selected) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
''')
|
||||||
|
|
||||||
|
t=t.replace(''' drawBackground(ctx, w, h, vp, dpr);
|
||||||
|
ctx.save();
|
||||||
|
ctx.translate(vp.x * dpr, vp.y * dpr);
|
||||||
|
ctx.scale(vp.scale * dpr, vp.scale * dpr);
|
||||||
|
drawEdges(ctx, graph, hovered, selected, nodeMap, animTime);
|
||||||
|
drawNodes(ctx, graph, hovered, selected, search, matchSet);
|
||||||
|
drawLabels(ctx, graph, hovered, selected, search, matchSet, vp);
|
||||||
|
ctx.restore();
|
||||||
|
''',''' const simplify = graph.nodes.length > RENDER_SIMPLIFY_NODE_THRESHOLD || graph.edges.length > RENDER_SIMPLIFY_EDGE_THRESHOLD;
|
||||||
|
drawBackground(ctx, w, h, vp, dpr, simplify);
|
||||||
|
ctx.save();
|
||||||
|
ctx.translate(vp.x * dpr, vp.y * dpr);
|
||||||
|
ctx.scale(vp.scale * dpr, vp.scale * dpr);
|
||||||
|
drawEdges(ctx, graph, hovered, selected, nodeMap, animTime, simplify);
|
||||||
|
drawNodes(ctx, graph, hovered, selected, search, matchSet);
|
||||||
|
drawLabels(ctx, graph, hovered, selected, search, matchSet, vp, simplify);
|
||||||
|
ctx.restore();
|
||||||
|
''')
|
||||||
|
|
||||||
|
if 'const hoverRafRef = useRef<number>(0);' not in t:
|
||||||
|
t=t.replace('const graphRef = useRef<Graph | null>(null);\n', 'const graphRef = useRef<Graph | null>(null);\n const hoverRafRef = useRef<number>(0);\n')
|
||||||
|
|
||||||
|
t=t.replace(''' const node = hitTest(graph, canvasRef.current, e.clientX, e.clientY, vpRef.current);
|
||||||
|
setHovered(node?.id ?? null);
|
||||||
|
}, [graph, redraw, startAnimLoop]);
|
||||||
|
''',''' cancelAnimationFrame(hoverRafRef.current);
|
||||||
|
const clientX = e.clientX;
|
||||||
|
const clientY = e.clientY;
|
||||||
|
hoverRafRef.current = requestAnimationFrame(() => {
|
||||||
|
const node = hitTest(graph, canvasRef.current as HTMLCanvasElement, clientX, clientY, vpRef.current);
|
||||||
|
setHovered(prev => (prev === (node?.id ?? null) ? prev : (node?.id ?? null)));
|
||||||
|
});
|
||||||
|
}, [graph, redraw, startAnimLoop]);
|
||||||
|
''')
|
||||||
|
|
||||||
|
t=t.replace(''' useEffect(() => {
|
||||||
|
if (graph) startAnimLoop();
|
||||||
|
return () => { cancelAnimationFrame(animFrameRef.current); isAnimatingRef.current = false; };
|
||||||
|
}, [graph, startAnimLoop]);
|
||||||
|
''',''' useEffect(() => {
|
||||||
|
if (graph) startAnimLoop();
|
||||||
|
return () => {
|
||||||
|
cancelAnimationFrame(animFrameRef.current);
|
||||||
|
cancelAnimationFrame(hoverRafRef.current);
|
||||||
|
isAnimatingRef.current = false;
|
||||||
|
};
|
||||||
|
}, [graph, startAnimLoop]);
|
||||||
|
''')
|
||||||
|
|
||||||
|
if 'const nodeById = useMemo(() => {' not in t:
|
||||||
|
t=t.replace(''' const connectionCount = selectedNode && graph
|
||||||
|
? graph.edges.filter(e => e.source === selectedNode.id || e.target === selectedNode.id).length
|
||||||
|
: 0;
|
||||||
|
|
||||||
|
const connectedNodes = useMemo(() => {
|
||||||
|
''',''' const connectionCount = selectedNode && graph
|
||||||
|
? graph.edges.filter(e => e.source === selectedNode.id || e.target === selectedNode.id).length
|
||||||
|
: 0;
|
||||||
|
|
||||||
|
const nodeById = useMemo(() => {
|
||||||
|
const m = new Map<string, GNode>();
|
||||||
|
if (!graph) return m;
|
||||||
|
for (const n of graph.nodes) m.set(n.id, n);
|
||||||
|
return m;
|
||||||
|
}, [graph]);
|
||||||
|
|
||||||
|
const connectedNodes = useMemo(() => {
|
||||||
|
''')
|
||||||
|
|
||||||
|
t=t.replace('const n = graph.nodes.find(x => x.id === e.target);','const n = nodeById.get(e.target);')
|
||||||
|
t=t.replace('const n = graph.nodes.find(x => x.id === e.source);','const n = nodeById.get(e.source);')
|
||||||
|
t=t.replace(' }, [selectedNode, graph]);',' }, [selectedNode, graph, nodeById]);')
|
||||||
|
|
||||||
|
p.write_text(t,encoding='utf-8')
|
||||||
|
print('patched NetworkMap performance')
|
||||||
227
_perf_replace_build_host_inventory.py
Normal file
@@ -0,0 +1,227 @@
|
|||||||
|
from pathlib import Path
|
||||||
|
p=Path(r'd:/Projects/Dev/ThreatHunt/backend/app/services/host_inventory.py')
|
||||||
|
t=p.read_text(encoding='utf-8')
|
||||||
|
start=t.index('async def build_host_inventory(')
|
||||||
|
# find end of function by locating '\n\n' before EOF after ' }\n'
|
||||||
|
end=t.index('\n\n', start)
|
||||||
|
# need proper end: first double newline after function may occur in docstring? compute by searching for '\n\n' after ' }\n' near end
|
||||||
|
ret_idx=t.rfind(' }')
|
||||||
|
# safer locate end as last occurrence of '\n }\n' after start, then function ends next newline
|
||||||
|
end=t.find('\n\n', ret_idx)
|
||||||
|
if end==-1:
|
||||||
|
end=len(t)
|
||||||
|
new_func='''async def build_host_inventory(hunt_id: str, db: AsyncSession) -> dict:
|
||||||
|
"""Build a deduplicated host inventory from all datasets in a hunt.
|
||||||
|
|
||||||
|
Returns dict with 'hosts', 'connections', and 'stats'.
|
||||||
|
Each host has: id, hostname, fqdn, client_id, ips, os, users, datasets, row_count.
|
||||||
|
"""
|
||||||
|
ds_result = await db.execute(
|
||||||
|
select(Dataset).where(Dataset.hunt_id == hunt_id)
|
||||||
|
)
|
||||||
|
all_datasets = ds_result.scalars().all()
|
||||||
|
|
||||||
|
if not all_datasets:
|
||||||
|
return {"hosts": [], "connections": [], "stats": {
|
||||||
|
"total_hosts": 0, "total_datasets_scanned": 0,
|
||||||
|
"total_rows_scanned": 0,
|
||||||
|
}}
|
||||||
|
|
||||||
|
hosts: dict[str, dict] = {} # fqdn -> host record
|
||||||
|
ip_to_host: dict[str, str] = {} # local-ip -> fqdn
|
||||||
|
connections: dict[tuple, int] = defaultdict(int)
|
||||||
|
total_rows = 0
|
||||||
|
ds_with_hosts = 0
|
||||||
|
sampled_dataset_count = 0
|
||||||
|
total_row_budget = max(0, int(settings.NETWORK_INVENTORY_MAX_TOTAL_ROWS))
|
||||||
|
max_connections = max(0, int(settings.NETWORK_INVENTORY_MAX_CONNECTIONS))
|
||||||
|
global_budget_reached = False
|
||||||
|
dropped_connections = 0
|
||||||
|
|
||||||
|
for ds in all_datasets:
|
||||||
|
if total_row_budget and total_rows >= total_row_budget:
|
||||||
|
global_budget_reached = True
|
||||||
|
break
|
||||||
|
|
||||||
|
cols = _identify_columns(ds)
|
||||||
|
if not cols['fqdn'] and not cols['host_id']:
|
||||||
|
continue
|
||||||
|
ds_with_hosts += 1
|
||||||
|
|
||||||
|
batch_size = 5000
|
||||||
|
max_rows_per_dataset = max(0, int(settings.NETWORK_INVENTORY_MAX_ROWS_PER_DATASET))
|
||||||
|
rows_scanned_this_dataset = 0
|
||||||
|
sampled_dataset = False
|
||||||
|
last_row_index = -1
|
||||||
|
|
||||||
|
while True:
|
||||||
|
if total_row_budget and total_rows >= total_row_budget:
|
||||||
|
sampled_dataset = True
|
||||||
|
global_budget_reached = True
|
||||||
|
break
|
||||||
|
|
||||||
|
rr = await db.execute(
|
||||||
|
select(DatasetRow)
|
||||||
|
.where(DatasetRow.dataset_id == ds.id)
|
||||||
|
.where(DatasetRow.row_index > last_row_index)
|
||||||
|
.order_by(DatasetRow.row_index)
|
||||||
|
.limit(batch_size)
|
||||||
|
)
|
||||||
|
rows = rr.scalars().all()
|
||||||
|
if not rows:
|
||||||
|
break
|
||||||
|
|
||||||
|
for ro in rows:
|
||||||
|
if max_rows_per_dataset and rows_scanned_this_dataset >= max_rows_per_dataset:
|
||||||
|
sampled_dataset = True
|
||||||
|
break
|
||||||
|
if total_row_budget and total_rows >= total_row_budget:
|
||||||
|
sampled_dataset = True
|
||||||
|
global_budget_reached = True
|
||||||
|
break
|
||||||
|
|
||||||
|
data = ro.data or {}
|
||||||
|
total_rows += 1
|
||||||
|
rows_scanned_this_dataset += 1
|
||||||
|
|
||||||
|
fqdn = ''
|
||||||
|
for c in cols['fqdn']:
|
||||||
|
fqdn = _clean(data.get(c))
|
||||||
|
if fqdn:
|
||||||
|
break
|
||||||
|
client_id = ''
|
||||||
|
for c in cols['host_id']:
|
||||||
|
client_id = _clean(data.get(c))
|
||||||
|
if client_id:
|
||||||
|
break
|
||||||
|
|
||||||
|
if not fqdn and not client_id:
|
||||||
|
continue
|
||||||
|
|
||||||
|
host_key = fqdn or client_id
|
||||||
|
|
||||||
|
if host_key not in hosts:
|
||||||
|
short = fqdn.split('.')[0] if fqdn and '.' in fqdn else fqdn
|
||||||
|
hosts[host_key] = {
|
||||||
|
'id': host_key,
|
||||||
|
'hostname': short or client_id,
|
||||||
|
'fqdn': fqdn,
|
||||||
|
'client_id': client_id,
|
||||||
|
'ips': set(),
|
||||||
|
'os': '',
|
||||||
|
'users': set(),
|
||||||
|
'datasets': set(),
|
||||||
|
'row_count': 0,
|
||||||
|
}
|
||||||
|
|
||||||
|
h = hosts[host_key]
|
||||||
|
h['datasets'].add(ds.name)
|
||||||
|
h['row_count'] += 1
|
||||||
|
if client_id and not h['client_id']:
|
||||||
|
h['client_id'] = client_id
|
||||||
|
|
||||||
|
for c in cols['username']:
|
||||||
|
u = _extract_username(_clean(data.get(c)))
|
||||||
|
if u:
|
||||||
|
h['users'].add(u)
|
||||||
|
|
||||||
|
for c in cols['local_ip']:
|
||||||
|
ip = _clean(data.get(c))
|
||||||
|
if _is_valid_ip(ip):
|
||||||
|
h['ips'].add(ip)
|
||||||
|
ip_to_host[ip] = host_key
|
||||||
|
|
||||||
|
for c in cols['os']:
|
||||||
|
ov = _clean(data.get(c))
|
||||||
|
if ov and not h['os']:
|
||||||
|
h['os'] = ov
|
||||||
|
|
||||||
|
for c in cols['remote_ip']:
|
||||||
|
rip = _clean(data.get(c))
|
||||||
|
if _is_valid_ip(rip):
|
||||||
|
rport = ''
|
||||||
|
for pc in cols['remote_port']:
|
||||||
|
rport = _clean(data.get(pc))
|
||||||
|
if rport:
|
||||||
|
break
|
||||||
|
conn_key = (host_key, rip, rport)
|
||||||
|
if max_connections and len(connections) >= max_connections and conn_key not in connections:
|
||||||
|
dropped_connections += 1
|
||||||
|
continue
|
||||||
|
connections[conn_key] += 1
|
||||||
|
|
||||||
|
if sampled_dataset:
|
||||||
|
sampled_dataset_count += 1
|
||||||
|
logger.info(
|
||||||
|
"Host inventory sampling for dataset %s (%d rows scanned)",
|
||||||
|
ds.id,
|
||||||
|
rows_scanned_this_dataset,
|
||||||
|
)
|
||||||
|
break
|
||||||
|
|
||||||
|
last_row_index = rows[-1].row_index
|
||||||
|
if len(rows) < batch_size:
|
||||||
|
break
|
||||||
|
|
||||||
|
if global_budget_reached:
|
||||||
|
logger.info(
|
||||||
|
"Host inventory global row budget reached for hunt %s at %d rows",
|
||||||
|
hunt_id,
|
||||||
|
total_rows,
|
||||||
|
)
|
||||||
|
break
|
||||||
|
|
||||||
|
# Post-process hosts
|
||||||
|
for h in hosts.values():
|
||||||
|
if not h['os'] and h['fqdn']:
|
||||||
|
h['os'] = _infer_os(h['fqdn'])
|
||||||
|
h['ips'] = sorted(h['ips'])
|
||||||
|
h['users'] = sorted(h['users'])
|
||||||
|
h['datasets'] = sorted(h['datasets'])
|
||||||
|
|
||||||
|
# Build connections, resolving IPs to host keys
|
||||||
|
conn_list = []
|
||||||
|
seen = set()
|
||||||
|
for (src, dst_ip, dst_port), cnt in connections.items():
|
||||||
|
if dst_ip in _IGNORE_IPS:
|
||||||
|
continue
|
||||||
|
dst_host = ip_to_host.get(dst_ip, '')
|
||||||
|
if dst_host == src:
|
||||||
|
continue
|
||||||
|
key = tuple(sorted([src, dst_host or dst_ip]))
|
||||||
|
if key in seen:
|
||||||
|
continue
|
||||||
|
seen.add(key)
|
||||||
|
conn_list.append({
|
||||||
|
'source': src,
|
||||||
|
'target': dst_host or dst_ip,
|
||||||
|
'target_ip': dst_ip,
|
||||||
|
'port': dst_port,
|
||||||
|
'count': cnt,
|
||||||
|
})
|
||||||
|
|
||||||
|
host_list = sorted(hosts.values(), key=lambda x: x['row_count'], reverse=True)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"hosts": host_list,
|
||||||
|
"connections": conn_list,
|
||||||
|
"stats": {
|
||||||
|
"total_hosts": len(host_list),
|
||||||
|
"total_datasets_scanned": len(all_datasets),
|
||||||
|
"datasets_with_hosts": ds_with_hosts,
|
||||||
|
"total_rows_scanned": total_rows,
|
||||||
|
"hosts_with_ips": sum(1 for h in host_list if h['ips']),
|
||||||
|
"hosts_with_users": sum(1 for h in host_list if h['users']),
|
||||||
|
"row_budget_per_dataset": settings.NETWORK_INVENTORY_MAX_ROWS_PER_DATASET,
|
||||||
|
"row_budget_total": settings.NETWORK_INVENTORY_MAX_TOTAL_ROWS,
|
||||||
|
"connection_budget": settings.NETWORK_INVENTORY_MAX_CONNECTIONS,
|
||||||
|
"sampled_mode": settings.NETWORK_INVENTORY_MAX_ROWS_PER_DATASET > 0 or settings.NETWORK_INVENTORY_MAX_TOTAL_ROWS > 0,
|
||||||
|
"sampled_datasets": sampled_dataset_count,
|
||||||
|
"global_budget_reached": global_budget_reached,
|
||||||
|
"dropped_connections": dropped_connections,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
'''
|
||||||
|
out=t[:start]+new_func+t[end:]
|
||||||
|
p.write_text(out,encoding='utf-8')
|
||||||
|
print('replaced build_host_inventory with hard-budget fast mode')
|
||||||
@@ -1,9 +0,0 @@
|
|||||||
FLASK_ENV=development
|
|
||||||
FLASK_DEBUG=True
|
|
||||||
SECRET_KEY=development-secret-key-change-in-production
|
|
||||||
MAX_CONTENT_LENGTH=104857600
|
|
||||||
UPLOAD_FOLDER=uploaded
|
|
||||||
OUTPUT_FOLDER=output
|
|
||||||
VIRUSTOTAL_API_KEY=
|
|
||||||
DATABASE_URL=sqlite:///threat_hunter.db
|
|
||||||
REDIS_URL=redis://localhost:6379/0
|
|
||||||
@@ -1,23 +0,0 @@
|
|||||||
FROM python:3.11-slim
|
|
||||||
|
|
||||||
WORKDIR /app
|
|
||||||
|
|
||||||
# Install system dependencies
|
|
||||||
RUN apt-get update && apt-get install -y \
|
|
||||||
gcc \
|
|
||||||
postgresql-client \
|
|
||||||
&& rm -rf /var/lib/apt/lists/*
|
|
||||||
|
|
||||||
# Copy requirements first for better caching
|
|
||||||
COPY requirements.txt .
|
|
||||||
RUN pip install --no-cache-dir -r requirements.txt
|
|
||||||
|
|
||||||
# Copy application code
|
|
||||||
COPY . .
|
|
||||||
|
|
||||||
# Create directories
|
|
||||||
RUN mkdir -p uploads output
|
|
||||||
|
|
||||||
EXPOSE 5000
|
|
||||||
|
|
||||||
CMD ["python", "app.py"]
|
|
||||||
@@ -1,24 +0,0 @@
|
|||||||
FROM python:3.11-slim
|
|
||||||
|
|
||||||
WORKDIR /app
|
|
||||||
|
|
||||||
# Install system dependencies
|
|
||||||
RUN apt-get update && apt-get install -y \
|
|
||||||
gcc \
|
|
||||||
postgresql-client \
|
|
||||||
&& rm -rf /var/lib/apt/lists/*
|
|
||||||
|
|
||||||
# Create non-root user
|
|
||||||
RUN useradd --create-home --shell /bin/bash app
|
|
||||||
|
|
||||||
COPY requirements.txt .
|
|
||||||
RUN pip install --no-cache-dir -r requirements.txt
|
|
||||||
|
|
||||||
COPY . .
|
|
||||||
RUN chown -R app:app /app
|
|
||||||
|
|
||||||
USER app
|
|
||||||
|
|
||||||
EXPOSE 5000
|
|
||||||
|
|
||||||
CMD ["gunicorn", "--bind", "0.0.0.0:5000", "--workers", "4", "app:app"]
|
|
||||||