3 Commits

Author SHA1 Message Date
037191f981 this is the first commit for the Claude Iteration project. 2025-06-18 02:30:36 -04:00
3c7e9b9eee CLAUDE branch
i made have screwed the pooch with this
2025-06-17 07:43:33 -04:00
b398f6624c CLAUDE branch
i made have screwed the pooch with this
2025-06-17 06:55:38 -04:00
14210 changed files with 1548679 additions and 77119 deletions

3
.env Normal file
View File

@@ -0,0 +1,3 @@
DATABASE_URL=postgresql://admin:secure_password_123@database:5432/threat_hunter
SECRET_KEY=your-very-secret-key-change-in-production
FLASK_ENV=production

View File

@@ -1,53 +0,0 @@
# ── ThreatHunt Configuration ──────────────────────────────────────────
# All backend env vars are prefixed with TH_ and match AppConfig field names.
# Copy this file to .env and adjust values.
# ── General ───────────────────────────────────────────────────────────
TH_DEBUG=false
# ── Database ──────────────────────────────────────────────────────────
# SQLite for local dev (zero-config):
TH_DATABASE_URL=sqlite+aiosqlite:///./threathunt.db
# PostgreSQL for production:
# TH_DATABASE_URL=postgresql+asyncpg://threathunt:password@localhost:5432/threathunt
# ── CORS ──────────────────────────────────────────────────────────────
TH_ALLOWED_ORIGINS=http://localhost:3000,http://localhost:8000
# ── File uploads ──────────────────────────────────────────────────────
TH_MAX_UPLOAD_SIZE_MB=500
# ── LLM Cluster (Wile & Roadrunner) ──────────────────────────────────
TH_OPENWEBUI_URL=https://ai.guapo613.beer
TH_OPENWEBUI_API_KEY=
TH_WILE_HOST=100.110.190.12
TH_WILE_OLLAMA_PORT=11434
TH_ROADRUNNER_HOST=100.110.190.11
TH_ROADRUNNER_OLLAMA_PORT=11434
# ── Default models (auto-selected by TaskRouter) ─────────────────────
TH_DEFAULT_FAST_MODEL=llama3.1:latest
TH_DEFAULT_HEAVY_MODEL=llama3.1:70b-instruct-q4_K_M
TH_DEFAULT_CODE_MODEL=qwen2.5-coder:32b
TH_DEFAULT_VISION_MODEL=llama3.2-vision:11b
TH_DEFAULT_EMBEDDING_MODEL=bge-m3:latest
# ── Agent behaviour ──────────────────────────────────────────────────
TH_AGENT_MAX_TOKENS=2048
TH_AGENT_TEMPERATURE=0.3
TH_AGENT_HISTORY_LENGTH=10
TH_FILTER_SENSITIVE_DATA=true
# ── Enrichment API keys (optional) ───────────────────────────────────
TH_VIRUSTOTAL_API_KEY=
TH_ABUSEIPDB_API_KEY=
TH_SHODAN_API_KEY=
# ── Auth ─────────────────────────────────────────────────────────────
TH_JWT_SECRET=CHANGE-ME-IN-PRODUCTION-USE-A-REAL-SECRET
TH_JWT_ACCESS_TOKEN_MINUTES=60
TH_JWT_REFRESH_TOKEN_DAYS=7
# ── Frontend ─────────────────────────────────────────────────────────
REACT_APP_API_URL=http://localhost:8000

60
.gitignore vendored
View File

@@ -1,60 +0,0 @@
# ── Python ────────────────────────────────────
__pycache__/
*.py[cod]
*$py.class
*.egg-info/
dist/
build/
*.egg
.eggs/
# ── Virtual environments ─────────────────────
venv/
.venv/
env/
# ── IDE / Editor ─────────────────────────────
.vscode/
.idea/
*.swp
*.swo
*~
# ── OS ────────────────────────────────────────
.DS_Store
Thumbs.db
# ── Environment / Secrets ────────────────────
.env
*.env.local
# ── Database ─────────────────────────────────
*.db
*.sqlite3
# ── Uploads ──────────────────────────────────
uploads/
# ── Node / Frontend ──────────────────────────
node_modules/
frontend/build/
frontend/.env.local
npm-debug.log*
yarn-debug.log*
yarn-error.log*
# ── Docker ───────────────────────────────────
docker-compose.override.yml
# ── Test / Coverage ──────────────────────────
.coverage
htmlcov/
.pytest_cache/
.mypy_cache/
# ── Alembic ──────────────────────────────────
alembic/versions/*.pyc
*.db-wal
*.db-shm

View File

@@ -1 +0,0 @@
[ 656ms] [WARNING] No routes matched location "/network-map" @ http://localhost:3000/static/js/main.c0a7ab6d.js:1

View File

@@ -1 +0,0 @@
[ 4269ms] [WARNING] You have set a custom wheel sensitivity. This will make your app zoom unnaturally when using mainstream mice. You should change this value from the default only if you can guarantee that all your users will use the same hardware and OS configuration as your current machine. @ http://localhost:3000/static/js/main.6d916bcf.js:1

View File

@@ -1 +0,0 @@
[ 496ms] [WARNING] You have set a custom wheel sensitivity. This will make your app zoom unnaturally when using mainstream mice. You should change this value from the default only if you can guarantee that all your users will use the same hardware and OS configuration as your current machine. @ http://localhost:3000/static/js/main.28ae077d.js:1

View File

@@ -1,76 +0,0 @@
[ 402ms] [WARNING] You have set a custom wheel sensitivity. This will make your app zoom unnaturally when using mainstream mice. You should change this value from the default only if you can guarantee that all your users will use the same hardware and OS configuration as your current machine. @ http://localhost:3000/static/js/main.cb47c3a0.js:1
[ 60389ms] [ERROR] Failed to load resource: the server responded with a status of 500 (Internal Server Error) @ http://localhost:3000/api/analysis/process-tree?hunt_id=4bb956a4225e45459a464da1146d3cf5:0
[ 114742ms] [ERROR] Failed to load resource: the server responded with a status of 500 (Internal Server Error) @ http://localhost:3000/api/analysis/process-tree?hunt_id=4bb956a4225e45459a464da1146d3cf5:0
[ 116603ms] [ERROR] Failed to load resource: the server responded with a status of 500 (Internal Server Error) @ http://localhost:3000/api/analysis/process-tree?hunt_id=4bb956a4225e45459a464da1146d3cf5:0
[ 362021ms] [WARNING] You have set a custom wheel sensitivity. This will make your app zoom unnaturally when using mainstream mice. You should change this value from the default only if you can guarantee that all your users will use the same hardware and OS configuration as your current machine. @ http://localhost:3000/static/js/main.cb47c3a0.js:1
[ 379006ms] [WARNING] You have set a custom wheel sensitivity. This will make your app zoom unnaturally when using mainstream mice. You should change this value from the default only if you can guarantee that all your users will use the same hardware and OS configuration as your current machine. @ http://localhost:3000/static/js/main.cb47c3a0.js:1
[ 379019ms] [ERROR] NotFoundError: Failed to execute 'removeChild' on 'Node': The node to be removed is not a child of this node.
at ps (http://localhost:3000/static/js/main.cb47c3a0.js:2:227378)
at ds (http://localhost:3000/static/js/main.cb47c3a0.js:2:227062)
at ps (http://localhost:3000/static/js/main.cb47c3a0.js:2:227824)
at ds (http://localhost:3000/static/js/main.cb47c3a0.js:2:227062)
at ps (http://localhost:3000/static/js/main.cb47c3a0.js:2:227824)
at hs (http://localhost:3000/static/js/main.cb47c3a0.js:2:228635)
at vs (http://localhost:3000/static/js/main.cb47c3a0.js:2:229095)
at hs (http://localhost:3000/static/js/main.cb47c3a0.js:2:228785)
at vs (http://localhost:3000/static/js/main.cb47c3a0.js:2:228898)
at hs (http://localhost:3000/static/js/main.cb47c3a0.js:2:228785) @ http://localhost:3000/static/js/main.cb47c3a0.js:1
[ 379021ms] NotFoundError: Failed to execute 'removeChild' on 'Node': The node to be removed is not a child of this node.
at ps (http://localhost:3000/static/js/main.cb47c3a0.js:2:227378)
at ds (http://localhost:3000/static/js/main.cb47c3a0.js:2:227062)
at ps (http://localhost:3000/static/js/main.cb47c3a0.js:2:227824)
at ds (http://localhost:3000/static/js/main.cb47c3a0.js:2:227062)
at ps (http://localhost:3000/static/js/main.cb47c3a0.js:2:227824)
at hs (http://localhost:3000/static/js/main.cb47c3a0.js:2:228635)
at vs (http://localhost:3000/static/js/main.cb47c3a0.js:2:229095)
at hs (http://localhost:3000/static/js/main.cb47c3a0.js:2:228785)
at vs (http://localhost:3000/static/js/main.cb47c3a0.js:2:228898)
at hs (http://localhost:3000/static/js/main.cb47c3a0.js:2:228785)
[ 382647ms] [WARNING] You have set a custom wheel sensitivity. This will make your app zoom unnaturally when using mainstream mice. You should change this value from the default only if you can guarantee that all your users will use the same hardware and OS configuration as your current machine. @ http://localhost:3000/static/js/main.cb47c3a0.js:1
[ 386088ms] [WARNING] You have set a custom wheel sensitivity. This will make your app zoom unnaturally when using mainstream mice. You should change this value from the default only if you can guarantee that all your users will use the same hardware and OS configuration as your current machine. @ http://localhost:3000/static/js/main.cb47c3a0.js:1
[ 386343ms] [ERROR] NotFoundError: Failed to execute 'removeChild' on 'Node': The node to be removed is not a child of this node.
at ps (http://localhost:3000/static/js/main.cb47c3a0.js:2:227378)
at ds (http://localhost:3000/static/js/main.cb47c3a0.js:2:227062)
at ps (http://localhost:3000/static/js/main.cb47c3a0.js:2:227824)
at ds (http://localhost:3000/static/js/main.cb47c3a0.js:2:227062)
at ps (http://localhost:3000/static/js/main.cb47c3a0.js:2:227824)
at hs (http://localhost:3000/static/js/main.cb47c3a0.js:2:228635)
at vs (http://localhost:3000/static/js/main.cb47c3a0.js:2:229095)
at hs (http://localhost:3000/static/js/main.cb47c3a0.js:2:228785)
at vs (http://localhost:3000/static/js/main.cb47c3a0.js:2:228898)
at hs (http://localhost:3000/static/js/main.cb47c3a0.js:2:228785) @ http://localhost:3000/static/js/main.cb47c3a0.js:1
[ 386345ms] NotFoundError: Failed to execute 'removeChild' on 'Node': The node to be removed is not a child of this node.
at ps (http://localhost:3000/static/js/main.cb47c3a0.js:2:227378)
at ds (http://localhost:3000/static/js/main.cb47c3a0.js:2:227062)
at ps (http://localhost:3000/static/js/main.cb47c3a0.js:2:227824)
at ds (http://localhost:3000/static/js/main.cb47c3a0.js:2:227062)
at ps (http://localhost:3000/static/js/main.cb47c3a0.js:2:227824)
at hs (http://localhost:3000/static/js/main.cb47c3a0.js:2:228635)
at vs (http://localhost:3000/static/js/main.cb47c3a0.js:2:229095)
at hs (http://localhost:3000/static/js/main.cb47c3a0.js:2:228785)
at vs (http://localhost:3000/static/js/main.cb47c3a0.js:2:228898)
at hs (http://localhost:3000/static/js/main.cb47c3a0.js:2:228785)
[ 397704ms] [WARNING] You have set a custom wheel sensitivity. This will make your app zoom unnaturally when using mainstream mice. You should change this value from the default only if you can guarantee that all your users will use the same hardware and OS configuration as your current machine. @ http://localhost:3000/static/js/main.cb47c3a0.js:1
[ 519009ms] [WARNING] You have set a custom wheel sensitivity. This will make your app zoom unnaturally when using mainstream mice. You should change this value from the default only if you can guarantee that all your users will use the same hardware and OS configuration as your current machine. @ http://localhost:3000/static/js/main.cb47c3a0.js:1
[ 519273ms] [ERROR] NotFoundError: Failed to execute 'removeChild' on 'Node': The node to be removed is not a child of this node.
at ps (http://localhost:3000/static/js/main.cb47c3a0.js:2:227378)
at ds (http://localhost:3000/static/js/main.cb47c3a0.js:2:227062)
at ps (http://localhost:3000/static/js/main.cb47c3a0.js:2:227824)
at ds (http://localhost:3000/static/js/main.cb47c3a0.js:2:227062)
at ps (http://localhost:3000/static/js/main.cb47c3a0.js:2:227824)
at hs (http://localhost:3000/static/js/main.cb47c3a0.js:2:228635)
at vs (http://localhost:3000/static/js/main.cb47c3a0.js:2:229095)
at hs (http://localhost:3000/static/js/main.cb47c3a0.js:2:228785)
at vs (http://localhost:3000/static/js/main.cb47c3a0.js:2:228898)
at hs (http://localhost:3000/static/js/main.cb47c3a0.js:2:228785) @ http://localhost:3000/static/js/main.cb47c3a0.js:1
[ 519274ms] NotFoundError: Failed to execute 'removeChild' on 'Node': The node to be removed is not a child of this node.
at ps (http://localhost:3000/static/js/main.cb47c3a0.js:2:227378)
at ds (http://localhost:3000/static/js/main.cb47c3a0.js:2:227062)
at ps (http://localhost:3000/static/js/main.cb47c3a0.js:2:227824)
at ds (http://localhost:3000/static/js/main.cb47c3a0.js:2:227062)
at ps (http://localhost:3000/static/js/main.cb47c3a0.js:2:227824)
at hs (http://localhost:3000/static/js/main.cb47c3a0.js:2:228635)
at vs (http://localhost:3000/static/js/main.cb47c3a0.js:2:229095)
at hs (http://localhost:3000/static/js/main.cb47c3a0.js:2:228785)
at vs (http://localhost:3000/static/js/main.cb47c3a0.js:2:228898)
at hs (http://localhost:3000/static/js/main.cb47c3a0.js:2:228785)

View File

@@ -1 +0,0 @@
[ 1803ms] [WARNING] You have set a custom wheel sensitivity. This will make your app zoom unnaturally when using mainstream mice. You should change this value from the default only if you can guarantee that all your users will use the same hardware and OS configuration as your current machine. @ http://localhost:3000/static/js/main.b2c21c5a.js:1

View File

@@ -1,48 +0,0 @@
[ 2196ms] [WARNING] You have set a custom wheel sensitivity. This will make your app zoom unnaturally when using mainstream mice. You should change this value from the default only if you can guarantee that all your users will use the same hardware and OS configuration as your current machine. @ http://localhost:3000/static/js/main.0e63bc98.js:1
[ 46100ms] [WARNING] You have set a custom wheel sensitivity. This will make your app zoom unnaturally when using mainstream mice. You should change this value from the default only if you can guarantee that all your users will use the same hardware and OS configuration as your current machine. @ http://localhost:3000/static/js/main.0e63bc98.js:1
[ 46117ms] [ERROR] NotFoundError: Failed to execute 'removeChild' on 'Node': The node to be removed is not a child of this node.
at ps (http://localhost:3000/static/js/main.0e63bc98.js:2:227378)
at ds (http://localhost:3000/static/js/main.0e63bc98.js:2:227062)
at ps (http://localhost:3000/static/js/main.0e63bc98.js:2:227824)
at ds (http://localhost:3000/static/js/main.0e63bc98.js:2:227062)
at ps (http://localhost:3000/static/js/main.0e63bc98.js:2:227824)
at hs (http://localhost:3000/static/js/main.0e63bc98.js:2:228635)
at vs (http://localhost:3000/static/js/main.0e63bc98.js:2:229095)
at hs (http://localhost:3000/static/js/main.0e63bc98.js:2:228785)
at vs (http://localhost:3000/static/js/main.0e63bc98.js:2:228898)
at hs (http://localhost:3000/static/js/main.0e63bc98.js:2:228785) @ http://localhost:3000/static/js/main.0e63bc98.js:1
[ 46118ms] NotFoundError: Failed to execute 'removeChild' on 'Node': The node to be removed is not a child of this node.
at ps (http://localhost:3000/static/js/main.0e63bc98.js:2:227378)
at ds (http://localhost:3000/static/js/main.0e63bc98.js:2:227062)
at ps (http://localhost:3000/static/js/main.0e63bc98.js:2:227824)
at ds (http://localhost:3000/static/js/main.0e63bc98.js:2:227062)
at ps (http://localhost:3000/static/js/main.0e63bc98.js:2:227824)
at hs (http://localhost:3000/static/js/main.0e63bc98.js:2:228635)
at vs (http://localhost:3000/static/js/main.0e63bc98.js:2:229095)
at hs (http://localhost:3000/static/js/main.0e63bc98.js:2:228785)
at vs (http://localhost:3000/static/js/main.0e63bc98.js:2:228898)
at hs (http://localhost:3000/static/js/main.0e63bc98.js:2:228785)
[ 52506ms] [WARNING] You have set a custom wheel sensitivity. This will make your app zoom unnaturally when using mainstream mice. You should change this value from the default only if you can guarantee that all your users will use the same hardware and OS configuration as your current machine. @ http://localhost:3000/static/js/main.0e63bc98.js:1
[ 54912ms] [WARNING] You have set a custom wheel sensitivity. This will make your app zoom unnaturally when using mainstream mice. You should change this value from the default only if you can guarantee that all your users will use the same hardware and OS configuration as your current machine. @ http://localhost:3000/static/js/main.0e63bc98.js:1
[ 54928ms] [ERROR] NotFoundError: Failed to execute 'removeChild' on 'Node': The node to be removed is not a child of this node.
at ps (http://localhost:3000/static/js/main.0e63bc98.js:2:227378)
at ds (http://localhost:3000/static/js/main.0e63bc98.js:2:227062)
at ps (http://localhost:3000/static/js/main.0e63bc98.js:2:227824)
at ds (http://localhost:3000/static/js/main.0e63bc98.js:2:227062)
at ps (http://localhost:3000/static/js/main.0e63bc98.js:2:227824)
at hs (http://localhost:3000/static/js/main.0e63bc98.js:2:228635)
at vs (http://localhost:3000/static/js/main.0e63bc98.js:2:229095)
at hs (http://localhost:3000/static/js/main.0e63bc98.js:2:228785)
at vs (http://localhost:3000/static/js/main.0e63bc98.js:2:228898)
at hs (http://localhost:3000/static/js/main.0e63bc98.js:2:228785) @ http://localhost:3000/static/js/main.0e63bc98.js:1
[ 54929ms] NotFoundError: Failed to execute 'removeChild' on 'Node': The node to be removed is not a child of this node.
at ps (http://localhost:3000/static/js/main.0e63bc98.js:2:227378)
at ds (http://localhost:3000/static/js/main.0e63bc98.js:2:227062)
at ps (http://localhost:3000/static/js/main.0e63bc98.js:2:227824)
at ds (http://localhost:3000/static/js/main.0e63bc98.js:2:227062)
at ps (http://localhost:3000/static/js/main.0e63bc98.js:2:227824)
at hs (http://localhost:3000/static/js/main.0e63bc98.js:2:228635)
at vs (http://localhost:3000/static/js/main.0e63bc98.js:2:229095)
at hs (http://localhost:3000/static/js/main.0e63bc98.js:2:228785)
at vs (http://localhost:3000/static/js/main.0e63bc98.js:2:228898)
at hs (http://localhost:3000/static/js/main.0e63bc98.js:2:228785)

View File

@@ -1,7 +0,0 @@
[ 2548ms] [WARNING] You have set a custom wheel sensitivity. This will make your app zoom unnaturally when using mainstream mice. You should change this value from the default only if you can guarantee that all your users will use the same hardware and OS configuration as your current machine. @ http://localhost:3000/static/js/main.c311038e.js:1
[ 32912ms] [WARNING] You have set a custom wheel sensitivity. This will make your app zoom unnaturally when using mainstream mice. You should change this value from the default only if you can guarantee that all your users will use the same hardware and OS configuration as your current machine. @ http://localhost:3000/static/js/main.c311038e.js:1
[ 55583ms] [WARNING] You have set a custom wheel sensitivity. This will make your app zoom unnaturally when using mainstream mice. You should change this value from the default only if you can guarantee that all your users will use the same hardware and OS configuration as your current machine. @ http://localhost:3000/static/js/main.c311038e.js:1
[ 58208ms] [WARNING] You have set a custom wheel sensitivity. This will make your app zoom unnaturally when using mainstream mice. You should change this value from the default only if you can guarantee that all your users will use the same hardware and OS configuration as your current machine. @ http://localhost:3000/static/js/main.c311038e.js:1
[ 1168933ms] [ERROR] Failed to load resource: the server responded with a status of 504 (Gateway Time-out) @ http://localhost:3000/api/analysis/llm-analyze:0
[ 1477343ms] [WARNING] You have set a custom wheel sensitivity. This will make your app zoom unnaturally when using mainstream mice. You should change this value from the default only if you can guarantee that all your users will use the same hardware and OS configuration as your current machine. @ http://localhost:3000/static/js/main.c311038e.js:1
[ 1482908ms] [WARNING] You have set a custom wheel sensitivity. This will make your app zoom unnaturally when using mainstream mice. You should change this value from the default only if you can guarantee that all your users will use the same hardware and OS configuration as your current machine. @ http://localhost:3000/static/js/main.c311038e.js:1

View File

@@ -1,7 +0,0 @@
[ 9612ms] [WARNING] The resource https://github.githubassets.com/assets/mona-sans-14595085164a.woff2 was preloaded using link preload but not used within a few seconds from the window's load event. Please make sure it has an appropriate `as` value and it is preloaded intentionally. @ https://github.com/:0
[ 17464ms] [WARNING] The resource https://github.githubassets.com/assets/mona-sans-14595085164a.woff2 was preloaded using link preload but not used within a few seconds from the window's load event. Please make sure it has an appropriate `as` value and it is preloaded intentionally. @ https://github.com/enterprise:0
[ 20742ms] [WARNING] The resource https://github.githubassets.com/assets/mona-sans-14595085164a.woff2 was preloaded using link preload but not used within a few seconds from the window's load event. Please make sure it has an appropriate `as` value and it is preloaded intentionally. @ https://github.com/enterprise:0
[ 53258ms] [WARNING] The resource https://github.githubassets.com/assets/mona-sans-14595085164a.woff2 was preloaded using link preload but not used within a few seconds from the window's load event. Please make sure it has an appropriate `as` value and it is preloaded intentionally. @ https://github.com/pricing:0
[ 59240ms] [WARNING] The resource https://github.githubassets.com/assets/mona-sans-14595085164a.woff2 was preloaded using link preload but not used within a few seconds from the window's load event. Please make sure it has an appropriate `as` value and it is preloaded intentionally. @ https://github.com/features/copilot#pricing:0
[ 67668ms] [WARNING] The resource https://github.githubassets.com/assets/mona-sans-14595085164a.woff2 was preloaded using link preload but not used within a few seconds from the window's load event. Please make sure it has an appropriate `as` value and it is preloaded intentionally. @ https://github.com/features/spark?utm_source=web-copilot-ce-cta&utm_campaign=spark-launch-sep-2025:0
[ 72166ms] [WARNING] The resource https://github.githubassets.com/assets/mona-sans-14595085164a.woff2 was preloaded using link preload but not used within a few seconds from the window's load event. Please make sure it has an appropriate `as` value and it is preloaded intentionally. @ https://github.com/features/spark?utm_source=web-copilot-ce-cta&utm_campaign=spark-launch-sep-2025:0

File diff suppressed because it is too large Load Diff

Binary file not shown.

Before

Width:  |  Height:  |  Size: 41 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 54 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 70 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 103 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 558 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 607 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 341 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 53 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 55 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 193 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 184 KiB

5
.vscode/settings.json vendored Normal file
View File

@@ -0,0 +1,5 @@
{
"cSpell.words": [
"jsonify"
]
}

View File

@@ -1,32 +0,0 @@
# ThreatHunt Backend API - Python 3.13
FROM python:3.13-slim
WORKDIR /app
# Install system dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
gcc curl \
&& rm -rf /var/lib/apt/lists/*
# Copy requirements
COPY backend/requirements.txt .
# Install Python dependencies
RUN pip install --no-cache-dir -r requirements.txt
# Copy backend code
COPY backend/ .
# Create non-root user & data directory
RUN useradd -m -u 1000 appuser && mkdir -p /app/data && chown -R appuser:appuser /app
USER appuser
# Expose port
EXPOSE 8000
# Health check
HEALTHCHECK --interval=30s --timeout=10s --start-period=10s --retries=3 \
CMD curl -f http://localhost:8000/ || exit 1
# Run Alembic migrations then start Uvicorn
CMD ["sh", "-c", "python -m alembic upgrade head && python run.py"]

View File

@@ -1,36 +0,0 @@
# ThreatHunt Frontend - Node.js React
FROM node:20-alpine AS builder
WORKDIR /app
# Copy package files
COPY frontend/package.json frontend/package-lock.json* ./
# Install dependencies
RUN npm ci
# Copy source
COPY frontend/public ./public
COPY frontend/src ./src
COPY frontend/tsconfig.json ./
# Build application
RUN npm run build
# Production stage — nginx reverse-proxy + static files
FROM nginx:alpine
# Copy built React app
COPY --from=builder /app/build /usr/share/nginx/html
# Copy custom nginx config (proxies /api to backend)
COPY frontend/nginx.conf /etc/nginx/conf.d/default.conf
# Expose port
EXPOSE 3000
# Health check
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
CMD wget --quiet --tries=1 --spider http://localhost:3000/ || exit 1
CMD ["nginx", "-g", "daemon off;"]

497
README.md
View File

@@ -1,496 +1,69 @@
# ThreatHunt - Analyst-Assist Threat Hunting Platform <<<<<<< Updated upstream
# ThreatHunt
=======
# Cyber Threat Hunter
A modern threat hunting platform with integrated analyst-assist agent guidance. Analyze CSV artifact data exported from Velociraptor with AI-powered suggestions for investigation directions, analytical pivots, and hypothesis formation. A modern web application for threat hunting and security analysis, built with React frontend and Flask backend.
## Overview
ThreatHunt is a web application designed to help security analysts efficiently hunt for threats by:
- Importing CSV artifacts from Velociraptor or other sources
- Displaying data in an organized, queryable interface
- Providing AI-powered guidance through an analyst-assist agent
- Suggesting analytical directions, filters, and pivots
- Highlighting anomalies and patterns of interest
> **Agent Policy**: The analyst-assist agent provides read-only guidance only. It does not execute actions, escalate alerts, or modify data. All decisions remain with the analyst.
## Quick Start
### Docker (Recommended)
```bash
# Clone and navigate
git clone https://github.com/mblanke/ThreatHunt.git
cd ThreatHunt
# Configure provider (choose one)
cp .env.example .env
# Edit .env and set your LLM provider:
# Option 1: Online (OpenAI, etc.)
# THREAT_HUNT_AGENT_PROVIDER=online
# THREAT_HUNT_ONLINE_API_KEY=sk-your-key
# Option 2: Local (Ollama, GGML, etc.)
# THREAT_HUNT_AGENT_PROVIDER=local
# THREAT_HUNT_LOCAL_MODEL_PATH=/path/to/model
# Option 3: Networked (Internal inference service)
# THREAT_HUNT_AGENT_PROVIDER=networked
# THREAT_HUNT_NETWORKED_ENDPOINT=http://service:5000
# Start services
docker-compose up -d
# Verify
curl http://localhost:8000/api/agent/health
curl http://localhost:3000
```
Access at http://localhost:3000
### Local Development
**Backend**:
```bash
cd backend
python -m venv venv
source venv/bin/activate # Windows: venv\Scripts\activate
pip install -r requirements.txt
# Configure provider
export THREAT_HUNT_ONLINE_API_KEY=sk-your-key
# OR set another provider env var
# Run
python run.py
# API at http://localhost:8000/docs
```
**Frontend** (new terminal):
```bash
cd frontend
npm install
npm start
# App at http://localhost:3000
```
## Features ## Features
### Analyst-Assist Agent 🤖 - **Security Tools Detection**: Identify running security tools (AV, EDR, VPN)
- **Read-only guidance**: Explains data patterns and suggests investigation directions - **CSV Processing**: Upload and analyze security logs
- **Context-aware**: Understands current dataset, host, and artifact type - **Baseline Analysis**: System baseline comparison
- **Pluggable providers**: Local, networked, or online LLM backends - **Network Analysis**: Network traffic and connection analysis
- **Transparent reasoning**: Explains logic with caveats and confidence scores - **VirusTotal Integration**: File and URL reputation checking
- **Governance-compliant**: Strictly adheres to agent policy (no execution, no escalation)
### Chat Interface
- Analyst asks questions about artifact data
- Agent provides guidance with suggested pivots and filters
- Conversation history for context continuity
- Real-time typing and response indicators
### Data Management
- Import CSV artifacts from Velociraptor
- Browse and filter findings by severity, host, artifact type
- Annotate findings with analyst notes
- Track investigation progress
## Architecture ## Architecture
### Backend
- **Framework**: FastAPI (Python 3.11)
- **Agent Module**: Pluggable LLM provider interface
- **API**: RESTful endpoints with OpenAPI documentation
- **Structure**: Modular design with clear separation of concerns
### Frontend
- **Framework**: React 18 with TypeScript
- **Components**: Agent chat panel + analysis dashboard
- **Styling**: CSS with responsive design
- **State Management**: React hooks + Context API
### LLM Providers
Supports three provider architectures:
1. **Local**: On-device or on-prem models (GGML, Ollama, vLLM)
2. **Networked**: Shared internal inference services
3. **Online**: External hosted APIs (OpenAI, Anthropic, Google)
Auto-detection: Automatically uses the first available provider.
## Project Structure
``` ```
ThreatHunt/ ThreatHunt/
├── backend/ ├── frontend/ # React application
│ ├── app/ ├── backend/ # Flask API server
│ │ ├── agents/ # Analyst-assist agent ├── uploaded/ # File upload storage
│ │ │ ├── core.py # ThreatHuntAgent class └── output/ # Analysis results
│ │ │ ├── providers.py # LLM provider interface
│ │ │ ├── config.py # Configuration
│ │ │ └── __init__.py
│ │ ├── api/routes/ # API endpoints
│ │ │ ├── agent.py # /api/agent/* routes
│ │ │ ├── __init__.py
│ │ ├── main.py # FastAPI app
│ │ └── __init__.py
│ ├── requirements.txt
│ ├── run.py
│ └── Dockerfile
├── frontend/
│ ├── src/
│ │ ├── components/
│ │ │ ├── AgentPanel.tsx # Chat interface
│ │ │ └── AgentPanel.css
│ │ ├── utils/
│ │ │ └── agentApi.ts # API communication
│ │ ├── App.tsx
│ │ ├── App.css
│ │ ├── index.tsx
│ │ └── index.css
│ ├── public/index.html
│ ├── package.json
│ ├── tsconfig.json
│ └── Dockerfile
├── docker-compose.yml
├── .env.example
├── .gitignore
├── AGENT_IMPLEMENTATION.md # Technical guide
├── INTEGRATION_GUIDE.md # Deployment guide
├── IMPLEMENTATION_SUMMARY.md # Overview
├── README.md # This file
├── ROADMAP.md
└── THREATHUNT_INTENT.md
``` ```
## API Endpoints ## Quick Start
### Agent Assistance ### Backend Setup
- **POST /api/agent/assist** - Request guidance on artifact data
- **GET /api/agent/health** - Check agent availability
See full API documentation at http://localhost:8000/docs
## Configuration
### LLM Provider Selection
Set via `THREAT_HUNT_AGENT_PROVIDER` environment variable:
```bash
# Auto-detect (tries local → networked → online)
THREAT_HUNT_AGENT_PROVIDER=auto
# Local (on-device/on-prem)
THREAT_HUNT_AGENT_PROVIDER=local
THREAT_HUNT_LOCAL_MODEL_PATH=/models/model.gguf
# Networked (internal service)
THREAT_HUNT_AGENT_PROVIDER=networked
THREAT_HUNT_NETWORKED_ENDPOINT=http://inference:5000
THREAT_HUNT_NETWORKED_KEY=api-key
# Online (hosted API)
THREAT_HUNT_AGENT_PROVIDER=online
THREAT_HUNT_ONLINE_API_KEY=sk-your-key
THREAT_HUNT_ONLINE_PROVIDER=openai
THREAT_HUNT_ONLINE_MODEL=gpt-3.5-turbo
```
### Agent Behavior
```bash
THREAT_HUNT_AGENT_MAX_TOKENS=1024
THREAT_HUNT_AGENT_REASONING=true
THREAT_HUNT_AGENT_HISTORY_LENGTH=10
THREAT_HUNT_AGENT_FILTER_SENSITIVE=true
```
See `.env.example` for all configuration options.
## Governance & Compliance
This implementation strictly follows governance principles:
-**Agents assist analysts** - No autonomous execution
-**No tool execution** - Agent provides guidance only
-**No alert escalation** - Analyst controls alerts
-**No data modification** - Read-only analysis
-**Transparent reasoning** - Explains guidance with caveats
-**Analyst authority** - All decisions remain with analyst
**References**:
- `goose-core/governance/AGENT_POLICY.md`
- `goose-core/governance/AI_RULES.md`
- `THREATHUNT_INTENT.md`
## Documentation
- **[AGENT_IMPLEMENTATION.md](AGENT_IMPLEMENTATION.md)** - Detailed technical architecture
- **[INTEGRATION_GUIDE.md](INTEGRATION_GUIDE.md)** - Deployment and configuration
- **[IMPLEMENTATION_SUMMARY.md](IMPLEMENTATION_SUMMARY.md)** - Feature overview
## Testing the Agent
### Check Health
```bash
curl http://localhost:8000/api/agent/health
```
### Test API
```bash
curl -X POST http://localhost:8000/api/agent/assist \
-H "Content-Type: application/json" \
-d '{
"query": "What patterns suggest suspicious activity?",
"dataset_name": "FileList",
"artifact_type": "FileList",
"host_identifier": "DESKTOP-ABC123"
}'
```
### Use UI
1. Open http://localhost:3000
2. Enter a question in the agent panel
3. View guidance with suggested pivots and filters
## Troubleshooting
### Agent Unavailable (503)
- Check environment variables for provider configuration
- Verify LLM provider is accessible
- See logs: `docker-compose logs backend`
### No Frontend Response
- Verify backend health: `curl http://localhost:8000/api/agent/health`
- Check browser console for errors
- See logs: `docker-compose logs frontend`
See [INTEGRATION_GUIDE.md](INTEGRATION_GUIDE.md) for detailed troubleshooting.
## Development
### Running Tests
```bash
cd backend
pytest
cd ../frontend
npm test
```
### Building Images
```bash
docker-compose build
```
### Logs
```bash
docker-compose logs -f backend
docker-compose logs -f frontend
```
## Security Notes
For production deployment:
1. Add authentication to API endpoints
2. Enable HTTPS/TLS
3. Implement rate limiting
4. Filter sensitive data before LLM
5. Add audit logging
6. Use secrets management for API keys
See [INTEGRATION_GUIDE.md](INTEGRATION_GUIDE.md#security-notes) for details.
## Future Enhancements
- [ ] Integration with actual CVE databases
- [ ] Fine-tuned models for cybersecurity domain
- [ ] Structured output from LLMs (JSON mode)
- [ ] Feedback loop on guidance quality
- [ ] Multi-modal support (images, documents)
- [ ] Compliance reporting and audit trails
- [ ] Performance optimization and caching
## Contributing
Follow the architecture and governance principles in `goose-core`. All changes must:
- Adhere to agent policy (read-only, advisory only)
- Conform to shared terminology in goose-core
- Include appropriate documentation
- Pass tests and lint checks
## License
See LICENSE file
## Support
For issues or questions:
1. Check [INTEGRATION_GUIDE.md](INTEGRATION_GUIDE.md)
2. Review [AGENT_IMPLEMENTATION.md](AGENT_IMPLEMENTATION.md)
3. See API docs at http://localhost:8000/docs
4. Check backend logs for errors
## Getting Started
### Prerequisites
- Docker and Docker Compose
- Python 3.11+ (for local development)
- Node.js 18+ (for local development)
### Quick Start with Docker
1. Clone the repository:
```bash
git clone https://github.com/mblanke/ThreatHunt.git
cd ThreatHunt
```
2. Start all services:
```bash
docker-compose up -d
```
3. Access the application:
- Frontend: http://localhost:3000
- Backend API: http://localhost:8000
- API Documentation: http://localhost:8000/docs
### Local Development
#### Backend
```bash ```bash
cd backend cd backend
python -m venv venv chmod +x setup_backend.sh
source venv/bin/activate # On Windows: venv\Scripts\activate ./setup_backend.sh
pip install -r requirements.txt source venv/bin/activate
python app.py
# Set up environment variables
cp .env.example .env
# Edit .env with your settings
# Run migrations
alembic upgrade head
# Start development server
uvicorn app.main:app --reload
``` ```
#### Frontend ### Frontend Setup
```bash ```bash
cd frontend cd frontend
npm install npm install
npm start npm run dev
``` ```
## API Endpoints ## API Endpoints
### Authentication - `GET /` - Serve React app
- `POST /api/auth/register` - Register a new user - `GET /api/health` - Health check
- `POST /api/auth/login` - Login and receive JWT token - `POST /api/upload` - File upload
- `GET /api/auth/me` - Get current user profile - `GET /api/analysis/<id>` - Get analysis results
- `PUT /api/auth/me` - Update current user profile
### User Management (Admin only) ## Security Considerations
- `GET /api/users` - List all users in tenant
- `GET /api/users/{user_id}` - Get user by ID
- `PUT /api/users/{user_id}` - Update user
- `DELETE /api/users/{user_id}` - Deactivate user
### Tenants - File upload validation
- `GET /api/tenants` - List tenants - Input sanitization
- `POST /api/tenants` - Create tenant (admin) - Rate limiting
- `GET /api/tenants/{tenant_id}` - Get tenant by ID - CORS configuration
### Hosts
- `GET /api/hosts` - List hosts (scoped to tenant)
- `POST /api/hosts` - Create host
- `GET /api/hosts/{host_id}` - Get host by ID
### Ingestion
- `POST /api/ingestion/ingest` - Upload and parse CSV files exported from Velociraptor
### VirusTotal
- `POST /api/vt/lookup` - Lookup hash in VirusTotal
## Authentication Flow
1. User registers or logs in via `/api/auth/login`
2. Backend returns JWT token with user_id, tenant_id, and role
3. Frontend stores token in localStorage
4. All subsequent API requests include token in Authorization header
5. Backend validates token and enforces tenant scoping
## Multi-Tenancy
- All data is scoped to tenant_id
- Users can only access data within their tenant
- Admin users have elevated permissions within their tenant
- Cross-tenant access requires explicit permissions
## Database Migrations
Create a new migration:
```bash
cd backend
alembic revision --autogenerate -m "Description of changes"
```
Apply migrations:
```bash
alembic upgrade head
```
Rollback migrations:
```bash
alembic downgrade -1
```
## Environment Variables
### Backend
- `DATABASE_URL` - PostgreSQL connection string
- `SECRET_KEY` - Secret key for JWT signing (min 32 characters)
- `ACCESS_TOKEN_EXPIRE_MINUTES` - JWT token expiration time (default: 30)
- `VT_API_KEY` - VirusTotal API key for hash lookups
### Frontend
- `REACT_APP_API_URL` - Backend API URL (default: http://localhost:8000)
## Security
- Passwords are hashed using bcrypt
- JWT tokens include expiration time
- All API endpoints (except login/register) require authentication
- Role-based access control for admin operations
- Data isolation through tenant scoping
## Testing
### Backend
```bash
cd backend
pytest
```
### Frontend
```bash
cd frontend
npm test
```
## Contributing ## Contributing
1. Fork the repository 1. Fork the repository
2. Create a feature branch 2. Create feature branch
3. Make your changes 3. Submit pull request
4. Submit a pull request
## License ## License
[Your License Here] MIT License
>>>>>>> Stashed changes
## Support
For issues and questions, please open an issue on GitHub.

View File

@@ -1,21 +0,0 @@
# Operating Model
## Default cadence
- Prefer iterative progress over big bangs.
- Keep diffs small: target ≤ 300 changed lines per PR unless justified.
- Update tests/docs as part of the same change when possible.
## Working agreement
- Start with a PLAN for non-trivial tasks.
- Implement the smallest slice that satisfies acceptance criteria.
- Verify via DoD.
- Write a crisp PR summary: what changed, why, and how verified.
## Stop conditions (plan first)
Stop and produce a PLAN (do not code yet) if:
- scope is unclear
- more than 3 files will change
- data model changes
- auth/security boundaries
- performance-critical paths

View File

@@ -1,36 +0,0 @@
# Agent Types & Roles (Practical Taxonomy)
Use this skill to choose the *right* kind of agent workflow for the job.
## Common agent "types" (in practice)
### 1) Chat assistant (no tools)
Best for: explanations, brainstorming, small edits.
Risk: can hallucinate; no grounding in repo state.
### 2) Tool-using single agent
Best for: well-scoped tasks where the agent can read/write files and run commands.
Key control: strict DoD gates + minimal permissions.
### 3) Planner + Executor (2-role pattern)
Best for: medium complexity work (multi-file changes, feature work).
Flow: Planner writes plan + acceptance criteria → Executor implements → Reviewer checks.
### 4) Multi-agent (specialists)
Best for: bigger features with separable workstreams (UI, backend, docs, tests).
Rule: isolate context per role; use separate branches/worktrees.
### 5) Supervisor / orchestrator
Best for: long-running workflows with checkpoints (pipelines, report generation, PAD docs).
Rule: supervisor delegates, enforces gates, and composes final output.
## Decision rules (fast)
- If you can describe it in ≤ 5 steps → single tool-using agent.
- If you need tradeoffs/design → Planner + Executor.
- If UI + backend + docs/tests all move → multi-agent specialists.
- If it's a pipeline that runs repeatedly → orchestrator.
## Guardrails (always)
- DoD is the truth gate.
- Separate branches/worktrees for parallel work.
- Log decisions + commands in AGENT_LOG.md.

View File

@@ -1,24 +0,0 @@
# Definition of Done (DoD)
A change is "done" only when:
## Code correctness
- Builds successfully (if applicable)
- Tests pass
- Linting/formatting passes
- Types/checks pass (if applicable)
## Quality
- No new warnings introduced
- Edge cases handled (inputs validated, errors meaningful)
- Hot paths not regressed (if applicable)
## Hygiene
- No secrets committed
- Docs updated if behavior or usage changed
- PR summary includes verification steps
## Commands
- macOS/Linux: `./scripts/dod.sh`
- Windows: `\scripts\dod.ps1`

View File

@@ -1,16 +0,0 @@
# Repo Mapping Skill
When entering a repo:
1) Read README.md
2) Identify entrypoints (app main / server startup / CLI)
3) Identify config (env vars, .env.example, config files)
4) Identify test/lint scripts (package.json, pyproject.toml, Makefile, etc.)
5) Write a 10-line "repo map" in the PLAN before changing code
Output format:
- Purpose:
- Key modules:
- Data flow:
- Commands:
- Risks:

View File

@@ -1,20 +0,0 @@
# Algorithms & Performance
Use this skill when performance matters (large inputs, hot paths, or repeated calls).
## Checklist
- Identify the **state** you're recomputing.
- Add **memoization / caching** when the same subproblem repeats.
- Prefer **linear scans** + caches over nested loops when possible.
- If you can write it as a **recurrence**, you can test it.
## Practical heuristics
- Measure first when possible (timing + input sizes).
- Optimize the biggest wins: avoid repeated I/O, repeated parsing, repeated network calls.
- Keep caches bounded (size/TTL) and invalidate safely.
- Choose data structures intentionally: dict/set for membership, heap for top-k, deque for queues.
## Review notes (for PRs)
- Call out accidental O(n²) patterns.
- Suggest table/DP or memoization when repeated work is obvious.
- Add tests that cover base cases + typical cases + worst-case size.

View File

@@ -1,31 +0,0 @@
# Vibe Coding With Fundamentals (Safety Rails)
Use this skill when you're using "vibe coding" (fast, conversational building) but want production-grade outcomes.
## The good
- Rapid scaffolding and iteration
- Fast UI prototypes
- Quick exploration of architectures and options
## The failure mode
- "It works on my machine" code with weak tests
- Security foot-guns (auth, input validation, secrets)
- Performance cliffs (accidental O(n²), repeated I/O)
- Unmaintainable abstractions
## Safety rails (apply every time)
- Always start with acceptance criteria (what "done" means).
- Prefer small PRs; never dump a huge AI diff.
- Require DoD gates (lint/test/build) before merge.
- Write tests for behavior changes.
- For anything security/data related: do a Reviewer pass.
## When to slow down
- Auth/session/token work
- Anything touching payments, PII, secrets
- Data migrations/schema changes
- Performance-critical paths
- "It's flaky" or "it only fails in CI"
## Practical prompt pattern (use in PLAN)
- "State assumptions, list files to touch, propose tests, and include rollback steps."

View File

@@ -1,31 +0,0 @@
# Performance Profiling (Bun/Node)
Use this skill when:
- a hot path feels slow
- CPU usage is high
- you suspect accidental O(n²) or repeated work
- you need evidence before optimizing
## Bun CPU profiling
Bun supports CPU profiling via `--cpu-prof` (generates a `.cpuprofile` you can open in Chrome DevTools).
Upcoming: `bun --cpu-prof-md <script>` outputs a CPU profile as **Markdown** so LLMs can read/grep it easily.
### Workflow (Bun)
1) Run the workload with profiling enabled
- Today: `bun --cpu-prof ./path/to/script.ts`
- Upcoming: `bun --cpu-prof-md ./path/to/script.ts`
2) Save the output (or `.cpuprofile`) into `./profiles/` with a timestamp.
3) Ask the Reviewer agent to:
- identify the top 5 hottest functions
- propose the smallest fix
- add a regression test or benchmark
## Node CPU profiling (fallback)
- `node --cpu-prof ./script.js` writes a `.cpuprofile` file.
- Open in Chrome DevTools → Performance → Load profile.
## Rules
- Optimize based on measured hotspots, not vibes.
- Prefer algorithmic wins (remove repeated work) over micro-optimizations.
- Keep profiling artifacts out of git unless explicitly needed (use `.gitignore`).

View File

@@ -1,16 +0,0 @@
# Implementation Rules
## Change policy
- Prefer edits over rewrites.
- Keep changes localized.
- One change = one purpose.
- Avoid unnecessary abstraction.
## Dependency policy
- Default: do not add dependencies.
- If adding: explain why, alternatives considered, and impact.
## Error handling
- Validate inputs at boundaries.
- Error messages must be actionable: what failed + what to do next.

View File

@@ -1,14 +0,0 @@
# Testing & Quality
## Strategy
- If behavior changes: add/update tests.
- Unit tests for logic; integration tests for boundaries; E2E only where needed.
## Minimum for every PR
- A test plan in the PR summary (even if "existing tests cover this").
- Run DoD.
## Flaky tests
- Capture repro steps.
- Quarantine only with justification + follow-up issue.

View File

@@ -1,16 +0,0 @@
# PR Review Skill
Reviewer must check:
- Correctness: does it do what it claims?
- Safety: secrets, injection, auth boundaries
- Maintainability: readability, naming, duplication
- Tests: added/updated appropriately
- DoD: did it pass?
Reviewer output format:
1) Summary
2) Must-fix
3) Nice-to-have
4) Risks
5) Verification suggestions

View File

@@ -1,41 +0,0 @@
# Material UI (MUI) Design System
Use this skill for any React/Next "portal/admin/dashboard" UI so you stay consistent and avoid random component soup.
## Standard choice
- Preferred UI library: **MUI (Material UI)**.
- Prefer MUI components over ad-hoc HTML/CSS unless there's a good reason.
- One design system per repo (do not mix Chakra/Ant/Bootstrap/etc.).
## Setup (Next.js/React)
- Install: `@mui/material @emotion/react @emotion/styled`
- If using icons: `@mui/icons-material`
- If using data grid: `@mui/x-data-grid` (or pro if licensed)
## Theming rules
- Define a single theme (typography, spacing, palette) and reuse everywhere.
- Use semantic colors (primary/secondary/error/warning/success/info), not hard-coded hex everywhere.
- Prefer MUI's `sx` for small styling; use `styled()` for reusable components.
## "Portal" patterns (modals, popovers, menus)
- Use MUI Dialog/Modal/Popover/Menu components instead of DIY portals.
- Accessibility requirements:
- Focus is trapped in Dialog/Modal.
- Escape closes modal unless explicitly prevented.
- All inputs have labels; buttons have clear text/aria-labels.
- Keyboard navigation works end-to-end.
## Layout conventions (for portals)
- Use: AppBar + Drawer (or NavigationRail equivalent) + main content.
- Keep pages as composition of small components: Page → Sections → Widgets.
- Keep forms consistent: FormControl + helper text + validation messages.
## Performance hygiene
- Avoid re-render storms: memoize heavy lists; use virtualization for large tables (DataGrid).
- Prefer server pagination for huge datasets.
## PR review checklist
- Theme is used (no random styling).
- Components are MUI where reasonable.
- Modal/popover accessibility is correct.
- No mixed UI libraries.

View File

@@ -1,15 +0,0 @@
# Security & Safety
## Secrets
- Never output secrets or tokens.
- Never log sensitive inputs.
- Never commit credentials.
## Inputs
- Validate external inputs at boundaries.
- Fail closed for auth/security decisions.
## Tooling
- No destructive commands unless requested and scoped.
- Prefer read-only operations first.

View File

@@ -1,13 +0,0 @@
# Docs & Artifacts
Update documentation when:
- setup steps change
- env vars change
- endpoints/CLI behavior changes
- data formats change
Docs standards:
- Provide copy/paste commands
- Provide expected outputs where helpful
- Keep it short and accurate

View File

@@ -1,11 +0,0 @@
# MCP Tools Skill (Optional)
If this repo defines MCP servers/tools:
Rules:
- Tool calls must be explicit and logged.
- Maintain an allowlist of tools; deny by default.
- Every tool must have: purpose, inputs/outputs schema, examples, and tests.
- Prefer idempotent tool operations.
- Never add tools that can exfiltrate secrets without strict guards.

View File

@@ -1,51 +0,0 @@
# MCP Server Design (Agent-First)
Build MCP servers like you're designing a UI for a non-human user.
This skill distills Phil Schmid's MCP server best practices into concrete repo rules.
Source: "MCP is Not the Problem, It's your Server" (Jan 21, 2026).
## 1) Outcomes, not operations
- Do **not** wrap REST endpoints 1:1 as tools.
- Expose high-level, outcome-oriented tools.
- Bad: `get_user`, `list_orders`, `get_order_status`
- Good: `track_latest_order(email)` (server orchestrates internally)
## 2) Flatten arguments
- Prefer top-level primitives + constrained enums.
- Avoid nested `dict`/config objects (agents hallucinate keys).
- Defaults reduce decision load.
## 3) Instructions are context
- Tool docstrings are *instructions*:
- when to use the tool
- argument formatting rules
- what the return means
- Error strings are also context:
- return actionable, self-correcting messages (not raw stack traces)
## 4) Curate ruthlessly
- Aim for **515 tools** per server.
- One server, one job. Split by persona if needed.
- Delete unused tools. Don't dump raw data into context.
## 5) Name tools for discovery
- Avoid generic names (`create_issue`).
- Prefer `{service}_{action}_{resource}`:
- `velociraptor_run_hunt`
- `github_list_prs`
- `slack_send_message`
## 6) Paginate large results
- Always support `limit` (default ~2050).
- Return metadata: `has_more`, `next_offset`, `total_count`.
- Never return hundreds of rows unbounded.
## Repo conventions
- Put MCP tool specs in `mcp/` (schemas, examples, fixtures).
- Provide at least 1 "golden path" example call per tool.
- Add an eval that checks:
- tool names follow discovery convention
- args are flat + typed
- responses are concise + stable
- pagination works

View File

@@ -1,40 +0,0 @@
# FastMCP 3 Patterns (Providers + Transforms)
Use this skill when you are building MCP servers in Python and want:
- composable tool sets
- per-user/per-session behavior
- auth, versioning, observability, and long-running tasks
## Mental model (FastMCP 3)
FastMCP 3 treats everything as three composable primitives:
- **Components**: what you expose (tools, resources, prompts)
- **Providers**: where components come from (decorators, files, OpenAPI, remote MCP, etc.)
- **Transforms**: how you reshape what clients see (namespace, filters, auth, versioning, visibility)
## Recommended architecture for Marc's platform
Build a **single "Cyber MCP Gateway"** that composes providers:
- LocalProvider: core cyber tools (run hunt, parse triage, generate report)
- OpenAPIProvider: wrap stable internal APIs (ticketing, asset DB) without 1:1 endpoint exposure
- ProxyProvider/FastMCPProvider: mount sub-servers (e.g., Velociraptor tools, Intel feeds)
Then apply transforms:
- Namespace per domain: `hunt.*`, `intel.*`, `pad.*`
- Visibility per session: hide dangerous tools unless user/role allows
- VersionFilter: keep old clients working while you evolve tools
## Production must-haves
- **Tool timeouts**: never let a tool hang forever
- **Pagination**: all list tools must be bounded
- **Background tasks**: use for long hunts / ingest jobs
- **Tracing**: emit OpenTelemetry traces so you can debug agent/tool behavior
## Auth rules
- Prefer component-level auth for "dangerous" tools.
- Default stance: read-only tools visible; write/execute tools gated.
## Versioning rules
- Version your components when you change schemas or semantics.
- Keep 1 previous version callable during migrations.
## Upgrade guidance
FastMCP 3 is in beta; pin to v2 for stability in production until you've tested.

View File

@@ -1,148 +0,0 @@
from pathlib import Path
p=Path(r'd:/Projects/Dev/ThreatHunt/frontend/src/components/NetworkMap.tsx')
t=p.read_text(encoding='utf-8')
# 1) Add label mode type near graph types
marker="interface GEdge { source: string; target: string; weight: number }\ninterface Graph { nodes: GNode[]; edges: GEdge[] }\n"
if marker in t and "type LabelMode" not in t:
t=t.replace(marker, marker+"\ntype LabelMode = 'all' | 'highlight' | 'none';\n")
# 2) extend drawLabels signature
old_sig="""function drawLabels(
ctx: CanvasRenderingContext2D, graph: Graph,
hovered: string | null, selected: string | null,
search: string, matchSet: Set<string>, vp: Viewport,
simplify: boolean,
) {
"""
new_sig="""function drawLabels(
ctx: CanvasRenderingContext2D, graph: Graph,
hovered: string | null, selected: string | null,
search: string, matchSet: Set<string>, vp: Viewport,
simplify: boolean, labelMode: LabelMode,
) {
"""
if old_sig in t:
t=t.replace(old_sig,new_sig)
# 3) label mode guards inside drawLabels
old_guard=""" const dimmed = search.length > 0;
if (simplify && !search && !hovered && !selected) {
return;
}
"""
new_guard=""" if (labelMode === 'none') return;
const dimmed = search.length > 0;
if (labelMode === 'highlight' && !search && !hovered && !selected) return;
if (simplify && labelMode !== 'all' && !search && !hovered && !selected) {
return;
}
"""
if old_guard in t:
t=t.replace(old_guard,new_guard)
old_show=""" const isHighlight = hovered === n.id || selected === n.id || matchSet.has(n.id);
const show = isHighlight || n.meta.type === 'host' || n.count >= 2;
if (!show) continue;
"""
new_show=""" const isHighlight = hovered === n.id || selected === n.id || matchSet.has(n.id);
const show = labelMode === 'all'
? (isHighlight || n.meta.type === 'host' || n.count >= 2)
: isHighlight;
if (!show) continue;
"""
if old_show in t:
t=t.replace(old_show,new_show)
# 4) drawGraph signature and call site
old_graph_sig="""function drawGraph(
ctx: CanvasRenderingContext2D, graph: Graph,
hovered: string | null, selected: string | null, search: string,
vp: Viewport, animTime: number, dpr: number,
) {
"""
new_graph_sig="""function drawGraph(
ctx: CanvasRenderingContext2D, graph: Graph,
hovered: string | null, selected: string | null, search: string,
vp: Viewport, animTime: number, dpr: number, labelMode: LabelMode,
) {
"""
if old_graph_sig in t:
t=t.replace(old_graph_sig,new_graph_sig)
old_drawlabels_call="drawLabels(ctx, graph, hovered, selected, search, matchSet, vp, simplify);"
new_drawlabels_call="drawLabels(ctx, graph, hovered, selected, search, matchSet, vp, simplify, labelMode);"
if old_drawlabels_call in t:
t=t.replace(old_drawlabels_call,new_drawlabels_call)
# 5) state for label mode
state_anchor=" const [selectedNode, setSelectedNode] = useState<GNode | null>(null);\n const [search, setSearch] = useState('');\n"
state_new=" const [selectedNode, setSelectedNode] = useState<GNode | null>(null);\n const [search, setSearch] = useState('');\n const [labelMode, setLabelMode] = useState<LabelMode>('highlight');\n"
if state_anchor in t:
t=t.replace(state_anchor,state_new)
# 6) pass labelMode in draw calls
old_tick_draw="drawGraph(ctx, g, hoveredRef.current, selectedNodeRef.current?.id ?? null, searchRef.current, vpRef.current, ts, dpr);"
new_tick_draw="drawGraph(ctx, g, hoveredRef.current, selectedNodeRef.current?.id ?? null, searchRef.current, vpRef.current, ts, dpr, labelMode);"
if old_tick_draw in t:
t=t.replace(old_tick_draw,new_tick_draw)
old_redraw_draw="if (ctx) drawGraph(ctx, graph, hovered, selectedNode?.id ?? null, search, vpRef.current, animTimeRef.current, dpr);"
new_redraw_draw="if (ctx) drawGraph(ctx, graph, hovered, selectedNode?.id ?? null, search, vpRef.current, animTimeRef.current, dpr, labelMode);"
if old_redraw_draw in t:
t=t.replace(old_redraw_draw,new_redraw_draw)
# 7) include labelMode in redraw deps
old_redraw_dep="] , [graph, hovered, selectedNode, search]);"
if old_redraw_dep in t:
t=t.replace(old_redraw_dep, "] , [graph, hovered, selectedNode, search, labelMode]);")
else:
t=t.replace(" }, [graph, hovered, selectedNode, search]);"," }, [graph, hovered, selectedNode, search, labelMode]);")
# 8) Add toolbar selector after search field
search_block=""" <TextField
size="small"
placeholder="Search hosts, IPs, users\u2026"
value={search}
onChange={e => setSearch(e.target.value)}
sx={{ width: 220, '& .MuiInputBase-input': { py: 0.8 } }}
slotProps={{
input: {
startAdornment: <SearchIcon sx={{ mr: 0.5, fontSize: 18, color: 'text.secondary' }} />,
},
}}
/>
"""
label_block=""" <TextField
size="small"
placeholder="Search hosts, IPs, users\u2026"
value={search}
onChange={e => setSearch(e.target.value)}
sx={{ width: 220, '& .MuiInputBase-input': { py: 0.8 } }}
slotProps={{
input: {
startAdornment: <SearchIcon sx={{ mr: 0.5, fontSize: 18, color: 'text.secondary' }} />,
},
}}
/>
<FormControl size="small" sx={{ minWidth: 140 }}>
<InputLabel id="label-mode-selector">Labels</InputLabel>
<Select
labelId="label-mode-selector"
value={labelMode}
label="Labels"
onChange={e => setLabelMode(e.target.value as LabelMode)}
sx={{ '& .MuiSelect-select': { py: 0.8 } }}
>
<MenuItem value="none">None</MenuItem>
<MenuItem value="highlight">Selected/Search</MenuItem>
<MenuItem value="all">All</MenuItem>
</Select>
</FormControl>
"""
if search_block in t:
t=t.replace(search_block,label_block)
p.write_text(t,encoding='utf-8')
print('added network map label filter control and renderer modes')

View File

@@ -1,18 +0,0 @@
from pathlib import Path
p=Path(r'd:/Projects/Dev/ThreatHunt/backend/app/config.py')
t=p.read_text(encoding='utf-8')
old=''' # -- Scanner settings -----------------------------------------------
SCANNER_BATCH_SIZE: int = Field(default=500, description="Rows per scanner batch")
'''
new=''' # -- Scanner settings -----------------------------------------------
SCANNER_BATCH_SIZE: int = Field(default=500, description="Rows per scanner batch")
SCANNER_MAX_ROWS_PER_SCAN: int = Field(
default=300000,
description="Global row budget for a single AUP scan request (0 = unlimited)",
)
'''
if old not in t:
raise SystemExit('scanner settings block not found')
t=t.replace(old,new)
p.write_text(t,encoding='utf-8')
print('added SCANNER_MAX_ROWS_PER_SCAN config')

View File

@@ -1,46 +0,0 @@
from pathlib import Path
root = Path(r"d:\Projects\Dev\ThreatHunt")
# -------- client.ts --------
client = root / "frontend/src/api/client.ts"
text = client.read_text(encoding="utf-8")
if "export interface NetworkSummary" not in text:
insert_after = "export interface InventoryStatus {\n hunt_id: string;\n status: 'ready' | 'building' | 'none';\n}\n"
addition = insert_after + "\nexport interface NetworkSummaryHost {\n id: string;\n hostname: string;\n row_count: number;\n ip_count: number;\n user_count: number;\n}\n\nexport interface NetworkSummary {\n stats: InventoryStats;\n top_hosts: NetworkSummaryHost[];\n top_edges: InventoryConnection[];\n status?: 'building' | 'deferred';\n message?: string;\n}\n"
text = text.replace(insert_after, addition)
net_old = """export const network = {\n hostInventory: (huntId: string, force = false) =>\n api<HostInventory>(`/api/network/host-inventory?hunt_id=${encodeURIComponent(huntId)}${force ? '&force=true' : ''}`),\n inventoryStatus: (huntId: string) =>\n api<InventoryStatus>(`/api/network/inventory-status?hunt_id=${encodeURIComponent(huntId)}`),\n rebuildInventory: (huntId: string) =>\n api<{ job_id: string; status: string }>(`/api/network/rebuild-inventory?hunt_id=${encodeURIComponent(huntId)}`, { method: 'POST' }),\n};"""
net_new = """export const network = {\n hostInventory: (huntId: string, force = false) =>\n api<HostInventory | { status: 'building' | 'deferred'; message?: string }>(`/api/network/host-inventory?hunt_id=${encodeURIComponent(huntId)}${force ? '&force=true' : ''}`),\n summary: (huntId: string, topN = 20) =>\n api<NetworkSummary | { status: 'building' | 'deferred'; message?: string }>(`/api/network/summary?hunt_id=${encodeURIComponent(huntId)}&top_n=${topN}`),\n subgraph: (huntId: string, maxHosts = 250, maxEdges = 1500, nodeId?: string) => {\n let qs = `/api/network/subgraph?hunt_id=${encodeURIComponent(huntId)}&max_hosts=${maxHosts}&max_edges=${maxEdges}`;\n if (nodeId) qs += `&node_id=${encodeURIComponent(nodeId)}`;\n return api<HostInventory | { status: 'building' | 'deferred'; message?: string }>(qs);\n },\n inventoryStatus: (huntId: string) =>\n api<InventoryStatus>(`/api/network/inventory-status?hunt_id=${encodeURIComponent(huntId)}`),\n rebuildInventory: (huntId: string) =>\n api<{ job_id: string; status: string }>(`/api/network/rebuild-inventory?hunt_id=${encodeURIComponent(huntId)}`, { method: 'POST' }),\n};"""
if net_old in text:
text = text.replace(net_old, net_new)
client.write_text(text, encoding="utf-8")
# -------- NetworkMap.tsx --------
nm = root / "frontend/src/components/NetworkMap.tsx"
text = nm.read_text(encoding="utf-8")
# add constants
if "LARGE_HUNT_HOST_THRESHOLD" not in text:
text = text.replace("let lastSelectedHuntId = '';\n", "let lastSelectedHuntId = '';\nconst LARGE_HUNT_HOST_THRESHOLD = 400;\nconst LARGE_HUNT_SUBGRAPH_HOSTS = 350;\nconst LARGE_HUNT_SUBGRAPH_EDGES = 2500;\n")
# inject helper in component after sleep
marker = " const sleep = (ms: number) => new Promise<void>(resolve => setTimeout(resolve, ms));\n"
if "loadScaleAwareGraph" not in text:
helper = marker + "\n const loadScaleAwareGraph = useCallback(async (huntId: string, forceRefresh = false) => {\n setLoading(true); setError(''); setGraph(null); setStats(null);\n setSelectedNode(null); setPopoverAnchor(null);\n\n const waitReadyThen = async <T,>(fn: () => Promise<T>): Promise<T> => {\n let delayMs = 1500;\n const startedAt = Date.now();\n for (;;) {\n const out: any = await fn();\n if (out && !out.status) return out as T;\n const st = await network.inventoryStatus(huntId);\n if (st.status === 'ready') {\n const out2: any = await fn();\n if (out2 && !out2.status) return out2 as T;\n }\n if (Date.now() - startedAt > 5 * 60 * 1000) throw new Error('Network data build timed out after 5 minutes');\n const jitter = Math.floor(Math.random() * 250);\n await sleep(delayMs + jitter);\n delayMs = Math.min(10000, Math.floor(delayMs * 1.5));\n }\n };\n\n try {\n setProgress('Loading network summary');\n const summary: any = await waitReadyThen(() => network.summary(huntId, 20));\n const totalHosts = summary?.stats?.total_hosts || 0;\n\n if (totalHosts > LARGE_HUNT_HOST_THRESHOLD) {\n setProgress(`Large hunt detected (${totalHosts} hosts). Loading focused subgraph`);\n const sub: any = await waitReadyThen(() => network.subgraph(huntId, LARGE_HUNT_SUBGRAPH_HOSTS, LARGE_HUNT_SUBGRAPH_EDGES));\n if (!sub?.hosts || sub.hosts.length === 0) {\n setError('No hosts found for subgraph.');\n return;\n }\n const { w, h } = canvasSizeRef.current;\n const g = buildGraphFromInventory(sub.hosts, sub.connections || [], w, h);\n simulate(g, w / 2, h / 2, 60);\n simAlphaRef.current = 0.3;\n setStats(summary.stats);\n graphCache.set(huntId, { graph: g, stats: summary.stats, ts: Date.now() });\n setGraph(g);\n return;\n }\n\n // Small/medium hunts: load full inventory\n setProgress('Loading host inventory');\n const inv: any = await waitReadyThen(() => network.hostInventory(huntId, forceRefresh));\n if (!inv?.hosts || inv.hosts.length === 0) {\n setError('No hosts found. Upload CSV files with host-identifying columns (ClientId, Fqdn, Hostname) to this hunt.');\n return;\n }\n const { w, h } = canvasSizeRef.current;\n const g = buildGraphFromInventory(inv.hosts, inv.connections || [], w, h);\n simulate(g, w / 2, h / 2, 60);\n simAlphaRef.current = 0.3;\n setStats(summary.stats || inv.stats);\n graphCache.set(huntId, { graph: g, stats: summary.stats || inv.stats, ts: Date.now() });\n setGraph(g);\n } catch (e: any) {\n console.error('[NetworkMap] scale-aware load error:', e);\n setError(e.message || 'Failed to load network data');\n } finally {\n setLoading(false);\n setProgress('');\n }\n }, []);\n"
text = text.replace(marker, helper)
# simplify existing loadGraph function body to delegate
pattern_start = text.find(" // Load host inventory for selected hunt (with cache).")
if pattern_start != -1:
# replace the whole loadGraph useCallback block by simple delegator
import re
block_re = re.compile(r" // Load host inventory for selected hunt \(with cache\)\.[\s\S]*?\n \}, \[\]\); // Stable - reads canvasSizeRef, no state deps\n", re.M)
repl = " // Load graph data for selected hunt (delegates to scale-aware loader).\n const loadGraph = useCallback(async (huntId: string, forceRefresh = false) => {\n if (!huntId) return;\n\n // Check module-level cache first (5 min TTL)\n if (!forceRefresh) {\n const cached = graphCache.get(huntId);\n if (cached && Date.now() - cached.ts < 5 * 60 * 1000) {\n setGraph(cached.graph);\n setStats(cached.stats);\n setError('');\n simAlphaRef.current = 0;\n return;\n }\n }\n\n await loadScaleAwareGraph(huntId, forceRefresh);\n // eslint-disable-next-line react-hooks/exhaustive-deps\n }, []); // Stable - reads canvasSizeRef, no state deps\n"
text = block_re.sub(repl, text, count=1)
nm.write_text(text, encoding="utf-8")
print("Patched frontend client + NetworkMap for scale-aware loading")

View File

@@ -1,206 +0,0 @@
from pathlib import Path
root = Path(r"d:\Projects\Dev\ThreatHunt")
# 1) config.py additions
cfg = root / "backend/app/config.py"
text = cfg.read_text(encoding="utf-8")
needle = " # -- Scanner settings -----------------------------------------------\n SCANNER_BATCH_SIZE: int = Field(default=500, description=\"Rows per scanner batch\")\n"
insert = " # -- Scanner settings -----------------------------------------------\n SCANNER_BATCH_SIZE: int = Field(default=500, description=\"Rows per scanner batch\")\n\n # -- Job queue settings ----------------------------------------------\n JOB_QUEUE_MAX_BACKLOG: int = Field(\n default=2000, description=\"Soft cap for queued background jobs\"\n )\n JOB_QUEUE_RETAIN_COMPLETED: int = Field(\n default=3000, description=\"Maximum completed/failed jobs to retain in memory\"\n )\n JOB_QUEUE_CLEANUP_INTERVAL_SECONDS: int = Field(\n default=60, description=\"How often to run in-memory job cleanup\"\n )\n JOB_QUEUE_CLEANUP_MAX_AGE_SECONDS: int = Field(\n default=3600, description=\"Age threshold for in-memory completed job cleanup\"\n )\n"
if needle in text:
text = text.replace(needle, insert)
cfg.write_text(text, encoding="utf-8")
# 2) scanner.py default scope = dataset-only
scanner = root / "backend/app/services/scanner.py"
text = scanner.read_text(encoding="utf-8")
text = text.replace(" scan_hunts: bool = True,", " scan_hunts: bool = False,")
text = text.replace(" scan_annotations: bool = True,", " scan_annotations: bool = False,")
text = text.replace(" scan_messages: bool = True,", " scan_messages: bool = False,")
scanner.write_text(text, encoding="utf-8")
# 3) keywords.py defaults = dataset-only
kw = root / "backend/app/api/routes/keywords.py"
text = kw.read_text(encoding="utf-8")
text = text.replace(" scan_hunts: bool = True", " scan_hunts: bool = False")
text = text.replace(" scan_annotations: bool = True", " scan_annotations: bool = False")
text = text.replace(" scan_messages: bool = True", " scan_messages: bool = False")
kw.write_text(text, encoding="utf-8")
# 4) job_queue.py dedupe + periodic cleanup
jq = root / "backend/app/services/job_queue.py"
text = jq.read_text(encoding="utf-8")
text = text.replace(
"from typing import Any, Callable, Coroutine, Optional\n",
"from typing import Any, Callable, Coroutine, Optional\n\nfrom app.config import settings\n"
)
text = text.replace(
" self._completion_callbacks: list[Callable[[Job], Coroutine]] = []\n",
" self._completion_callbacks: list[Callable[[Job], Coroutine]] = []\n self._cleanup_task: asyncio.Task | None = None\n"
)
start_old = ''' async def start(self):
if self._started:
return
self._started = True
for i in range(self._max_workers):
task = asyncio.create_task(self._worker(i))
self._workers.append(task)
logger.info(f"Job queue started with {self._max_workers} workers")
'''
start_new = ''' async def start(self):
if self._started:
return
self._started = True
for i in range(self._max_workers):
task = asyncio.create_task(self._worker(i))
self._workers.append(task)
if not self._cleanup_task or self._cleanup_task.done():
self._cleanup_task = asyncio.create_task(self._cleanup_loop())
logger.info(f"Job queue started with {self._max_workers} workers")
'''
text = text.replace(start_old, start_new)
stop_old = ''' async def stop(self):
self._started = False
for w in self._workers:
w.cancel()
await asyncio.gather(*self._workers, return_exceptions=True)
self._workers.clear()
logger.info("Job queue stopped")
'''
stop_new = ''' async def stop(self):
self._started = False
for w in self._workers:
w.cancel()
await asyncio.gather(*self._workers, return_exceptions=True)
self._workers.clear()
if self._cleanup_task:
self._cleanup_task.cancel()
await asyncio.gather(self._cleanup_task, return_exceptions=True)
self._cleanup_task = None
logger.info("Job queue stopped")
'''
text = text.replace(stop_old, stop_new)
submit_old = ''' def submit(self, job_type: JobType, **params) -> Job:
job = Job(id=str(uuid.uuid4()), job_type=job_type, params=params)
self._jobs[job.id] = job
self._queue.put_nowait(job.id)
logger.info(f"Job submitted: {job.id} ({job_type.value}) params={params}")
return job
'''
submit_new = ''' def submit(self, job_type: JobType, **params) -> Job:
# Soft backpressure: prefer dedupe over queue amplification
dedupe_job = self._find_active_duplicate(job_type, params)
if dedupe_job is not None:
logger.info(
f"Job deduped: reusing {dedupe_job.id} ({job_type.value}) params={params}"
)
return dedupe_job
if self._queue.qsize() >= settings.JOB_QUEUE_MAX_BACKLOG:
logger.warning(
"Job queue backlog high (%d >= %d). Accepting job but system may be degraded.",
self._queue.qsize(), settings.JOB_QUEUE_MAX_BACKLOG,
)
job = Job(id=str(uuid.uuid4()), job_type=job_type, params=params)
self._jobs[job.id] = job
self._queue.put_nowait(job.id)
logger.info(f"Job submitted: {job.id} ({job_type.value}) params={params}")
return job
'''
text = text.replace(submit_old, submit_new)
insert_methods_after = " def get_job(self, job_id: str) -> Job | None:\n return self._jobs.get(job_id)\n"
new_methods = ''' def get_job(self, job_id: str) -> Job | None:
return self._jobs.get(job_id)
def _find_active_duplicate(self, job_type: JobType, params: dict) -> Job | None:
"""Return queued/running job with same key workload to prevent duplicate storms."""
key_fields = ["dataset_id", "hunt_id", "hostname", "question", "mode"]
sig = tuple((k, params.get(k)) for k in key_fields if params.get(k) is not None)
if not sig:
return None
for j in self._jobs.values():
if j.job_type != job_type:
continue
if j.status not in (JobStatus.QUEUED, JobStatus.RUNNING):
continue
other_sig = tuple((k, j.params.get(k)) for k in key_fields if j.params.get(k) is not None)
if sig == other_sig:
return j
return None
'''
text = text.replace(insert_methods_after, new_methods)
cleanup_old = ''' def cleanup(self, max_age_seconds: float = 3600):
now = time.time()
to_remove = [
jid for jid, j in self._jobs.items()
if j.status in (JobStatus.COMPLETED, JobStatus.FAILED, JobStatus.CANCELLED)
and (now - j.created_at) > max_age_seconds
]
for jid in to_remove:
del self._jobs[jid]
if to_remove:
logger.info(f"Cleaned up {len(to_remove)} old jobs")
'''
cleanup_new = ''' def cleanup(self, max_age_seconds: float = 3600):
now = time.time()
terminal_states = (JobStatus.COMPLETED, JobStatus.FAILED, JobStatus.CANCELLED)
to_remove = [
jid for jid, j in self._jobs.items()
if j.status in terminal_states and (now - j.created_at) > max_age_seconds
]
# Also cap retained terminal jobs to avoid unbounded memory growth
terminal_jobs = sorted(
[j for j in self._jobs.values() if j.status in terminal_states],
key=lambda j: j.created_at,
reverse=True,
)
overflow = terminal_jobs[settings.JOB_QUEUE_RETAIN_COMPLETED :]
to_remove.extend([j.id for j in overflow])
removed = 0
for jid in set(to_remove):
if jid in self._jobs:
del self._jobs[jid]
removed += 1
if removed:
logger.info(f"Cleaned up {removed} old jobs")
async def _cleanup_loop(self):
interval = max(10, settings.JOB_QUEUE_CLEANUP_INTERVAL_SECONDS)
while self._started:
try:
self.cleanup(max_age_seconds=settings.JOB_QUEUE_CLEANUP_MAX_AGE_SECONDS)
except Exception as e:
logger.warning(f"Job queue cleanup loop error: {e}")
await asyncio.sleep(interval)
'''
text = text.replace(cleanup_old, cleanup_new)
jq.write_text(text, encoding="utf-8")
# 5) NetworkMap polling backoff/jitter max wait
nm = root / "frontend/src/components/NetworkMap.tsx"
text = nm.read_text(encoding="utf-8")
text = text.replace(
" // Poll until ready, then re-fetch\n for (;;) {\n await new Promise(r => setTimeout(r, 2000));\n const st = await network.inventoryStatus(huntId);\n if (st.status === 'ready') break;\n }\n",
" // Poll until ready (exponential backoff), then re-fetch\n let delayMs = 1500;\n const startedAt = Date.now();\n for (;;) {\n const jitter = Math.floor(Math.random() * 250);\n await new Promise(r => setTimeout(r, delayMs + jitter));\n const st = await network.inventoryStatus(huntId);\n if (st.status === 'ready') break;\n if (Date.now() - startedAt > 5 * 60 * 1000) {\n throw new Error('Host inventory build timed out after 5 minutes');\n }\n delayMs = Math.min(10000, Math.floor(delayMs * 1.5));\n }\n"
)
text = text.replace(
" const waitUntilReady = async (): Promise<boolean> => {\n // Poll inventory-status every 2s until 'ready' (or cancelled)\n setProgress('Host inventory is being prepared in the background');\n setLoading(true);\n for (;;) {\n await new Promise(r => setTimeout(r, 2000));\n if (cancelled) return false;\n try {\n const st = await network.inventoryStatus(selectedHuntId);\n if (cancelled) return false;\n if (st.status === 'ready') return true;\n // still building or none (job may not have started yet) - keep polling\n } catch { if (cancelled) return false; }\n }\n };\n",
" const waitUntilReady = async (): Promise<boolean> => {\n // Poll inventory-status with exponential backoff until 'ready' (or cancelled)\n setProgress('Host inventory is being prepared in the background');\n setLoading(true);\n let delayMs = 1500;\n const startedAt = Date.now();\n for (;;) {\n const jitter = Math.floor(Math.random() * 250);\n await new Promise(r => setTimeout(r, delayMs + jitter));\n if (cancelled) return false;\n try {\n const st = await network.inventoryStatus(selectedHuntId);\n if (cancelled) return false;\n if (st.status === 'ready') return true;\n if (Date.now() - startedAt > 5 * 60 * 1000) {\n setError('Host inventory build timed out. Please retry.');\n return false;\n }\n delayMs = Math.min(10000, Math.floor(delayMs * 1.5));\n // still building or none (job may not have started yet) - keep polling\n } catch {\n if (cancelled) return false;\n delayMs = Math.min(10000, Math.floor(delayMs * 1.5));\n }\n }\n };\n"
)
nm.write_text(text, encoding="utf-8")
print("Patched: config.py, scanner.py, keywords.py, job_queue.py, NetworkMap.tsx")

View File

@@ -1,207 +0,0 @@
from pathlib import Path
import re
root = Path(r"d:\Projects\Dev\ThreatHunt")
# ---------- config.py ----------
cfg = root / "backend/app/config.py"
text = cfg.read_text(encoding="utf-8")
marker = " JOB_QUEUE_CLEANUP_MAX_AGE_SECONDS: int = Field(\n default=3600, description=\"Age threshold for in-memory completed job cleanup\"\n )\n"
add = marker + "\n # -- Startup throttling ------------------------------------------------\n STARTUP_WARMUP_MAX_HUNTS: int = Field(\n default=5, description=\"Max hunts to warm inventory cache for at startup\"\n )\n STARTUP_REPROCESS_MAX_DATASETS: int = Field(\n default=25, description=\"Max unprocessed datasets to enqueue at startup\"\n )\n\n # -- Network API scale guards -----------------------------------------\n NETWORK_SUBGRAPH_MAX_HOSTS: int = Field(\n default=400, description=\"Hard cap for hosts returned by network subgraph endpoint\"\n )\n NETWORK_SUBGRAPH_MAX_EDGES: int = Field(\n default=3000, description=\"Hard cap for edges returned by network subgraph endpoint\"\n )\n"
if marker in text and "STARTUP_WARMUP_MAX_HUNTS" not in text:
text = text.replace(marker, add)
cfg.write_text(text, encoding="utf-8")
# ---------- job_queue.py ----------
jq = root / "backend/app/services/job_queue.py"
text = jq.read_text(encoding="utf-8")
# add helper methods after get_stats
anchor = " def get_stats(self) -> dict:\n by_status = {}\n for j in self._jobs.values():\n by_status[j.status.value] = by_status.get(j.status.value, 0) + 1\n return {\n \"total\": len(self._jobs),\n \"queued\": self._queue.qsize(),\n \"by_status\": by_status,\n \"workers\": self._max_workers,\n \"active_workers\": sum(1 for j in self._jobs.values() if j.status == JobStatus.RUNNING),\n }\n"
if "def is_backlogged(" not in text:
insert = anchor + "\n def is_backlogged(self) -> bool:\n return self._queue.qsize() >= settings.JOB_QUEUE_MAX_BACKLOG\n\n def can_accept(self, reserve: int = 0) -> bool:\n return (self._queue.qsize() + max(0, reserve)) < settings.JOB_QUEUE_MAX_BACKLOG\n"
text = text.replace(anchor, insert)
jq.write_text(text, encoding="utf-8")
# ---------- host_inventory.py keyset pagination ----------
hi = root / "backend/app/services/host_inventory.py"
text = hi.read_text(encoding="utf-8")
old = ''' batch_size = 5000
offset = 0
while True:
rr = await db.execute(
select(DatasetRow)
.where(DatasetRow.dataset_id == ds.id)
.order_by(DatasetRow.row_index)
.offset(offset).limit(batch_size)
)
rows = rr.scalars().all()
if not rows:
break
'''
new = ''' batch_size = 5000
last_row_index = -1
while True:
rr = await db.execute(
select(DatasetRow)
.where(DatasetRow.dataset_id == ds.id)
.where(DatasetRow.row_index > last_row_index)
.order_by(DatasetRow.row_index)
.limit(batch_size)
)
rows = rr.scalars().all()
if not rows:
break
'''
if old in text:
text = text.replace(old, new)
text = text.replace(" offset += batch_size\n if len(rows) < batch_size:\n break\n", " last_row_index = rows[-1].row_index\n if len(rows) < batch_size:\n break\n")
hi.write_text(text, encoding="utf-8")
# ---------- network.py add summary/subgraph + backpressure ----------
net = root / "backend/app/api/routes/network.py"
text = net.read_text(encoding="utf-8")
text = text.replace("from fastapi import APIRouter, Depends, HTTPException, Query", "from fastapi import APIRouter, Depends, HTTPException, Query")
if "from app.config import settings" not in text:
text = text.replace("from app.db import get_db\n", "from app.config import settings\nfrom app.db import get_db\n")
# add helpers and endpoints before inventory-status endpoint
if "def _build_summary" not in text:
helper_block = '''
def _build_summary(inv: dict, top_n: int = 20) -> dict:
hosts = inv.get("hosts", [])
conns = inv.get("connections", [])
top_hosts = sorted(hosts, key=lambda h: h.get("row_count", 0), reverse=True)[:top_n]
top_edges = sorted(conns, key=lambda c: c.get("count", 0), reverse=True)[:top_n]
return {
"stats": inv.get("stats", {}),
"top_hosts": [
{
"id": h.get("id"),
"hostname": h.get("hostname"),
"row_count": h.get("row_count", 0),
"ip_count": len(h.get("ips", [])),
"user_count": len(h.get("users", [])),
}
for h in top_hosts
],
"top_edges": top_edges,
}
def _build_subgraph(inv: dict, node_id: str | None, max_hosts: int, max_edges: int) -> dict:
hosts = inv.get("hosts", [])
conns = inv.get("connections", [])
max_hosts = max(1, min(max_hosts, settings.NETWORK_SUBGRAPH_MAX_HOSTS))
max_edges = max(1, min(max_edges, settings.NETWORK_SUBGRAPH_MAX_EDGES))
if node_id:
rel_edges = [c for c in conns if c.get("source") == node_id or c.get("target") == node_id]
rel_edges = sorted(rel_edges, key=lambda c: c.get("count", 0), reverse=True)[:max_edges]
ids = {node_id}
for c in rel_edges:
ids.add(c.get("source"))
ids.add(c.get("target"))
rel_hosts = [h for h in hosts if h.get("id") in ids][:max_hosts]
else:
rel_hosts = sorted(hosts, key=lambda h: h.get("row_count", 0), reverse=True)[:max_hosts]
allowed = {h.get("id") for h in rel_hosts}
rel_edges = [
c for c in sorted(conns, key=lambda c: c.get("count", 0), reverse=True)
if c.get("source") in allowed and c.get("target") in allowed
][:max_edges]
return {
"hosts": rel_hosts,
"connections": rel_edges,
"stats": {
**inv.get("stats", {}),
"subgraph_hosts": len(rel_hosts),
"subgraph_connections": len(rel_edges),
"truncated": len(rel_hosts) < len(hosts) or len(rel_edges) < len(conns),
},
}
@router.get("/summary")
async def get_inventory_summary(
hunt_id: str = Query(..., description="Hunt ID"),
top_n: int = Query(20, ge=1, le=200),
):
"""Return a lightweight summary view for large hunts."""
cached = inventory_cache.get(hunt_id)
if cached is None:
if not inventory_cache.is_building(hunt_id):
if job_queue.is_backlogged():
return JSONResponse(
status_code=202,
content={"status": "deferred", "message": "Queue busy, retry shortly"},
)
job_queue.submit(JobType.HOST_INVENTORY, hunt_id=hunt_id)
return JSONResponse(status_code=202, content={"status": "building"})
return _build_summary(cached, top_n=top_n)
@router.get("/subgraph")
async def get_inventory_subgraph(
hunt_id: str = Query(..., description="Hunt ID"),
node_id: str | None = Query(None, description="Optional focal node"),
max_hosts: int = Query(200, ge=1, le=5000),
max_edges: int = Query(1500, ge=1, le=20000),
):
"""Return a bounded subgraph for scale-safe rendering."""
cached = inventory_cache.get(hunt_id)
if cached is None:
if not inventory_cache.is_building(hunt_id):
if job_queue.is_backlogged():
return JSONResponse(
status_code=202,
content={"status": "deferred", "message": "Queue busy, retry shortly"},
)
job_queue.submit(JobType.HOST_INVENTORY, hunt_id=hunt_id)
return JSONResponse(status_code=202, content={"status": "building"})
return _build_subgraph(cached, node_id=node_id, max_hosts=max_hosts, max_edges=max_edges)
'''
text = text.replace("\n\n@router.get(\"/inventory-status\")", helper_block + "\n\n@router.get(\"/inventory-status\")")
# add backpressure in host-inventory enqueue points
text = text.replace(
" if not inventory_cache.is_building(hunt_id):\n job_queue.submit(JobType.HOST_INVENTORY, hunt_id=hunt_id)",
" if not inventory_cache.is_building(hunt_id):\n if job_queue.is_backlogged():\n return JSONResponse(status_code=202, content={\"status\": \"deferred\", \"message\": \"Queue busy, retry shortly\"})\n job_queue.submit(JobType.HOST_INVENTORY, hunt_id=hunt_id)"
)
text = text.replace(
" if not inventory_cache.is_building(hunt_id):\n logger.info(f\"Cache miss for {hunt_id}, triggering background build\")\n job_queue.submit(JobType.HOST_INVENTORY, hunt_id=hunt_id)",
" if not inventory_cache.is_building(hunt_id):\n logger.info(f\"Cache miss for {hunt_id}, triggering background build\")\n if job_queue.is_backlogged():\n return JSONResponse(status_code=202, content={\"status\": \"deferred\", \"message\": \"Queue busy, retry shortly\"})\n job_queue.submit(JobType.HOST_INVENTORY, hunt_id=hunt_id)"
)
net.write_text(text, encoding="utf-8")
# ---------- analysis.py backpressure on manual submit ----------
analysis = root / "backend/app/api/routes/analysis.py"
text = analysis.read_text(encoding="utf-8")
text = text.replace(
" job = job_queue.submit(jt, **params)\n return {\"job_id\": job.id, \"status\": job.status.value, \"job_type\": job_type}",
" if not job_queue.can_accept():\n raise HTTPException(status_code=429, detail=\"Job queue is busy. Retry shortly.\")\n job = job_queue.submit(jt, **params)\n return {\"job_id\": job.id, \"status\": job.status.value, \"job_type\": job_type}"
)
analysis.write_text(text, encoding="utf-8")
# ---------- main.py startup throttles ----------
main = root / "backend/app/main.py"
text = main.read_text(encoding="utf-8")
text = text.replace(
" for hid in hunt_ids:\n job_queue.submit(JobType.HOST_INVENTORY, hunt_id=hid)\n if hunt_ids:\n logger.info(f\"Queued host inventory warm-up for {len(hunt_ids)} hunts\")",
" warm_hunts = hunt_ids[: settings.STARTUP_WARMUP_MAX_HUNTS]\n for hid in warm_hunts:\n job_queue.submit(JobType.HOST_INVENTORY, hunt_id=hid)\n if warm_hunts:\n logger.info(f\"Queued host inventory warm-up for {len(warm_hunts)} hunts (total hunts with data: {len(hunt_ids)})\")"
)
text = text.replace(
" if unprocessed_ids:\n for ds_id in unprocessed_ids:\n job_queue.submit(JobType.TRIAGE, dataset_id=ds_id)\n job_queue.submit(JobType.ANOMALY, dataset_id=ds_id)\n job_queue.submit(JobType.KEYWORD_SCAN, dataset_id=ds_id)\n job_queue.submit(JobType.IOC_EXTRACT, dataset_id=ds_id)\n logger.info(f\"Queued processing pipeline for {len(unprocessed_ids)} unprocessed datasets\")\n async with async_session_factory() as update_db:\n from sqlalchemy import update\n from app.db.models import Dataset\n await update_db.execute(\n update(Dataset)\n .where(Dataset.id.in_(unprocessed_ids))\n .values(processing_status=\"processing\")\n )\n await update_db.commit()",
" if unprocessed_ids:\n to_reprocess = unprocessed_ids[: settings.STARTUP_REPROCESS_MAX_DATASETS]\n for ds_id in to_reprocess:\n job_queue.submit(JobType.TRIAGE, dataset_id=ds_id)\n job_queue.submit(JobType.ANOMALY, dataset_id=ds_id)\n job_queue.submit(JobType.KEYWORD_SCAN, dataset_id=ds_id)\n job_queue.submit(JobType.IOC_EXTRACT, dataset_id=ds_id)\n logger.info(f\"Queued processing pipeline for {len(to_reprocess)} datasets at startup (unprocessed total: {len(unprocessed_ids)})\")\n async with async_session_factory() as update_db:\n from sqlalchemy import update\n from app.db.models import Dataset\n await update_db.execute(\n update(Dataset)\n .where(Dataset.id.in_(to_reprocess))\n .values(processing_status=\"processing\")\n )\n await update_db.commit()"
)
main.write_text(text, encoding="utf-8")
print("Patched Phase 2 files")

View File

@@ -1,75 +0,0 @@
from pathlib import Path
p=Path(r'd:/Projects/Dev/ThreatHunt/frontend/src/components/AUPScanner.tsx')
t=p.read_text(encoding='utf-8')
# default selection when hunt changes: first 3 datasets instead of all
old=''' datasets.list(0, 500, selectedHuntId).then(res => {
if (cancelled) return;
setDsList(res.datasets);
setSelectedDs(new Set(res.datasets.map(d => d.id)));
}).catch(() => {});
'''
new=''' datasets.list(0, 500, selectedHuntId).then(res => {
if (cancelled) return;
setDsList(res.datasets);
setSelectedDs(new Set(res.datasets.slice(0, 3).map(d => d.id)));
}).catch(() => {});
'''
if old not in t:
raise SystemExit('hunt-change dataset init block not found')
t=t.replace(old,new)
# insert dataset scope multi-select under hunt info
anchor=''' {!selectedHuntId && (
<Typography variant="caption" color="text.secondary" sx={{ mt: 0.5, display: 'block' }}>
All datasets will be scanned if no hunt is selected
</Typography>
)}
</Box>
{/* Theme selector */}
'''
insert=''' {!selectedHuntId && (
<Typography variant="caption" color="text.secondary" sx={{ mt: 0.5, display: 'block' }}>
Select a hunt to enable scoped scanning
</Typography>
)}
<FormControl size="small" fullWidth sx={{ mt: 1.2 }} disabled={!selectedHuntId || dsList.length === 0}>
<InputLabel id="aup-dataset-label">Datasets</InputLabel>
<Select
labelId="aup-dataset-label"
multiple
value={Array.from(selectedDs)}
label="Datasets"
renderValue={(selected) => `${(selected as string[]).length} selected`}
onChange={(e) => setSelectedDs(new Set(e.target.value as string[]))}
>
{dsList.map(d => (
<MenuItem key={d.id} value={d.id}>
<Checkbox size="small" checked={selectedDs.has(d.id)} />
<Typography variant="body2" sx={{ ml: 0.5 }}>
{d.name} ({d.row_count.toLocaleString()} rows)
</Typography>
</MenuItem>
))}
</Select>
</FormControl>
{selectedHuntId && dsList.length > 0 && (
<Stack direction="row" spacing={1} sx={{ mt: 1 }}>
<Button size="small" onClick={() => setSelectedDs(new Set(dsList.slice(0, 3).map(d => d.id)))}>Top 3</Button>
<Button size="small" onClick={() => setSelectedDs(new Set(dsList.map(d => d.id)))}>All</Button>
<Button size="small" onClick={() => setSelectedDs(new Set())}>Clear</Button>
</Stack>
)}
</Box>
{/* Theme selector */}
'''
if anchor not in t:
raise SystemExit('dataset scope anchor not found')
t=t.replace(anchor,insert)
p.write_text(t,encoding='utf-8')
print('added AUP dataset multi-select scoping and safer defaults')

View File

@@ -1,182 +0,0 @@
from pathlib import Path
p=Path(r'd:/Projects/Dev/ThreatHunt/backend/app/services/scanner.py')
t=p.read_text(encoding='utf-8')
# 1) Extend ScanHit dataclass
old='''@dataclass
class ScanHit:
theme_name: str
theme_color: str
keyword: str
source_type: str # dataset_row | hunt | annotation | message
source_id: str | int
field: str
matched_value: str
row_index: int | None = None
dataset_name: str | None = None
'''
new='''@dataclass
class ScanHit:
theme_name: str
theme_color: str
keyword: str
source_type: str # dataset_row | hunt | annotation | message
source_id: str | int
field: str
matched_value: str
row_index: int | None = None
dataset_name: str | None = None
hostname: str | None = None
username: str | None = None
'''
if old not in t:
raise SystemExit('ScanHit dataclass block not found')
t=t.replace(old,new)
# 2) Add helper to infer hostname/user from a row
insert_after='''BATCH_SIZE = 200
@dataclass
class ScanHit:
'''
helper='''BATCH_SIZE = 200
def _infer_hostname_and_user(data: dict) -> tuple[str | None, str | None]:
"""Best-effort extraction of hostname and user from a dataset row."""
if not data:
return None, None
host_keys = (
'hostname', 'host_name', 'host', 'computer_name', 'computer',
'fqdn', 'client_id', 'agent_id', 'endpoint_id',
)
user_keys = (
'username', 'user_name', 'user', 'account_name',
'logged_in_user', 'samaccountname', 'sam_account_name',
)
def pick(keys):
for k in keys:
for actual_key, v in data.items():
if actual_key.lower() == k and v not in (None, ''):
return str(v)
return None
return pick(host_keys), pick(user_keys)
@dataclass
class ScanHit:
'''
if insert_after in t and '_infer_hostname_and_user' not in t:
t=t.replace(insert_after,helper)
# 3) Extend _match_text signature and ScanHit construction
old_sig=''' def _match_text(
self,
text: str,
patterns: dict,
source_type: str,
source_id: str | int,
field_name: str,
hits: list[ScanHit],
row_index: int | None = None,
dataset_name: str | None = None,
) -> None:
'''
new_sig=''' def _match_text(
self,
text: str,
patterns: dict,
source_type: str,
source_id: str | int,
field_name: str,
hits: list[ScanHit],
row_index: int | None = None,
dataset_name: str | None = None,
hostname: str | None = None,
username: str | None = None,
) -> None:
'''
if old_sig not in t:
raise SystemExit('_match_text signature not found')
t=t.replace(old_sig,new_sig)
old_hit=''' hits.append(ScanHit(
theme_name=theme_name,
theme_color=theme_color,
keyword=kw_value,
source_type=source_type,
source_id=source_id,
field=field_name,
matched_value=matched_preview,
row_index=row_index,
dataset_name=dataset_name,
))
'''
new_hit=''' hits.append(ScanHit(
theme_name=theme_name,
theme_color=theme_color,
keyword=kw_value,
source_type=source_type,
source_id=source_id,
field=field_name,
matched_value=matched_preview,
row_index=row_index,
dataset_name=dataset_name,
hostname=hostname,
username=username,
))
'''
if old_hit not in t:
raise SystemExit('ScanHit append block not found')
t=t.replace(old_hit,new_hit)
# 4) Pass inferred hostname/username in dataset scan path
old_call=''' for row in rows:
result.rows_scanned += 1
data = row.data or {}
for col_name, cell_value in data.items():
if cell_value is None:
continue
text = str(cell_value)
self._match_text(
text,
patterns,
"dataset_row",
row.id,
col_name,
result.hits,
row_index=row.row_index,
dataset_name=ds_name,
)
'''
new_call=''' for row in rows:
result.rows_scanned += 1
data = row.data or {}
hostname, username = _infer_hostname_and_user(data)
for col_name, cell_value in data.items():
if cell_value is None:
continue
text = str(cell_value)
self._match_text(
text,
patterns,
"dataset_row",
row.id,
col_name,
result.hits,
row_index=row.row_index,
dataset_name=ds_name,
hostname=hostname,
username=username,
)
'''
if old_call not in t:
raise SystemExit('dataset _match_text call block not found')
t=t.replace(old_call,new_call)
p.write_text(t,encoding='utf-8')
print('updated scanner hits with hostname+username context')

View File

@@ -1,32 +0,0 @@
from pathlib import Path
p=Path(r'd:/Projects/Dev/ThreatHunt/backend/app/api/routes/keywords.py')
t=p.read_text(encoding='utf-8')
old='''class ScanHit(BaseModel):
theme_name: str
theme_color: str
keyword: str
source_type: str
source_id: str | int
field: str
matched_value: str
row_index: int | None = None
dataset_name: str | None = None
'''
new='''class ScanHit(BaseModel):
theme_name: str
theme_color: str
keyword: str
source_type: str
source_id: str | int
field: str
matched_value: str
row_index: int | None = None
dataset_name: str | None = None
hostname: str | None = None
username: str | None = None
'''
if old not in t:
raise SystemExit('ScanHit pydantic model block not found')
t=t.replace(old,new)
p.write_text(t,encoding='utf-8')
print('extended API ScanHit model with hostname+username')

View File

@@ -1,21 +0,0 @@
from pathlib import Path
p=Path(r'd:/Projects/Dev/ThreatHunt/frontend/src/api/client.ts')
t=p.read_text(encoding='utf-8')
old='''export interface ScanHit {
theme_name: string; theme_color: string; keyword: string;
source_type: string; source_id: string | number; field: string;
matched_value: string; row_index: number | null; dataset_name: string | null;
}
'''
new='''export interface ScanHit {
theme_name: string; theme_color: string; keyword: string;
source_type: string; source_id: string | number; field: string;
matched_value: string; row_index: number | null; dataset_name: string | null;
hostname?: string | null; username?: string | null;
}
'''
if old not in t:
raise SystemExit('frontend ScanHit interface block not found')
t=t.replace(old,new)
p.write_text(t,encoding='utf-8')
print('extended frontend ScanHit type with hostname+username')

View File

@@ -1,57 +0,0 @@
from pathlib import Path
p=Path(r'd:/Projects/Dev/ThreatHunt/backend/app/api/routes/keywords.py')
t=p.read_text(encoding='utf-8')
# add fast guard against unscoped global dataset scans
insert_after='''async def run_scan(body: ScanRequest, db: AsyncSession = Depends(get_db)):\n scanner = KeywordScanner(db)\n\n'''
if insert_after not in t:
raise SystemExit('run_scan header block not found')
if 'Select at least one dataset' not in t:
guard=''' if not body.dataset_ids and not body.scan_hunts and not body.scan_annotations and not body.scan_messages:\n raise HTTPException(400, "Select at least one dataset or enable additional sources (hunts/annotations/messages)")\n\n'''
t=t.replace(insert_after, insert_after+guard)
old=''' if missing:
missing_entries: list[dict] = []
for dataset_id in missing:
partial = await scanner.scan(dataset_ids=[dataset_id], theme_ids=body.theme_ids)
keyword_scan_cache.put(dataset_id, partial)
missing_entries.append({"result": partial, "built_at": None})
merged = _merge_cached_results(
cached_entries + missing_entries,
allowed_theme_names if body.theme_ids else None,
)
return {
"total_hits": merged["total_hits"],
"hits": merged["hits"],
"themes_scanned": len(themes),
"keywords_scanned": keywords_scanned,
"rows_scanned": merged["rows_scanned"],
"cache_used": len(cached_entries) > 0,
"cache_status": "partial" if cached_entries else "miss",
"cached_at": merged["cached_at"],
}
'''
new=''' if missing:
partial = await scanner.scan(dataset_ids=missing, theme_ids=body.theme_ids)
merged = _merge_cached_results(
cached_entries + [{"result": partial, "built_at": None}],
allowed_theme_names if body.theme_ids else None,
)
return {
"total_hits": merged["total_hits"],
"hits": merged["hits"],
"themes_scanned": len(themes),
"keywords_scanned": keywords_scanned,
"rows_scanned": merged["rows_scanned"],
"cache_used": len(cached_entries) > 0,
"cache_status": "partial" if cached_entries else "miss",
"cached_at": merged["cached_at"],
}
'''
if old not in t:
raise SystemExit('partial-cache missing block not found')
t=t.replace(old,new)
p.write_text(t,encoding='utf-8')
print('hardened keywords scan scope + optimized missing-cache path')

View File

@@ -1,18 +0,0 @@
from pathlib import Path
p=Path(r'd:/Projects/Dev/ThreatHunt/backend/app/config.py')
t=p.read_text(encoding='utf-8')
old=''' SCANNER_MAX_ROWS_PER_SCAN: int = Field(
default=300000,
description="Global row budget for a single AUP scan request (0 = unlimited)",
)
'''
new=''' SCANNER_MAX_ROWS_PER_SCAN: int = Field(
default=120000,
description="Global row budget for a single AUP scan request (0 = unlimited)",
)
'''
if old not in t:
raise SystemExit('SCANNER_MAX_ROWS_PER_SCAN block not found')
t=t.replace(old,new)
p.write_text(t,encoding='utf-8')
print('reduced SCANNER_MAX_ROWS_PER_SCAN default to 120000')

View File

@@ -1,42 +0,0 @@
from pathlib import Path
p=Path(r'd:/Projects/Dev/ThreatHunt/frontend/src/components/AUPScanner.tsx')
t=p.read_text(encoding='utf-8')
old='''const RESULT_COLUMNS: GridColDef[] = [
{
field: 'theme_name', headerName: 'Theme', width: 140,
renderCell: (params) => (
<Chip label={params.value} size="small"
sx={{ bgcolor: params.row.theme_color, color: '#fff', fontWeight: 600 }} />
),
},
{ field: 'keyword', headerName: 'Keyword', width: 140 },
{ field: 'source_type', headerName: 'Source', width: 120 },
{ field: 'dataset_name', headerName: 'Dataset', width: 150 },
{ field: 'field', headerName: 'Field', width: 130 },
{ field: 'matched_value', headerName: 'Matched Value', flex: 1, minWidth: 200 },
{ field: 'row_index', headerName: 'Row #', width: 80, type: 'number' },
];
'''
new='''const RESULT_COLUMNS: GridColDef[] = [
{
field: 'theme_name', headerName: 'Theme', width: 140,
renderCell: (params) => (
<Chip label={params.value} size="small"
sx={{ bgcolor: params.row.theme_color, color: '#fff', fontWeight: 600 }} />
),
},
{ field: 'keyword', headerName: 'Keyword', width: 140 },
{ field: 'dataset_name', headerName: 'Dataset', width: 170 },
{ field: 'hostname', headerName: 'Hostname', width: 170, valueGetter: (v, row) => row.hostname || '' },
{ field: 'username', headerName: 'User', width: 160, valueGetter: (v, row) => row.username || '' },
{ field: 'matched_value', headerName: 'Matched Value', flex: 1, minWidth: 220 },
{ field: 'field', headerName: 'Field', width: 130 },
{ field: 'source_type', headerName: 'Source', width: 120 },
{ field: 'row_index', headerName: 'Row #', width: 90, type: 'number' },
];
'''
if old not in t:
raise SystemExit('RESULT_COLUMNS block not found')
t=t.replace(old,new)
p.write_text(t,encoding='utf-8')
print('updated AUP results grid columns with dataset/hostname/user/matched value focus')

View File

@@ -1,40 +0,0 @@
from pathlib import Path
p=Path(r'd:/Projects/Dev/ThreatHunt/frontend/src/components/AUPScanner.tsx')
t=p.read_text(encoding='utf-8')
t=t.replace(' const [scanHunts, setScanHunts] = useState(true);',' const [scanHunts, setScanHunts] = useState(false);')
t=t.replace(' const [scanAnnotations, setScanAnnotations] = useState(true);',' const [scanAnnotations, setScanAnnotations] = useState(false);')
t=t.replace(' const [scanMessages, setScanMessages] = useState(true);',' const [scanMessages, setScanMessages] = useState(false);')
t=t.replace(' scan_messages: scanMessages,\n });',' scan_messages: scanMessages,\n prefer_cache: true,\n });')
# add cache chip in summary alert
old=''' {scanResult && (
<Alert severity={scanResult.total_hits > 0 ? 'warning' : 'success'} sx={{ py: 0.5 }}>
<strong>{scanResult.total_hits}</strong> hits across{' '}
<strong>{scanResult.rows_scanned}</strong> rows |{' '}
{scanResult.themes_scanned} themes, {scanResult.keywords_scanned} keywords scanned
</Alert>
)}
'''
new=''' {scanResult && (
<Alert severity={scanResult.total_hits > 0 ? 'warning' : 'success'} sx={{ py: 0.5 }}>
<strong>{scanResult.total_hits}</strong> hits across{' '}
<strong>{scanResult.rows_scanned}</strong> rows |{' '}
{scanResult.themes_scanned} themes, {scanResult.keywords_scanned} keywords scanned
{scanResult.cache_status && (
<Chip
size="small"
label={scanResult.cache_status === 'hit' ? 'Cached' : 'Live'}
sx={{ ml: 1, height: 20 }}
color={scanResult.cache_status === 'hit' ? 'success' : 'default'}
variant="outlined"
/>
)}
</Alert>
)}
'''
if old in t:
t=t.replace(old,new)
else:
print('warning: summary block not replaced')
p.write_text(t,encoding='utf-8')
print('updated AUPScanner.tsx')

View File

@@ -1,36 +0,0 @@
from pathlib import Path
import re
p=Path(r'd:/Projects/Dev/ThreatHunt/frontend/src/api/client.ts')
t=p.read_text(encoding='utf-8')
# Add HuntProgress interface after Hunt interface
if 'export interface HuntProgress' not in t:
insert = '''export interface HuntProgress {
hunt_id: string;
status: 'idle' | 'processing' | 'ready';
progress_percent: number;
dataset_total: number;
dataset_completed: number;
dataset_processing: number;
dataset_errors: number;
active_jobs: number;
queued_jobs: number;
network_status: 'none' | 'building' | 'ready';
stages: Record<string, any>;
}
'''
t=t.replace('export interface Hunt {\n id: string; name: string; description: string | null; status: string;\n owner_id: string | null; created_at: string; updated_at: string;\n dataset_count: number; hypothesis_count: number;\n}\n\n', 'export interface Hunt {\n id: string; name: string; description: string | null; status: string;\n owner_id: string | null; created_at: string; updated_at: string;\n dataset_count: number; hypothesis_count: number;\n}\n\n'+insert)
# Add hunts.progress method
if 'progress: (id: string)' not in t:
t=t.replace(" delete: (id: string) => api(`/api/hunts/${id}`, { method: 'DELETE' }),\n};", " delete: (id: string) => api(`/api/hunts/${id}`, { method: 'DELETE' }),\n progress: (id: string) => api<HuntProgress>(`/api/hunts/${id}/progress`),\n};")
# Extend ScanResponse
if 'cache_used?: boolean' not in t:
t=t.replace('export interface ScanResponse {\n total_hits: number; hits: ScanHit[]; themes_scanned: number;\n keywords_scanned: number; rows_scanned: number;\n}\n', 'export interface ScanResponse {\n total_hits: number; hits: ScanHit[]; themes_scanned: number;\n keywords_scanned: number; rows_scanned: number;\n cache_used?: boolean; cache_status?: string; cached_at?: string | null;\n}\n')
# Extend keywords.scan opts
t=t.replace(' scan_hunts?: boolean; scan_annotations?: boolean; scan_messages?: boolean;\n }) =>', ' scan_hunts?: boolean; scan_annotations?: boolean; scan_messages?: boolean;\n prefer_cache?: boolean; force_rescan?: boolean;\n }) =>')
p.write_text(t,encoding='utf-8')
print('updated client.ts')

View File

@@ -1,20 +0,0 @@
from pathlib import Path
p=Path(r'd:/Projects/Dev/ThreatHunt/backend/app/config.py')
t=p.read_text(encoding='utf-8')
anchor=''' STARTUP_REPROCESS_MAX_DATASETS: int = Field(
default=25, description="Max unprocessed datasets to enqueue at startup"
)
'''
insert=''' STARTUP_REPROCESS_MAX_DATASETS: int = Field(
default=25, description="Max unprocessed datasets to enqueue at startup"
)
STARTUP_RECONCILE_STALE_TASKS: bool = Field(
default=True,
description="Mark stale queued/running processing tasks as failed on startup",
)
'''
if anchor not in t:
raise SystemExit('startup anchor not found')
t=t.replace(anchor,insert)
p.write_text(t,encoding='utf-8')
print('updated config with STARTUP_RECONCILE_STALE_TASKS')

View File

@@ -1,39 +0,0 @@
from pathlib import Path
p=Path(r'd:/Projects/Dev/ThreatHunt/backend/app/api/routes/datasets.py')
t=p.read_text(encoding='utf-8')
if 'from app.services.scanner import keyword_scan_cache' not in t:
t=t.replace('from app.services.host_inventory import inventory_cache','from app.services.host_inventory import inventory_cache\nfrom app.services.scanner import keyword_scan_cache')
old='''@router.delete(
"/{dataset_id}",
summary="Delete a dataset",
)
async def delete_dataset(
dataset_id: str,
db: AsyncSession = Depends(get_db),
):
repo = DatasetRepository(db)
deleted = await repo.delete_dataset(dataset_id)
if not deleted:
raise HTTPException(status_code=404, detail="Dataset not found")
return {"message": "Dataset deleted", "id": dataset_id}
'''
new='''@router.delete(
"/{dataset_id}",
summary="Delete a dataset",
)
async def delete_dataset(
dataset_id: str,
db: AsyncSession = Depends(get_db),
):
repo = DatasetRepository(db)
deleted = await repo.delete_dataset(dataset_id)
if not deleted:
raise HTTPException(status_code=404, detail="Dataset not found")
keyword_scan_cache.invalidate_dataset(dataset_id)
return {"message": "Dataset deleted", "id": dataset_id}
'''
if old not in t:
raise SystemExit('delete block not found')
t=t.replace(old,new)
p.write_text(t,encoding='utf-8')
print('updated datasets.py')

View File

@@ -1,110 +0,0 @@
from pathlib import Path
p=Path(r'd:/Projects/Dev/ThreatHunt/backend/app/api/routes/datasets.py')
t=p.read_text(encoding='utf-8')
if 'ProcessingTask' not in t:
t=t.replace('from app.db.models import', 'from app.db.models import ProcessingTask\n# from app.db.models import')
t=t.replace('from app.services.scanner import keyword_scan_cache','from app.services.scanner import keyword_scan_cache')
# clean import replacement to proper single line
if '# from app.db.models import' in t:
t=t.replace('from app.db.models import ProcessingTask\n# from app.db.models import', 'from app.db.models import ProcessingTask')
old=''' # 1. AI Triage (chains to HOST_PROFILE automatically on completion)
job_queue.submit(JobType.TRIAGE, dataset_id=dataset.id)
jobs_queued.append("triage")
# 2. Anomaly detection (embedding-based outlier detection)
job_queue.submit(JobType.ANOMALY, dataset_id=dataset.id)
jobs_queued.append("anomaly")
# 3. AUP keyword scan
job_queue.submit(JobType.KEYWORD_SCAN, dataset_id=dataset.id)
jobs_queued.append("keyword_scan")
# 4. IOC extraction
job_queue.submit(JobType.IOC_EXTRACT, dataset_id=dataset.id)
jobs_queued.append("ioc_extract")
# 5. Host inventory (network map) - requires hunt_id
if hunt_id:
inventory_cache.invalidate(hunt_id)
job_queue.submit(JobType.HOST_INVENTORY, hunt_id=hunt_id)
jobs_queued.append("host_inventory")
'''
new=''' task_rows: list[ProcessingTask] = []
# 1. AI Triage (chains to HOST_PROFILE automatically on completion)
triage_job = job_queue.submit(JobType.TRIAGE, dataset_id=dataset.id)
jobs_queued.append("triage")
task_rows.append(ProcessingTask(
hunt_id=hunt_id,
dataset_id=dataset.id,
job_id=triage_job.id,
stage="triage",
status="queued",
progress=0.0,
message="Queued",
))
# 2. Anomaly detection (embedding-based outlier detection)
anomaly_job = job_queue.submit(JobType.ANOMALY, dataset_id=dataset.id)
jobs_queued.append("anomaly")
task_rows.append(ProcessingTask(
hunt_id=hunt_id,
dataset_id=dataset.id,
job_id=anomaly_job.id,
stage="anomaly",
status="queued",
progress=0.0,
message="Queued",
))
# 3. AUP keyword scan
kw_job = job_queue.submit(JobType.KEYWORD_SCAN, dataset_id=dataset.id)
jobs_queued.append("keyword_scan")
task_rows.append(ProcessingTask(
hunt_id=hunt_id,
dataset_id=dataset.id,
job_id=kw_job.id,
stage="keyword_scan",
status="queued",
progress=0.0,
message="Queued",
))
# 4. IOC extraction
ioc_job = job_queue.submit(JobType.IOC_EXTRACT, dataset_id=dataset.id)
jobs_queued.append("ioc_extract")
task_rows.append(ProcessingTask(
hunt_id=hunt_id,
dataset_id=dataset.id,
job_id=ioc_job.id,
stage="ioc_extract",
status="queued",
progress=0.0,
message="Queued",
))
# 5. Host inventory (network map) - requires hunt_id
if hunt_id:
inventory_cache.invalidate(hunt_id)
inv_job = job_queue.submit(JobType.HOST_INVENTORY, hunt_id=hunt_id)
jobs_queued.append("host_inventory")
task_rows.append(ProcessingTask(
hunt_id=hunt_id,
dataset_id=dataset.id,
job_id=inv_job.id,
stage="host_inventory",
status="queued",
progress=0.0,
message="Queued",
))
if task_rows:
db.add_all(task_rows)
await db.flush()
'''
if old not in t:
raise SystemExit('queue block not found')
t=t.replace(old,new)
p.write_text(t,encoding='utf-8')
print('updated datasets upload queue + processing tasks')

View File

@@ -1,254 +0,0 @@
from pathlib import Path
p=Path(r'd:/Projects/Dev/ThreatHunt/backend/app/api/routes/hunts.py')
new='''"""API routes for hunt management."""
import logging
from fastapi import APIRouter, Depends, HTTPException, Query
from pydantic import BaseModel, Field
from sqlalchemy import select, func
from sqlalchemy.ext.asyncio import AsyncSession
from app.db import get_db
from app.db.models import Hunt, Dataset
from app.services.job_queue import job_queue
from app.services.host_inventory import inventory_cache
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/api/hunts", tags=["hunts"])
class HuntCreate(BaseModel):
name: str = Field(..., max_length=256)
description: str | None = None
class HuntUpdate(BaseModel):
name: str | None = None
description: str | None = None
status: str | None = None
class HuntResponse(BaseModel):
id: str
name: str
description: str | None
status: str
owner_id: str | None
created_at: str
updated_at: str
dataset_count: int = 0
hypothesis_count: int = 0
class HuntListResponse(BaseModel):
hunts: list[HuntResponse]
total: int
class HuntProgressResponse(BaseModel):
hunt_id: str
status: str
progress_percent: float
dataset_total: int
dataset_completed: int
dataset_processing: int
dataset_errors: int
active_jobs: int
queued_jobs: int
network_status: str
stages: dict
@router.post("", response_model=HuntResponse, summary="Create a new hunt")
async def create_hunt(body: HuntCreate, db: AsyncSession = Depends(get_db)):
hunt = Hunt(name=body.name, description=body.description)
db.add(hunt)
await db.flush()
return HuntResponse(
id=hunt.id,
name=hunt.name,
description=hunt.description,
status=hunt.status,
owner_id=hunt.owner_id,
created_at=hunt.created_at.isoformat(),
updated_at=hunt.updated_at.isoformat(),
)
@router.get("", response_model=HuntListResponse, summary="List hunts")
async def list_hunts(
status: str | None = Query(None),
limit: int = Query(50, ge=1, le=500),
offset: int = Query(0, ge=0),
db: AsyncSession = Depends(get_db),
):
stmt = select(Hunt).order_by(Hunt.updated_at.desc())
if status:
stmt = stmt.where(Hunt.status == status)
stmt = stmt.limit(limit).offset(offset)
result = await db.execute(stmt)
hunts = result.scalars().all()
count_stmt = select(func.count(Hunt.id))
if status:
count_stmt = count_stmt.where(Hunt.status == status)
total = (await db.execute(count_stmt)).scalar_one()
return HuntListResponse(
hunts=[
HuntResponse(
id=h.id,
name=h.name,
description=h.description,
status=h.status,
owner_id=h.owner_id,
created_at=h.created_at.isoformat(),
updated_at=h.updated_at.isoformat(),
dataset_count=len(h.datasets) if h.datasets else 0,
hypothesis_count=len(h.hypotheses) if h.hypotheses else 0,
)
for h in hunts
],
total=total,
)
@router.get("/{hunt_id}", response_model=HuntResponse, summary="Get hunt details")
async def get_hunt(hunt_id: str, db: AsyncSession = Depends(get_db)):
result = await db.execute(select(Hunt).where(Hunt.id == hunt_id))
hunt = result.scalar_one_or_none()
if not hunt:
raise HTTPException(status_code=404, detail="Hunt not found")
return HuntResponse(
id=hunt.id,
name=hunt.name,
description=hunt.description,
status=hunt.status,
owner_id=hunt.owner_id,
created_at=hunt.created_at.isoformat(),
updated_at=hunt.updated_at.isoformat(),
dataset_count=len(hunt.datasets) if hunt.datasets else 0,
hypothesis_count=len(hunt.hypotheses) if hunt.hypotheses else 0,
)
@router.get("/{hunt_id}/progress", response_model=HuntProgressResponse, summary="Get hunt processing progress")
async def get_hunt_progress(hunt_id: str, db: AsyncSession = Depends(get_db)):
hunt = await db.get(Hunt, hunt_id)
if not hunt:
raise HTTPException(status_code=404, detail="Hunt not found")
ds_rows = await db.execute(
select(Dataset.id, Dataset.processing_status)
.where(Dataset.hunt_id == hunt_id)
)
datasets = ds_rows.all()
dataset_ids = {row[0] for row in datasets}
dataset_total = len(datasets)
dataset_completed = sum(1 for _, st in datasets if st == "completed")
dataset_errors = sum(1 for _, st in datasets if st == "completed_with_errors")
dataset_processing = max(0, dataset_total - dataset_completed - dataset_errors)
jobs = job_queue.list_jobs(limit=5000)
relevant_jobs = [
j for j in jobs
if j.get("params", {}).get("hunt_id") == hunt_id
or j.get("params", {}).get("dataset_id") in dataset_ids
]
active_jobs = sum(1 for j in relevant_jobs if j.get("status") == "running")
queued_jobs = sum(1 for j in relevant_jobs if j.get("status") == "queued")
if inventory_cache.get(hunt_id) is not None:
network_status = "ready"
network_ratio = 1.0
elif inventory_cache.is_building(hunt_id):
network_status = "building"
network_ratio = 0.5
else:
network_status = "none"
network_ratio = 0.0
dataset_ratio = ((dataset_completed + dataset_errors) / dataset_total) if dataset_total > 0 else 1.0
overall_ratio = min(1.0, (dataset_ratio * 0.85) + (network_ratio * 0.15))
progress_percent = round(overall_ratio * 100.0, 1)
status = "ready"
if dataset_total == 0:
status = "idle"
elif progress_percent < 100:
status = "processing"
stages = {
"datasets": {
"total": dataset_total,
"completed": dataset_completed,
"processing": dataset_processing,
"errors": dataset_errors,
"percent": round(dataset_ratio * 100.0, 1),
},
"network": {
"status": network_status,
"percent": round(network_ratio * 100.0, 1),
},
"jobs": {
"active": active_jobs,
"queued": queued_jobs,
"total_seen": len(relevant_jobs),
},
}
return HuntProgressResponse(
hunt_id=hunt_id,
status=status,
progress_percent=progress_percent,
dataset_total=dataset_total,
dataset_completed=dataset_completed,
dataset_processing=dataset_processing,
dataset_errors=dataset_errors,
active_jobs=active_jobs,
queued_jobs=queued_jobs,
network_status=network_status,
stages=stages,
)
@router.put("/{hunt_id}", response_model=HuntResponse, summary="Update a hunt")
async def update_hunt(
hunt_id: str, body: HuntUpdate, db: AsyncSession = Depends(get_db)
):
result = await db.execute(select(Hunt).where(Hunt.id == hunt_id))
hunt = result.scalar_one_or_none()
if not hunt:
raise HTTPException(status_code=404, detail="Hunt not found")
if body.name is not None:
hunt.name = body.name
if body.description is not None:
hunt.description = body.description
if body.status is not None:
hunt.status = body.status
await db.flush()
return HuntResponse(
id=hunt.id,
name=hunt.name,
description=hunt.description,
status=hunt.status,
owner_id=hunt.owner_id,
created_at=hunt.created_at.isoformat(),
updated_at=hunt.updated_at.isoformat(),
)
@router.delete("/{hunt_id}", summary="Delete a hunt")
async def delete_hunt(hunt_id: str, db: AsyncSession = Depends(get_db)):
result = await db.execute(select(Hunt).where(Hunt.id == hunt_id))
hunt = result.scalar_one_or_none()
if not hunt:
raise HTTPException(status_code=404, detail="Hunt not found")
await db.delete(hunt)
return {"message": "Hunt deleted", "id": hunt_id}
'''
p.write_text(new,encoding='utf-8')
print('updated hunts.py')

View File

@@ -1,102 +0,0 @@
from pathlib import Path
p=Path(r'd:/Projects/Dev/ThreatHunt/backend/app/api/routes/hunts.py')
t=p.read_text(encoding='utf-8')
if 'ProcessingTask' not in t:
t=t.replace('from app.db.models import Hunt, Dataset','from app.db.models import Hunt, Dataset, ProcessingTask')
old=''' jobs = job_queue.list_jobs(limit=5000)
relevant_jobs = [
j for j in jobs
if j.get("params", {}).get("hunt_id") == hunt_id
or j.get("params", {}).get("dataset_id") in dataset_ids
]
active_jobs = sum(1 for j in relevant_jobs if j.get("status") == "running")
queued_jobs = sum(1 for j in relevant_jobs if j.get("status") == "queued")
if inventory_cache.get(hunt_id) is not None:
'''
new=''' jobs = job_queue.list_jobs(limit=5000)
relevant_jobs = [
j for j in jobs
if j.get("params", {}).get("hunt_id") == hunt_id
or j.get("params", {}).get("dataset_id") in dataset_ids
]
active_jobs_mem = sum(1 for j in relevant_jobs if j.get("status") == "running")
queued_jobs_mem = sum(1 for j in relevant_jobs if j.get("status") == "queued")
task_rows = await db.execute(
select(ProcessingTask.stage, ProcessingTask.status, ProcessingTask.progress)
.where(ProcessingTask.hunt_id == hunt_id)
)
tasks = task_rows.all()
task_total = len(tasks)
task_done = sum(1 for _, st, _ in tasks if st in ("completed", "failed", "cancelled"))
task_running = sum(1 for _, st, _ in tasks if st == "running")
task_queued = sum(1 for _, st, _ in tasks if st == "queued")
task_ratio = (task_done / task_total) if task_total > 0 else None
active_jobs = max(active_jobs_mem, task_running)
queued_jobs = max(queued_jobs_mem, task_queued)
stage_rollup: dict[str, dict] = {}
for stage, status, progress in tasks:
bucket = stage_rollup.setdefault(stage, {"total": 0, "done": 0, "running": 0, "queued": 0, "progress_sum": 0.0})
bucket["total"] += 1
if status in ("completed", "failed", "cancelled"):
bucket["done"] += 1
elif status == "running":
bucket["running"] += 1
elif status == "queued":
bucket["queued"] += 1
bucket["progress_sum"] += float(progress or 0.0)
for stage_name, bucket in stage_rollup.items():
total = max(1, bucket["total"])
bucket["percent"] = round(bucket["progress_sum"] / total, 1)
if inventory_cache.get(hunt_id) is not None:
'''
if old not in t:
raise SystemExit('job block not found')
t=t.replace(old,new)
old2=''' dataset_ratio = ((dataset_completed + dataset_errors) / dataset_total) if dataset_total > 0 else 1.0
overall_ratio = min(1.0, (dataset_ratio * 0.85) + (network_ratio * 0.15))
progress_percent = round(overall_ratio * 100.0, 1)
'''
new2=''' dataset_ratio = ((dataset_completed + dataset_errors) / dataset_total) if dataset_total > 0 else 1.0
if task_ratio is None:
overall_ratio = min(1.0, (dataset_ratio * 0.85) + (network_ratio * 0.15))
else:
overall_ratio = min(1.0, (dataset_ratio * 0.50) + (task_ratio * 0.35) + (network_ratio * 0.15))
progress_percent = round(overall_ratio * 100.0, 1)
'''
if old2 not in t:
raise SystemExit('ratio block not found')
t=t.replace(old2,new2)
old3=''' "jobs": {
"active": active_jobs,
"queued": queued_jobs,
"total_seen": len(relevant_jobs),
},
}
'''
new3=''' "jobs": {
"active": active_jobs,
"queued": queued_jobs,
"total_seen": len(relevant_jobs),
"task_total": task_total,
"task_done": task_done,
"task_percent": round((task_ratio or 0.0) * 100.0, 1) if task_total else None,
},
"task_stages": stage_rollup,
}
'''
if old3 not in t:
raise SystemExit('stages jobs block not found')
t=t.replace(old3,new3)
p.write_text(t,encoding='utf-8')
print('updated hunt progress to merge persistent processing tasks')

View File

@@ -1,46 +0,0 @@
from pathlib import Path
p=Path(r'd:/Projects/Dev/ThreatHunt/backend/app/services/job_queue.py')
t=p.read_text(encoding='utf-8')
old='''async def _handle_keyword_scan(job: Job):
"""AUP keyword scan handler."""
from app.db import async_session_factory
from app.services.scanner import KeywordScanner
dataset_id = job.params.get("dataset_id")
job.message = f"Running AUP keyword scan on dataset {dataset_id}"
async with async_session_factory() as db:
scanner = KeywordScanner(db)
result = await scanner.scan(dataset_ids=[dataset_id])
hits = result.get("total_hits", 0)
job.message = f"Keyword scan complete: {hits} hits"
logger.info(f"Keyword scan for {dataset_id}: {hits} hits across {result.get('rows_scanned', 0)} rows")
return {"dataset_id": dataset_id, "total_hits": hits, "rows_scanned": result.get("rows_scanned", 0)}
'''
new='''async def _handle_keyword_scan(job: Job):
"""AUP keyword scan handler."""
from app.db import async_session_factory
from app.services.scanner import KeywordScanner, keyword_scan_cache
dataset_id = job.params.get("dataset_id")
job.message = f"Running AUP keyword scan on dataset {dataset_id}"
async with async_session_factory() as db:
scanner = KeywordScanner(db)
result = await scanner.scan(dataset_ids=[dataset_id])
# Cache dataset-only result for fast API reuse
if dataset_id:
keyword_scan_cache.put(dataset_id, result)
hits = result.get("total_hits", 0)
job.message = f"Keyword scan complete: {hits} hits"
logger.info(f"Keyword scan for {dataset_id}: {hits} hits across {result.get('rows_scanned', 0)} rows")
return {"dataset_id": dataset_id, "total_hits": hits, "rows_scanned": result.get("rows_scanned", 0)}
'''
if old not in t:
raise SystemExit('target block not found')
t=t.replace(old,new)
p.write_text(t,encoding='utf-8')
print('updated job_queue keyword scan handler')

View File

@@ -1,13 +0,0 @@
from pathlib import Path
p=Path(r'd:/Projects/Dev/ThreatHunt/backend/app/services/job_queue.py')
t=p.read_text(encoding='utf-8')
marker='''def register_all_handlers():
"""Register all job handlers and completion callbacks."""
'''
ins='''\n\nasync def reconcile_stale_processing_tasks() -> int:\n """Mark queued/running processing tasks from prior runs as failed."""\n from datetime import datetime, timezone\n from sqlalchemy import update\n\n try:\n from app.db import async_session_factory\n from app.db.models import ProcessingTask\n\n now = datetime.now(timezone.utc)\n async with async_session_factory() as db:\n result = await db.execute(\n update(ProcessingTask)\n .where(ProcessingTask.status.in_([\"queued\", \"running\"]))\n .values(\n status=\"failed\",\n error=\"Recovered after service restart before task completion\",\n message=\"Recovered stale task after restart\",\n completed_at=now,\n )\n )\n await db.commit()\n updated = int(result.rowcount or 0)\n\n if updated:\n logger.warning(\n \"Reconciled %d stale processing tasks (queued/running -> failed) during startup\",\n updated,\n )\n return updated\n except Exception as e:\n logger.warning(f\"Failed to reconcile stale processing tasks: {e}\")\n return 0\n\n\n'''
if ins.strip() not in t:
if marker not in t:
raise SystemExit('register marker not found')
t=t.replace(marker,ins+marker)
p.write_text(t,encoding='utf-8')
print('added reconcile_stale_processing_tasks to job_queue')

View File

@@ -1,64 +0,0 @@
from pathlib import Path
p=Path(r'd:/Projects/Dev/ThreatHunt/backend/app/services/job_queue.py')
t=p.read_text(encoding='utf-8')
ins='''\n\nasync def _sync_processing_task(job: Job):\n """Persist latest job state into processing_tasks (if linked by job_id)."""\n from datetime import datetime, timezone\n from sqlalchemy import update\n\n try:\n from app.db import async_session_factory\n from app.db.models import ProcessingTask\n\n values = {\n "status": job.status.value,\n "progress": float(job.progress),\n "message": job.message,\n "error": job.error,\n }\n if job.started_at:\n values["started_at"] = datetime.fromtimestamp(job.started_at, tz=timezone.utc)\n if job.completed_at:\n values["completed_at"] = datetime.fromtimestamp(job.completed_at, tz=timezone.utc)\n\n async with async_session_factory() as db:\n await db.execute(\n update(ProcessingTask)\n .where(ProcessingTask.job_id == job.id)\n .values(**values)\n )\n await db.commit()\n except Exception as e:\n logger.warning(f"Failed to sync processing task for job {job.id}: {e}")\n'''
marker='\n\n# -- Singleton + job handlers --\n'
if ins.strip() not in t:
t=t.replace(marker, ins+marker)
old=''' job.status = JobStatus.RUNNING
job.started_at = time.time()
job.message = "Running..."
logger.info(f"Worker {worker_id}: executing {job.id} ({job.job_type.value})")
try:
'''
new=''' job.status = JobStatus.RUNNING
job.started_at = time.time()
if job.progress <= 0:
job.progress = 5.0
job.message = "Running..."
await _sync_processing_task(job)
logger.info(f"Worker {worker_id}: executing {job.id} ({job.job_type.value})")
try:
'''
if old not in t:
raise SystemExit('worker running block not found')
t=t.replace(old,new)
old2=''' job.completed_at = time.time()
logger.info(f"Worker {worker_id}: completed {job.id} in {job.elapsed_ms}ms")
except Exception as e:
if not job.is_cancelled:
job.status = JobStatus.FAILED
job.error = str(e)
job.message = f"Failed: {e}"
job.completed_at = time.time()
logger.error(f"Worker {worker_id}: failed {job.id}: {e}", exc_info=True)
# Fire completion callbacks
'''
new2=''' job.completed_at = time.time()
logger.info(f"Worker {worker_id}: completed {job.id} in {job.elapsed_ms}ms")
except Exception as e:
if not job.is_cancelled:
job.status = JobStatus.FAILED
job.error = str(e)
job.message = f"Failed: {e}"
job.completed_at = time.time()
logger.error(f"Worker {worker_id}: failed {job.id}: {e}", exc_info=True)
if job.is_cancelled and not job.completed_at:
job.completed_at = time.time()
await _sync_processing_task(job)
# Fire completion callbacks
'''
if old2 not in t:
raise SystemExit('worker completion block not found')
t=t.replace(old2,new2)
p.write_text(t, encoding='utf-8')
print('updated job_queue persistent task syncing')

View File

@@ -1,39 +0,0 @@
from pathlib import Path
p=Path(r'd:/Projects/Dev/ThreatHunt/backend/app/services/job_queue.py')
t=p.read_text(encoding='utf-8')
old=''' if hunt_id:
job_queue.submit(JobType.HOST_PROFILE, hunt_id=hunt_id)
logger.info(f"Triage done for {dataset_id} - chained HOST_PROFILE for hunt {hunt_id}")
except Exception as e:
'''
new=''' if hunt_id:
hp_job = job_queue.submit(JobType.HOST_PROFILE, hunt_id=hunt_id)
try:
from sqlalchemy import select
from app.db.models import ProcessingTask
async with async_session_factory() as db:
existing = await db.execute(
select(ProcessingTask.id).where(ProcessingTask.job_id == hp_job.id)
)
if existing.first() is None:
db.add(ProcessingTask(
hunt_id=hunt_id,
dataset_id=dataset_id,
job_id=hp_job.id,
stage="host_profile",
status="queued",
progress=0.0,
message="Queued",
))
await db.commit()
except Exception as persist_err:
logger.warning(f"Failed to persist chained HOST_PROFILE task: {persist_err}")
logger.info(f"Triage done for {dataset_id} - chained HOST_PROFILE for hunt {hunt_id}")
except Exception as e:
'''
if old not in t:
raise SystemExit('triage chain block not found')
t=t.replace(old,new)
p.write_text(t,encoding='utf-8')
print('updated triage chain to persist host_profile task row')

View File

@@ -1,321 +0,0 @@
from pathlib import Path
p=Path(r'd:/Projects/Dev/ThreatHunt/backend/app/api/routes/keywords.py')
new_text='''"""API routes for AUP keyword themes, keyword CRUD, and scanning."""
import logging
from fastapi import APIRouter, Depends, HTTPException, Query
from pydantic import BaseModel, Field
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from app.db import get_db
from app.db.models import KeywordTheme, Keyword
from app.services.scanner import KeywordScanner, keyword_scan_cache
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/api/keywords", tags=["keywords"])
class ThemeCreate(BaseModel):
name: str = Field(..., min_length=1, max_length=128)
color: str = Field(default="#9e9e9e", max_length=16)
enabled: bool = True
class ThemeUpdate(BaseModel):
name: str | None = None
color: str | None = None
enabled: bool | None = None
class KeywordOut(BaseModel):
id: int
theme_id: str
value: str
is_regex: bool
created_at: str
class ThemeOut(BaseModel):
id: str
name: str
color: str
enabled: bool
is_builtin: bool
created_at: str
keyword_count: int
keywords: list[KeywordOut]
class ThemeListResponse(BaseModel):
themes: list[ThemeOut]
total: int
class KeywordCreate(BaseModel):
value: str = Field(..., min_length=1, max_length=256)
is_regex: bool = False
class KeywordBulkCreate(BaseModel):
values: list[str] = Field(..., min_items=1)
is_regex: bool = False
class ScanRequest(BaseModel):
dataset_ids: list[str] | None = None
theme_ids: list[str] | None = None
scan_hunts: bool = False
scan_annotations: bool = False
scan_messages: bool = False
prefer_cache: bool = True
force_rescan: bool = False
class ScanHit(BaseModel):
theme_name: str
theme_color: str
keyword: str
source_type: str
source_id: str | int
field: str
matched_value: str
row_index: int | None = None
dataset_name: str | None = None
class ScanResponse(BaseModel):
total_hits: int
hits: list[ScanHit]
themes_scanned: int
keywords_scanned: int
rows_scanned: int
cache_used: bool = False
cache_status: str = "miss"
cached_at: str | None = None
def _theme_to_out(t: KeywordTheme) -> ThemeOut:
return ThemeOut(
id=t.id,
name=t.name,
color=t.color,
enabled=t.enabled,
is_builtin=t.is_builtin,
created_at=t.created_at.isoformat(),
keyword_count=len(t.keywords),
keywords=[
KeywordOut(
id=k.id,
theme_id=k.theme_id,
value=k.value,
is_regex=k.is_regex,
created_at=k.created_at.isoformat(),
)
for k in t.keywords
],
)
def _merge_cached_results(entries: list[dict], allowed_theme_names: set[str] | None = None) -> dict:
hits: list[dict] = []
total_rows = 0
cached_at: str | None = None
for entry in entries:
result = entry["result"]
total_rows += int(result.get("rows_scanned", 0) or 0)
if entry.get("built_at"):
if not cached_at or entry["built_at"] > cached_at:
cached_at = entry["built_at"]
for h in result.get("hits", []):
if allowed_theme_names is not None and h.get("theme_name") not in allowed_theme_names:
continue
hits.append(h)
return {
"total_hits": len(hits),
"hits": hits,
"rows_scanned": total_rows,
"cached_at": cached_at,
}
@router.get("/themes", response_model=ThemeListResponse)
async def list_themes(db: AsyncSession = Depends(get_db)):
result = await db.execute(select(KeywordTheme).order_by(KeywordTheme.name))
themes = result.scalars().all()
return ThemeListResponse(themes=[_theme_to_out(t) for t in themes], total=len(themes))
@router.post("/themes", response_model=ThemeOut, status_code=201)
async def create_theme(body: ThemeCreate, db: AsyncSession = Depends(get_db)):
exists = await db.scalar(select(KeywordTheme.id).where(KeywordTheme.name == body.name))
if exists:
raise HTTPException(409, f"Theme '{body.name}' already exists")
theme = KeywordTheme(name=body.name, color=body.color, enabled=body.enabled)
db.add(theme)
await db.flush()
await db.refresh(theme)
keyword_scan_cache.clear()
return _theme_to_out(theme)
@router.put("/themes/{theme_id}", response_model=ThemeOut)
async def update_theme(theme_id: str, body: ThemeUpdate, db: AsyncSession = Depends(get_db)):
theme = await db.get(KeywordTheme, theme_id)
if not theme:
raise HTTPException(404, "Theme not found")
if body.name is not None:
dup = await db.scalar(
select(KeywordTheme.id).where(KeywordTheme.name == body.name, KeywordTheme.id != theme_id)
)
if dup:
raise HTTPException(409, f"Theme '{body.name}' already exists")
theme.name = body.name
if body.color is not None:
theme.color = body.color
if body.enabled is not None:
theme.enabled = body.enabled
await db.flush()
await db.refresh(theme)
keyword_scan_cache.clear()
return _theme_to_out(theme)
@router.delete("/themes/{theme_id}", status_code=204)
async def delete_theme(theme_id: str, db: AsyncSession = Depends(get_db)):
theme = await db.get(KeywordTheme, theme_id)
if not theme:
raise HTTPException(404, "Theme not found")
await db.delete(theme)
keyword_scan_cache.clear()
@router.post("/themes/{theme_id}/keywords", response_model=KeywordOut, status_code=201)
async def add_keyword(theme_id: str, body: KeywordCreate, db: AsyncSession = Depends(get_db)):
theme = await db.get(KeywordTheme, theme_id)
if not theme:
raise HTTPException(404, "Theme not found")
kw = Keyword(theme_id=theme_id, value=body.value, is_regex=body.is_regex)
db.add(kw)
await db.flush()
await db.refresh(kw)
keyword_scan_cache.clear()
return KeywordOut(
id=kw.id, theme_id=kw.theme_id, value=kw.value,
is_regex=kw.is_regex, created_at=kw.created_at.isoformat(),
)
@router.post("/themes/{theme_id}/keywords/bulk", response_model=dict, status_code=201)
async def add_keywords_bulk(theme_id: str, body: KeywordBulkCreate, db: AsyncSession = Depends(get_db)):
theme = await db.get(KeywordTheme, theme_id)
if not theme:
raise HTTPException(404, "Theme not found")
added = 0
for val in body.values:
val = val.strip()
if not val:
continue
db.add(Keyword(theme_id=theme_id, value=val, is_regex=body.is_regex))
added += 1
await db.flush()
keyword_scan_cache.clear()
return {"added": added, "theme_id": theme_id}
@router.delete("/keywords/{keyword_id}", status_code=204)
async def delete_keyword(keyword_id: int, db: AsyncSession = Depends(get_db)):
kw = await db.get(Keyword, keyword_id)
if not kw:
raise HTTPException(404, "Keyword not found")
await db.delete(kw)
keyword_scan_cache.clear()
@router.post("/scan", response_model=ScanResponse)
async def run_scan(body: ScanRequest, db: AsyncSession = Depends(get_db)):
scanner = KeywordScanner(db)
can_use_cache = (
body.prefer_cache
and not body.force_rescan
and bool(body.dataset_ids)
and not body.scan_hunts
and not body.scan_annotations
and not body.scan_messages
)
if can_use_cache:
themes = await scanner._load_themes(body.theme_ids)
allowed_theme_names = {t.name for t in themes}
keywords_scanned = sum(len(theme.keywords) for theme in themes)
cached_entries: list[dict] = []
missing: list[str] = []
for dataset_id in (body.dataset_ids or []):
entry = keyword_scan_cache.get(dataset_id)
if not entry:
missing.append(dataset_id)
continue
cached_entries.append({"result": entry.result, "built_at": entry.built_at})
if not missing and cached_entries:
merged = _merge_cached_results(cached_entries, allowed_theme_names if body.theme_ids else None)
return {
"total_hits": merged["total_hits"],
"hits": merged["hits"],
"themes_scanned": len(themes),
"keywords_scanned": keywords_scanned,
"rows_scanned": merged["rows_scanned"],
"cache_used": True,
"cache_status": "hit",
"cached_at": merged["cached_at"],
}
result = await scanner.scan(
dataset_ids=body.dataset_ids,
theme_ids=body.theme_ids,
scan_hunts=body.scan_hunts,
scan_annotations=body.scan_annotations,
scan_messages=body.scan_messages,
)
return {
**result,
"cache_used": False,
"cache_status": "miss",
"cached_at": None,
}
@router.get("/scan/quick", response_model=ScanResponse)
async def quick_scan(
dataset_id: str = Query(..., description="Dataset to scan"),
db: AsyncSession = Depends(get_db),
):
entry = keyword_scan_cache.get(dataset_id)
if entry is not None:
result = entry.result
return {
**result,
"cache_used": True,
"cache_status": "hit",
"cached_at": entry.built_at,
}
scanner = KeywordScanner(db)
result = await scanner.scan(dataset_ids=[dataset_id])
keyword_scan_cache.put(dataset_id, result)
return {
**result,
"cache_used": False,
"cache_status": "miss",
"cached_at": None,
}
'''
p.write_text(new_text,encoding='utf-8')
print('updated keywords.py')

View File

@@ -1,31 +0,0 @@
from pathlib import Path
p=Path(r'd:/Projects/Dev/ThreatHunt/backend/app/main.py')
t=p.read_text(encoding='utf-8')
old=''' # Start job queue
from app.services.job_queue import job_queue, register_all_handlers, JobType
register_all_handlers()
await job_queue.start()
logger.info("Job queue started (%d workers)", job_queue._max_workers)
'''
new=''' # Start job queue
from app.services.job_queue import (
job_queue,
register_all_handlers,
reconcile_stale_processing_tasks,
JobType,
)
if settings.STARTUP_RECONCILE_STALE_TASKS:
reconciled = await reconcile_stale_processing_tasks()
if reconciled:
logger.info("Startup reconciliation marked %d stale tasks", reconciled)
register_all_handlers()
await job_queue.start()
logger.info("Job queue started (%d workers)", job_queue._max_workers)
'''
if old not in t:
raise SystemExit('startup queue block not found')
t=t.replace(old,new)
p.write_text(t,encoding='utf-8')
print('wired startup reconciliation in main lifespan')

View File

@@ -1,45 +0,0 @@
from pathlib import Path
p=Path(r'd:/Projects/Dev/ThreatHunt/backend/app/db/models.py')
t=p.read_text(encoding='utf-8')
if 'class ProcessingTask(Base):' in t:
print('processing task model already exists')
raise SystemExit(0)
insert='''
# -- Persistent Processing Tasks (Phase 2) ---
class ProcessingTask(Base):
__tablename__ = "processing_tasks"
id: Mapped[str] = mapped_column(String(32), primary_key=True, default=_new_id)
hunt_id: Mapped[Optional[str]] = mapped_column(
String(32), ForeignKey("hunts.id", ondelete="CASCADE"), nullable=True, index=True
)
dataset_id: Mapped[Optional[str]] = mapped_column(
String(32), ForeignKey("datasets.id", ondelete="CASCADE"), nullable=True, index=True
)
job_id: Mapped[Optional[str]] = mapped_column(String(64), nullable=True, index=True)
stage: Mapped[str] = mapped_column(String(64), nullable=False, index=True)
status: Mapped[str] = mapped_column(String(20), default="queued", index=True)
progress: Mapped[float] = mapped_column(Float, default=0.0)
message: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
error: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=_utcnow)
started_at: Mapped[Optional[datetime]] = mapped_column(DateTime(timezone=True), nullable=True)
completed_at: Mapped[Optional[datetime]] = mapped_column(DateTime(timezone=True), nullable=True)
updated_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True), default=_utcnow, onupdate=_utcnow
)
__table_args__ = (
Index("ix_processing_tasks_hunt_stage", "hunt_id", "stage"),
Index("ix_processing_tasks_dataset_stage", "dataset_id", "stage"),
)
'''
# insert before Playbook section
marker='\n\n# -- Playbook / Investigation Templates (Feature 3) ---\n'
if marker not in t:
raise SystemExit('marker not found for insertion')
t=t.replace(marker, insert+marker)
p.write_text(t,encoding='utf-8')
print('added ProcessingTask model')

View File

@@ -1,59 +0,0 @@
from pathlib import Path
p=Path(r'd:/Projects/Dev/ThreatHunt/frontend/src/components/NetworkMap.tsx')
t=p.read_text(encoding='utf-8')
insert='''
function isPointOnNodeLabel(node: GNode, wx: number, wy: number, vp: Viewport): boolean {
const fontSize = Math.max(9, Math.round(12 / vp.scale));
const approxCharW = Math.max(5, fontSize * 0.58);
const line1 = node.label || '';
const line2 = node.meta.ips.length > 0 ? node.meta.ips[0] : '';
const tw = Math.max(line1.length * approxCharW, line2 ? line2.length * approxCharW : 0);
const px = 5, py = 2;
const totalH = line2 ? fontSize * 2 + py * 2 : fontSize + py * 2;
const lx = node.x, ly = node.y - node.radius - 6;
const rx = lx - tw / 2 - px;
const ry = ly - totalH;
const rw = tw + px * 2;
const rh = totalH;
return wx >= rx && wx <= (rx + rw) && wy >= ry && wy <= (ry + rh);
}
'''
if 'function isPointOnNodeLabel' not in t:
t=t.replace('// == Hit-test =============================================================\n', '// == Hit-test =============================================================\n'+insert)
old='''function hitTest(
graph: Graph, canvas: HTMLCanvasElement, clientX: number, clientY: number, vp: Viewport,
): GNode | null {
const { wx, wy } = screenToWorld(canvas, clientX, clientY, vp);
for (const n of graph.nodes) {
const dx = n.x - wx, dy = n.y - wy;
if (dx * dx + dy * dy < (n.radius + 5) ** 2) return n;
}
return null;
}
'''
new='''function hitTest(
graph: Graph, canvas: HTMLCanvasElement, clientX: number, clientY: number, vp: Viewport,
): GNode | null {
const { wx, wy } = screenToWorld(canvas, clientX, clientY, vp);
// Node-circle hit has priority
for (const n of graph.nodes) {
const dx = n.x - wx, dy = n.y - wy;
if (dx * dx + dy * dy < (n.radius + 5) ** 2) return n;
}
// Then label hit (so clicking text works too)
for (const n of graph.nodes) {
if (isPointOnNodeLabel(n, wx, wy, vp)) return n;
}
return null;
}
'''
if old not in t:
raise SystemExit('hitTest block not found')
t=t.replace(old,new)
p.write_text(t,encoding='utf-8')
print('updated NetworkMap hit-test for labels')

View File

@@ -1,272 +0,0 @@
from pathlib import Path
p = Path(r'd:/Projects/Dev/ThreatHunt/backend/app/services/scanner.py')
text = p.read_text(encoding='utf-8')
new_text = '''"""AUP Keyword Scanner searches dataset rows, hunts, annotations, and
messages for keyword matches.
Scanning is done in Python (not SQL LIKE on JSON columns) for portability
across SQLite / PostgreSQL and to provide per-cell match context.
"""
import logging
import re
from dataclasses import dataclass, field
from datetime import datetime, timezone
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from app.db.models import (
KeywordTheme,
DatasetRow,
Dataset,
Hunt,
Annotation,
Message,
)
logger = logging.getLogger(__name__)
BATCH_SIZE = 200
@dataclass
class ScanHit:
theme_name: str
theme_color: str
keyword: str
source_type: str # dataset_row | hunt | annotation | message
source_id: str | int
field: str
matched_value: str
row_index: int | None = None
dataset_name: str | None = None
@dataclass
class ScanResult:
total_hits: int = 0
hits: list[ScanHit] = field(default_factory=list)
themes_scanned: int = 0
keywords_scanned: int = 0
rows_scanned: int = 0
@dataclass
class KeywordScanCacheEntry:
dataset_id: str
result: dict
built_at: str = field(default_factory=lambda: datetime.now(timezone.utc).isoformat())
class KeywordScanCache:
"""In-memory per-dataset cache for dataset-only keyword scans.
This enables fast-path reads when users run AUP scans against datasets that
were already scanned during upload pipeline processing.
"""
def __init__(self):
self._entries: dict[str, KeywordScanCacheEntry] = {}
def put(self, dataset_id: str, result: dict):
self._entries[dataset_id] = KeywordScanCacheEntry(dataset_id=dataset_id, result=result)
def get(self, dataset_id: str) -> KeywordScanCacheEntry | None:
return self._entries.get(dataset_id)
def invalidate_dataset(self, dataset_id: str):
self._entries.pop(dataset_id, None)
def clear(self):
self._entries.clear()
keyword_scan_cache = KeywordScanCache()
class KeywordScanner:
"""Scans multiple data sources for keyword/regex matches."""
def __init__(self, db: AsyncSession):
self.db = db
# Public API
async def scan(
self,
dataset_ids: list[str] | None = None,
theme_ids: list[str] | None = None,
scan_hunts: bool = False,
scan_annotations: bool = False,
scan_messages: bool = False,
) -> dict:
"""Run a full AUP scan and return dict matching ScanResponse."""
# Load themes + keywords
themes = await self._load_themes(theme_ids)
if not themes:
return ScanResult().__dict__
# Pre-compile patterns per theme
patterns = self._compile_patterns(themes)
result = ScanResult(
themes_scanned=len(themes),
keywords_scanned=sum(len(kws) for kws in patterns.values()),
)
# Scan dataset rows
await self._scan_datasets(patterns, result, dataset_ids)
# Scan hunts
if scan_hunts:
await self._scan_hunts(patterns, result)
# Scan annotations
if scan_annotations:
await self._scan_annotations(patterns, result)
# Scan messages
if scan_messages:
await self._scan_messages(patterns, result)
result.total_hits = len(result.hits)
return {
"total_hits": result.total_hits,
"hits": [h.__dict__ for h in result.hits],
"themes_scanned": result.themes_scanned,
"keywords_scanned": result.keywords_scanned,
"rows_scanned": result.rows_scanned,
}
# Internal
async def _load_themes(self, theme_ids: list[str] | None) -> list[KeywordTheme]:
q = select(KeywordTheme).where(KeywordTheme.enabled == True) # noqa: E712
if theme_ids:
q = q.where(KeywordTheme.id.in_(theme_ids))
result = await self.db.execute(q)
return list(result.scalars().all())
def _compile_patterns(
self, themes: list[KeywordTheme]
) -> dict[tuple[str, str, str], list[tuple[str, re.Pattern]]]:
"""Returns {(theme_id, theme_name, theme_color): [(keyword_value, compiled_pattern), ...]}"""
patterns: dict[tuple[str, str, str], list[tuple[str, re.Pattern]]] = {}
for theme in themes:
key = (theme.id, theme.name, theme.color)
compiled = []
for kw in theme.keywords:
try:
if kw.is_regex:
pat = re.compile(kw.value, re.IGNORECASE)
else:
pat = re.compile(re.escape(kw.value), re.IGNORECASE)
compiled.append((kw.value, pat))
except re.error:
logger.warning("Invalid regex pattern '%s' in theme '%s', skipping",
kw.value, theme.name)
patterns[key] = compiled
return patterns
def _match_text(
self,
text: str,
patterns: dict,
source_type: str,
source_id: str | int,
field_name: str,
hits: list[ScanHit],
row_index: int | None = None,
dataset_name: str | None = None,
) -> None:
"""Check text against all compiled patterns, append hits."""
if not text:
return
for (theme_id, theme_name, theme_color), keyword_patterns in patterns.items():
for kw_value, pat in keyword_patterns:
if pat.search(text):
matched_preview = text[:200] + ("" if len(text) > 200 else "")
hits.append(ScanHit(
theme_name=theme_name,
theme_color=theme_color,
keyword=kw_value,
source_type=source_type,
source_id=source_id,
field=field_name,
matched_value=matched_preview,
row_index=row_index,
dataset_name=dataset_name,
))
async def _scan_datasets(
self, patterns: dict, result: ScanResult, dataset_ids: list[str] | None
) -> None:
"""Scan dataset rows in batches."""
ds_q = select(Dataset.id, Dataset.name)
if dataset_ids:
ds_q = ds_q.where(Dataset.id.in_(dataset_ids))
ds_result = await self.db.execute(ds_q)
ds_map = {r[0]: r[1] for r in ds_result.fetchall()}
if not ds_map:
return
offset = 0
row_q_base = select(DatasetRow).where(
DatasetRow.dataset_id.in_(list(ds_map.keys()))
).order_by(DatasetRow.id)
while True:
rows_result = await self.db.execute(
row_q_base.offset(offset).limit(BATCH_SIZE)
)
rows = rows_result.scalars().all()
if not rows:
break
for row in rows:
result.rows_scanned += 1
data = row.data or {}
for col_name, cell_value in data.items():
if cell_value is None:
continue
text = str(cell_value)
self._match_text(
text, patterns, "dataset_row", row.id,
col_name, result.hits,
row_index=row.row_index,
dataset_name=ds_map.get(row.dataset_id),
)
offset += BATCH_SIZE
import asyncio
await asyncio.sleep(0)
if len(rows) < BATCH_SIZE:
break
async def _scan_hunts(self, patterns: dict, result: ScanResult) -> None:
"""Scan hunt names and descriptions."""
hunts_result = await self.db.execute(select(Hunt))
for hunt in hunts_result.scalars().all():
self._match_text(hunt.name, patterns, "hunt", hunt.id, "name", result.hits)
if hunt.description:
self._match_text(hunt.description, patterns, "hunt", hunt.id, "description", result.hits)
async def _scan_annotations(self, patterns: dict, result: ScanResult) -> None:
"""Scan annotation text."""
ann_result = await self.db.execute(select(Annotation))
for ann in ann_result.scalars().all():
self._match_text(ann.text, patterns, "annotation", ann.id, "text", result.hits)
async def _scan_messages(self, patterns: dict, result: ScanResult) -> None:
"""Scan conversation messages (user messages only)."""
msg_result = await self.db.execute(
select(Message).where(Message.role == "user")
)
for msg in msg_result.scalars().all():
self._match_text(msg.content, patterns, "message", msg.id, "content", result.hits)
'''
p.write_text(new_text, encoding='utf-8')
print('updated scanner.py')

View File

@@ -1,31 +0,0 @@
from pathlib import Path
p=Path(r'd:/Projects/Dev/ThreatHunt/backend/tests/test_api.py')
t=p.read_text(encoding='utf-8')
insert='''
async def test_hunt_progress(self, client):
create = await client.post("/api/hunts", json={"name": "Progress Hunt"})
hunt_id = create.json()["id"]
# attach one dataset so progress has scope
from tests.conftest import SAMPLE_CSV
import io
files = {"file": ("progress.csv", io.BytesIO(SAMPLE_CSV), "text/csv")}
up = await client.post(f"/api/datasets/upload?hunt_id={hunt_id}", files=files)
assert up.status_code == 200
res = await client.get(f"/api/hunts/{hunt_id}/progress")
assert res.status_code == 200
body = res.json()
assert body["hunt_id"] == hunt_id
assert "progress_percent" in body
assert "dataset_total" in body
assert "network_status" in body
'''
needle=''' async def test_get_nonexistent_hunt(self, client):
resp = await client.get("/api/hunts/nonexistent-id")
assert resp.status_code == 404
'''
if needle in t and 'test_hunt_progress' not in t:
t=t.replace(needle, needle+'\n'+insert)
p.write_text(t,encoding='utf-8')
print('updated test_api.py')

View File

@@ -1,32 +0,0 @@
from pathlib import Path
p=Path(r'd:/Projects/Dev/ThreatHunt/backend/tests/test_keywords.py')
t=p.read_text(encoding='utf-8')
add='''
@pytest.mark.asyncio
async def test_quick_scan_cache_hit(client: AsyncClient):
"""Second quick scan should return cache hit metadata."""
theme_res = await client.post("/api/keywords/themes", json={"name": "Quick Cache Theme", "color": "#00aa00"})
tid = theme_res.json()["id"]
await client.post(f"/api/keywords/themes/{tid}/keywords", json={"value": "chrome.exe"})
from tests.conftest import SAMPLE_CSV
import io
files = {"file": ("cache_quick.csv", io.BytesIO(SAMPLE_CSV), "text/csv")}
upload = await client.post("/api/datasets/upload", files=files)
ds_id = upload.json()["id"]
first = await client.get(f"/api/keywords/scan/quick?dataset_id={ds_id}")
assert first.status_code == 200
assert first.json().get("cache_status") in ("miss", "hit")
second = await client.get(f"/api/keywords/scan/quick?dataset_id={ds_id}")
assert second.status_code == 200
body = second.json()
assert body.get("cache_used") is True
assert body.get("cache_status") == "hit"
'''
if 'test_quick_scan_cache_hit' not in t:
t=t + add
p.write_text(t,encoding='utf-8')
print('updated test_keywords.py')

View File

@@ -1,26 +0,0 @@
from pathlib import Path
p=Path(r'd:/Projects/Dev/ThreatHunt/frontend/src/components/FileUpload.tsx')
t=p.read_text(encoding='utf-8')
# import useEffect
t=t.replace("import React, { useState, useCallback, useRef } from 'react';","import React, { useState, useCallback, useRef, useEffect } from 'react';")
# import HuntProgress type
t=t.replace("import { datasets, hunts, type UploadResult, type Hunt } from '../api/client';","import { datasets, hunts, type UploadResult, type Hunt, type HuntProgress } from '../api/client';")
# add state
if 'const [huntProgress, setHuntProgress]' not in t:
t=t.replace(" const [huntList, setHuntList] = useState<Hunt[]>([]);\n const [huntId, setHuntId] = useState('');"," const [huntList, setHuntList] = useState<Hunt[]>([]);\n const [huntId, setHuntId] = useState('');\n const [huntProgress, setHuntProgress] = useState<HuntProgress | null>(null);")
# add polling effect after hunts list effect
marker=" React.useEffect(() => {\n hunts.list(0, 100).then(r => setHuntList(r.hunts)).catch(() => {});\n }, []);\n"
if marker in t and 'setInterval' not in t.split(marker,1)[1][:500]:
add='''\n useEffect(() => {\n let timer: any = null;\n let cancelled = false;\n\n const pull = async () => {\n if (!huntId) {\n if (!cancelled) setHuntProgress(null);\n return;\n }\n try {\n const p = await hunts.progress(huntId);\n if (!cancelled) setHuntProgress(p);\n } catch {\n if (!cancelled) setHuntProgress(null);\n }\n };\n\n pull();\n if (huntId) timer = setInterval(pull, 2000);\n return () => { cancelled = true; if (timer) clearInterval(timer); };\n }, [huntId, jobs.length]);\n'''
t=t.replace(marker, marker+add)
# insert master progress UI after overall summary
insert_after=''' {overallTotal > 0 && (\n <Stack direction="row" alignItems="center" spacing={1} sx={{ mt: 2 }}>\n <Typography variant="body2" color="text.secondary">\n {overallDone + overallErr} / {overallTotal} files processed\n {overallErr > 0 && ` ({overallErr} failed)`}\n </Typography>\n <Box sx={{ flexGrow: 1 }} />\n {overallDone + overallErr === overallTotal && overallTotal > 0 && (\n <Tooltip title="Clear completed">\n <IconButton size="small" onClick={clearCompleted}><ClearIcon fontSize="small" /></IconButton>\n </Tooltip>\n )}\n </Stack>\n )}\n'''
add_block='''\n {huntId && huntProgress && (\n <Paper sx={{ p: 1.5, mt: 1.5 }}>\n <Stack direction="row" alignItems="center" spacing={1} sx={{ mb: 0.8 }}>\n <Typography variant="body2" sx={{ fontWeight: 600 }}>\n Master Processing Progress\n </Typography>\n <Chip\n size="small"\n label={huntProgress.status.toUpperCase()}\n color={huntProgress.status === 'ready' ? 'success' : huntProgress.status === 'processing' ? 'warning' : 'default'}\n variant="outlined"\n />\n <Box sx={{ flexGrow: 1 }} />\n <Typography variant="caption" color="text.secondary">\n {huntProgress.progress_percent.toFixed(1)}%\n </Typography>\n </Stack>\n <LinearProgress\n variant="determinate"\n value={Math.max(0, Math.min(100, huntProgress.progress_percent))}\n sx={{ height: 8, borderRadius: 4 }}\n />\n <Stack direction="row" spacing={1} sx={{ mt: 1 }} flexWrap="wrap" useFlexGap>\n <Chip size="small" label={`Datasets ${huntProgress.dataset_completed}/${huntProgress.dataset_total}`} variant="outlined" />\n <Chip size="small" label={`Active jobs ${huntProgress.active_jobs}`} variant="outlined" />\n <Chip size="small" label={`Queued jobs ${huntProgress.queued_jobs}`} variant="outlined" />\n <Chip size="small" label={`Network ${huntProgress.network_status}`} variant="outlined" />\n </Stack>\n </Paper>\n )}\n'''
if insert_after in t:
t=t.replace(insert_after, insert_after+add_block)
else:
print('warning: summary block not found')
p.write_text(t,encoding='utf-8')
print('updated FileUpload.tsx')

View File

@@ -1,42 +0,0 @@
from pathlib import Path
p=Path(r'd:/Projects/Dev/ThreatHunt/frontend/src/components/FileUpload.tsx')
t=p.read_text(encoding='utf-8')
marker=''' {/* Per-file progress list */}
'''
add=''' {huntId && huntProgress && (
<Paper sx={{ p: 1.5, mt: 1.5 }}>
<Stack direction="row" alignItems="center" spacing={1} sx={{ mb: 0.8 }}>
<Typography variant="body2" sx={{ fontWeight: 600 }}>
Master Processing Progress
</Typography>
<Chip
size="small"
label={huntProgress.status.toUpperCase()}
color={huntProgress.status === 'ready' ? 'success' : huntProgress.status === 'processing' ? 'warning' : 'default'}
variant="outlined"
/>
<Box sx={{ flexGrow: 1 }} />
<Typography variant="caption" color="text.secondary">
{huntProgress.progress_percent.toFixed(1)}%
</Typography>
</Stack>
<LinearProgress
variant="determinate"
value={Math.max(0, Math.min(100, huntProgress.progress_percent))}
sx={{ height: 8, borderRadius: 4 }}
/>
<Stack direction="row" spacing={1} sx={{ mt: 1 }} flexWrap="wrap" useFlexGap>
<Chip size="small" label={`Datasets ${huntProgress.dataset_completed}/${huntProgress.dataset_total}`} variant="outlined" />
<Chip size="small" label={`Active jobs ${huntProgress.active_jobs}`} variant="outlined" />
<Chip size="small" label={`Queued jobs ${huntProgress.queued_jobs}`} variant="outlined" />
<Chip size="small" label={`Network ${huntProgress.network_status}`} variant="outlined" />
</Stack>
</Paper>
)}
'''
if marker not in t:
raise SystemExit('marker not found')
t=t.replace(marker, add+marker)
p.write_text(t,encoding='utf-8')
print('inserted master progress block')

View File

@@ -1,55 +0,0 @@
from pathlib import Path
p=Path(r'd:/Projects/Dev/ThreatHunt/backend/app/services/scanner.py')
t=p.read_text(encoding='utf-8')
if 'from app.config import settings' not in t:
t=t.replace('from sqlalchemy.ext.asyncio import AsyncSession\n','from sqlalchemy.ext.asyncio import AsyncSession\n\nfrom app.config import settings\n')
old=''' import asyncio
for ds_id, ds_name in ds_map.items():
last_id = 0
while True:
'''
new=''' import asyncio
max_rows = max(0, int(settings.SCANNER_MAX_ROWS_PER_SCAN))
budget_reached = False
for ds_id, ds_name in ds_map.items():
if max_rows and result.rows_scanned >= max_rows:
budget_reached = True
break
last_id = 0
while True:
if max_rows and result.rows_scanned >= max_rows:
budget_reached = True
break
'''
if old not in t:
raise SystemExit('scanner loop block not found')
t=t.replace(old,new)
old2=''' if len(rows) < BATCH_SIZE:
break
'''
new2=''' if len(rows) < BATCH_SIZE:
break
if budget_reached:
break
if budget_reached:
logger.warning(
"AUP scan row budget reached (%d rows). Returning partial results.",
result.rows_scanned,
)
'''
if old2 not in t:
raise SystemExit('scanner break block not found')
t=t.replace(old2,new2,1)
p.write_text(t,encoding='utf-8')
print('added scanner global row budget enforcement')

View File

@@ -1,12 +0,0 @@
from pathlib import Path
p=Path(r'd:/Projects/Dev/ThreatHunt/frontend/src/components/AUPScanner.tsx')
t=p.read_text(encoding='utf-8')
old=''' }, [selectedDs, selectedThemes, scanHunts, scanAnnotations, scanMessages, enqueueSnackbar]);
'''
new=''' }, [selectedHuntId, selectedDs, selectedThemes, scanHunts, scanAnnotations, scanMessages, enqueueSnackbar]);
'''
if old not in t:
raise SystemExit('runScan deps block not found')
t=t.replace(old,new)
p.write_text(t,encoding='utf-8')
print('fixed AUPScanner runScan dependency list')

View File

@@ -1,7 +0,0 @@
from pathlib import Path
p=Path(r'd:/Projects/Dev/ThreatHunt/backend/app/api/routes/datasets.py')
t=p.read_text(encoding='utf-8')
if 'from app.db.models import ProcessingTask' not in t:
t=t.replace('from app.db import get_db\n', 'from app.db import get_db\nfrom app.db.models import ProcessingTask\n')
p.write_text(t, encoding='utf-8')
print('added ProcessingTask import')

View File

@@ -1,25 +0,0 @@
from pathlib import Path
p=Path(r'd:/Projects/Dev/ThreatHunt/backend/app/api/routes/keywords.py')
t=p.read_text(encoding='utf-8')
old=''' if not body.dataset_ids and not body.scan_hunts and not body.scan_annotations and not body.scan_messages:
raise HTTPException(400, "Select at least one dataset or enable additional sources (hunts/annotations/messages)")
'''
new=''' if not body.dataset_ids and not body.scan_hunts and not body.scan_annotations and not body.scan_messages:
return {
"total_hits": 0,
"hits": [],
"themes_scanned": 0,
"keywords_scanned": 0,
"rows_scanned": 0,
"cache_used": False,
"cache_status": "miss",
"cached_at": None,
}
'''
if old not in t:
raise SystemExit('scope guard block not found')
t=t.replace(old,new)
p.write_text(t,encoding='utf-8')
print('adjusted empty scan guard to return fast empty result (200)')

View File

@@ -1,47 +0,0 @@
from pathlib import Path
p=Path(r'd:/Projects/Dev/ThreatHunt/frontend/src/components/NetworkMap.tsx')
t=p.read_text(encoding='utf-8')
# Add label selector in toolbar before refresh button
insert_after=""" <TextField
size=\"small\"
placeholder=\"Search hosts, IPs, users\\u2026\"
value={search}
onChange={e => setSearch(e.target.value)}
sx={{ width: 220, '& .MuiInputBase-input': { py: 0.8 } }}
slotProps={{
input: {
startAdornment: <SearchIcon sx={{ mr: 0.5, fontSize: 18, color: 'text.secondary' }} />,
},
}}
/>
"""
label_ctrl="""
<FormControl size=\"small\" sx={{ minWidth: 150 }}>
<InputLabel id=\"label-mode-selector\">Labels</InputLabel>
<Select
labelId=\"label-mode-selector\"
value={labelMode}
label=\"Labels\"
onChange={e => setLabelMode(e.target.value as LabelMode)}
sx={{ '& .MuiSelect-select': { py: 0.8 } }}
>
<MenuItem value=\"none\">None</MenuItem>
<MenuItem value=\"highlight\">Selected/Search</MenuItem>
<MenuItem value=\"all\">All</MenuItem>
</Select>
</FormControl>
"""
if 'label-mode-selector' not in t:
if insert_after not in t:
raise SystemExit('search block not found for label selector insertion')
t=t.replace(insert_after, insert_after+label_ctrl)
# Fix useCallback dependency for startAnimLoop
old=' }, [canvasSize]);'
new=' }, [canvasSize, labelMode]);'
if old in t:
t=t.replace(old,new,1)
p.write_text(t,encoding='utf-8')
print('inserted label selector UI and fixed callback dependency')

View File

@@ -1,10 +0,0 @@
from pathlib import Path
p=Path(r'd:/Projects/Dev/ThreatHunt/frontend/src/components/NetworkMap.tsx')
t=p.read_text(encoding='utf-8')
count=t.count('}, [canvasSize]);')
if count:
t=t.replace('}, [canvasSize]);','}, [canvasSize, labelMode]);')
# In case formatter created spaced variant
t=t.replace('}, [canvasSize ]);','}, [canvasSize, labelMode]);')
p.write_text(t,encoding='utf-8')
print('patched remaining canvasSize callback deps:', count)

View File

@@ -1,71 +0,0 @@
from pathlib import Path
p=Path(r'd:/Projects/Dev/ThreatHunt/frontend/src/components/AUPScanner.tsx')
t=p.read_text(encoding='utf-8')
# Auto-select first hunt with datasets after load
old=''' const [tRes, hRes] = await Promise.all([
keywords.listThemes(),
hunts.list(0, 200),
]);
setThemes(tRes.themes);
setHuntList(hRes.hunts);
'''
new=''' const [tRes, hRes] = await Promise.all([
keywords.listThemes(),
hunts.list(0, 200),
]);
setThemes(tRes.themes);
setHuntList(hRes.hunts);
if (!selectedHuntId && hRes.hunts.length > 0) {
const best = hRes.hunts.find(h => h.dataset_count > 0) || hRes.hunts[0];
setSelectedHuntId(best.id);
}
'''
if old not in t:
raise SystemExit('loadData block not found')
t=t.replace(old,new)
# Guard runScan
old2=''' const runScan = useCallback(async () => {
setScanning(true);
setScanResult(null);
try {
'''
new2=''' const runScan = useCallback(async () => {
if (!selectedHuntId) {
enqueueSnackbar('Please select a hunt before running AUP scan', { variant: 'warning' });
return;
}
if (selectedDs.size === 0) {
enqueueSnackbar('No datasets selected for this hunt', { variant: 'warning' });
return;
}
setScanning(true);
setScanResult(null);
try {
'''
if old2 not in t:
raise SystemExit('runScan header not found')
t=t.replace(old2,new2)
# update loadData deps
old3=''' }, [enqueueSnackbar]);
'''
new3=''' }, [enqueueSnackbar, selectedHuntId]);
'''
if old3 not in t:
raise SystemExit('loadData deps not found')
t=t.replace(old3,new3,1)
# disable button if no hunt or no datasets
old4=''' onClick={runScan} disabled={scanning}
'''
new4=''' onClick={runScan} disabled={scanning || !selectedHuntId || selectedDs.size === 0}
'''
if old4 not in t:
raise SystemExit('scan button props not found')
t=t.replace(old4,new4)
p.write_text(t,encoding='utf-8')
print('hardened AUPScanner to require explicit hunt/dataset scope')

View File

@@ -1,84 +0,0 @@
from pathlib import Path
p=Path(r'd:/Projects/Dev/ThreatHunt/backend/app/api/routes/keywords.py')
t=p.read_text(encoding='utf-8')
old=''' if can_use_cache:
themes = await scanner._load_themes(body.theme_ids)
allowed_theme_names = {t.name for t in themes}
keywords_scanned = sum(len(theme.keywords) for theme in themes)
cached_entries: list[dict] = []
missing: list[str] = []
for dataset_id in (body.dataset_ids or []):
entry = keyword_scan_cache.get(dataset_id)
if not entry:
missing.append(dataset_id)
continue
cached_entries.append({"result": entry.result, "built_at": entry.built_at})
if not missing and cached_entries:
merged = _merge_cached_results(cached_entries, allowed_theme_names if body.theme_ids else None)
return {
"total_hits": merged["total_hits"],
"hits": merged["hits"],
"themes_scanned": len(themes),
"keywords_scanned": keywords_scanned,
"rows_scanned": merged["rows_scanned"],
"cache_used": True,
"cache_status": "hit",
"cached_at": merged["cached_at"],
}
'''
new=''' if can_use_cache:
themes = await scanner._load_themes(body.theme_ids)
allowed_theme_names = {t.name for t in themes}
keywords_scanned = sum(len(theme.keywords) for theme in themes)
cached_entries: list[dict] = []
missing: list[str] = []
for dataset_id in (body.dataset_ids or []):
entry = keyword_scan_cache.get(dataset_id)
if not entry:
missing.append(dataset_id)
continue
cached_entries.append({"result": entry.result, "built_at": entry.built_at})
if not missing and cached_entries:
merged = _merge_cached_results(cached_entries, allowed_theme_names if body.theme_ids else None)
return {
"total_hits": merged["total_hits"],
"hits": merged["hits"],
"themes_scanned": len(themes),
"keywords_scanned": keywords_scanned,
"rows_scanned": merged["rows_scanned"],
"cache_used": True,
"cache_status": "hit",
"cached_at": merged["cached_at"],
}
if missing:
missing_entries: list[dict] = []
for dataset_id in missing:
partial = await scanner.scan(dataset_ids=[dataset_id], theme_ids=body.theme_ids)
keyword_scan_cache.put(dataset_id, partial)
missing_entries.append({"result": partial, "built_at": None})
merged = _merge_cached_results(
cached_entries + missing_entries,
allowed_theme_names if body.theme_ids else None,
)
return {
"total_hits": merged["total_hits"],
"hits": merged["hits"],
"themes_scanned": len(themes),
"keywords_scanned": keywords_scanned,
"rows_scanned": merged["rows_scanned"],
"cache_used": len(cached_entries) > 0,
"cache_status": "partial" if cached_entries else "miss",
"cached_at": merged["cached_at"],
}
'''
if old not in t:
raise SystemExit('cache block not found')
t=t.replace(old,new)
p.write_text(t,encoding='utf-8')
print('updated keyword /scan to use partial cache + scan missing datasets only')

View File

@@ -1,61 +0,0 @@
from pathlib import Path
p=Path(r'd:/Projects/Dev/ThreatHunt/backend/app/services/scanner.py')
t=p.read_text(encoding='utf-8')
start=t.index(' async def _scan_datasets(')
end=t.index(' async def _scan_hunts', start)
new_func=''' async def _scan_datasets(
self, patterns: dict, result: ScanResult, dataset_ids: list[str] | None
) -> None:
"""Scan dataset rows in batches using keyset pagination (no OFFSET)."""
ds_q = select(Dataset.id, Dataset.name)
if dataset_ids:
ds_q = ds_q.where(Dataset.id.in_(dataset_ids))
ds_result = await self.db.execute(ds_q)
ds_map = {r[0]: r[1] for r in ds_result.fetchall()}
if not ds_map:
return
import asyncio
for ds_id, ds_name in ds_map.items():
last_id = 0
while True:
rows_result = await self.db.execute(
select(DatasetRow)
.where(DatasetRow.dataset_id == ds_id)
.where(DatasetRow.id > last_id)
.order_by(DatasetRow.id)
.limit(BATCH_SIZE)
)
rows = rows_result.scalars().all()
if not rows:
break
for row in rows:
result.rows_scanned += 1
data = row.data or {}
for col_name, cell_value in data.items():
if cell_value is None:
continue
text = str(cell_value)
self._match_text(
text,
patterns,
"dataset_row",
row.id,
col_name,
result.hits,
row_index=row.row_index,
dataset_name=ds_name,
)
last_id = rows[-1].id
await asyncio.sleep(0)
if len(rows) < BATCH_SIZE:
break
'''
out=t[:start]+new_func+t[end:]
p.write_text(out,encoding='utf-8')
print('optimized scanner _scan_datasets to keyset pagination')

View File

@@ -1,36 +0,0 @@
from pathlib import Path
p=Path(r'd:/Projects/Dev/ThreatHunt/backend/app/services/host_inventory.py')
t=p.read_text(encoding='utf-8')
old=''' return {
"hosts": host_list,
"connections": conn_list,
"stats": {
"total_hosts": len(host_list),
"total_datasets_scanned": len(all_datasets),
"datasets_with_hosts": ds_with_hosts,
"total_rows_scanned": total_rows,
"hosts_with_ips": sum(1 for h in host_list if h['ips']),
"hosts_with_users": sum(1 for h in host_list if h['users']),
},
}
'''
new=''' return {
"hosts": host_list,
"connections": conn_list,
"stats": {
"total_hosts": len(host_list),
"total_datasets_scanned": len(all_datasets),
"datasets_with_hosts": ds_with_hosts,
"total_rows_scanned": total_rows,
"hosts_with_ips": sum(1 for h in host_list if h['ips']),
"hosts_with_users": sum(1 for h in host_list if h['users']),
"row_budget_per_dataset": settings.NETWORK_INVENTORY_MAX_ROWS_PER_DATASET,
"sampled_mode": settings.NETWORK_INVENTORY_MAX_ROWS_PER_DATASET > 0,
},
}
'''
if old not in t:
raise SystemExit('return block not found')
t=t.replace(old,new)
p.write_text(t,encoding='utf-8')
print('patched inventory stats metadata')

View File

@@ -1,10 +0,0 @@
from pathlib import Path
p=Path(r'd:/Projects/Dev/ThreatHunt/backend/app/services/host_inventory.py')
t=p.read_text(encoding='utf-8')
needle=' "hosts_with_users": sum(1 for h in host_list if h[\'users\']),\n'
if '"row_budget_per_dataset"' not in t:
if needle not in t:
raise SystemExit('needle not found')
t=t.replace(needle, needle + ' "row_budget_per_dataset": settings.NETWORK_INVENTORY_MAX_ROWS_PER_DATASET,\n "sampled_mode": settings.NETWORK_INVENTORY_MAX_ROWS_PER_DATASET > 0,\n')
p.write_text(t,encoding='utf-8')
print('inserted inventory budget stats lines')

View File

@@ -1,14 +0,0 @@
from pathlib import Path
p = Path(r"d:\Projects\Dev\ThreatHunt\frontend\src\components\NetworkMap.tsx")
text = p.read_text(encoding="utf-8")
anchor = " useEffect(() => { canvasSizeRef.current = canvasSize; }, [canvasSize]);\n"
insert = anchor + "\n const sleep = (ms: number) => new Promise<void>(resolve => setTimeout(resolve, ms));\n"
if "const sleep = (ms: number)" not in text and anchor in text:
text = text.replace(anchor, insert)
text = text.replace("await new Promise(r => setTimeout(r, delayMs + jitter));", "await sleep(delayMs + jitter);")
p.write_text(text, encoding="utf-8")
print("Patched sleep helper + polling awaits")

View File

@@ -1,37 +0,0 @@
from pathlib import Path
import re
p = Path(r"d:\Projects\Dev\ThreatHunt\frontend\src\components\NetworkMap.tsx")
text = p.read_text(encoding="utf-8")
pattern = re.compile(r"const waitUntilReady = async \(\): Promise<boolean> => \{[\s\S]*?\n\s*\};", re.M)
replacement = '''const waitUntilReady = async (): Promise<boolean> => {
// Poll inventory-status with exponential backoff until 'ready' (or cancelled)
setProgress('Host inventory is being prepared in the background');
setLoading(true);
let delayMs = 1500;
const startedAt = Date.now();
for (;;) {
const jitter = Math.floor(Math.random() * 250);
await new Promise(r => setTimeout(r, delayMs + jitter));
if (cancelled) return false;
try {
const st = await network.inventoryStatus(selectedHuntId);
if (cancelled) return false;
if (st.status === 'ready') return true;
if (Date.now() - startedAt > 5 * 60 * 1000) {
setError('Host inventory build timed out. Please retry.');
return false;
}
delayMs = Math.min(10000, Math.floor(delayMs * 1.5));
// still building or none (job may not have started yet) - keep polling
} catch {
if (cancelled) return false;
delayMs = Math.min(10000, Math.floor(delayMs * 1.5));
}
}
};'''
new_text, n = pattern.subn(replacement, text, count=1)
if n != 1:
raise SystemExit(f"Failed to patch waitUntilReady, matches={n}")
p.write_text(new_text, encoding="utf-8")
print("Patched waitUntilReady")

View File

@@ -1,26 +0,0 @@
from pathlib import Path
p=Path(r'd:/Projects/Dev/ThreatHunt/backend/app/config.py')
t=p.read_text(encoding='utf-8')
old=''' NETWORK_INVENTORY_MAX_ROWS_PER_DATASET: int = Field(
default=25000,
description="Row budget per dataset when building host inventory (0 = unlimited)",
)
'''
new=''' NETWORK_INVENTORY_MAX_ROWS_PER_DATASET: int = Field(
default=5000,
description="Row budget per dataset when building host inventory (0 = unlimited)",
)
NETWORK_INVENTORY_MAX_TOTAL_ROWS: int = Field(
default=120000,
description="Global row budget across all datasets for host inventory build (0 = unlimited)",
)
NETWORK_INVENTORY_MAX_CONNECTIONS: int = Field(
default=120000,
description="Max unique connection tuples retained during host inventory build",
)
'''
if old not in t:
raise SystemExit('network inventory block not found')
t=t.replace(old,new)
p.write_text(t,encoding='utf-8')
print('updated network inventory budgets in config')

View File

@@ -1,164 +0,0 @@
from pathlib import Path
p=Path(r'd:/Projects/Dev/ThreatHunt/backend/app/services/host_inventory.py')
t=p.read_text(encoding='utf-8')
# insert budget vars near existing counters
old=''' connections: dict[tuple, int] = defaultdict(int)
total_rows = 0
ds_with_hosts = 0
'''
new=''' connections: dict[tuple, int] = defaultdict(int)
total_rows = 0
ds_with_hosts = 0
sampled_dataset_count = 0
total_row_budget = max(0, int(settings.NETWORK_INVENTORY_MAX_TOTAL_ROWS))
max_connections = max(0, int(settings.NETWORK_INVENTORY_MAX_CONNECTIONS))
global_budget_reached = False
dropped_connections = 0
'''
if old not in t:
raise SystemExit('counter block not found')
t=t.replace(old,new)
# update batch size and sampled count increments + global budget checks
old2=''' batch_size = 10000
max_rows_per_dataset = max(0, int(settings.NETWORK_INVENTORY_MAX_ROWS_PER_DATASET))
rows_scanned_this_dataset = 0
sampled_dataset = False
last_row_index = -1
while True:
'''
new2=''' batch_size = 5000
max_rows_per_dataset = max(0, int(settings.NETWORK_INVENTORY_MAX_ROWS_PER_DATASET))
rows_scanned_this_dataset = 0
sampled_dataset = False
last_row_index = -1
while True:
if total_row_budget and total_rows >= total_row_budget:
global_budget_reached = True
break
'''
if old2 not in t:
raise SystemExit('batch block not found')
t=t.replace(old2,new2)
old3=''' if max_rows_per_dataset and rows_scanned_this_dataset >= max_rows_per_dataset:
sampled_dataset = True
break
data = ro.data or {}
total_rows += 1
rows_scanned_this_dataset += 1
'''
new3=''' if max_rows_per_dataset and rows_scanned_this_dataset >= max_rows_per_dataset:
sampled_dataset = True
break
if total_row_budget and total_rows >= total_row_budget:
sampled_dataset = True
global_budget_reached = True
break
data = ro.data or {}
total_rows += 1
rows_scanned_this_dataset += 1
'''
if old3 not in t:
raise SystemExit('row scan block not found')
t=t.replace(old3,new3)
# cap connection map growth
old4=''' for c in cols['remote_ip']:
rip = _clean(data.get(c))
if _is_valid_ip(rip):
rport = ''
for pc in cols['remote_port']:
rport = _clean(data.get(pc))
if rport:
break
connections[(host_key, rip, rport)] += 1
'''
new4=''' for c in cols['remote_ip']:
rip = _clean(data.get(c))
if _is_valid_ip(rip):
rport = ''
for pc in cols['remote_port']:
rport = _clean(data.get(pc))
if rport:
break
conn_key = (host_key, rip, rport)
if max_connections and len(connections) >= max_connections and conn_key not in connections:
dropped_connections += 1
continue
connections[conn_key] += 1
'''
if old4 not in t:
raise SystemExit('connection block not found')
t=t.replace(old4,new4)
# sampled_dataset counter
old5=''' if sampled_dataset:
logger.info(
"Host inventory row budget reached for dataset %s (%d rows)",
ds.id,
rows_scanned_this_dataset,
)
break
'''
new5=''' if sampled_dataset:
sampled_dataset_count += 1
logger.info(
"Host inventory row budget reached for dataset %s (%d rows)",
ds.id,
rows_scanned_this_dataset,
)
break
'''
if old5 not in t:
raise SystemExit('sampled block not found')
t=t.replace(old5,new5)
# break dataset loop if global budget reached
old6=''' if len(rows) < batch_size:
break
# Post-process hosts
'''
new6=''' if len(rows) < batch_size:
break
if global_budget_reached:
logger.info(
"Host inventory global row budget reached for hunt %s at %d rows",
hunt_id,
total_rows,
)
break
# Post-process hosts
'''
if old6 not in t:
raise SystemExit('post-process boundary block not found')
t=t.replace(old6,new6)
# add stats
old7=''' "row_budget_per_dataset": settings.NETWORK_INVENTORY_MAX_ROWS_PER_DATASET,
"sampled_mode": settings.NETWORK_INVENTORY_MAX_ROWS_PER_DATASET > 0,
},
}
'''
new7=''' "row_budget_per_dataset": settings.NETWORK_INVENTORY_MAX_ROWS_PER_DATASET,
"row_budget_total": settings.NETWORK_INVENTORY_MAX_TOTAL_ROWS,
"connection_budget": settings.NETWORK_INVENTORY_MAX_CONNECTIONS,
"sampled_mode": settings.NETWORK_INVENTORY_MAX_ROWS_PER_DATASET > 0 or settings.NETWORK_INVENTORY_MAX_TOTAL_ROWS > 0,
"sampled_datasets": sampled_dataset_count,
"global_budget_reached": global_budget_reached,
"dropped_connections": dropped_connections,
},
}
'''
if old7 not in t:
raise SystemExit('stats block not found')
t=t.replace(old7,new7)
p.write_text(t,encoding='utf-8')
print('updated host inventory with global row and connection budgets')

View File

@@ -1,39 +0,0 @@
from pathlib import Path
p=Path(r'd:/Projects/Dev/ThreatHunt/frontend/src/components/NetworkMap.tsx')
t=p.read_text(encoding='utf-8')
repls={
"const LARGE_HUNT_SUBGRAPH_HOSTS = 350;":"const LARGE_HUNT_SUBGRAPH_HOSTS = 220;",
"const LARGE_HUNT_SUBGRAPH_EDGES = 2500;":"const LARGE_HUNT_SUBGRAPH_EDGES = 1200;",
"const RENDER_SIMPLIFY_NODE_THRESHOLD = 220;":"const RENDER_SIMPLIFY_NODE_THRESHOLD = 120;",
"const RENDER_SIMPLIFY_EDGE_THRESHOLD = 1200;":"const RENDER_SIMPLIFY_EDGE_THRESHOLD = 500;",
"const EDGE_DRAW_TARGET = 1000;":"const EDGE_DRAW_TARGET = 600;"
}
for a,b in repls.items():
if a not in t:
raise SystemExit(f'missing constant: {a}')
t=t.replace(a,b)
old=''' // Then label hit (so clicking text works too)
for (const n of graph.nodes) {
if (isPointOnNodeLabel(n, wx, wy, vp)) return n;
}
'''
new=''' // Then label hit (so clicking text works too on manageable graph sizes)
if (graph.nodes.length <= 220) {
for (const n of graph.nodes) {
if (isPointOnNodeLabel(n, wx, wy, vp)) return n;
}
}
'''
if old not in t:
raise SystemExit('label hit block not found')
t=t.replace(old,new)
old2='simulate(g, w / 2, h / 2, 60);'
if t.count(old2) < 2:
raise SystemExit('expected two simulate calls')
t=t.replace(old2,'simulate(g, w / 2, h / 2, 20);',1)
t=t.replace(old2,'simulate(g, w / 2, h / 2, 30);',1)
p.write_text(t,encoding='utf-8')
print('tightened network map rendering + load limits')

View File

@@ -1,107 +0,0 @@
from pathlib import Path
# config updates
cfg=Path(r'd:/Projects/Dev/ThreatHunt/backend/app/config.py')
t=cfg.read_text(encoding='utf-8')
anchor=''' NETWORK_SUBGRAPH_MAX_EDGES: int = Field(
default=3000, description="Hard cap for edges returned by network subgraph endpoint"
)
'''
ins=''' NETWORK_SUBGRAPH_MAX_EDGES: int = Field(
default=3000, description="Hard cap for edges returned by network subgraph endpoint"
)
NETWORK_INVENTORY_MAX_ROWS_PER_DATASET: int = Field(
default=200000,
description="Row budget per dataset when building host inventory (0 = unlimited)",
)
'''
if 'NETWORK_INVENTORY_MAX_ROWS_PER_DATASET' not in t:
if anchor not in t:
raise SystemExit('config network anchor not found')
t=t.replace(anchor,ins)
cfg.write_text(t,encoding='utf-8')
# host inventory updates
p=Path(r'd:/Projects/Dev/ThreatHunt/backend/app/services/host_inventory.py')
t=p.read_text(encoding='utf-8')
if 'from app.config import settings' not in t:
t=t.replace('from app.db.models import Dataset, DatasetRow\n', 'from app.db.models import Dataset, DatasetRow\nfrom app.config import settings\n')
t=t.replace(' batch_size = 5000\n last_row_index = -1\n while True:\n', ' batch_size = 10000\n max_rows_per_dataset = max(0, int(settings.NETWORK_INVENTORY_MAX_ROWS_PER_DATASET))\n rows_scanned_this_dataset = 0\n sampled_dataset = False\n last_row_index = -1\n while True:\n')
old=''' for ro in rows:
data = ro.data or {}
total_rows += 1
fqdn = ''
'''
new=''' for ro in rows:
if max_rows_per_dataset and rows_scanned_this_dataset >= max_rows_per_dataset:
sampled_dataset = True
break
data = ro.data or {}
total_rows += 1
rows_scanned_this_dataset += 1
fqdn = ''
'''
if old not in t:
raise SystemExit('row loop anchor not found')
t=t.replace(old,new)
old2=''' last_row_index = rows[-1].row_index
if len(rows) < batch_size:
break
'''
new2=''' if sampled_dataset:
logger.info(
"Host inventory row budget reached for dataset %s (%d rows)",
ds.id,
rows_scanned_this_dataset,
)
break
last_row_index = rows[-1].row_index
if len(rows) < batch_size:
break
'''
if old2 not in t:
raise SystemExit('batch loop end anchor not found')
t=t.replace(old2,new2)
old3=''' return {
"hosts": host_list,
"connections": conn_list,
"stats": {
"total_hosts": len(host_list),
"total_datasets_scanned": len(all_datasets),
"datasets_with_hosts": ds_with_hosts,
"total_rows_scanned": total_rows,
"hosts_with_ips": sum(1 for h in host_list if h['ips']),
"hosts_with_users": sum(1 for h in host_list if h['users']),
},
}
'''
new3=''' sampled = settings.NETWORK_INVENTORY_MAX_ROWS_PER_DATASET > 0
return {
"hosts": host_list,
"connections": conn_list,
"stats": {
"total_hosts": len(host_list),
"total_datasets_scanned": len(all_datasets),
"datasets_with_hosts": ds_with_hosts,
"total_rows_scanned": total_rows,
"hosts_with_ips": sum(1 for h in host_list if h['ips']),
"hosts_with_users": sum(1 for h in host_list if h['users']),
"row_budget_per_dataset": settings.NETWORK_INVENTORY_MAX_ROWS_PER_DATASET,
"sampled_mode": sampled,
},
}
'''
if old3 not in t:
raise SystemExit('return stats anchor not found')
t=t.replace(old3,new3)
p.write_text(t,encoding='utf-8')
print('patched config + host inventory row budget')

View File

@@ -1,38 +0,0 @@
from pathlib import Path
cfg=Path(r'd:/Projects/Dev/ThreatHunt/backend/app/config.py')
t=cfg.read_text(encoding='utf-8')
if 'NETWORK_INVENTORY_MAX_ROWS_PER_DATASET' not in t:
t=t.replace(
''' NETWORK_SUBGRAPH_MAX_EDGES: int = Field(
default=3000, description="Hard cap for edges returned by network subgraph endpoint"
)
''',
''' NETWORK_SUBGRAPH_MAX_EDGES: int = Field(
default=3000, description="Hard cap for edges returned by network subgraph endpoint"
)
NETWORK_INVENTORY_MAX_ROWS_PER_DATASET: int = Field(
default=200000,
description="Row budget per dataset when building host inventory (0 = unlimited)",
)
''')
cfg.write_text(t,encoding='utf-8')
p=Path(r'd:/Projects/Dev/ThreatHunt/backend/app/services/host_inventory.py')
t=p.read_text(encoding='utf-8')
if 'from app.config import settings' not in t:
t=t.replace('from app.db.models import Dataset, DatasetRow\n','from app.db.models import Dataset, DatasetRow\nfrom app.config import settings\n')
t=t.replace(' batch_size = 5000\n last_row_index = -1\n while True:\n',
' batch_size = 10000\n max_rows_per_dataset = max(0, int(settings.NETWORK_INVENTORY_MAX_ROWS_PER_DATASET))\n rows_scanned_this_dataset = 0\n sampled_dataset = False\n last_row_index = -1\n while True:\n')
t=t.replace(' for ro in rows:\n data = ro.data or {}\n total_rows += 1\n\n',
' for ro in rows:\n if max_rows_per_dataset and rows_scanned_this_dataset >= max_rows_per_dataset:\n sampled_dataset = True\n break\n\n data = ro.data or {}\n total_rows += 1\n rows_scanned_this_dataset += 1\n\n')
t=t.replace(' last_row_index = rows[-1].row_index\n if len(rows) < batch_size:\n break\n',
' if sampled_dataset:\n logger.info(\n "Host inventory row budget reached for dataset %s (%d rows)",\n ds.id,\n rows_scanned_this_dataset,\n )\n break\n\n last_row_index = rows[-1].row_index\n if len(rows) < batch_size:\n break\n')
t=t.replace(' return {\n "hosts": host_list,\n "connections": conn_list,\n "stats": {\n "total_hosts": len(host_list),\n "total_datasets_scanned": len(all_datasets),\n "datasets_with_hosts": ds_with_hosts,\n "total_rows_scanned": total_rows,\n "hosts_with_ips": sum(1 for h in host_list if h[\'ips\']),\n "hosts_with_users": sum(1 for h in host_list if h[\'users\']),\n },\n }\n',
' sampled = settings.NETWORK_INVENTORY_MAX_ROWS_PER_DATASET > 0\n\n return {\n "hosts": host_list,\n "connections": conn_list,\n "stats": {\n "total_hosts": len(host_list),\n "total_datasets_scanned": len(all_datasets),\n "datasets_with_hosts": ds_with_hosts,\n "total_rows_scanned": total_rows,\n "hosts_with_ips": sum(1 for h in host_list if h[\'ips\']),\n "hosts_with_users": sum(1 for h in host_list if h[\'users\']),\n "row_budget_per_dataset": settings.NETWORK_INVENTORY_MAX_ROWS_PER_DATASET,\n "sampled_mode": sampled,\n },\n }\n')
p.write_text(t,encoding='utf-8')
print('patched backend inventory performance settings')

View File

@@ -1,220 +0,0 @@
from pathlib import Path
p=Path(r'd:/Projects/Dev/ThreatHunt/frontend/src/components/NetworkMap.tsx')
t=p.read_text(encoding='utf-8')
# constants
if 'RENDER_SIMPLIFY_NODE_THRESHOLD' not in t:
t=t.replace(
"const LARGE_HUNT_SUBGRAPH_EDGES = 2500;\n",
"const LARGE_HUNT_SUBGRAPH_EDGES = 2500;\nconst RENDER_SIMPLIFY_NODE_THRESHOLD = 220;\nconst RENDER_SIMPLIFY_EDGE_THRESHOLD = 1200;\nconst EDGE_DRAW_TARGET = 1000;\n")
# drawBackground signature
t_old='''function drawBackground(
ctx: CanvasRenderingContext2D, w: number, h: number, vp: Viewport, dpr: number,
) {
'''
if t_old in t:
t=t.replace(t_old,
'''function drawBackground(
ctx: CanvasRenderingContext2D, w: number, h: number, vp: Viewport, dpr: number,
simplify: boolean,
) {
''')
# skip grid when simplify
if 'if (!simplify) {' not in t:
t=t.replace(
''' ctx.save();
ctx.translate(vp.x * dpr, vp.y * dpr);
ctx.scale(vp.scale * dpr, vp.scale * dpr);
const startX = -vp.x / vp.scale - GRID_SPACING;
const startY = -vp.y / vp.scale - GRID_SPACING;
const endX = startX + w / (vp.scale * dpr) + GRID_SPACING * 2;
const endY = startY + h / (vp.scale * dpr) + GRID_SPACING * 2;
ctx.fillStyle = GRID_DOT_COLOR;
for (let gx = Math.floor(startX / GRID_SPACING) * GRID_SPACING; gx < endX; gx += GRID_SPACING) {
for (let gy = Math.floor(startY / GRID_SPACING) * GRID_SPACING; gy < endY; gy += GRID_SPACING) {
ctx.beginPath(); ctx.arc(gx, gy, 1, 0, Math.PI * 2); ctx.fill();
}
}
ctx.restore();
''',
''' if (!simplify) {
ctx.save();
ctx.translate(vp.x * dpr, vp.y * dpr);
ctx.scale(vp.scale * dpr, vp.scale * dpr);
const startX = -vp.x / vp.scale - GRID_SPACING;
const startY = -vp.y / vp.scale - GRID_SPACING;
const endX = startX + w / (vp.scale * dpr) + GRID_SPACING * 2;
const endY = startY + h / (vp.scale * dpr) + GRID_SPACING * 2;
ctx.fillStyle = GRID_DOT_COLOR;
for (let gx = Math.floor(startX / GRID_SPACING) * GRID_SPACING; gx < endX; gx += GRID_SPACING) {
for (let gy = Math.floor(startY / GRID_SPACING) * GRID_SPACING; gy < endY; gy += GRID_SPACING) {
ctx.beginPath(); ctx.arc(gx, gy, 1, 0, Math.PI * 2); ctx.fill();
}
}
ctx.restore();
}
''')
# drawEdges signature
t=t.replace('''function drawEdges(
ctx: CanvasRenderingContext2D, graph: Graph,
hovered: string | null, selected: string | null,
nodeMap: Map<string, GNode>, animTime: number,
) {
for (const e of graph.edges) {
''',
'''function drawEdges(
ctx: CanvasRenderingContext2D, graph: Graph,
hovered: string | null, selected: string | null,
nodeMap: Map<string, GNode>, animTime: number,
simplify: boolean,
) {
const edgeStep = simplify ? Math.max(1, Math.ceil(graph.edges.length / EDGE_DRAW_TARGET)) : 1;
for (let ei = 0; ei < graph.edges.length; ei += edgeStep) {
const e = graph.edges[ei];
''')
# simplify edge path
t=t.replace('ctx.beginPath(); ctx.moveTo(a.x, a.y); ctx.quadraticCurveTo(cpx, cpy, b.x, b.y);',
'ctx.beginPath(); ctx.moveTo(a.x, a.y); if (simplify) { ctx.lineTo(b.x, b.y); } else { ctx.quadraticCurveTo(cpx, cpy, b.x, b.y); }')
t=t.replace('ctx.beginPath(); ctx.moveTo(a.x, a.y); ctx.quadraticCurveTo(cpx, cpy, b.x, b.y);',
'ctx.beginPath(); ctx.moveTo(a.x, a.y); if (simplify) { ctx.lineTo(b.x, b.y); } else { ctx.quadraticCurveTo(cpx, cpy, b.x, b.y); }')
# reduce glow when simplify
t=t.replace(''' ctx.save();
ctx.shadowColor = 'rgba(96,165,250,0.5)'; ctx.shadowBlur = 8;
ctx.strokeStyle = 'rgba(96,165,250,0.3)';
ctx.lineWidth = Math.min(5, 2 + e.weight * 0.2);
ctx.beginPath(); ctx.moveTo(a.x, a.y); if (simplify) { ctx.lineTo(b.x, b.y); } else { ctx.quadraticCurveTo(cpx, cpy, b.x, b.y); }
ctx.stroke(); ctx.restore();
''',
''' if (!simplify) {
ctx.save();
ctx.shadowColor = 'rgba(96,165,250,0.5)'; ctx.shadowBlur = 8;
ctx.strokeStyle = 'rgba(96,165,250,0.3)';
ctx.lineWidth = Math.min(5, 2 + e.weight * 0.2);
ctx.beginPath(); ctx.moveTo(a.x, a.y); if (simplify) { ctx.lineTo(b.x, b.y); } else { ctx.quadraticCurveTo(cpx, cpy, b.x, b.y); }
ctx.stroke(); ctx.restore();
}
''')
# drawLabels signature and early return
t=t.replace('''function drawLabels(
ctx: CanvasRenderingContext2D, graph: Graph,
hovered: string | null, selected: string | null,
search: string, matchSet: Set<string>, vp: Viewport,
) {
''',
'''function drawLabels(
ctx: CanvasRenderingContext2D, graph: Graph,
hovered: string | null, selected: string | null,
search: string, matchSet: Set<string>, vp: Viewport,
simplify: boolean,
) {
''')
if 'if (simplify && !search && !hovered && !selected) {' not in t:
t=t.replace(' const dimmed = search.length > 0;\n',
' const dimmed = search.length > 0;\n if (simplify && !search && !hovered && !selected) {\n return;\n }\n')
# drawGraph adapt
t=t.replace(''' drawBackground(ctx, w, h, vp, dpr);
ctx.save();
ctx.translate(vp.x * dpr, vp.y * dpr);
ctx.scale(vp.scale * dpr, vp.scale * dpr);
drawEdges(ctx, graph, hovered, selected, nodeMap, animTime);
drawNodes(ctx, graph, hovered, selected, search, matchSet);
drawLabels(ctx, graph, hovered, selected, search, matchSet, vp);
ctx.restore();
''',
''' const simplify = graph.nodes.length > RENDER_SIMPLIFY_NODE_THRESHOLD || graph.edges.length > RENDER_SIMPLIFY_EDGE_THRESHOLD;
drawBackground(ctx, w, h, vp, dpr, simplify);
ctx.save();
ctx.translate(vp.x * dpr, vp.y * dpr);
ctx.scale(vp.scale * dpr, vp.scale * dpr);
drawEdges(ctx, graph, hovered, selected, nodeMap, animTime, simplify);
drawNodes(ctx, graph, hovered, selected, search, matchSet);
drawLabels(ctx, graph, hovered, selected, search, matchSet, vp, simplify);
ctx.restore();
''')
# hover RAF ref
if 'const hoverRafRef = useRef<number>(0);' not in t:
t=t.replace(' const graphRef = useRef<Graph | null>(null);\n', ' const graphRef = useRef<Graph | null>(null);\n const hoverRafRef = useRef<number>(0);\n')
# throttle hover hit test on mousemove
old_mm=''' const node = hitTest(graph, canvasRef.current, e.clientX, e.clientY, vpRef.current);
setHovered(node?.id ?? null);
}, [graph, redraw, startAnimLoop]);
'''
new_mm=''' cancelAnimationFrame(hoverRafRef.current);
const clientX = e.clientX;
const clientY = e.clientY;
hoverRafRef.current = requestAnimationFrame(() => {
const node = hitTest(graph, canvasRef.current as HTMLCanvasElement, clientX, clientY, vpRef.current);
setHovered(prev => (prev === (node?.id ?? null) ? prev : (node?.id ?? null)));
});
}, [graph, redraw, startAnimLoop]);
'''
if old_mm in t:
t=t.replace(old_mm,new_mm)
# cleanup hover raf on unmount in existing animation cleanup effect
if 'cancelAnimationFrame(hoverRafRef.current);' not in t:
t=t.replace(''' useEffect(() => {
if (graph) startAnimLoop();
return () => { cancelAnimationFrame(animFrameRef.current); isAnimatingRef.current = false; };
}, [graph, startAnimLoop]);
''',
''' useEffect(() => {
if (graph) startAnimLoop();
return () => {
cancelAnimationFrame(animFrameRef.current);
cancelAnimationFrame(hoverRafRef.current);
isAnimatingRef.current = false;
};
}, [graph, startAnimLoop]);
''')
# connectedNodes optimization map
if 'const nodeById = useMemo(() => {' not in t:
t=t.replace(''' const connectionCount = selectedNode && graph
? graph.edges.filter(e => e.source === selectedNode.id || e.target === selectedNode.id).length
: 0;
const connectedNodes = useMemo(() => {
''',
''' const connectionCount = selectedNode && graph
? graph.edges.filter(e => e.source === selectedNode.id || e.target === selectedNode.id).length
: 0;
const nodeById = useMemo(() => {
const m = new Map<string, GNode>();
if (!graph) return m;
for (const n of graph.nodes) m.set(n.id, n);
return m;
}, [graph]);
const connectedNodes = useMemo(() => {
''')
t=t.replace(''' const n = graph.nodes.find(x => x.id === e.target);
if (n) neighbors.push({ id: n.id, type: n.meta.type, weight: e.weight });
} else if (e.target === selectedNode.id) {
const n = graph.nodes.find(x => x.id === e.source);
if (n) neighbors.push({ id: n.id, type: n.meta.type, weight: e.weight });
''',
''' const n = nodeById.get(e.target);
if (n) neighbors.push({ id: n.id, type: n.meta.type, weight: e.weight });
} else if (e.target === selectedNode.id) {
const n = nodeById.get(e.source);
if (n) neighbors.push({ id: n.id, type: n.meta.type, weight: e.weight });
''')
t=t.replace(' }, [selectedNode, graph]);\n', ' }, [selectedNode, graph, nodeById]);\n')
p.write_text(t,encoding='utf-8')
print('patched NetworkMap adaptive render + hover throttle')

View File

@@ -1,153 +0,0 @@
from pathlib import Path
p=Path(r'd:/Projects/Dev/ThreatHunt/frontend/src/components/NetworkMap.tsx')
t=p.read_text(encoding='utf-8')
if 'RENDER_SIMPLIFY_NODE_THRESHOLD' not in t:
t=t.replace('const LARGE_HUNT_SUBGRAPH_EDGES = 2500;\n', 'const LARGE_HUNT_SUBGRAPH_EDGES = 2500;\nconst RENDER_SIMPLIFY_NODE_THRESHOLD = 220;\nconst RENDER_SIMPLIFY_EDGE_THRESHOLD = 1200;\nconst EDGE_DRAW_TARGET = 1000;\n')
t=t.replace('function drawBackground(\n ctx: CanvasRenderingContext2D, w: number, h: number, vp: Viewport, dpr: number,\n) {', 'function drawBackground(\n ctx: CanvasRenderingContext2D, w: number, h: number, vp: Viewport, dpr: number,\n simplify: boolean,\n) {')
t=t.replace(''' ctx.save();
ctx.translate(vp.x * dpr, vp.y * dpr);
ctx.scale(vp.scale * dpr, vp.scale * dpr);
const startX = -vp.x / vp.scale - GRID_SPACING;
const startY = -vp.y / vp.scale - GRID_SPACING;
const endX = startX + w / (vp.scale * dpr) + GRID_SPACING * 2;
const endY = startY + h / (vp.scale * dpr) + GRID_SPACING * 2;
ctx.fillStyle = GRID_DOT_COLOR;
for (let gx = Math.floor(startX / GRID_SPACING) * GRID_SPACING; gx < endX; gx += GRID_SPACING) {
for (let gy = Math.floor(startY / GRID_SPACING) * GRID_SPACING; gy < endY; gy += GRID_SPACING) {
ctx.beginPath(); ctx.arc(gx, gy, 1, 0, Math.PI * 2); ctx.fill();
}
}
ctx.restore();
''',''' if (!simplify) {
ctx.save();
ctx.translate(vp.x * dpr, vp.y * dpr);
ctx.scale(vp.scale * dpr, vp.scale * dpr);
const startX = -vp.x / vp.scale - GRID_SPACING;
const startY = -vp.y / vp.scale - GRID_SPACING;
const endX = startX + w / (vp.scale * dpr) + GRID_SPACING * 2;
const endY = startY + h / (vp.scale * dpr) + GRID_SPACING * 2;
ctx.fillStyle = GRID_DOT_COLOR;
for (let gx = Math.floor(startX / GRID_SPACING) * GRID_SPACING; gx < endX; gx += GRID_SPACING) {
for (let gy = Math.floor(startY / GRID_SPACING) * GRID_SPACING; gy < endY; gy += GRID_SPACING) {
ctx.beginPath(); ctx.arc(gx, gy, 1, 0, Math.PI * 2); ctx.fill();
}
}
ctx.restore();
}
''')
t=t.replace('''function drawEdges(
ctx: CanvasRenderingContext2D, graph: Graph,
hovered: string | null, selected: string | null,
nodeMap: Map<string, GNode>, animTime: number,
) {
for (const e of graph.edges) {
''','''function drawEdges(
ctx: CanvasRenderingContext2D, graph: Graph,
hovered: string | null, selected: string | null,
nodeMap: Map<string, GNode>, animTime: number,
simplify: boolean,
) {
const edgeStep = simplify ? Math.max(1, Math.ceil(graph.edges.length / EDGE_DRAW_TARGET)) : 1;
for (let ei = 0; ei < graph.edges.length; ei += edgeStep) {
const e = graph.edges[ei];
''')
t=t.replace('ctx.beginPath(); ctx.moveTo(a.x, a.y); ctx.quadraticCurveTo(cpx, cpy, b.x, b.y);', 'ctx.beginPath(); ctx.moveTo(a.x, a.y); if (simplify) { ctx.lineTo(b.x, b.y); } else { ctx.quadraticCurveTo(cpx, cpy, b.x, b.y); }')
t=t.replace('''function drawLabels(
ctx: CanvasRenderingContext2D, graph: Graph,
hovered: string | null, selected: string | null,
search: string, matchSet: Set<string>, vp: Viewport,
) {
const dimmed = search.length > 0;
''','''function drawLabels(
ctx: CanvasRenderingContext2D, graph: Graph,
hovered: string | null, selected: string | null,
search: string, matchSet: Set<string>, vp: Viewport,
simplify: boolean,
) {
const dimmed = search.length > 0;
if (simplify && !search && !hovered && !selected) {
return;
}
''')
t=t.replace(''' drawBackground(ctx, w, h, vp, dpr);
ctx.save();
ctx.translate(vp.x * dpr, vp.y * dpr);
ctx.scale(vp.scale * dpr, vp.scale * dpr);
drawEdges(ctx, graph, hovered, selected, nodeMap, animTime);
drawNodes(ctx, graph, hovered, selected, search, matchSet);
drawLabels(ctx, graph, hovered, selected, search, matchSet, vp);
ctx.restore();
''',''' const simplify = graph.nodes.length > RENDER_SIMPLIFY_NODE_THRESHOLD || graph.edges.length > RENDER_SIMPLIFY_EDGE_THRESHOLD;
drawBackground(ctx, w, h, vp, dpr, simplify);
ctx.save();
ctx.translate(vp.x * dpr, vp.y * dpr);
ctx.scale(vp.scale * dpr, vp.scale * dpr);
drawEdges(ctx, graph, hovered, selected, nodeMap, animTime, simplify);
drawNodes(ctx, graph, hovered, selected, search, matchSet);
drawLabels(ctx, graph, hovered, selected, search, matchSet, vp, simplify);
ctx.restore();
''')
if 'const hoverRafRef = useRef<number>(0);' not in t:
t=t.replace('const graphRef = useRef<Graph | null>(null);\n', 'const graphRef = useRef<Graph | null>(null);\n const hoverRafRef = useRef<number>(0);\n')
t=t.replace(''' const node = hitTest(graph, canvasRef.current, e.clientX, e.clientY, vpRef.current);
setHovered(node?.id ?? null);
}, [graph, redraw, startAnimLoop]);
''',''' cancelAnimationFrame(hoverRafRef.current);
const clientX = e.clientX;
const clientY = e.clientY;
hoverRafRef.current = requestAnimationFrame(() => {
const node = hitTest(graph, canvasRef.current as HTMLCanvasElement, clientX, clientY, vpRef.current);
setHovered(prev => (prev === (node?.id ?? null) ? prev : (node?.id ?? null)));
});
}, [graph, redraw, startAnimLoop]);
''')
t=t.replace(''' useEffect(() => {
if (graph) startAnimLoop();
return () => { cancelAnimationFrame(animFrameRef.current); isAnimatingRef.current = false; };
}, [graph, startAnimLoop]);
''',''' useEffect(() => {
if (graph) startAnimLoop();
return () => {
cancelAnimationFrame(animFrameRef.current);
cancelAnimationFrame(hoverRafRef.current);
isAnimatingRef.current = false;
};
}, [graph, startAnimLoop]);
''')
if 'const nodeById = useMemo(() => {' not in t:
t=t.replace(''' const connectionCount = selectedNode && graph
? graph.edges.filter(e => e.source === selectedNode.id || e.target === selectedNode.id).length
: 0;
const connectedNodes = useMemo(() => {
''',''' const connectionCount = selectedNode && graph
? graph.edges.filter(e => e.source === selectedNode.id || e.target === selectedNode.id).length
: 0;
const nodeById = useMemo(() => {
const m = new Map<string, GNode>();
if (!graph) return m;
for (const n of graph.nodes) m.set(n.id, n);
return m;
}, [graph]);
const connectedNodes = useMemo(() => {
''')
t=t.replace('const n = graph.nodes.find(x => x.id === e.target);','const n = nodeById.get(e.target);')
t=t.replace('const n = graph.nodes.find(x => x.id === e.source);','const n = nodeById.get(e.source);')
t=t.replace(' }, [selectedNode, graph]);',' }, [selectedNode, graph, nodeById]);')
p.write_text(t,encoding='utf-8')
print('patched NetworkMap performance')

View File

@@ -1,227 +0,0 @@
from pathlib import Path
p=Path(r'd:/Projects/Dev/ThreatHunt/backend/app/services/host_inventory.py')
t=p.read_text(encoding='utf-8')
start=t.index('async def build_host_inventory(')
# find end of function by locating '\n\n' before EOF after ' }\n'
end=t.index('\n\n', start)
# need proper end: first double newline after function may occur in docstring? compute by searching for '\n\n' after ' }\n' near end
ret_idx=t.rfind(' }')
# safer locate end as last occurrence of '\n }\n' after start, then function ends next newline
end=t.find('\n\n', ret_idx)
if end==-1:
end=len(t)
new_func='''async def build_host_inventory(hunt_id: str, db: AsyncSession) -> dict:
"""Build a deduplicated host inventory from all datasets in a hunt.
Returns dict with 'hosts', 'connections', and 'stats'.
Each host has: id, hostname, fqdn, client_id, ips, os, users, datasets, row_count.
"""
ds_result = await db.execute(
select(Dataset).where(Dataset.hunt_id == hunt_id)
)
all_datasets = ds_result.scalars().all()
if not all_datasets:
return {"hosts": [], "connections": [], "stats": {
"total_hosts": 0, "total_datasets_scanned": 0,
"total_rows_scanned": 0,
}}
hosts: dict[str, dict] = {} # fqdn -> host record
ip_to_host: dict[str, str] = {} # local-ip -> fqdn
connections: dict[tuple, int] = defaultdict(int)
total_rows = 0
ds_with_hosts = 0
sampled_dataset_count = 0
total_row_budget = max(0, int(settings.NETWORK_INVENTORY_MAX_TOTAL_ROWS))
max_connections = max(0, int(settings.NETWORK_INVENTORY_MAX_CONNECTIONS))
global_budget_reached = False
dropped_connections = 0
for ds in all_datasets:
if total_row_budget and total_rows >= total_row_budget:
global_budget_reached = True
break
cols = _identify_columns(ds)
if not cols['fqdn'] and not cols['host_id']:
continue
ds_with_hosts += 1
batch_size = 5000
max_rows_per_dataset = max(0, int(settings.NETWORK_INVENTORY_MAX_ROWS_PER_DATASET))
rows_scanned_this_dataset = 0
sampled_dataset = False
last_row_index = -1
while True:
if total_row_budget and total_rows >= total_row_budget:
sampled_dataset = True
global_budget_reached = True
break
rr = await db.execute(
select(DatasetRow)
.where(DatasetRow.dataset_id == ds.id)
.where(DatasetRow.row_index > last_row_index)
.order_by(DatasetRow.row_index)
.limit(batch_size)
)
rows = rr.scalars().all()
if not rows:
break
for ro in rows:
if max_rows_per_dataset and rows_scanned_this_dataset >= max_rows_per_dataset:
sampled_dataset = True
break
if total_row_budget and total_rows >= total_row_budget:
sampled_dataset = True
global_budget_reached = True
break
data = ro.data or {}
total_rows += 1
rows_scanned_this_dataset += 1
fqdn = ''
for c in cols['fqdn']:
fqdn = _clean(data.get(c))
if fqdn:
break
client_id = ''
for c in cols['host_id']:
client_id = _clean(data.get(c))
if client_id:
break
if not fqdn and not client_id:
continue
host_key = fqdn or client_id
if host_key not in hosts:
short = fqdn.split('.')[0] if fqdn and '.' in fqdn else fqdn
hosts[host_key] = {
'id': host_key,
'hostname': short or client_id,
'fqdn': fqdn,
'client_id': client_id,
'ips': set(),
'os': '',
'users': set(),
'datasets': set(),
'row_count': 0,
}
h = hosts[host_key]
h['datasets'].add(ds.name)
h['row_count'] += 1
if client_id and not h['client_id']:
h['client_id'] = client_id
for c in cols['username']:
u = _extract_username(_clean(data.get(c)))
if u:
h['users'].add(u)
for c in cols['local_ip']:
ip = _clean(data.get(c))
if _is_valid_ip(ip):
h['ips'].add(ip)
ip_to_host[ip] = host_key
for c in cols['os']:
ov = _clean(data.get(c))
if ov and not h['os']:
h['os'] = ov
for c in cols['remote_ip']:
rip = _clean(data.get(c))
if _is_valid_ip(rip):
rport = ''
for pc in cols['remote_port']:
rport = _clean(data.get(pc))
if rport:
break
conn_key = (host_key, rip, rport)
if max_connections and len(connections) >= max_connections and conn_key not in connections:
dropped_connections += 1
continue
connections[conn_key] += 1
if sampled_dataset:
sampled_dataset_count += 1
logger.info(
"Host inventory sampling for dataset %s (%d rows scanned)",
ds.id,
rows_scanned_this_dataset,
)
break
last_row_index = rows[-1].row_index
if len(rows) < batch_size:
break
if global_budget_reached:
logger.info(
"Host inventory global row budget reached for hunt %s at %d rows",
hunt_id,
total_rows,
)
break
# Post-process hosts
for h in hosts.values():
if not h['os'] and h['fqdn']:
h['os'] = _infer_os(h['fqdn'])
h['ips'] = sorted(h['ips'])
h['users'] = sorted(h['users'])
h['datasets'] = sorted(h['datasets'])
# Build connections, resolving IPs to host keys
conn_list = []
seen = set()
for (src, dst_ip, dst_port), cnt in connections.items():
if dst_ip in _IGNORE_IPS:
continue
dst_host = ip_to_host.get(dst_ip, '')
if dst_host == src:
continue
key = tuple(sorted([src, dst_host or dst_ip]))
if key in seen:
continue
seen.add(key)
conn_list.append({
'source': src,
'target': dst_host or dst_ip,
'target_ip': dst_ip,
'port': dst_port,
'count': cnt,
})
host_list = sorted(hosts.values(), key=lambda x: x['row_count'], reverse=True)
return {
"hosts": host_list,
"connections": conn_list,
"stats": {
"total_hosts": len(host_list),
"total_datasets_scanned": len(all_datasets),
"datasets_with_hosts": ds_with_hosts,
"total_rows_scanned": total_rows,
"hosts_with_ips": sum(1 for h in host_list if h['ips']),
"hosts_with_users": sum(1 for h in host_list if h['users']),
"row_budget_per_dataset": settings.NETWORK_INVENTORY_MAX_ROWS_PER_DATASET,
"row_budget_total": settings.NETWORK_INVENTORY_MAX_TOTAL_ROWS,
"connection_budget": settings.NETWORK_INVENTORY_MAX_CONNECTIONS,
"sampled_mode": settings.NETWORK_INVENTORY_MAX_ROWS_PER_DATASET > 0 or settings.NETWORK_INVENTORY_MAX_TOTAL_ROWS > 0,
"sampled_datasets": sampled_dataset_count,
"global_budget_reached": global_budget_reached,
"dropped_connections": dropped_connections,
},
}
'''
out=t[:start]+new_func+t[end:]
p.write_text(out,encoding='utf-8')
print('replaced build_host_inventory with hard-budget fast mode')

9
backend/.env Normal file
View File

@@ -0,0 +1,9 @@
FLASK_ENV=development
FLASK_DEBUG=True
SECRET_KEY=development-secret-key-change-in-production
MAX_CONTENT_LENGTH=104857600
UPLOAD_FOLDER=uploaded
OUTPUT_FOLDER=output
VIRUSTOTAL_API_KEY=
DATABASE_URL=sqlite:///threat_hunter.db
REDIS_URL=redis://localhost:6379/0

0
backend/.env.example Normal file
View File

23
backend/Dockerfile Normal file
View File

@@ -0,0 +1,23 @@
FROM python:3.11-slim
WORKDIR /app
# Install system dependencies
RUN apt-get update && apt-get install -y \
gcc \
postgresql-client \
&& rm -rf /var/lib/apt/lists/*
# Copy requirements first for better caching
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
# Copy application code
COPY . .
# Create directories
RUN mkdir -p uploads output
EXPOSE 5000
CMD ["python", "app.py"]

24
backend/Dockerfile.prod Normal file
View File

@@ -0,0 +1,24 @@
FROM python:3.11-slim
WORKDIR /app
# Install system dependencies
RUN apt-get update && apt-get install -y \
gcc \
postgresql-client \
&& rm -rf /var/lib/apt/lists/*
# Create non-root user
RUN useradd --create-home --shell /bin/bash app
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
COPY . .
RUN chown -R app:app /app
USER app
EXPOSE 5000
CMD ["gunicorn", "--bind", "0.0.0.0:5000", "--workers", "4", "app:app"]

Some files were not shown because too many files have changed in this diff Show More