diff --git a/.env.example b/.env.example index 38f5089..e37407f 100644 --- a/.env.example +++ b/.env.example @@ -1,27 +1,53 @@ -# Docker environment configuration -# Copy this to .env and customize for your deployment +# ── ThreatHunt Configuration ────────────────────────────────────────── +# All backend env vars are prefixed with TH_ and match AppConfig field names. +# Copy this file to .env and adjust values. -# Agent Configuration -# Choose one: local, networked, online, auto -THREAT_HUNT_AGENT_PROVIDER=auto +# ── General ─────────────────────────────────────────────────────────── +TH_DEBUG=false -# Local Provider (on-device or on-prem models) -# THREAT_HUNT_LOCAL_MODEL_PATH=/models/model.gguf +# ── Database ────────────────────────────────────────────────────────── +# SQLite for local dev (zero-config): +TH_DATABASE_URL=sqlite+aiosqlite:///./threathunt.db +# PostgreSQL for production: +# TH_DATABASE_URL=postgresql+asyncpg://threathunt:password@localhost:5432/threathunt -# Networked Provider (shared internal inference service) -# THREAT_HUNT_NETWORKED_ENDPOINT=http://inference-service:5000 -# THREAT_HUNT_NETWORKED_KEY=api-key-here +# ── CORS ────────────────────────────────────────────────────────────── +TH_ALLOWED_ORIGINS=http://localhost:3000,http://localhost:8000 -# Online Provider (external hosted APIs) -# THREAT_HUNT_ONLINE_API_KEY=sk-your-api-key -# THREAT_HUNT_ONLINE_PROVIDER=openai -# THREAT_HUNT_ONLINE_MODEL=gpt-3.5-turbo +# ── File uploads ────────────────────────────────────────────────────── +TH_MAX_UPLOAD_SIZE_MB=500 -# Agent Behavior -THREAT_HUNT_AGENT_MAX_TOKENS=1024 -THREAT_HUNT_AGENT_REASONING=true -THREAT_HUNT_AGENT_HISTORY_LENGTH=10 -THREAT_HUNT_AGENT_FILTER_SENSITIVE=true +# ── LLM Cluster (Wile & Roadrunner) ────────────────────────────────── +TH_OPENWEBUI_URL=https://ai.guapo613.beer +TH_OPENWEBUI_API_KEY= +TH_WILE_HOST=100.110.190.12 +TH_WILE_OLLAMA_PORT=11434 +TH_ROADRUNNER_HOST=100.110.190.11 +TH_ROADRUNNER_OLLAMA_PORT=11434 -# Frontend +# ── Default models (auto-selected by TaskRouter) ───────────────────── +TH_DEFAULT_FAST_MODEL=llama3.1:latest +TH_DEFAULT_HEAVY_MODEL=llama3.1:70b-instruct-q4_K_M +TH_DEFAULT_CODE_MODEL=qwen2.5-coder:32b +TH_DEFAULT_VISION_MODEL=llama3.2-vision:11b +TH_DEFAULT_EMBEDDING_MODEL=bge-m3:latest + +# ── Agent behaviour ────────────────────────────────────────────────── +TH_AGENT_MAX_TOKENS=2048 +TH_AGENT_TEMPERATURE=0.3 +TH_AGENT_HISTORY_LENGTH=10 +TH_FILTER_SENSITIVE_DATA=true + +# ── Enrichment API keys (optional) ─────────────────────────────────── +TH_VIRUSTOTAL_API_KEY= +TH_ABUSEIPDB_API_KEY= +TH_SHODAN_API_KEY= + +# ── Auth ───────────────────────────────────────────────────────────── +TH_JWT_SECRET=CHANGE-ME-IN-PRODUCTION-USE-A-REAL-SECRET +TH_JWT_ACCESS_TOKEN_MINUTES=60 +TH_JWT_REFRESH_TOKEN_DAYS=7 + +# ── Frontend ───────────────────────────────────────────────────────── REACT_APP_API_URL=http://localhost:8000 + diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..3cb1044 --- /dev/null +++ b/.gitignore @@ -0,0 +1,56 @@ +# ── Python ──────────────────────────────────── +__pycache__/ +*.py[cod] +*$py.class +*.egg-info/ +dist/ +build/ +*.egg +.eggs/ + +# ── Virtual environments ───────────────────── +venv/ +.venv/ +env/ + +# ── IDE / Editor ───────────────────────────── +.vscode/ +.idea/ +*.swp +*.swo +*~ + +# ── OS ──────────────────────────────────────── +.DS_Store +Thumbs.db + +# ── Environment / Secrets ──────────────────── +.env +*.env.local + +# ── Database ───────────────────────────────── +*.db +*.sqlite3 + +# ── Uploads ────────────────────────────────── +uploads/ + +# ── Node / Frontend ────────────────────────── +node_modules/ +frontend/build/ +frontend/.env.local +npm-debug.log* +yarn-debug.log* +yarn-error.log* + +# ── Docker ─────────────────────────────────── +docker-compose.override.yml + +# ── Test / Coverage ────────────────────────── +.coverage +htmlcov/ +.pytest_cache/ +.mypy_cache/ + +# ── Alembic ────────────────────────────────── +alembic/versions/*.pyc diff --git a/Dockerfile.backend b/Dockerfile.backend index cf1011b..999eca4 100644 --- a/Dockerfile.backend +++ b/Dockerfile.backend @@ -1,11 +1,11 @@ -# ThreatHunt Backend API - Python 3.11 -FROM python:3.11-slim +# ThreatHunt Backend API - Python 3.13 +FROM python:3.13-slim WORKDIR /app # Install system dependencies RUN apt-get update && apt-get install -y --no-install-recommends \ - gcc \ + gcc curl \ && rm -rf /var/lib/apt/lists/* # Copy requirements @@ -17,16 +17,16 @@ RUN pip install --no-cache-dir -r requirements.txt # Copy backend code COPY backend/ . -# Create non-root user -RUN useradd -m -u 1000 appuser && chown -R appuser:appuser /app +# Create non-root user & data directory +RUN useradd -m -u 1000 appuser && mkdir -p /app/data && chown -R appuser:appuser /app USER appuser # Expose port EXPOSE 8000 # Health check -HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \ - CMD python -c "import requests; requests.get('http://localhost:8000/api/agent/health')" +HEALTHCHECK --interval=30s --timeout=10s --start-period=10s --retries=3 \ + CMD curl -f http://localhost:8000/ || exit 1 -# Run application -CMD ["python", "run.py"] +# Run Alembic migrations then start Uvicorn +CMD ["sh", "-c", "python -m alembic upgrade head && python run.py"] diff --git a/Dockerfile.frontend b/Dockerfile.frontend index e28fab3..c99551c 100644 --- a/Dockerfile.frontend +++ b/Dockerfile.frontend @@ -1,5 +1,5 @@ # ThreatHunt Frontend - Node.js React -FROM node:18-alpine AS builder +FROM node:20-alpine AS builder WORKDIR /app @@ -17,20 +17,14 @@ COPY frontend/tsconfig.json ./ # Build application RUN npm run build -# Production stage -FROM node:18-alpine +# Production stage — nginx reverse-proxy + static files +FROM nginx:alpine -WORKDIR /app +# Copy built React app +COPY --from=builder /app/build /usr/share/nginx/html -# Install serve to serve the static files -RUN npm install -g serve - -# Copy built application from builder -COPY --from=builder /app/build ./build - -# Create non-root user -RUN addgroup -g 1000 appuser && adduser -D -u 1000 -G appuser appuser -USER appuser +# Copy custom nginx config (proxies /api to backend) +COPY frontend/nginx.conf /etc/nginx/conf.d/default.conf # Expose port EXPOSE 3000 @@ -39,5 +33,4 @@ EXPOSE 3000 HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \ CMD wget --quiet --tries=1 --spider http://localhost:3000/ || exit 1 -# Serve application -CMD ["serve", "-s", "build", "-l", "3000"] +CMD ["nginx", "-g", "daemon off;"] diff --git a/SKILLS/00-operating-model.md b/SKILLS/00-operating-model.md new file mode 100644 index 0000000..98d16b4 --- /dev/null +++ b/SKILLS/00-operating-model.md @@ -0,0 +1,21 @@ + +# Operating Model + +## Default cadence +- Prefer iterative progress over big bangs. +- Keep diffs small: target ≤ 300 changed lines per PR unless justified. +- Update tests/docs as part of the same change when possible. + +## Working agreement +- Start with a PLAN for non-trivial tasks. +- Implement the smallest slice that satisfies acceptance criteria. +- Verify via DoD. +- Write a crisp PR summary: what changed, why, and how verified. + +## Stop conditions (plan first) +Stop and produce a PLAN (do not code yet) if: +- scope is unclear +- more than 3 files will change +- data model changes +- auth/security boundaries +- performance-critical paths diff --git a/SKILLS/05-agent-taxonomy.md b/SKILLS/05-agent-taxonomy.md new file mode 100644 index 0000000..5063494 --- /dev/null +++ b/SKILLS/05-agent-taxonomy.md @@ -0,0 +1,36 @@ +# Agent Types & Roles (Practical Taxonomy) + +Use this skill to choose the *right* kind of agent workflow for the job. + +## Common agent "types" (in practice) + +### 1) Chat assistant (no tools) +Best for: explanations, brainstorming, small edits. +Risk: can hallucinate; no grounding in repo state. + +### 2) Tool-using single agent +Best for: well-scoped tasks where the agent can read/write files and run commands. +Key control: strict DoD gates + minimal permissions. + +### 3) Planner + Executor (2-role pattern) +Best for: medium complexity work (multi-file changes, feature work). +Flow: Planner writes plan + acceptance criteria → Executor implements → Reviewer checks. + +### 4) Multi-agent (specialists) +Best for: bigger features with separable workstreams (UI, backend, docs, tests). +Rule: isolate context per role; use separate branches/worktrees. + +### 5) Supervisor / orchestrator +Best for: long-running workflows with checkpoints (pipelines, report generation, PAD docs). +Rule: supervisor delegates, enforces gates, and composes final output. + +## Decision rules (fast) +- If you can describe it in ≤ 5 steps → single tool-using agent. +- If you need tradeoffs/design → Planner + Executor. +- If UI + backend + docs/tests all move → multi-agent specialists. +- If it's a pipeline that runs repeatedly → orchestrator. + +## Guardrails (always) +- DoD is the truth gate. +- Separate branches/worktrees for parallel work. +- Log decisions + commands in AGENT_LOG.md. diff --git a/SKILLS/10-definition-of-done.md b/SKILLS/10-definition-of-done.md new file mode 100644 index 0000000..d99148e --- /dev/null +++ b/SKILLS/10-definition-of-done.md @@ -0,0 +1,24 @@ + +# Definition of Done (DoD) + +A change is "done" only when: + +## Code correctness +- Builds successfully (if applicable) +- Tests pass +- Linting/formatting passes +- Types/checks pass (if applicable) + +## Quality +- No new warnings introduced +- Edge cases handled (inputs validated, errors meaningful) +- Hot paths not regressed (if applicable) + +## Hygiene +- No secrets committed +- Docs updated if behavior or usage changed +- PR summary includes verification steps + +## Commands +- macOS/Linux: `./scripts/dod.sh` +- Windows: `\scripts\dod.ps1` diff --git a/SKILLS/20-repo-map.md b/SKILLS/20-repo-map.md new file mode 100644 index 0000000..810f986 --- /dev/null +++ b/SKILLS/20-repo-map.md @@ -0,0 +1,16 @@ + +# Repo Mapping Skill + +When entering a repo: +1) Read README.md +2) Identify entrypoints (app main / server startup / CLI) +3) Identify config (env vars, .env.example, config files) +4) Identify test/lint scripts (package.json, pyproject.toml, Makefile, etc.) +5) Write a 10-line "repo map" in the PLAN before changing code + +Output format: +- Purpose: +- Key modules: +- Data flow: +- Commands: +- Risks: diff --git a/SKILLS/25-algorithms-performance.md b/SKILLS/25-algorithms-performance.md new file mode 100644 index 0000000..4bbe6b2 --- /dev/null +++ b/SKILLS/25-algorithms-performance.md @@ -0,0 +1,20 @@ +# Algorithms & Performance + +Use this skill when performance matters (large inputs, hot paths, or repeated calls). + +## Checklist +- Identify the **state** you're recomputing. +- Add **memoization / caching** when the same subproblem repeats. +- Prefer **linear scans** + caches over nested loops when possible. +- If you can write it as a **recurrence**, you can test it. + +## Practical heuristics +- Measure first when possible (timing + input sizes). +- Optimize the biggest wins: avoid repeated I/O, repeated parsing, repeated network calls. +- Keep caches bounded (size/TTL) and invalidate safely. +- Choose data structures intentionally: dict/set for membership, heap for top-k, deque for queues. + +## Review notes (for PRs) +- Call out accidental O(n²) patterns. +- Suggest table/DP or memoization when repeated work is obvious. +- Add tests that cover base cases + typical cases + worst-case size. diff --git a/SKILLS/26-vibe-coding-fundamentals.md b/SKILLS/26-vibe-coding-fundamentals.md new file mode 100644 index 0000000..01c4b4e --- /dev/null +++ b/SKILLS/26-vibe-coding-fundamentals.md @@ -0,0 +1,31 @@ +# Vibe Coding With Fundamentals (Safety Rails) + +Use this skill when you're using "vibe coding" (fast, conversational building) but want production-grade outcomes. + +## The good +- Rapid scaffolding and iteration +- Fast UI prototypes +- Quick exploration of architectures and options + +## The failure mode +- "It works on my machine" code with weak tests +- Security foot-guns (auth, input validation, secrets) +- Performance cliffs (accidental O(n²), repeated I/O) +- Unmaintainable abstractions + +## Safety rails (apply every time) +- Always start with acceptance criteria (what "done" means). +- Prefer small PRs; never dump a huge AI diff. +- Require DoD gates (lint/test/build) before merge. +- Write tests for behavior changes. +- For anything security/data related: do a Reviewer pass. + +## When to slow down +- Auth/session/token work +- Anything touching payments, PII, secrets +- Data migrations/schema changes +- Performance-critical paths +- "It's flaky" or "it only fails in CI" + +## Practical prompt pattern (use in PLAN) +- "State assumptions, list files to touch, propose tests, and include rollback steps." diff --git a/SKILLS/27-performance-profiling.md b/SKILLS/27-performance-profiling.md new file mode 100644 index 0000000..6dc5504 --- /dev/null +++ b/SKILLS/27-performance-profiling.md @@ -0,0 +1,31 @@ +# Performance Profiling (Bun/Node) + +Use this skill when: +- a hot path feels slow +- CPU usage is high +- you suspect accidental O(n²) or repeated work +- you need evidence before optimizing + +## Bun CPU profiling +Bun supports CPU profiling via `--cpu-prof` (generates a `.cpuprofile` you can open in Chrome DevTools). + +Upcoming: `bun --cpu-prof-md