Add ThreatHunt agent backend/frontend scaffolding

2026-03-01 05:50:21 -05:00 · 2025-12-29 10:22:57 -05:00
parent dc2dcd02c1
commit d0c9f88268
35 changed files with 21929 additions and 42 deletions
--- a/backend/app/init.py
+++ b/backend/app/init.py
@@ -0,0 +1 @@
+"""Backend initialization."""
--- a/backend/app/agent/debate.py
+++ b/backend/app/agent/debate.py
@@ -0,0 +1,67 @@
+import asyncio
+
+async def debated_generate(provider, prompt: str) -> str:
+    """
+    Minimal behind-the-scenes debate.
+    Same logic for all apps.
+    Advisory only. No execution.
+    """
+
+    planner = f"""
+You are the Planner.
+Give structured advisory guidance only.
+No execution. No tools.
+
+Request:
+{prompt}
+"""
+
+    critic = f"""
+You are the Critic.
+Identify risks, missing steps, and assumptions.
+No execution. No tools.
+
+Request:
+{prompt}
+"""
+
+    pragmatist = f"""
+You are the Pragmatist.
+Suggest the safest and simplest approach.
+No execution. No tools.
+
+Request:
+{prompt}
+"""
+
+    planner_task = provider.generate(planner)
+    critic_task = provider.generate(critic)
+    prag_task = provider.generate(pragmatist)
+
+    planner_resp, critic_resp, prag_resp = await asyncio.gather(
+        planner_task, critic_task, prag_task
+    )
+
+    judge = f"""
+You are the Judge.
+
+Merge the three responses into ONE final advisory answer.
+
+Rules:
+- Advisory only
+- No execution
+- Clearly list risks and assumptions
+- Be concise
+
+Planner:
+{planner_resp}
+
+Critic:
+{critic_resp}
+
+Pragmatist:
+{prag_resp}
+"""
+
+    final = await provider.generate(judge)
+    return final
--- a/backend/app/agents/init.py
+++ b/backend/app/agents/init.py
@@ -0,0 +1,16 @@
+"""Analyst-assist agent module for ThreatHunt.
+
+Provides read-only guidance on CSV artifact data, analytical pivots, and hypotheses.
+Agents are advisory only and do not execute actions or modify data.
+"""
+
+from .core import ThreatHuntAgent
+from .providers import LLMProvider, LocalProvider, NetworkedProvider, OnlineProvider
+
+__all__ = [
+    "ThreatHuntAgent",
+    "LLMProvider",
+    "LocalProvider",
+    "NetworkedProvider",
+    "OnlineProvider",
+]
--- a/backend/app/agents/config.py
+++ b/backend/app/agents/config.py
@@ -0,0 +1,59 @@
+"""Configuration for agent settings."""
+
+import os
+from typing import Literal
+
+
+class AgentConfig:
+    """Configuration for analyst-assist agents."""
+
+    # Provider type: 'local', 'networked', 'online', or 'auto'
+    PROVIDER_TYPE: Literal["local", "networked", "online", "auto"] = os.getenv(
+        "THREAT_HUNT_AGENT_PROVIDER", "auto"
+    )
+
+    # Local provider settings
+    LOCAL_MODEL_PATH: str | None = os.getenv("THREAT_HUNT_LOCAL_MODEL_PATH")
+
+    # Networked provider settings
+    NETWORKED_ENDPOINT: str | None = os.getenv("THREAT_HUNT_NETWORKED_ENDPOINT")
+    NETWORKED_API_KEY: str | None = os.getenv("THREAT_HUNT_NETWORKED_KEY")
+
+    # Online provider settings
+    ONLINE_API_PROVIDER: str = os.getenv("THREAT_HUNT_ONLINE_PROVIDER", "openai")
+    ONLINE_API_KEY: str | None = os.getenv("THREAT_HUNT_ONLINE_API_KEY")
+    ONLINE_MODEL: str | None = os.getenv("THREAT_HUNT_ONLINE_MODEL")
+
+    # Agent behavior settings
+    MAX_RESPONSE_TOKENS: int = int(
+        os.getenv("THREAT_HUNT_AGENT_MAX_TOKENS", "1024")
+    )
+    ENABLE_REASONING: bool = os.getenv(
+        "THREAT_HUNT_AGENT_REASONING", "true"
+    ).lower() in ("true", "1", "yes")
+    CONVERSATION_HISTORY_LENGTH: int = int(
+        os.getenv("THREAT_HUNT_AGENT_HISTORY_LENGTH", "10")
+    )
+
+    # Privacy settings
+    FILTER_SENSITIVE_DATA: bool = os.getenv(
+        "THREAT_HUNT_AGENT_FILTER_SENSITIVE", "true"
+    ).lower() in ("true", "1", "yes")
+
+    @classmethod
+    def is_agent_enabled(cls) -> bool:
+        """Check if agent is enabled and properly configured."""
+        # Agent is disabled if no provider can be used
+        if cls.PROVIDER_TYPE == "auto":
+            return bool(
+                cls.LOCAL_MODEL_PATH
+                or cls.NETWORKED_ENDPOINT
+                or cls.ONLINE_API_KEY
+            )
+        elif cls.PROVIDER_TYPE == "local":
+            return bool(cls.LOCAL_MODEL_PATH)
+        elif cls.PROVIDER_TYPE == "networked":
+            return bool(cls.NETWORKED_ENDPOINT)
+        elif cls.PROVIDER_TYPE == "online":
+            return bool(cls.ONLINE_API_KEY)
+        return False
--- a/backend/app/agents/core.py
+++ b/backend/app/agents/core.py
@@ -0,0 +1,208 @@
+"""Core ThreatHunt analyst-assist agent.
+
+Provides read-only guidance on CSV artifact data, analytical pivots, and hypotheses.
+Agents are advisory only - no execution, no alerts, no data modifications.
+"""
+
+import logging
+from typing import Optional
+from pydantic import BaseModel, Field
+
+from .providers import LLMProvider, get_provider
+
+logger = logging.getLogger(__name__)
+
+
+class AgentContext(BaseModel):
+    """Context for agent guidance requests."""
+
+    query: str = Field(
+        ..., description="Analyst question or request for guidance"
+    )
+    dataset_name: Optional[str] = Field(None, description="Name of CSV dataset")
+    artifact_type: Optional[str] = Field(None, description="Artifact type (e.g., file, process, network)")
+    host_identifier: Optional[str] = Field(
+        None, description="Host name, IP, or identifier"
+    )
+    data_summary: Optional[str] = Field(
+        None, description="Brief description of uploaded data"
+    )
+    conversation_history: Optional[list[dict]] = Field(
+        default_factory=list, description="Previous messages in conversation"
+    )
+
+
+class AgentResponse(BaseModel):
+    """Response from analyst-assist agent."""
+
+    guidance: str = Field(..., description="Advisory guidance for analyst")
+    confidence: float = Field(
+        ..., ge=0.0, le=1.0, description="Confidence in guidance (0-1)"
+    )
+    suggested_pivots: list[str] = Field(
+        default_factory=list, description="Suggested analytical directions"
+    )
+    suggested_filters: list[str] = Field(
+        default_factory=list, description="Suggested data filters or queries"
+    )
+    caveats: Optional[str] = Field(
+        None, description="Assumptions, limitations, or caveats"
+    )
+    reasoning: Optional[str] = Field(
+        None, description="Explanation of how guidance was generated"
+    )
+
+
+class ThreatHuntAgent:
+    """Analyst-assist agent for ThreatHunt.
+    
+    Provides guidance on:
+    - Interpreting CSV artifact data
+    - Suggesting analytical pivots and filters
+    - Forming and testing hypotheses
+    
+    Policy:
+    - Advisory guidance only (no execution)
+    - No database or schema changes
+    - No alert escalation
+    - Transparent reasoning
+    """
+
+    def __init__(self, provider: Optional[LLMProvider] = None):
+        """Initialize agent with LLM provider.
+        
+        Args:
+            provider: LLM provider instance. If None, uses get_provider() with auto mode.
+        """
+        if provider is None:
+            try:
+                provider = get_provider("auto")
+            except RuntimeError as e:
+                logger.warning(f"Could not initialize default provider: {e}")
+                provider = None
+
+        self.provider = provider
+        self.system_prompt = self._build_system_prompt()
+
+    def _build_system_prompt(self) -> str:
+        """Build the system prompt that governs agent behavior."""
+        return """You are an analyst-assist agent for ThreatHunt, a threat hunting platform.
+
+Your role:
+- Interpret and explain CSV artifact data from Velociraptor
+- Suggest analytical pivots, filters, and hypotheses
+- Highlight anomalies, patterns, or points of interest
+- Guide analysts without replacing their judgment
+
+Your constraints:
+- You ONLY provide guidance and suggestions
+- You do NOT execute actions or tools
+- You do NOT modify data or escalate alerts
+- You do NOT make autonomous decisions
+- You ONLY analyze data presented to you
+- You explain your reasoning transparently
+- You acknowledge limitations and assumptions
+- You suggest next investigative steps
+
+When responding:
+1. Start with a clear, direct answer to the query
+2. Explain your reasoning based on the data context provided
+3. Suggest 2-4 analytical pivots the analyst might explore
+4. Suggest 2-4 data filters or queries that might be useful
+5. Include relevant caveats or assumptions
+6. Be honest about what you cannot determine from the data
+
+Remember: The analyst is the decision-maker. You are an assistant."""
+
+    async def assist(self, context: AgentContext) -> AgentResponse:
+        """Provide guidance on artifact data and analysis.
+        
+        Args:
+            context: Request context including query and data context.
+            
+        Returns:
+            Guidance response with suggestions and reasoning.
+            
+        Raises:
+            RuntimeError: If no provider is available.
+        """
+        if not self.provider:
+            raise RuntimeError(
+                "No LLM provider available. Configure at least one of: "
+                "THREAT_HUNT_LOCAL_MODEL_PATH, THREAT_HUNT_NETWORKED_ENDPOINT, "
+                "or THREAT_HUNT_ONLINE_API_KEY"
+            )
+
+        # Build prompt with context
+        prompt = self._build_prompt(context)
+
+        try:
+            # Get guidance from LLM provider
+            guidance = await self.provider.generate(prompt, max_tokens=1024)
+
+            # Parse response into structured format
+            response = self._parse_response(guidance, context)
+
+            logger.info(
+                f"Agent assisted with query: {context.query[:50]}... "
+                f"(dataset: {context.dataset_name})"
+            )
+
+            return response
+
+        except Exception as e:
+            logger.error(f"Error generating guidance: {e}")
+            raise
+
+    def _build_prompt(self, context: AgentContext) -> str:
+        """Build the prompt for the LLM."""
+        prompt_parts = [
+            f"Analyst query: {context.query}",
+        ]
+
+        if context.dataset_name:
+            prompt_parts.append(f"Dataset: {context.dataset_name}")
+
+        if context.artifact_type:
+            prompt_parts.append(f"Artifact type: {context.artifact_type}")
+
+        if context.host_identifier:
+            prompt_parts.append(f"Host: {context.host_identifier}")
+
+        if context.data_summary:
+            prompt_parts.append(f"Data summary: {context.data_summary}")
+
+        if context.conversation_history:
+            prompt_parts.append("\nConversation history:")
+            for msg in context.conversation_history[-5:]:  # Last 5 messages for context
+                prompt_parts.append(f"  {msg.get('role', 'unknown')}: {msg.get('content', '')}")
+
+        return "\n".join(prompt_parts)
+
+    def _parse_response(self, response_text: str, context: AgentContext) -> AgentResponse:
+        """Parse LLM response into structured format.
+        
+        Note: This is a simplified parser. In production, use structured output
+        from the LLM (JSON mode, function calling, etc.) for better reliability.
+        """
+        # For now, return a structured response based on the raw guidance
+        # In production, parse JSON or use structured output from LLM
+        return AgentResponse(
+            guidance=response_text,
+            confidence=0.8,  # Placeholder
+            suggested_pivots=[
+                "Analyze temporal patterns",
+                "Cross-reference with known indicators",
+                "Examine outliers in the dataset",
+                "Compare with baseline behavior",
+            ],
+            suggested_filters=[
+                "Filter by high-risk indicators",
+                "Sort by timestamp for timeline analysis",
+                "Group by host or user",
+                "Filter by anomaly score",
+            ],
+            caveats="Guidance is based on available data context. "
+            "Analysts should verify findings with additional sources.",
+            reasoning="Analysis generated based on artifact data patterns and analyst query.",
+        )
--- a/backend/app/agents/providers.py
+++ b/backend/app/agents/providers.py
@@ -0,0 +1,190 @@
+"""Pluggable LLM provider interface for analyst-assist agents.
+
+Supports three provider types:
+- Local: On-device or on-prem models
+- Networked: Shared internal inference services
+- Online: External hosted APIs
+"""
+
+import os
+from abc import ABC, abstractmethod
+from typing import Optional
+
+
+class LLMProvider(ABC):
+    """Abstract base class for LLM providers."""
+
+    @abstractmethod
+    async def generate(self, prompt: str, max_tokens: int = 1024) -> str:
+        """Generate a response from the LLM.
+        
+        Args:
+            prompt: The input prompt
+            max_tokens: Maximum tokens in response
+            
+        Returns:
+            Generated text response
+        """
+        pass
+
+    @abstractmethod
+    def is_available(self) -> bool:
+        """Check if provider backend is available."""
+        pass
+
+
+class LocalProvider(LLMProvider):
+    """Local LLM provider (on-device or on-prem models)."""
+
+    def __init__(self, model_path: Optional[str] = None):
+        """Initialize local provider.
+        
+        Args:
+            model_path: Path to local model. If None, uses THREAT_HUNT_LOCAL_MODEL_PATH env var.
+        """
+        self.model_path = model_path or os.getenv("THREAT_HUNT_LOCAL_MODEL_PATH")
+        self.model = None
+
+    def is_available(self) -> bool:
+        """Check if local model is available."""
+        if not self.model_path:
+            return False
+        # In production, would verify model file exists and can be loaded
+        return os.path.exists(str(self.model_path))
+
+    async def generate(self, prompt: str, max_tokens: int = 1024) -> str:
+        """Generate response using local model.
+        
+        Note: This is a placeholder. In production, integrate with:
+        - llama-cpp-python for GGML models
+        - Ollama API
+        - vLLM
+        - Other local inference engines
+        """
+        if not self.is_available():
+            raise RuntimeError("Local model not available")
+
+        # Placeholder implementation
+        return f"[Local model response to: {prompt[:50]}...]"
+
+
+class NetworkedProvider(LLMProvider):
+    """Networked LLM provider (shared internal inference services)."""
+
+    def __init__(
+        self,
+        api_endpoint: Optional[str] = None,
+        api_key: Optional[str] = None,
+        model_name: str = "default",
+    ):
+        """Initialize networked provider.
+        
+        Args:
+            api_endpoint: URL to inference service. Defaults to env var THREAT_HUNT_NETWORKED_ENDPOINT.
+            api_key: API key for service. Defaults to env var THREAT_HUNT_NETWORKED_KEY.
+            model_name: Model name/ID on the service.
+        """
+        self.api_endpoint = api_endpoint or os.getenv("THREAT_HUNT_NETWORKED_ENDPOINT")
+        self.api_key = api_key or os.getenv("THREAT_HUNT_NETWORKED_KEY")
+        self.model_name = model_name
+
+    def is_available(self) -> bool:
+        """Check if networked service is available."""
+        return bool(self.api_endpoint)
+
+    async def generate(self, prompt: str, max_tokens: int = 1024) -> str:
+        """Generate response using networked service.
+        
+        Note: This is a placeholder. In production, integrate with:
+        - Internal inference service API
+        - LLM inference container cluster
+        - Enterprise inference gateway
+        """
+        if not self.is_available():
+            raise RuntimeError("Networked service not available")
+
+        # Placeholder implementation
+        return f"[Networked response from {self.model_name}: {prompt[:50]}...]"
+
+
+class OnlineProvider(LLMProvider):
+    """Online LLM provider (external hosted APIs)."""
+
+    def __init__(
+        self,
+        api_provider: str = "openai",
+        api_key: Optional[str] = None,
+        model_name: Optional[str] = None,
+    ):
+        """Initialize online provider.
+        
+        Args:
+            api_provider: Provider name (openai, anthropic, google, etc.)
+            api_key: API key. Defaults to env var THREAT_HUNT_ONLINE_API_KEY.
+            model_name: Model name. Defaults to env var THREAT_HUNT_ONLINE_MODEL.
+        """
+        self.api_provider = api_provider
+        self.api_key = api_key or os.getenv("THREAT_HUNT_ONLINE_API_KEY")
+        self.model_name = model_name or os.getenv(
+            "THREAT_HUNT_ONLINE_MODEL", f"{api_provider}-default"
+        )
+
+    def is_available(self) -> bool:
+        """Check if online API is available."""
+        return bool(self.api_key)
+
+    async def generate(self, prompt: str, max_tokens: int = 1024) -> str:
+        """Generate response using online API.
+        
+        Note: This is a placeholder. In production, integrate with:
+        - OpenAI API (GPT-3.5, GPT-4, etc.)
+        - Anthropic Claude API
+        - Google Gemini API
+        - Other hosted LLM services
+        """
+        if not self.is_available():
+            raise RuntimeError("Online API not available or API key not set")
+
+        # Placeholder implementation
+        return f"[Online {self.api_provider} response: {prompt[:50]}...]"
+
+
+def get_provider(provider_type: str = "auto") -> LLMProvider:
+    """Get an LLM provider based on configuration.
+    
+    Args:
+        provider_type: Type of provider to use: 'local', 'networked', 'online', or 'auto'.
+                      'auto' attempts to use the first available provider in order:
+                      local -> networked -> online.
+    
+    Returns:
+        Configured LLM provider instance.
+        
+    Raises:
+        RuntimeError: If no provider is available.
+    """
+    # Explicit provider selection
+    if provider_type == "local":
+        provider = LocalProvider()
+    elif provider_type == "networked":
+        provider = NetworkedProvider()
+    elif provider_type == "online":
+        provider = OnlineProvider()
+    elif provider_type == "auto":
+        # Try providers in order of preference
+        for Provider in [LocalProvider, NetworkedProvider, OnlineProvider]:
+            provider = Provider()
+            if provider.is_available():
+                return provider
+        raise RuntimeError(
+            "No LLM provider available. Configure at least one of: "
+            "THREAT_HUNT_LOCAL_MODEL_PATH, THREAT_HUNT_NETWORKED_ENDPOINT, "
+            "or THREAT_HUNT_ONLINE_API_KEY"
+        )
+    else:
+        raise ValueError(f"Unknown provider type: {provider_type}")
+
+    if not provider.is_available():
+        raise RuntimeError(f"{provider_type} provider not available")
+
+    return provider
--- a/backend/app/api/init.py
+++ b/backend/app/api/init.py
@@ -0,0 +1 @@
+"""API routes initialization."""
--- a/backend/app/api/routes/init.py
+++ b/backend/app/api/routes/init.py
@@ -0,0 +1 @@
+"""API route modules."""
--- a/backend/app/api/routes/agent.py
+++ b/backend/app/api/routes/agent.py
@@ -0,0 +1,170 @@
+"""API routes for analyst-assist agent."""
+
+import logging
+from fastapi import APIRouter, HTTPException
+from pydantic import BaseModel, Field
+
+from app.agents.core import ThreatHuntAgent, AgentContext, AgentResponse
+from app.agents.config import AgentConfig
+
+logger = logging.getLogger(__name__)
+
+router = APIRouter(prefix="/api/agent", tags=["agent"])
+
+# Global agent instance (lazy-loaded)
+_agent: ThreatHuntAgent | None = None
+
+
+def get_agent() -> ThreatHuntAgent:
+    """Get or create the agent instance."""
+    global _agent
+    if _agent is None:
+        if not AgentConfig.is_agent_enabled():
+            raise HTTPException(
+                status_code=503,
+                detail="Analyst-assist agent is not configured. "
+                "Please configure an LLM provider.",
+            )
+        _agent = ThreatHuntAgent()
+    return _agent
+
+
+class AssistRequest(BaseModel):
+    """Request for agent assistance."""
+
+    query: str = Field(
+        ..., description="Analyst question or request for guidance"
+    )
+    dataset_name: str | None = Field(
+        None, description="Name of CSV dataset being analyzed"
+    )
+    artifact_type: str | None = Field(
+        None, description="Type of artifact (e.g., FileList, ProcessList, NetworkConnections)"
+    )
+    host_identifier: str | None = Field(
+        None, description="Host name, IP address, or identifier"
+    )
+    data_summary: str | None = Field(
+        None, description="Brief summary or context about the uploaded data"
+    )
+    conversation_history: list[dict] | None = Field(
+        None, description="Previous messages for context"
+    )
+
+
+class AssistResponse(BaseModel):
+    """Response with agent guidance."""
+
+    guidance: str
+    confidence: float
+    suggested_pivots: list[str]
+    suggested_filters: list[str]
+    caveats: str | None = None
+    reasoning: str | None = None
+
+
+@router.post(
+    "/assist",
+    response_model=AssistResponse,
+    summary="Get analyst-assist guidance",
+    description="Request guidance on CSV artifact data, analytical pivots, and hypotheses. "
+    "Agent provides advisory guidance only - no execution.",
+)
+async def agent_assist(request: AssistRequest) -> AssistResponse:
+    """Provide analyst-assist guidance on artifact data.
+
+    The agent will:
+    - Explain and interpret the provided data context
+    - Suggest analytical pivots the analyst might explore
+    - Suggest data filters or queries that might be useful
+    - Highlight assumptions, limitations, and caveats
+
+    The agent will NOT:
+    - Execute any tools or actions
+    - Escalate findings to alerts
+    - Modify any data or schema
+    - Make autonomous decisions
+
+    Args:
+        request: Assistance request with query and context
+
+    Returns:
+        Guidance response with suggestions and reasoning
+
+    Raises:
+        HTTPException: If agent is not configured (503) or request fails
+    """
+    try:
+        agent = get_agent()
+
+        # Build context
+        context = AgentContext(
+            query=request.query,
+            dataset_name=request.dataset_name,
+            artifact_type=request.artifact_type,
+            host_identifier=request.host_identifier,
+            data_summary=request.data_summary,
+            conversation_history=request.conversation_history or [],
+        )
+
+        # Get guidance
+        response = await agent.assist(context)
+
+        logger.info(
+            f"Agent assisted analyst with query: {request.query[:50]}... "
+            f"(host: {request.host_identifier}, artifact: {request.artifact_type})"
+        )
+
+        return AssistResponse(
+            guidance=response.guidance,
+            confidence=response.confidence,
+            suggested_pivots=response.suggested_pivots,
+            suggested_filters=response.suggested_filters,
+            caveats=response.caveats,
+            reasoning=response.reasoning,
+        )
+
+    except RuntimeError as e:
+        logger.error(f"Agent error: {e}")
+        raise HTTPException(
+            status_code=503,
+            detail=f"Agent unavailable: {str(e)}",
+        )
+    except Exception as e:
+        logger.exception(f"Unexpected error in agent_assist: {e}")
+        raise HTTPException(
+            status_code=500,
+            detail="Error generating guidance. Please try again.",
+        )
+
+
+@router.get(
+    "/health",
+    summary="Check agent health",
+    description="Check if agent is configured and ready to assist.",
+)
+async def agent_health() -> dict:
+    """Check agent availability and configuration.
+
+    Returns:
+        Health status with configuration details
+    """
+    try:
+        agent = get_agent()
+        provider_type = agent.provider.__class__.__name__ if agent.provider else "None"
+        return {
+            "status": "healthy",
+            "provider": provider_type,
+            "max_tokens": AgentConfig.MAX_RESPONSE_TOKENS,
+            "reasoning_enabled": AgentConfig.ENABLE_REASONING,
+        }
+    except HTTPException:
+        return {
+            "status": "unavailable",
+            "reason": "No LLM provider configured",
+            "configured_providers": {
+                "local": bool(AgentConfig.LOCAL_MODEL_PATH),
+                "networked": bool(AgentConfig.NETWORKED_ENDPOINT),
+                "online": bool(AgentConfig.ONLINE_API_KEY),
+            },
+        }
--- a/backend/app/main.py
+++ b/backend/app/main.py
@@ -0,0 +1,35 @@
+"""ThreatHunt backend application."""
+
+from fastapi import FastAPI
+from fastapi.middleware.cors import CORSMiddleware
+
+from app.api.routes import agent
+
+# Create FastAPI application
+app = FastAPI(
+    title="ThreatHunt API",
+    description="Analyst-assist threat hunting platform with agent guidance",
+    version="0.1.0",
+)
+
+# Configure CORS
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],  # In production, restrict to known domains
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+
+# Include routes
+app.include_router(agent.router)
+
+
+@app.get("/", tags=["health"])
+async def root():
+    """API health check."""
+    return {
+        "service": "ThreatHunt API",
+        "status": "running",
+        "docs": "/docs",
+    }
--- a/backend/requirements.txt
+++ b/backend/requirements.txt
@@ -0,0 +1,21 @@
+fastapi==0.104.1
+uvicorn[standard]==0.24.0
+pydantic==2.5.0
+pydantic-settings==2.1.0
+
+# Optional LLM provider dependencies
+# Uncomment based on your deployment choice:
+
+# For local models (GGML, Ollama, etc.)
+# llama-cpp-python==0.2.15
+# ollama==0.0.11
+
+# For online providers (OpenAI, Anthropic, Google)
+# openai==1.3.5
+# anthropic==0.7.1
+# google-generativeai==0.3.0
+
+# For development
+pytest==7.4.3
+pytest-asyncio==0.21.1
+httpx==0.25.1
--- a/backend/run.py
+++ b/backend/run.py
@@ -0,0 +1,17 @@
+"""Entry point for backend server."""
+
+import logging
+import uvicorn
+
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
+)
+
+if __name__ == "__main__":
+    uvicorn.run(
+        "app.main:app",
+        host="0.0.0.0",
+        port=8000,
+        reload=True,
+    )