"""Core ThreatHunt analyst-assist agent — v2. Uses TaskRouter to select the right model/node for each query, real LLM providers (Ollama/OpenWebUI), and structured response parsing. Integrates SANS RAG context from Open WebUI. """ import json import logging import re import time from typing import AsyncIterator, Optional from pydantic import BaseModel, Field from app.config import settings from app.services.sans_rag import sans_rag from .router import TaskRouter, TaskType, RoutingDecision, task_router from .providers_v2 import OllamaProvider, OpenWebUIProvider logger = logging.getLogger(__name__) # ── Models ──────────────────────────────────────────────────────────── class AgentContext(BaseModel): """Context for agent guidance requests.""" query: str = Field(..., description="Analyst question or request for guidance") dataset_name: Optional[str] = Field(None, description="Name of CSV dataset") artifact_type: Optional[str] = Field(None, description="Artifact type") host_identifier: Optional[str] = Field(None, description="Host name, IP, or identifier") data_summary: Optional[str] = Field(None, description="Brief description of data") conversation_history: Optional[list[dict]] = Field( default_factory=list, description="Previous messages" ) active_hypotheses: Optional[list[str]] = Field( default_factory=list, description="Active investigation hypotheses" ) annotations_summary: Optional[str] = Field( None, description="Summary of analyst annotations" ) enrichment_summary: Optional[str] = Field( None, description="Summary of enrichment results" ) mode: str = Field(default="quick", description="quick | deep | debate") model_override: Optional[str] = Field(None, description="Force a specific model") class Perspective(BaseModel): """A single perspective from the debate agent.""" role: str content: str model_used: str node_used: str latency_ms: int class AgentResponse(BaseModel): """Response from analyst-assist agent.""" guidance: str = Field(..., description="Advisory guidance for analyst") confidence: float = Field(..., ge=0.0, le=1.0, description="Confidence (0-1)") suggested_pivots: list[str] = Field(default_factory=list) suggested_filters: list[str] = Field(default_factory=list) caveats: Optional[str] = None reasoning: Optional[str] = None sans_references: list[str] = Field( default_factory=list, description="SANS course references" ) model_used: str = Field(default="", description="Model that generated the response") node_used: str = Field(default="", description="Node that processed the request") latency_ms: int = Field(default=0, description="Total latency in ms") perspectives: Optional[list[Perspective]] = Field( None, description="Debate perspectives (only in debate mode)" ) # ── System prompt ───────────────────────────────────────────────────── SYSTEM_PROMPT = """You are an analyst-assist agent for ThreatHunt, a threat hunting platform. You have access to 300GB of SANS cybersecurity course material for reference. Your role: - Interpret and explain CSV artifact data from Velociraptor and other forensic tools - Suggest analytical pivots, filters, and hypotheses - Highlight anomalies, patterns, or points of interest - Reference relevant SANS methodologies and techniques when applicable - Guide analysts without replacing their judgment Your constraints: - You ONLY provide guidance and suggestions - You do NOT execute actions or tools - You do NOT modify data or escalate alerts - You explain your reasoning transparently RESPONSE FORMAT — you MUST respond with valid JSON: { "guidance": "Your main guidance text here", "confidence": 0.85, "suggested_pivots": ["Pivot 1", "Pivot 2"], "suggested_filters": ["filter expression 1", "filter expression 2"], "caveats": "Any assumptions or limitations", "reasoning": "How you arrived at this guidance", "sans_references": ["SANS SEC504: ...", "SANS FOR508: ..."] } Respond ONLY with the JSON object. No markdown, no code fences, no extra text.""" # ── Agent ───────────────────────────────────────────────────────────── class ThreatHuntAgent: """Analyst-assist agent backed by Wile + Roadrunner LLM cluster.""" def __init__(self, router: TaskRouter | None = None): self.router = router or task_router self.system_prompt = SYSTEM_PROMPT async def assist(self, context: AgentContext) -> AgentResponse: """Provide guidance on artifact data and analysis.""" start = time.monotonic() if context.mode == "debate": return await self._debate_assist(context) # Classify task and route task_type = self.router.classify_task(context.query) if context.mode == "deep": task_type = TaskType.DEEP_ANALYSIS decision = self.router.route(task_type, model_override=context.model_override) logger.info(f"Routing: {decision.reason}") # Enrich prompt with SANS RAG context prompt = self._build_prompt(context) try: rag_context = await sans_rag.enrich_prompt( context.query, investigation_context=context.data_summary or "", ) if rag_context: prompt = f"{prompt}\n\n{rag_context}" except Exception as e: logger.warning(f"SANS RAG enrichment failed: {e}") # Call LLM provider = self.router.get_provider(decision) if isinstance(provider, OpenWebUIProvider): messages = [ {"role": "system", "content": self.system_prompt}, {"role": "user", "content": prompt}, ] result = await provider.chat( messages, max_tokens=settings.AGENT_MAX_TOKENS, temperature=settings.AGENT_TEMPERATURE, ) else: result = await provider.generate( prompt, system=self.system_prompt, max_tokens=settings.AGENT_MAX_TOKENS, temperature=settings.AGENT_TEMPERATURE, ) raw_text = result.get("response", "") latency_ms = result.get("_latency_ms", 0) # Parse structured response response = self._parse_response(raw_text, context) response.model_used = decision.model response.node_used = decision.node.value response.latency_ms = latency_ms total_ms = int((time.monotonic() - start) * 1000) logger.info( f"Agent assist: {context.query[:60]}... → " f"{decision.model} on {decision.node.value} " f"({total_ms}ms total, {latency_ms}ms LLM)" ) return response async def assist_stream( self, context: AgentContext, ) -> AsyncIterator[str]: """Stream agent response tokens.""" task_type = self.router.classify_task(context.query) decision = self.router.route(task_type, model_override=context.model_override) prompt = self._build_prompt(context) provider = self.router.get_provider(decision) if isinstance(provider, OllamaProvider): async for token in provider.generate_stream( prompt, system=self.system_prompt, max_tokens=settings.AGENT_MAX_TOKENS, temperature=settings.AGENT_TEMPERATURE, ): yield token elif isinstance(provider, OpenWebUIProvider): messages = [ {"role": "system", "content": self.system_prompt}, {"role": "user", "content": prompt}, ] async for token in provider.chat_stream( messages, max_tokens=settings.AGENT_MAX_TOKENS, temperature=settings.AGENT_TEMPERATURE, ): yield token async def _debate_assist(self, context: AgentContext) -> AgentResponse: """Multi-perspective analysis using diverse models on Wile.""" import asyncio start = time.monotonic() prompt = self._build_prompt(context) # Route each perspective to a different heavy model roles = { TaskType.DEBATE_PLANNER: ( "Planner", "You are the Planner for a threat hunting investigation.\n" "Provide a structured investigation strategy. Reference SANS methodologies.\n" "Focus on: investigation steps, data sources to examine, MITRE ATT&CK mapping.\n" "Be specific to the data context provided.\n\n", ), TaskType.DEBATE_CRITIC: ( "Critic", "You are the Critic for a threat hunting investigation.\n" "Identify risks, false positive scenarios, missing evidence, and assumptions.\n" "Reference SANS training on common analyst mistakes.\n" "Challenge the obvious interpretation.\n\n", ), TaskType.DEBATE_PRAGMATIST: ( "Pragmatist", "You are the Pragmatist for a threat hunting investigation.\n" "Suggest the most actionable, efficient next steps.\n" "Reference SANS incident response playbooks.\n" "Focus on: quick wins, triage priorities, what to escalate.\n\n", ), } async def _call_perspective(task_type: TaskType, role_name: str, prefix: str): decision = self.router.route(task_type) provider = self.router.get_provider(decision) full_prompt = prefix + prompt if isinstance(provider, OpenWebUIProvider): result = await provider.generate( full_prompt, system=f"You are the {role_name}. Provide analysis only. No execution.", max_tokens=settings.AGENT_MAX_TOKENS, temperature=0.4, ) else: result = await provider.generate( full_prompt, system=f"You are the {role_name}. Provide analysis only. No execution.", max_tokens=settings.AGENT_MAX_TOKENS, temperature=0.4, ) return Perspective( role=role_name, content=result.get("response", ""), model_used=decision.model, node_used=decision.node.value, latency_ms=result.get("_latency_ms", 0), ) # Run perspectives in parallel perspective_tasks = [ _call_perspective(tt, name, prefix) for tt, (name, prefix) in roles.items() ] perspectives = await asyncio.gather(*perspective_tasks) # Judge merges the perspectives judge_prompt = ( "You are the Judge. Merge these three threat hunting perspectives into " "ONE final advisory answer.\n\n" "Rules:\n" "- Advisory only — no execution\n" "- Clearly list risks and assumptions\n" "- Highlight where perspectives agree and disagree\n" "- Provide a unified recommendation\n" "- Reference SANS methodologies where relevant\n\n" ) for p in perspectives: judge_prompt += f"=== {p.role} (via {p.model_used}) ===\n{p.content}\n\n" judge_prompt += ( f"\nOriginal analyst query:\n{context.query}\n\n" "Respond with the merged analysis in this JSON format:\n" '{"guidance": "...", "confidence": 0.85, "suggested_pivots": [...], ' '"suggested_filters": [...], "caveats": "...", "reasoning": "...", ' '"sans_references": [...]}' ) judge_decision = self.router.route(TaskType.DEBATE_JUDGE) judge_provider = self.router.get_provider(judge_decision) if isinstance(judge_provider, OpenWebUIProvider): judge_result = await judge_provider.generate( judge_prompt, system="You are the Judge. Merge perspectives into a final advisory answer. Respond with JSON only.", max_tokens=settings.AGENT_MAX_TOKENS, temperature=0.2, ) else: judge_result = await judge_provider.generate( judge_prompt, system="You are the Judge. Merge perspectives into a final advisory answer. Respond with JSON only.", max_tokens=settings.AGENT_MAX_TOKENS, temperature=0.2, ) raw_text = judge_result.get("response", "") response = self._parse_response(raw_text, context) response.model_used = judge_decision.model response.node_used = judge_decision.node.value response.latency_ms = int((time.monotonic() - start) * 1000) response.perspectives = list(perspectives) return response def _build_prompt(self, context: AgentContext) -> str: """Build the prompt with all available context.""" parts = [f"Analyst query: {context.query}"] if context.dataset_name: parts.append(f"Dataset: {context.dataset_name}") if context.artifact_type: parts.append(f"Artifact type: {context.artifact_type}") if context.host_identifier: parts.append(f"Host: {context.host_identifier}") if context.data_summary: parts.append(f"Data summary: {context.data_summary}") if context.active_hypotheses: parts.append(f"Active hypotheses: {'; '.join(context.active_hypotheses)}") if context.annotations_summary: parts.append(f"Analyst annotations: {context.annotations_summary}") if context.enrichment_summary: parts.append(f"Enrichment data: {context.enrichment_summary}") if context.conversation_history: parts.append("\nRecent conversation:") for msg in context.conversation_history[-settings.AGENT_HISTORY_LENGTH:]: parts.append(f" {msg.get('role', 'unknown')}: {msg.get('content', '')[:500]}") return "\n".join(parts) def _parse_response(self, raw: str, context: AgentContext) -> AgentResponse: """Parse LLM output into structured AgentResponse. Tries JSON extraction first, falls back to raw text with defaults. """ parsed = self._try_parse_json(raw) if parsed: return AgentResponse( guidance=parsed.get("guidance", raw), confidence=min(max(float(parsed.get("confidence", 0.7)), 0.0), 1.0), suggested_pivots=parsed.get("suggested_pivots", [])[:6], suggested_filters=parsed.get("suggested_filters", [])[:6], caveats=parsed.get("caveats"), reasoning=parsed.get("reasoning"), sans_references=parsed.get("sans_references", []), ) # Fallback: use raw text as guidance return AgentResponse( guidance=raw.strip() or "No guidance generated. Please try rephrasing your question.", confidence=0.5, suggested_pivots=[], suggested_filters=[], caveats="Response was not in structured format. Pivots and filters may be embedded in the guidance text.", reasoning=None, sans_references=[], ) def _try_parse_json(self, text: str) -> dict | None: """Try to extract JSON from LLM output.""" # Direct parse try: return json.loads(text.strip()) except json.JSONDecodeError: pass # Extract from code fences patterns = [ r"```json\s*(.*?)\s*```", r"```\s*(.*?)\s*```", r"\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}", ] for pattern in patterns: match = re.search(pattern, text, re.DOTALL) if match: try: return json.loads(match.group(1) if match.lastindex else match.group(0)) except (json.JSONDecodeError, IndexError): continue return None