mirror of
https://github.com/mblanke/ThreatHunt.git
synced 2026-03-01 14:00:20 -05:00
Add ThreatHunt agent backend/frontend scaffolding
This commit is contained in:
16
backend/app/agents/__init__.py
Normal file
16
backend/app/agents/__init__.py
Normal file
@@ -0,0 +1,16 @@
|
||||
"""Analyst-assist agent module for ThreatHunt.
|
||||
|
||||
Provides read-only guidance on CSV artifact data, analytical pivots, and hypotheses.
|
||||
Agents are advisory only and do not execute actions or modify data.
|
||||
"""
|
||||
|
||||
from .core import ThreatHuntAgent
|
||||
from .providers import LLMProvider, LocalProvider, NetworkedProvider, OnlineProvider
|
||||
|
||||
__all__ = [
|
||||
"ThreatHuntAgent",
|
||||
"LLMProvider",
|
||||
"LocalProvider",
|
||||
"NetworkedProvider",
|
||||
"OnlineProvider",
|
||||
]
|
||||
59
backend/app/agents/config.py
Normal file
59
backend/app/agents/config.py
Normal file
@@ -0,0 +1,59 @@
|
||||
"""Configuration for agent settings."""
|
||||
|
||||
import os
|
||||
from typing import Literal
|
||||
|
||||
|
||||
class AgentConfig:
|
||||
"""Configuration for analyst-assist agents."""
|
||||
|
||||
# Provider type: 'local', 'networked', 'online', or 'auto'
|
||||
PROVIDER_TYPE: Literal["local", "networked", "online", "auto"] = os.getenv(
|
||||
"THREAT_HUNT_AGENT_PROVIDER", "auto"
|
||||
)
|
||||
|
||||
# Local provider settings
|
||||
LOCAL_MODEL_PATH: str | None = os.getenv("THREAT_HUNT_LOCAL_MODEL_PATH")
|
||||
|
||||
# Networked provider settings
|
||||
NETWORKED_ENDPOINT: str | None = os.getenv("THREAT_HUNT_NETWORKED_ENDPOINT")
|
||||
NETWORKED_API_KEY: str | None = os.getenv("THREAT_HUNT_NETWORKED_KEY")
|
||||
|
||||
# Online provider settings
|
||||
ONLINE_API_PROVIDER: str = os.getenv("THREAT_HUNT_ONLINE_PROVIDER", "openai")
|
||||
ONLINE_API_KEY: str | None = os.getenv("THREAT_HUNT_ONLINE_API_KEY")
|
||||
ONLINE_MODEL: str | None = os.getenv("THREAT_HUNT_ONLINE_MODEL")
|
||||
|
||||
# Agent behavior settings
|
||||
MAX_RESPONSE_TOKENS: int = int(
|
||||
os.getenv("THREAT_HUNT_AGENT_MAX_TOKENS", "1024")
|
||||
)
|
||||
ENABLE_REASONING: bool = os.getenv(
|
||||
"THREAT_HUNT_AGENT_REASONING", "true"
|
||||
).lower() in ("true", "1", "yes")
|
||||
CONVERSATION_HISTORY_LENGTH: int = int(
|
||||
os.getenv("THREAT_HUNT_AGENT_HISTORY_LENGTH", "10")
|
||||
)
|
||||
|
||||
# Privacy settings
|
||||
FILTER_SENSITIVE_DATA: bool = os.getenv(
|
||||
"THREAT_HUNT_AGENT_FILTER_SENSITIVE", "true"
|
||||
).lower() in ("true", "1", "yes")
|
||||
|
||||
@classmethod
|
||||
def is_agent_enabled(cls) -> bool:
|
||||
"""Check if agent is enabled and properly configured."""
|
||||
# Agent is disabled if no provider can be used
|
||||
if cls.PROVIDER_TYPE == "auto":
|
||||
return bool(
|
||||
cls.LOCAL_MODEL_PATH
|
||||
or cls.NETWORKED_ENDPOINT
|
||||
or cls.ONLINE_API_KEY
|
||||
)
|
||||
elif cls.PROVIDER_TYPE == "local":
|
||||
return bool(cls.LOCAL_MODEL_PATH)
|
||||
elif cls.PROVIDER_TYPE == "networked":
|
||||
return bool(cls.NETWORKED_ENDPOINT)
|
||||
elif cls.PROVIDER_TYPE == "online":
|
||||
return bool(cls.ONLINE_API_KEY)
|
||||
return False
|
||||
208
backend/app/agents/core.py
Normal file
208
backend/app/agents/core.py
Normal file
@@ -0,0 +1,208 @@
|
||||
"""Core ThreatHunt analyst-assist agent.
|
||||
|
||||
Provides read-only guidance on CSV artifact data, analytical pivots, and hypotheses.
|
||||
Agents are advisory only - no execution, no alerts, no data modifications.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import Optional
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from .providers import LLMProvider, get_provider
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class AgentContext(BaseModel):
|
||||
"""Context for agent guidance requests."""
|
||||
|
||||
query: str = Field(
|
||||
..., description="Analyst question or request for guidance"
|
||||
)
|
||||
dataset_name: Optional[str] = Field(None, description="Name of CSV dataset")
|
||||
artifact_type: Optional[str] = Field(None, description="Artifact type (e.g., file, process, network)")
|
||||
host_identifier: Optional[str] = Field(
|
||||
None, description="Host name, IP, or identifier"
|
||||
)
|
||||
data_summary: Optional[str] = Field(
|
||||
None, description="Brief description of uploaded data"
|
||||
)
|
||||
conversation_history: Optional[list[dict]] = Field(
|
||||
default_factory=list, description="Previous messages in conversation"
|
||||
)
|
||||
|
||||
|
||||
class AgentResponse(BaseModel):
|
||||
"""Response from analyst-assist agent."""
|
||||
|
||||
guidance: str = Field(..., description="Advisory guidance for analyst")
|
||||
confidence: float = Field(
|
||||
..., ge=0.0, le=1.0, description="Confidence in guidance (0-1)"
|
||||
)
|
||||
suggested_pivots: list[str] = Field(
|
||||
default_factory=list, description="Suggested analytical directions"
|
||||
)
|
||||
suggested_filters: list[str] = Field(
|
||||
default_factory=list, description="Suggested data filters or queries"
|
||||
)
|
||||
caveats: Optional[str] = Field(
|
||||
None, description="Assumptions, limitations, or caveats"
|
||||
)
|
||||
reasoning: Optional[str] = Field(
|
||||
None, description="Explanation of how guidance was generated"
|
||||
)
|
||||
|
||||
|
||||
class ThreatHuntAgent:
|
||||
"""Analyst-assist agent for ThreatHunt.
|
||||
|
||||
Provides guidance on:
|
||||
- Interpreting CSV artifact data
|
||||
- Suggesting analytical pivots and filters
|
||||
- Forming and testing hypotheses
|
||||
|
||||
Policy:
|
||||
- Advisory guidance only (no execution)
|
||||
- No database or schema changes
|
||||
- No alert escalation
|
||||
- Transparent reasoning
|
||||
"""
|
||||
|
||||
def __init__(self, provider: Optional[LLMProvider] = None):
|
||||
"""Initialize agent with LLM provider.
|
||||
|
||||
Args:
|
||||
provider: LLM provider instance. If None, uses get_provider() with auto mode.
|
||||
"""
|
||||
if provider is None:
|
||||
try:
|
||||
provider = get_provider("auto")
|
||||
except RuntimeError as e:
|
||||
logger.warning(f"Could not initialize default provider: {e}")
|
||||
provider = None
|
||||
|
||||
self.provider = provider
|
||||
self.system_prompt = self._build_system_prompt()
|
||||
|
||||
def _build_system_prompt(self) -> str:
|
||||
"""Build the system prompt that governs agent behavior."""
|
||||
return """You are an analyst-assist agent for ThreatHunt, a threat hunting platform.
|
||||
|
||||
Your role:
|
||||
- Interpret and explain CSV artifact data from Velociraptor
|
||||
- Suggest analytical pivots, filters, and hypotheses
|
||||
- Highlight anomalies, patterns, or points of interest
|
||||
- Guide analysts without replacing their judgment
|
||||
|
||||
Your constraints:
|
||||
- You ONLY provide guidance and suggestions
|
||||
- You do NOT execute actions or tools
|
||||
- You do NOT modify data or escalate alerts
|
||||
- You do NOT make autonomous decisions
|
||||
- You ONLY analyze data presented to you
|
||||
- You explain your reasoning transparently
|
||||
- You acknowledge limitations and assumptions
|
||||
- You suggest next investigative steps
|
||||
|
||||
When responding:
|
||||
1. Start with a clear, direct answer to the query
|
||||
2. Explain your reasoning based on the data context provided
|
||||
3. Suggest 2-4 analytical pivots the analyst might explore
|
||||
4. Suggest 2-4 data filters or queries that might be useful
|
||||
5. Include relevant caveats or assumptions
|
||||
6. Be honest about what you cannot determine from the data
|
||||
|
||||
Remember: The analyst is the decision-maker. You are an assistant."""
|
||||
|
||||
async def assist(self, context: AgentContext) -> AgentResponse:
|
||||
"""Provide guidance on artifact data and analysis.
|
||||
|
||||
Args:
|
||||
context: Request context including query and data context.
|
||||
|
||||
Returns:
|
||||
Guidance response with suggestions and reasoning.
|
||||
|
||||
Raises:
|
||||
RuntimeError: If no provider is available.
|
||||
"""
|
||||
if not self.provider:
|
||||
raise RuntimeError(
|
||||
"No LLM provider available. Configure at least one of: "
|
||||
"THREAT_HUNT_LOCAL_MODEL_PATH, THREAT_HUNT_NETWORKED_ENDPOINT, "
|
||||
"or THREAT_HUNT_ONLINE_API_KEY"
|
||||
)
|
||||
|
||||
# Build prompt with context
|
||||
prompt = self._build_prompt(context)
|
||||
|
||||
try:
|
||||
# Get guidance from LLM provider
|
||||
guidance = await self.provider.generate(prompt, max_tokens=1024)
|
||||
|
||||
# Parse response into structured format
|
||||
response = self._parse_response(guidance, context)
|
||||
|
||||
logger.info(
|
||||
f"Agent assisted with query: {context.query[:50]}... "
|
||||
f"(dataset: {context.dataset_name})"
|
||||
)
|
||||
|
||||
return response
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating guidance: {e}")
|
||||
raise
|
||||
|
||||
def _build_prompt(self, context: AgentContext) -> str:
|
||||
"""Build the prompt for the LLM."""
|
||||
prompt_parts = [
|
||||
f"Analyst query: {context.query}",
|
||||
]
|
||||
|
||||
if context.dataset_name:
|
||||
prompt_parts.append(f"Dataset: {context.dataset_name}")
|
||||
|
||||
if context.artifact_type:
|
||||
prompt_parts.append(f"Artifact type: {context.artifact_type}")
|
||||
|
||||
if context.host_identifier:
|
||||
prompt_parts.append(f"Host: {context.host_identifier}")
|
||||
|
||||
if context.data_summary:
|
||||
prompt_parts.append(f"Data summary: {context.data_summary}")
|
||||
|
||||
if context.conversation_history:
|
||||
prompt_parts.append("\nConversation history:")
|
||||
for msg in context.conversation_history[-5:]: # Last 5 messages for context
|
||||
prompt_parts.append(f" {msg.get('role', 'unknown')}: {msg.get('content', '')}")
|
||||
|
||||
return "\n".join(prompt_parts)
|
||||
|
||||
def _parse_response(self, response_text: str, context: AgentContext) -> AgentResponse:
|
||||
"""Parse LLM response into structured format.
|
||||
|
||||
Note: This is a simplified parser. In production, use structured output
|
||||
from the LLM (JSON mode, function calling, etc.) for better reliability.
|
||||
"""
|
||||
# For now, return a structured response based on the raw guidance
|
||||
# In production, parse JSON or use structured output from LLM
|
||||
return AgentResponse(
|
||||
guidance=response_text,
|
||||
confidence=0.8, # Placeholder
|
||||
suggested_pivots=[
|
||||
"Analyze temporal patterns",
|
||||
"Cross-reference with known indicators",
|
||||
"Examine outliers in the dataset",
|
||||
"Compare with baseline behavior",
|
||||
],
|
||||
suggested_filters=[
|
||||
"Filter by high-risk indicators",
|
||||
"Sort by timestamp for timeline analysis",
|
||||
"Group by host or user",
|
||||
"Filter by anomaly score",
|
||||
],
|
||||
caveats="Guidance is based on available data context. "
|
||||
"Analysts should verify findings with additional sources.",
|
||||
reasoning="Analysis generated based on artifact data patterns and analyst query.",
|
||||
)
|
||||
190
backend/app/agents/providers.py
Normal file
190
backend/app/agents/providers.py
Normal file
@@ -0,0 +1,190 @@
|
||||
"""Pluggable LLM provider interface for analyst-assist agents.
|
||||
|
||||
Supports three provider types:
|
||||
- Local: On-device or on-prem models
|
||||
- Networked: Shared internal inference services
|
||||
- Online: External hosted APIs
|
||||
"""
|
||||
|
||||
import os
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Optional
|
||||
|
||||
|
||||
class LLMProvider(ABC):
|
||||
"""Abstract base class for LLM providers."""
|
||||
|
||||
@abstractmethod
|
||||
async def generate(self, prompt: str, max_tokens: int = 1024) -> str:
|
||||
"""Generate a response from the LLM.
|
||||
|
||||
Args:
|
||||
prompt: The input prompt
|
||||
max_tokens: Maximum tokens in response
|
||||
|
||||
Returns:
|
||||
Generated text response
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def is_available(self) -> bool:
|
||||
"""Check if provider backend is available."""
|
||||
pass
|
||||
|
||||
|
||||
class LocalProvider(LLMProvider):
|
||||
"""Local LLM provider (on-device or on-prem models)."""
|
||||
|
||||
def __init__(self, model_path: Optional[str] = None):
|
||||
"""Initialize local provider.
|
||||
|
||||
Args:
|
||||
model_path: Path to local model. If None, uses THREAT_HUNT_LOCAL_MODEL_PATH env var.
|
||||
"""
|
||||
self.model_path = model_path or os.getenv("THREAT_HUNT_LOCAL_MODEL_PATH")
|
||||
self.model = None
|
||||
|
||||
def is_available(self) -> bool:
|
||||
"""Check if local model is available."""
|
||||
if not self.model_path:
|
||||
return False
|
||||
# In production, would verify model file exists and can be loaded
|
||||
return os.path.exists(str(self.model_path))
|
||||
|
||||
async def generate(self, prompt: str, max_tokens: int = 1024) -> str:
|
||||
"""Generate response using local model.
|
||||
|
||||
Note: This is a placeholder. In production, integrate with:
|
||||
- llama-cpp-python for GGML models
|
||||
- Ollama API
|
||||
- vLLM
|
||||
- Other local inference engines
|
||||
"""
|
||||
if not self.is_available():
|
||||
raise RuntimeError("Local model not available")
|
||||
|
||||
# Placeholder implementation
|
||||
return f"[Local model response to: {prompt[:50]}...]"
|
||||
|
||||
|
||||
class NetworkedProvider(LLMProvider):
|
||||
"""Networked LLM provider (shared internal inference services)."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
api_endpoint: Optional[str] = None,
|
||||
api_key: Optional[str] = None,
|
||||
model_name: str = "default",
|
||||
):
|
||||
"""Initialize networked provider.
|
||||
|
||||
Args:
|
||||
api_endpoint: URL to inference service. Defaults to env var THREAT_HUNT_NETWORKED_ENDPOINT.
|
||||
api_key: API key for service. Defaults to env var THREAT_HUNT_NETWORKED_KEY.
|
||||
model_name: Model name/ID on the service.
|
||||
"""
|
||||
self.api_endpoint = api_endpoint or os.getenv("THREAT_HUNT_NETWORKED_ENDPOINT")
|
||||
self.api_key = api_key or os.getenv("THREAT_HUNT_NETWORKED_KEY")
|
||||
self.model_name = model_name
|
||||
|
||||
def is_available(self) -> bool:
|
||||
"""Check if networked service is available."""
|
||||
return bool(self.api_endpoint)
|
||||
|
||||
async def generate(self, prompt: str, max_tokens: int = 1024) -> str:
|
||||
"""Generate response using networked service.
|
||||
|
||||
Note: This is a placeholder. In production, integrate with:
|
||||
- Internal inference service API
|
||||
- LLM inference container cluster
|
||||
- Enterprise inference gateway
|
||||
"""
|
||||
if not self.is_available():
|
||||
raise RuntimeError("Networked service not available")
|
||||
|
||||
# Placeholder implementation
|
||||
return f"[Networked response from {self.model_name}: {prompt[:50]}...]"
|
||||
|
||||
|
||||
class OnlineProvider(LLMProvider):
|
||||
"""Online LLM provider (external hosted APIs)."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
api_provider: str = "openai",
|
||||
api_key: Optional[str] = None,
|
||||
model_name: Optional[str] = None,
|
||||
):
|
||||
"""Initialize online provider.
|
||||
|
||||
Args:
|
||||
api_provider: Provider name (openai, anthropic, google, etc.)
|
||||
api_key: API key. Defaults to env var THREAT_HUNT_ONLINE_API_KEY.
|
||||
model_name: Model name. Defaults to env var THREAT_HUNT_ONLINE_MODEL.
|
||||
"""
|
||||
self.api_provider = api_provider
|
||||
self.api_key = api_key or os.getenv("THREAT_HUNT_ONLINE_API_KEY")
|
||||
self.model_name = model_name or os.getenv(
|
||||
"THREAT_HUNT_ONLINE_MODEL", f"{api_provider}-default"
|
||||
)
|
||||
|
||||
def is_available(self) -> bool:
|
||||
"""Check if online API is available."""
|
||||
return bool(self.api_key)
|
||||
|
||||
async def generate(self, prompt: str, max_tokens: int = 1024) -> str:
|
||||
"""Generate response using online API.
|
||||
|
||||
Note: This is a placeholder. In production, integrate with:
|
||||
- OpenAI API (GPT-3.5, GPT-4, etc.)
|
||||
- Anthropic Claude API
|
||||
- Google Gemini API
|
||||
- Other hosted LLM services
|
||||
"""
|
||||
if not self.is_available():
|
||||
raise RuntimeError("Online API not available or API key not set")
|
||||
|
||||
# Placeholder implementation
|
||||
return f"[Online {self.api_provider} response: {prompt[:50]}...]"
|
||||
|
||||
|
||||
def get_provider(provider_type: str = "auto") -> LLMProvider:
|
||||
"""Get an LLM provider based on configuration.
|
||||
|
||||
Args:
|
||||
provider_type: Type of provider to use: 'local', 'networked', 'online', or 'auto'.
|
||||
'auto' attempts to use the first available provider in order:
|
||||
local -> networked -> online.
|
||||
|
||||
Returns:
|
||||
Configured LLM provider instance.
|
||||
|
||||
Raises:
|
||||
RuntimeError: If no provider is available.
|
||||
"""
|
||||
# Explicit provider selection
|
||||
if provider_type == "local":
|
||||
provider = LocalProvider()
|
||||
elif provider_type == "networked":
|
||||
provider = NetworkedProvider()
|
||||
elif provider_type == "online":
|
||||
provider = OnlineProvider()
|
||||
elif provider_type == "auto":
|
||||
# Try providers in order of preference
|
||||
for Provider in [LocalProvider, NetworkedProvider, OnlineProvider]:
|
||||
provider = Provider()
|
||||
if provider.is_available():
|
||||
return provider
|
||||
raise RuntimeError(
|
||||
"No LLM provider available. Configure at least one of: "
|
||||
"THREAT_HUNT_LOCAL_MODEL_PATH, THREAT_HUNT_NETWORKED_ENDPOINT, "
|
||||
"or THREAT_HUNT_ONLINE_API_KEY"
|
||||
)
|
||||
else:
|
||||
raise ValueError(f"Unknown provider type: {provider_type}")
|
||||
|
||||
if not provider.is_available():
|
||||
raise RuntimeError(f"{provider_type} provider not available")
|
||||
|
||||
return provider
|
||||
Reference in New Issue
Block a user