mirror of
https://github.com/mblanke/ThreatHunt.git
synced 2026-03-01 14:00:20 -05:00
feat: interactive network map, IOC highlighting, AUP hunt selector, type filters
- NetworkMap: hunt-scoped force-directed graph with click-to-inspect popover - NetworkMap: zoom/pan (wheel, drag, buttons), viewport transform - NetworkMap: clickable IP/Host/Domain/URL legend chips to filter node types - NetworkMap: brighter colors, 20% smaller nodes - DatasetViewer: IOC columns highlighted with colored headers + cell tinting - AUPScanner: hunt dropdown replacing dataset checkboxes, auto-select all - Rename 'Social Media (Personal)' theme to 'Social Media' with DB migration - Fix /api/hunts timeout: Dataset.rows lazy='noload' (was selectin cascade) - Add OS column mapping to normalizer - Full backend services, DB models, alembic migrations, new routes - New components: Dashboard, HuntManager, FileUpload, NetworkMap, etc. - Docker Compose deployment with nginx reverse proxy
This commit is contained in:
346
backend/app/services/sans_rag.py
Normal file
346
backend/app/services/sans_rag.py
Normal file
@@ -0,0 +1,346 @@
|
||||
"""SANS RAG service — queries the 300GB SANS courseware indexed in Open WebUI.
|
||||
|
||||
Provides contextual SANS references for threat hunting guidance.
|
||||
Uses two approaches:
|
||||
1. Open WebUI RAG pipeline (if configured with a knowledge collection)
|
||||
2. Embedding-based semantic search against locally indexed SANS content
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import re
|
||||
import time
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Optional
|
||||
|
||||
import httpx
|
||||
|
||||
from app.config import settings
|
||||
from app.agents.providers_v2 import _get_client
|
||||
from app.agents.registry import Node
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# ── SANS course catalog for reference matching ────────────────────────
|
||||
|
||||
SANS_COURSES = {
|
||||
"SEC401": "Security Essentials",
|
||||
"SEC504": "Hacker Tools, Techniques, and Incident Handling",
|
||||
"SEC503": "Network Monitoring and Threat Detection In-Depth",
|
||||
"SEC505": "Securing Windows and PowerShell Automation",
|
||||
"SEC506": "Securing Linux/Unix",
|
||||
"SEC510": "Public Cloud Security: AWS, Azure, and GCP",
|
||||
"SEC511": "Continuous Monitoring and Security Operations",
|
||||
"SEC530": "Defensible Security Architecture and Engineering",
|
||||
"SEC540": "Cloud Security and DevSecOps Automation",
|
||||
"SEC555": "SIEM with Tactical Analytics",
|
||||
"SEC560": "Enterprise Penetration Testing",
|
||||
"SEC565": "Red Team Operations and Adversary Emulation",
|
||||
"SEC573": "Automating Information Security with Python",
|
||||
"SEC575": "Mobile Device Security and Ethical Hacking",
|
||||
"SEC588": "Cloud Penetration Testing",
|
||||
"SEC599": "Defeating Advanced Adversaries - Purple Team Tactics",
|
||||
"FOR408": "Windows Forensic Analysis",
|
||||
"FOR498": "Digital Acquisition and Rapid Triage",
|
||||
"FOR500": "Windows Forensic Analysis",
|
||||
"FOR508": "Advanced Incident Response, Threat Hunting, and Digital Forensics",
|
||||
"FOR509": "Enterprise Cloud Forensics and Incident Response",
|
||||
"FOR518": "Mac and iOS Forensic Analysis and Incident Response",
|
||||
"FOR572": "Advanced Network Forensics: Threat Hunting, Analysis, and Incident Response",
|
||||
"FOR578": "Cyber Threat Intelligence",
|
||||
"FOR585": "Smartphone Forensic Analysis In-Depth",
|
||||
"FOR610": "Reverse-Engineering Malware: Malware Analysis Tools and Techniques",
|
||||
"FOR710": "Reverse-Engineering Malware: Advanced Code Analysis",
|
||||
"ICS410": "ICS/SCADA Security Essentials",
|
||||
"ICS515": "ICS Visibility, Detection, and Response",
|
||||
}
|
||||
|
||||
# Topic-to-course mapping for fallback recommendations
|
||||
TOPIC_COURSE_MAP = {
|
||||
"malware": ["FOR610", "FOR710", "SEC504"],
|
||||
"reverse engineer": ["FOR610", "FOR710"],
|
||||
"incident response": ["FOR508", "SEC504"],
|
||||
"forensic": ["FOR508", "FOR500", "FOR408"],
|
||||
"windows forensic": ["FOR500", "FOR408"],
|
||||
"network forensic": ["FOR572"],
|
||||
"threat hunting": ["FOR508", "SEC504", "FOR578"],
|
||||
"threat intelligence": ["FOR578"],
|
||||
"powershell": ["SEC505", "FOR508"],
|
||||
"lateral movement": ["SEC504", "FOR508"],
|
||||
"persistence": ["FOR508", "SEC504"],
|
||||
"privilege escalation": ["SEC504", "SEC560"],
|
||||
"credential": ["SEC504", "SEC560"],
|
||||
"memory forensic": ["FOR508"],
|
||||
"disk forensic": ["FOR500", "FOR408"],
|
||||
"registry": ["FOR500", "FOR408"],
|
||||
"event log": ["FOR508", "SEC555"],
|
||||
"siem": ["SEC555"],
|
||||
"log analysis": ["SEC555", "SEC503"],
|
||||
"network monitor": ["SEC503"],
|
||||
"pcap": ["SEC503", "FOR572"],
|
||||
"cloud": ["SEC510", "SEC540", "FOR509"],
|
||||
"aws": ["SEC510", "SEC540", "FOR509"],
|
||||
"azure": ["SEC510", "FOR509"],
|
||||
"linux": ["SEC506"],
|
||||
"mobile": ["SEC575", "FOR585"],
|
||||
"penetration test": ["SEC560", "SEC565"],
|
||||
"red team": ["SEC565", "SEC599"],
|
||||
"purple team": ["SEC599"],
|
||||
"python": ["SEC573"],
|
||||
"automation": ["SEC573", "SEC540"],
|
||||
"deobfusc": ["FOR610", "SEC504"],
|
||||
"base64": ["FOR610", "SEC504"],
|
||||
"shellcode": ["FOR610", "FOR710"],
|
||||
"ransomware": ["FOR508", "FOR610"],
|
||||
"phishing": ["SEC504", "FOR578"],
|
||||
"c2": ["FOR508", "SEC504", "FOR572"],
|
||||
"command and control": ["FOR508", "SEC504"],
|
||||
"exfiltration": ["FOR508", "FOR572", "SEC503"],
|
||||
"dns": ["FOR572", "SEC503"],
|
||||
"ioc": ["FOR508", "FOR578"],
|
||||
"mitre": ["FOR508", "SEC504", "SEC599"],
|
||||
"att&ck": ["FOR508", "SEC504"],
|
||||
"velociraptor": ["FOR508"],
|
||||
"volatility": ["FOR508"],
|
||||
"scheduled task": ["FOR508", "SEC504"],
|
||||
"service": ["FOR508", "SEC504"],
|
||||
"wmi": ["FOR508", "SEC504"],
|
||||
"process": ["FOR508"],
|
||||
"dll": ["FOR610", "FOR508"],
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class RAGResult:
|
||||
"""Result from a RAG query."""
|
||||
query: str
|
||||
context: str # Retrieved relevant text
|
||||
sources: list[str] = field(default_factory=list) # Source document names
|
||||
course_references: list[str] = field(default_factory=list) # SANS course IDs
|
||||
confidence: float = 0.0
|
||||
latency_ms: int = 0
|
||||
|
||||
|
||||
class SANSRAGService:
|
||||
"""Service for querying SANS courseware via Open WebUI RAG pipeline."""
|
||||
|
||||
def __init__(self):
|
||||
self.openwebui_url = settings.OPENWEBUI_URL.rstrip("/")
|
||||
self.api_key = settings.OPENWEBUI_API_KEY
|
||||
self.rag_model = settings.DEFAULT_FAST_MODEL
|
||||
self._available: bool | None = None
|
||||
|
||||
def _headers(self) -> dict:
|
||||
h = {"Content-Type": "application/json"}
|
||||
if self.api_key:
|
||||
h["Authorization"] = f"Bearer {self.api_key}"
|
||||
return h
|
||||
|
||||
async def query(
|
||||
self,
|
||||
question: str,
|
||||
context: str = "",
|
||||
max_tokens: int = 1024,
|
||||
) -> RAGResult:
|
||||
"""Query SANS courseware for relevant context.
|
||||
|
||||
Uses Open WebUI's RAG-enabled chat to retrieve from indexed SANS content.
|
||||
Falls back to topic-based course recommendations if RAG is unavailable.
|
||||
"""
|
||||
start = time.monotonic()
|
||||
|
||||
# Try Open WebUI RAG pipeline first
|
||||
try:
|
||||
result = await self._query_openwebui_rag(question, context, max_tokens)
|
||||
result.latency_ms = int((time.monotonic() - start) * 1000)
|
||||
|
||||
# Enrich with course references if not already present
|
||||
if not result.course_references:
|
||||
result.course_references = self._match_courses(question)
|
||||
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"RAG query failed, using fallback: {e}")
|
||||
# Fallback to topic-based matching
|
||||
courses = self._match_courses(question)
|
||||
return RAGResult(
|
||||
query=question,
|
||||
context="",
|
||||
sources=[],
|
||||
course_references=courses,
|
||||
confidence=0.3 if courses else 0.0,
|
||||
latency_ms=int((time.monotonic() - start) * 1000),
|
||||
)
|
||||
|
||||
async def _query_openwebui_rag(
|
||||
self,
|
||||
question: str,
|
||||
context: str,
|
||||
max_tokens: int,
|
||||
) -> RAGResult:
|
||||
"""Query Open WebUI with RAG context retrieval.
|
||||
|
||||
Open WebUI automatically retrieves from its indexed knowledge base
|
||||
when the model is configured with a knowledge collection.
|
||||
"""
|
||||
client = _get_client()
|
||||
|
||||
system_msg = (
|
||||
"You are a SANS cybersecurity knowledge assistant. "
|
||||
"Use your indexed SANS courseware to answer the question. "
|
||||
"Always cite the specific SANS course (e.g., FOR508, SEC504) "
|
||||
"and relevant section when referencing material. "
|
||||
"If the question relates to threat hunting procedures, "
|
||||
"reference the specific SANS methodology or framework."
|
||||
)
|
||||
|
||||
messages = [
|
||||
{"role": "system", "content": system_msg},
|
||||
]
|
||||
|
||||
if context:
|
||||
messages.append({
|
||||
"role": "user",
|
||||
"content": f"Investigation context:\n{context}\n\nQuestion: {question}",
|
||||
})
|
||||
else:
|
||||
messages.append({"role": "user", "content": question})
|
||||
|
||||
payload = {
|
||||
"model": self.rag_model,
|
||||
"messages": messages,
|
||||
"max_tokens": max_tokens,
|
||||
"temperature": 0.2,
|
||||
"stream": False,
|
||||
}
|
||||
|
||||
resp = await client.post(
|
||||
f"{self.openwebui_url}/v1/chat/completions",
|
||||
json=payload,
|
||||
headers=self._headers(),
|
||||
)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
|
||||
content = ""
|
||||
if data.get("choices"):
|
||||
content = data["choices"][0].get("message", {}).get("content", "")
|
||||
|
||||
# Extract course references from response
|
||||
course_refs = self._extract_course_refs(content)
|
||||
sources = self._extract_sources(data)
|
||||
|
||||
return RAGResult(
|
||||
query=question,
|
||||
context=content,
|
||||
sources=sources,
|
||||
course_references=course_refs,
|
||||
confidence=0.8 if content else 0.0,
|
||||
)
|
||||
|
||||
def _extract_course_refs(self, text: str) -> list[str]:
|
||||
"""Extract SANS course references from response text."""
|
||||
refs = set()
|
||||
# Match patterns like SEC504, FOR508, ICS410
|
||||
pattern = r'\b(SEC|FOR|ICS|MGT|AUD|DEV|LEG)\d{3}\b'
|
||||
matches = re.findall(pattern, text, re.IGNORECASE)
|
||||
# Need to get the full match
|
||||
full_pattern = r'\b(?:SEC|FOR|ICS|MGT|AUD|DEV|LEG)\d{3}\b'
|
||||
full_matches = re.findall(full_pattern, text, re.IGNORECASE)
|
||||
for m in full_matches:
|
||||
course_id = m.upper()
|
||||
if course_id in SANS_COURSES:
|
||||
refs.add(f"{course_id}: {SANS_COURSES[course_id]}")
|
||||
else:
|
||||
refs.add(course_id)
|
||||
return sorted(refs)
|
||||
|
||||
def _extract_sources(self, api_response: dict) -> list[str]:
|
||||
"""Extract source document references from Open WebUI response metadata."""
|
||||
sources = []
|
||||
# Open WebUI may include source metadata in various formats
|
||||
if "sources" in api_response:
|
||||
for src in api_response["sources"]:
|
||||
if isinstance(src, dict):
|
||||
sources.append(src.get("name", src.get("title", str(src))))
|
||||
else:
|
||||
sources.append(str(src))
|
||||
# Check in metadata
|
||||
for choice in api_response.get("choices", []):
|
||||
meta = choice.get("metadata", {})
|
||||
if "sources" in meta:
|
||||
for src in meta["sources"]:
|
||||
if isinstance(src, dict):
|
||||
sources.append(src.get("name", str(src)))
|
||||
else:
|
||||
sources.append(str(src))
|
||||
return sources[:10] # Limit
|
||||
|
||||
def _match_courses(self, query: str) -> list[str]:
|
||||
"""Match query keywords to SANS courses using topic map."""
|
||||
q = query.lower()
|
||||
matched = set()
|
||||
for topic, courses in TOPIC_COURSE_MAP.items():
|
||||
if topic in q:
|
||||
for course_id in courses:
|
||||
if course_id in SANS_COURSES:
|
||||
matched.add(f"{course_id}: {SANS_COURSES[course_id]}")
|
||||
return sorted(matched)[:5]
|
||||
|
||||
async def get_course_context(self, course_id: str) -> str:
|
||||
"""Get a brief course description for context injection."""
|
||||
course_id = course_id.upper().split(":")[0].strip()
|
||||
if course_id in SANS_COURSES:
|
||||
return f"{course_id}: {SANS_COURSES[course_id]}"
|
||||
return ""
|
||||
|
||||
async def enrich_prompt(
|
||||
self,
|
||||
query: str,
|
||||
investigation_context: str = "",
|
||||
) -> str:
|
||||
"""Generate SANS-enriched context to inject into agent prompts.
|
||||
|
||||
Returns a context string with relevant SANS references.
|
||||
"""
|
||||
result = await self.query(query, context=investigation_context, max_tokens=512)
|
||||
|
||||
parts = []
|
||||
if result.context:
|
||||
parts.append(f"SANS Reference Context:\n{result.context}")
|
||||
if result.course_references:
|
||||
parts.append(f"Relevant SANS Courses: {', '.join(result.course_references)}")
|
||||
if result.sources:
|
||||
parts.append(f"Sources: {', '.join(result.sources[:5])}")
|
||||
|
||||
return "\n".join(parts) if parts else ""
|
||||
|
||||
async def health_check(self) -> dict:
|
||||
"""Check RAG service availability."""
|
||||
try:
|
||||
client = _get_client()
|
||||
resp = await client.get(
|
||||
f"{self.openwebui_url}/v1/models",
|
||||
headers=self._headers(),
|
||||
timeout=5,
|
||||
)
|
||||
available = resp.status_code == 200
|
||||
self._available = available
|
||||
return {
|
||||
"available": available,
|
||||
"url": self.openwebui_url,
|
||||
"model": self.rag_model,
|
||||
}
|
||||
except Exception as e:
|
||||
self._available = False
|
||||
return {
|
||||
"available": False,
|
||||
"url": self.openwebui_url,
|
||||
"error": str(e),
|
||||
}
|
||||
|
||||
|
||||
# Singleton
|
||||
sans_rag = SANSRAGService()
|
||||
Reference in New Issue
Block a user