mirror of
https://github.com/mblanke/ThreatHunt.git
synced 2026-03-01 14:00:20 -05:00
feat: host-centric network map, analysis dashboard, deduped inventory
- Rewrote NetworkMap to use deduplicated host inventory (163 hosts from 394K rows) - New host_inventory.py service: scans datasets, groups by FQDN/ClientId, extracts IPs/users/OS - New /api/network/host-inventory endpoint - Added AnalysisDashboard with 6 tabs (IOC, anomaly, host profile, query, triage, reports) - Added 16 analysis API endpoints with job queue and load balancer - Added 4 AI/analysis ORM models (ProcessingJob, AnalysisResult, HostProfile, IOCEntry) - Filters system accounts (DWM-*, UMFD-*, LOCAL/NETWORK SERVICE) - Infers OS from hostname patterns (W10-* -> Windows 10) - Canvas 2D force-directed graph with host/external-IP node types - Click popover shows hostname, FQDN, IPs, OS, users, datasets, connections
This commit is contained in:
@@ -1,7 +1,7 @@
|
||||
"""SQLAlchemy ORM models for ThreatHunt.
|
||||
|
||||
All persistent entities: datasets, hunts, conversations, annotations,
|
||||
hypotheses, enrichment results, and users.
|
||||
hypotheses, enrichment results, users, and AI analysis tables.
|
||||
"""
|
||||
|
||||
import uuid
|
||||
@@ -32,8 +32,7 @@ def _new_id() -> str:
|
||||
return uuid.uuid4().hex
|
||||
|
||||
|
||||
# ── Users ──────────────────────────────────────────────────────────────
|
||||
|
||||
# -- Users ---
|
||||
|
||||
class User(Base):
|
||||
__tablename__ = "users"
|
||||
@@ -42,17 +41,15 @@ class User(Base):
|
||||
username: Mapped[str] = mapped_column(String(64), unique=True, nullable=False, index=True)
|
||||
email: Mapped[str] = mapped_column(String(256), unique=True, nullable=False)
|
||||
hashed_password: Mapped[str] = mapped_column(String(256), nullable=False)
|
||||
role: Mapped[str] = mapped_column(String(16), default="analyst") # analyst | admin | viewer
|
||||
role: Mapped[str] = mapped_column(String(16), default="analyst")
|
||||
is_active: Mapped[bool] = mapped_column(Boolean, default=True)
|
||||
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=_utcnow)
|
||||
|
||||
# relationships
|
||||
hunts: Mapped[list["Hunt"]] = relationship(back_populates="owner", lazy="selectin")
|
||||
annotations: Mapped[list["Annotation"]] = relationship(back_populates="author", lazy="selectin")
|
||||
|
||||
|
||||
# ── Hunts ──────────────────────────────────────────────────────────────
|
||||
|
||||
# -- Hunts ---
|
||||
|
||||
class Hunt(Base):
|
||||
__tablename__ = "hunts"
|
||||
@@ -60,7 +57,7 @@ class Hunt(Base):
|
||||
id: Mapped[str] = mapped_column(String(32), primary_key=True, default=_new_id)
|
||||
name: Mapped[str] = mapped_column(String(256), nullable=False)
|
||||
description: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
||||
status: Mapped[str] = mapped_column(String(32), default="active") # active | closed | archived
|
||||
status: Mapped[str] = mapped_column(String(32), default="active")
|
||||
owner_id: Mapped[Optional[str]] = mapped_column(
|
||||
String(32), ForeignKey("users.id"), nullable=True
|
||||
)
|
||||
@@ -69,15 +66,15 @@ class Hunt(Base):
|
||||
DateTime(timezone=True), default=_utcnow, onupdate=_utcnow
|
||||
)
|
||||
|
||||
# relationships
|
||||
owner: Mapped[Optional["User"]] = relationship(back_populates="hunts", lazy="selectin")
|
||||
datasets: Mapped[list["Dataset"]] = relationship(back_populates="hunt", lazy="selectin")
|
||||
conversations: Mapped[list["Conversation"]] = relationship(back_populates="hunt", lazy="selectin")
|
||||
hypotheses: Mapped[list["Hypothesis"]] = relationship(back_populates="hunt", lazy="selectin")
|
||||
host_profiles: Mapped[list["HostProfile"]] = relationship(back_populates="hunt", lazy="noload")
|
||||
reports: Mapped[list["HuntReport"]] = relationship(back_populates="hunt", lazy="noload")
|
||||
|
||||
|
||||
# ── Datasets ───────────────────────────────────────────────────────────
|
||||
|
||||
# -- Datasets ---
|
||||
|
||||
class Dataset(Base):
|
||||
__tablename__ = "datasets"
|
||||
@@ -85,36 +82,44 @@ class Dataset(Base):
|
||||
id: Mapped[str] = mapped_column(String(32), primary_key=True, default=_new_id)
|
||||
name: Mapped[str] = mapped_column(String(256), nullable=False, index=True)
|
||||
filename: Mapped[str] = mapped_column(String(512), nullable=False)
|
||||
source_tool: Mapped[Optional[str]] = mapped_column(String(64), nullable=True) # velociraptor, etc.
|
||||
source_tool: Mapped[Optional[str]] = mapped_column(String(64), nullable=True)
|
||||
row_count: Mapped[int] = mapped_column(Integer, default=0)
|
||||
column_schema: Mapped[Optional[dict]] = mapped_column(JSON, nullable=True)
|
||||
normalized_columns: Mapped[Optional[dict]] = mapped_column(JSON, nullable=True)
|
||||
ioc_columns: Mapped[Optional[dict]] = mapped_column(JSON, nullable=True) # auto-detected IOC columns
|
||||
ioc_columns: Mapped[Optional[dict]] = mapped_column(JSON, nullable=True)
|
||||
file_size_bytes: Mapped[int] = mapped_column(Integer, default=0)
|
||||
encoding: Mapped[Optional[str]] = mapped_column(String(32), nullable=True)
|
||||
delimiter: Mapped[Optional[str]] = mapped_column(String(4), nullable=True)
|
||||
time_range_start: Mapped[Optional[datetime]] = mapped_column(DateTime(timezone=True), nullable=True)
|
||||
time_range_end: Mapped[Optional[datetime]] = mapped_column(DateTime(timezone=True), nullable=True)
|
||||
|
||||
# New Phase 1-2 columns
|
||||
processing_status: Mapped[str] = mapped_column(String(20), default="ready")
|
||||
artifact_type: Mapped[Optional[str]] = mapped_column(String(128), nullable=True)
|
||||
error_message: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
||||
file_path: Mapped[Optional[str]] = mapped_column(String(512), nullable=True)
|
||||
|
||||
hunt_id: Mapped[Optional[str]] = mapped_column(
|
||||
String(32), ForeignKey("hunts.id"), nullable=True
|
||||
)
|
||||
uploaded_by: Mapped[Optional[str]] = mapped_column(String(32), nullable=True)
|
||||
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=_utcnow)
|
||||
|
||||
# relationships
|
||||
hunt: Mapped[Optional["Hunt"]] = relationship(back_populates="datasets", lazy="selectin")
|
||||
rows: Mapped[list["DatasetRow"]] = relationship(
|
||||
back_populates="dataset", lazy="noload", cascade="all, delete-orphan"
|
||||
)
|
||||
triage_results: Mapped[list["TriageResult"]] = relationship(
|
||||
back_populates="dataset", lazy="noload", cascade="all, delete-orphan"
|
||||
)
|
||||
|
||||
__table_args__ = (
|
||||
Index("ix_datasets_hunt", "hunt_id"),
|
||||
Index("ix_datasets_status", "processing_status"),
|
||||
)
|
||||
|
||||
|
||||
class DatasetRow(Base):
|
||||
"""Individual row from a CSV dataset, stored as JSON blob."""
|
||||
__tablename__ = "dataset_rows"
|
||||
|
||||
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
|
||||
@@ -125,7 +130,6 @@ class DatasetRow(Base):
|
||||
data: Mapped[dict] = mapped_column(JSON, nullable=False)
|
||||
normalized_data: Mapped[Optional[dict]] = mapped_column(JSON, nullable=True)
|
||||
|
||||
# relationships
|
||||
dataset: Mapped["Dataset"] = relationship(back_populates="rows")
|
||||
annotations: Mapped[list["Annotation"]] = relationship(
|
||||
back_populates="row", lazy="noload"
|
||||
@@ -137,8 +141,7 @@ class DatasetRow(Base):
|
||||
)
|
||||
|
||||
|
||||
# ── Conversations ─────────────────────────────────────────────────────
|
||||
|
||||
# -- Conversations ---
|
||||
|
||||
class Conversation(Base):
|
||||
__tablename__ = "conversations"
|
||||
@@ -156,7 +159,6 @@ class Conversation(Base):
|
||||
DateTime(timezone=True), default=_utcnow, onupdate=_utcnow
|
||||
)
|
||||
|
||||
# relationships
|
||||
hunt: Mapped[Optional["Hunt"]] = relationship(back_populates="conversations", lazy="selectin")
|
||||
messages: Mapped[list["Message"]] = relationship(
|
||||
back_populates="conversation", lazy="selectin", cascade="all, delete-orphan",
|
||||
@@ -171,16 +173,15 @@ class Message(Base):
|
||||
conversation_id: Mapped[str] = mapped_column(
|
||||
String(32), ForeignKey("conversations.id", ondelete="CASCADE"), nullable=False
|
||||
)
|
||||
role: Mapped[str] = mapped_column(String(16), nullable=False) # user | agent | system
|
||||
role: Mapped[str] = mapped_column(String(16), nullable=False)
|
||||
content: Mapped[str] = mapped_column(Text, nullable=False)
|
||||
model_used: Mapped[Optional[str]] = mapped_column(String(128), nullable=True)
|
||||
node_used: Mapped[Optional[str]] = mapped_column(String(64), nullable=True) # wile | roadrunner | cluster
|
||||
node_used: Mapped[Optional[str]] = mapped_column(String(64), nullable=True)
|
||||
token_count: Mapped[Optional[int]] = mapped_column(Integer, nullable=True)
|
||||
latency_ms: Mapped[Optional[int]] = mapped_column(Integer, nullable=True)
|
||||
response_meta: Mapped[Optional[dict]] = mapped_column(JSON, nullable=True)
|
||||
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=_utcnow)
|
||||
|
||||
# relationships
|
||||
conversation: Mapped["Conversation"] = relationship(back_populates="messages")
|
||||
|
||||
__table_args__ = (
|
||||
@@ -188,8 +189,7 @@ class Message(Base):
|
||||
)
|
||||
|
||||
|
||||
# ── Annotations ───────────────────────────────────────────────────────
|
||||
|
||||
# -- Annotations ---
|
||||
|
||||
class Annotation(Base):
|
||||
__tablename__ = "annotations"
|
||||
@@ -205,19 +205,14 @@ class Annotation(Base):
|
||||
String(32), ForeignKey("users.id"), nullable=True
|
||||
)
|
||||
text: Mapped[str] = mapped_column(Text, nullable=False)
|
||||
severity: Mapped[str] = mapped_column(
|
||||
String(16), default="info"
|
||||
) # info | low | medium | high | critical
|
||||
tag: Mapped[Optional[str]] = mapped_column(
|
||||
String(32), nullable=True
|
||||
) # suspicious | benign | needs-review
|
||||
severity: Mapped[str] = mapped_column(String(16), default="info")
|
||||
tag: Mapped[Optional[str]] = mapped_column(String(32), nullable=True)
|
||||
highlight_color: Mapped[Optional[str]] = mapped_column(String(16), nullable=True)
|
||||
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=_utcnow)
|
||||
updated_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True), default=_utcnow, onupdate=_utcnow
|
||||
)
|
||||
|
||||
# relationships
|
||||
row: Mapped[Optional["DatasetRow"]] = relationship(back_populates="annotations")
|
||||
author: Mapped[Optional["User"]] = relationship(back_populates="annotations")
|
||||
|
||||
@@ -227,8 +222,7 @@ class Annotation(Base):
|
||||
)
|
||||
|
||||
|
||||
# ── Hypotheses ────────────────────────────────────────────────────────
|
||||
|
||||
# -- Hypotheses ---
|
||||
|
||||
class Hypothesis(Base):
|
||||
__tablename__ = "hypotheses"
|
||||
@@ -240,9 +234,7 @@ class Hypothesis(Base):
|
||||
title: Mapped[str] = mapped_column(String(256), nullable=False)
|
||||
description: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
||||
mitre_technique: Mapped[Optional[str]] = mapped_column(String(32), nullable=True)
|
||||
status: Mapped[str] = mapped_column(
|
||||
String(16), default="draft"
|
||||
) # draft | active | confirmed | rejected
|
||||
status: Mapped[str] = mapped_column(String(16), default="draft")
|
||||
evidence_row_ids: Mapped[Optional[list]] = mapped_column(JSON, nullable=True)
|
||||
evidence_notes: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
||||
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=_utcnow)
|
||||
@@ -250,7 +242,6 @@ class Hypothesis(Base):
|
||||
DateTime(timezone=True), default=_utcnow, onupdate=_utcnow
|
||||
)
|
||||
|
||||
# relationships
|
||||
hunt: Mapped[Optional["Hunt"]] = relationship(back_populates="hypotheses", lazy="selectin")
|
||||
|
||||
__table_args__ = (
|
||||
@@ -258,21 +249,16 @@ class Hypothesis(Base):
|
||||
)
|
||||
|
||||
|
||||
# ── Enrichment Results ────────────────────────────────────────────────
|
||||
|
||||
# -- Enrichment Results ---
|
||||
|
||||
class EnrichmentResult(Base):
|
||||
__tablename__ = "enrichment_results"
|
||||
|
||||
id: Mapped[str] = mapped_column(String(32), primary_key=True, default=_new_id)
|
||||
ioc_value: Mapped[str] = mapped_column(String(512), nullable=False, index=True)
|
||||
ioc_type: Mapped[str] = mapped_column(
|
||||
String(32), nullable=False
|
||||
) # ip | hash_md5 | hash_sha1 | hash_sha256 | domain | url
|
||||
source: Mapped[str] = mapped_column(String(32), nullable=False) # virustotal | abuseipdb | shodan | ai
|
||||
verdict: Mapped[Optional[str]] = mapped_column(
|
||||
String(16), nullable=True
|
||||
) # clean | suspicious | malicious | unknown
|
||||
ioc_type: Mapped[str] = mapped_column(String(32), nullable=False)
|
||||
source: Mapped[str] = mapped_column(String(32), nullable=False)
|
||||
verdict: Mapped[Optional[str]] = mapped_column(String(16), nullable=True)
|
||||
confidence: Mapped[Optional[float]] = mapped_column(Float, nullable=True)
|
||||
raw_result: Mapped[Optional[dict]] = mapped_column(JSON, nullable=True)
|
||||
summary: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
||||
@@ -287,28 +273,24 @@ class EnrichmentResult(Base):
|
||||
)
|
||||
|
||||
|
||||
# ── AUP Keyword Themes & Keywords ────────────────────────────────────
|
||||
|
||||
# -- AUP Keyword Themes & Keywords ---
|
||||
|
||||
class KeywordTheme(Base):
|
||||
"""A named category of keywords for AUP scanning (e.g. gambling, gaming)."""
|
||||
__tablename__ = "keyword_themes"
|
||||
|
||||
id: Mapped[str] = mapped_column(String(32), primary_key=True, default=_new_id)
|
||||
name: Mapped[str] = mapped_column(String(128), unique=True, nullable=False, index=True)
|
||||
color: Mapped[str] = mapped_column(String(16), default="#9e9e9e") # hex chip color
|
||||
color: Mapped[str] = mapped_column(String(16), default="#9e9e9e")
|
||||
enabled: Mapped[bool] = mapped_column(Boolean, default=True)
|
||||
is_builtin: Mapped[bool] = mapped_column(Boolean, default=False) # seed-provided
|
||||
is_builtin: Mapped[bool] = mapped_column(Boolean, default=False)
|
||||
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=_utcnow)
|
||||
|
||||
# relationships
|
||||
keywords: Mapped[list["Keyword"]] = relationship(
|
||||
back_populates="theme", lazy="selectin", cascade="all, delete-orphan"
|
||||
)
|
||||
|
||||
|
||||
class Keyword(Base):
|
||||
"""Individual keyword / pattern belonging to a theme."""
|
||||
__tablename__ = "keywords"
|
||||
|
||||
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
|
||||
@@ -319,10 +301,102 @@ class Keyword(Base):
|
||||
is_regex: Mapped[bool] = mapped_column(Boolean, default=False)
|
||||
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=_utcnow)
|
||||
|
||||
# relationships
|
||||
theme: Mapped["KeywordTheme"] = relationship(back_populates="keywords")
|
||||
|
||||
__table_args__ = (
|
||||
Index("ix_keywords_theme", "theme_id"),
|
||||
Index("ix_keywords_value", "value"),
|
||||
)
|
||||
|
||||
|
||||
# -- AI Analysis Tables (Phase 2) ---
|
||||
|
||||
class TriageResult(Base):
|
||||
__tablename__ = "triage_results"
|
||||
|
||||
id: Mapped[str] = mapped_column(String(32), primary_key=True, default=_new_id)
|
||||
dataset_id: Mapped[str] = mapped_column(
|
||||
String(32), ForeignKey("datasets.id", ondelete="CASCADE"), nullable=False, index=True
|
||||
)
|
||||
row_start: Mapped[int] = mapped_column(Integer, nullable=False)
|
||||
row_end: Mapped[int] = mapped_column(Integer, nullable=False)
|
||||
risk_score: Mapped[float] = mapped_column(Float, default=0.0)
|
||||
verdict: Mapped[str] = mapped_column(String(20), default="pending")
|
||||
findings: Mapped[Optional[list]] = mapped_column(JSON, nullable=True)
|
||||
suspicious_indicators: Mapped[Optional[list]] = mapped_column(JSON, nullable=True)
|
||||
mitre_techniques: Mapped[Optional[list]] = mapped_column(JSON, nullable=True)
|
||||
model_used: Mapped[Optional[str]] = mapped_column(String(128), nullable=True)
|
||||
node_used: Mapped[Optional[str]] = mapped_column(String(64), nullable=True)
|
||||
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=_utcnow)
|
||||
|
||||
dataset: Mapped["Dataset"] = relationship(back_populates="triage_results")
|
||||
|
||||
|
||||
class HostProfile(Base):
|
||||
__tablename__ = "host_profiles"
|
||||
|
||||
id: Mapped[str] = mapped_column(String(32), primary_key=True, default=_new_id)
|
||||
hunt_id: Mapped[str] = mapped_column(
|
||||
String(32), ForeignKey("hunts.id", ondelete="CASCADE"), nullable=False, index=True
|
||||
)
|
||||
hostname: Mapped[str] = mapped_column(String(256), nullable=False)
|
||||
fqdn: Mapped[Optional[str]] = mapped_column(String(512), nullable=True)
|
||||
client_id: Mapped[Optional[str]] = mapped_column(String(64), nullable=True)
|
||||
risk_score: Mapped[float] = mapped_column(Float, default=0.0)
|
||||
risk_level: Mapped[str] = mapped_column(String(20), default="unknown")
|
||||
artifact_summary: Mapped[Optional[dict]] = mapped_column(JSON, nullable=True)
|
||||
timeline_summary: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
||||
suspicious_findings: Mapped[Optional[list]] = mapped_column(JSON, nullable=True)
|
||||
mitre_techniques: Mapped[Optional[list]] = mapped_column(JSON, nullable=True)
|
||||
llm_analysis: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
||||
model_used: Mapped[Optional[str]] = mapped_column(String(128), nullable=True)
|
||||
node_used: Mapped[Optional[str]] = mapped_column(String(64), nullable=True)
|
||||
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=_utcnow)
|
||||
updated_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True), default=_utcnow, onupdate=_utcnow
|
||||
)
|
||||
|
||||
hunt: Mapped["Hunt"] = relationship(back_populates="host_profiles")
|
||||
|
||||
|
||||
class HuntReport(Base):
|
||||
__tablename__ = "hunt_reports"
|
||||
|
||||
id: Mapped[str] = mapped_column(String(32), primary_key=True, default=_new_id)
|
||||
hunt_id: Mapped[str] = mapped_column(
|
||||
String(32), ForeignKey("hunts.id", ondelete="CASCADE"), nullable=False, index=True
|
||||
)
|
||||
status: Mapped[str] = mapped_column(String(20), default="pending")
|
||||
exec_summary: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
||||
full_report: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
||||
findings: Mapped[Optional[list]] = mapped_column(JSON, nullable=True)
|
||||
recommendations: Mapped[Optional[list]] = mapped_column(JSON, nullable=True)
|
||||
mitre_mapping: Mapped[Optional[dict]] = mapped_column(JSON, nullable=True)
|
||||
ioc_table: Mapped[Optional[list]] = mapped_column(JSON, nullable=True)
|
||||
host_risk_summary: Mapped[Optional[list]] = mapped_column(JSON, nullable=True)
|
||||
models_used: Mapped[Optional[list]] = mapped_column(JSON, nullable=True)
|
||||
generation_time_ms: Mapped[Optional[int]] = mapped_column(Integer, nullable=True)
|
||||
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=_utcnow)
|
||||
updated_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True), default=_utcnow, onupdate=_utcnow
|
||||
)
|
||||
|
||||
hunt: Mapped["Hunt"] = relationship(back_populates="reports")
|
||||
|
||||
|
||||
class AnomalyResult(Base):
|
||||
__tablename__ = "anomaly_results"
|
||||
|
||||
id: Mapped[str] = mapped_column(String(32), primary_key=True, default=_new_id)
|
||||
dataset_id: Mapped[str] = mapped_column(
|
||||
String(32), ForeignKey("datasets.id", ondelete="CASCADE"), nullable=False, index=True
|
||||
)
|
||||
row_id: Mapped[Optional[int]] = mapped_column(
|
||||
Integer, ForeignKey("dataset_rows.id", ondelete="CASCADE"), nullable=True
|
||||
)
|
||||
anomaly_score: Mapped[float] = mapped_column(Float, default=0.0)
|
||||
distance_from_centroid: Mapped[Optional[float]] = mapped_column(Float, nullable=True)
|
||||
cluster_id: Mapped[Optional[int]] = mapped_column(Integer, nullable=True)
|
||||
is_outlier: Mapped[bool] = mapped_column(Boolean, default=False)
|
||||
explanation: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
||||
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=_utcnow)
|
||||
Reference in New Issue
Block a user