mirror of
https://github.com/mblanke/ThreatHunt.git
synced 2026-03-01 14:00:20 -05:00
- Rewrote NetworkMap to use deduplicated host inventory (163 hosts from 394K rows) - New host_inventory.py service: scans datasets, groups by FQDN/ClientId, extracts IPs/users/OS - New /api/network/host-inventory endpoint - Added AnalysisDashboard with 6 tabs (IOC, anomaly, host profile, query, triage, reports) - Added 16 analysis API endpoints with job queue and load balancer - Added 4 AI/analysis ORM models (ProcessingJob, AnalysisResult, HostProfile, IOCEntry) - Filters system accounts (DWM-*, UMFD-*, LOCAL/NETWORK SERVICE) - Infers OS from hostname patterns (W10-* -> Windows 10) - Canvas 2D force-directed graph with host/external-IP node types - Click popover shows hostname, FQDN, IPs, OS, users, datasets, connections
402 lines
17 KiB
Python
402 lines
17 KiB
Python
"""SQLAlchemy ORM models for ThreatHunt.
|
|
|
|
All persistent entities: datasets, hunts, conversations, annotations,
|
|
hypotheses, enrichment results, users, and AI analysis tables.
|
|
"""
|
|
|
|
import uuid
|
|
from datetime import datetime, timezone
|
|
from typing import Optional
|
|
|
|
from sqlalchemy import (
|
|
Boolean,
|
|
DateTime,
|
|
Float,
|
|
ForeignKey,
|
|
Integer,
|
|
String,
|
|
Text,
|
|
JSON,
|
|
Index,
|
|
)
|
|
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
|
|
|
from .engine import Base
|
|
|
|
|
|
def _utcnow() -> datetime:
|
|
return datetime.now(timezone.utc)
|
|
|
|
|
|
def _new_id() -> str:
|
|
return uuid.uuid4().hex
|
|
|
|
|
|
# -- Users ---
|
|
|
|
class User(Base):
|
|
__tablename__ = "users"
|
|
|
|
id: Mapped[str] = mapped_column(String(32), primary_key=True, default=_new_id)
|
|
username: Mapped[str] = mapped_column(String(64), unique=True, nullable=False, index=True)
|
|
email: Mapped[str] = mapped_column(String(256), unique=True, nullable=False)
|
|
hashed_password: Mapped[str] = mapped_column(String(256), nullable=False)
|
|
role: Mapped[str] = mapped_column(String(16), default="analyst")
|
|
is_active: Mapped[bool] = mapped_column(Boolean, default=True)
|
|
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=_utcnow)
|
|
|
|
hunts: Mapped[list["Hunt"]] = relationship(back_populates="owner", lazy="selectin")
|
|
annotations: Mapped[list["Annotation"]] = relationship(back_populates="author", lazy="selectin")
|
|
|
|
|
|
# -- Hunts ---
|
|
|
|
class Hunt(Base):
|
|
__tablename__ = "hunts"
|
|
|
|
id: Mapped[str] = mapped_column(String(32), primary_key=True, default=_new_id)
|
|
name: Mapped[str] = mapped_column(String(256), nullable=False)
|
|
description: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
|
status: Mapped[str] = mapped_column(String(32), default="active")
|
|
owner_id: Mapped[Optional[str]] = mapped_column(
|
|
String(32), ForeignKey("users.id"), nullable=True
|
|
)
|
|
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=_utcnow)
|
|
updated_at: Mapped[datetime] = mapped_column(
|
|
DateTime(timezone=True), default=_utcnow, onupdate=_utcnow
|
|
)
|
|
|
|
owner: Mapped[Optional["User"]] = relationship(back_populates="hunts", lazy="selectin")
|
|
datasets: Mapped[list["Dataset"]] = relationship(back_populates="hunt", lazy="selectin")
|
|
conversations: Mapped[list["Conversation"]] = relationship(back_populates="hunt", lazy="selectin")
|
|
hypotheses: Mapped[list["Hypothesis"]] = relationship(back_populates="hunt", lazy="selectin")
|
|
host_profiles: Mapped[list["HostProfile"]] = relationship(back_populates="hunt", lazy="noload")
|
|
reports: Mapped[list["HuntReport"]] = relationship(back_populates="hunt", lazy="noload")
|
|
|
|
|
|
# -- Datasets ---
|
|
|
|
class Dataset(Base):
|
|
__tablename__ = "datasets"
|
|
|
|
id: Mapped[str] = mapped_column(String(32), primary_key=True, default=_new_id)
|
|
name: Mapped[str] = mapped_column(String(256), nullable=False, index=True)
|
|
filename: Mapped[str] = mapped_column(String(512), nullable=False)
|
|
source_tool: Mapped[Optional[str]] = mapped_column(String(64), nullable=True)
|
|
row_count: Mapped[int] = mapped_column(Integer, default=0)
|
|
column_schema: Mapped[Optional[dict]] = mapped_column(JSON, nullable=True)
|
|
normalized_columns: Mapped[Optional[dict]] = mapped_column(JSON, nullable=True)
|
|
ioc_columns: Mapped[Optional[dict]] = mapped_column(JSON, nullable=True)
|
|
file_size_bytes: Mapped[int] = mapped_column(Integer, default=0)
|
|
encoding: Mapped[Optional[str]] = mapped_column(String(32), nullable=True)
|
|
delimiter: Mapped[Optional[str]] = mapped_column(String(4), nullable=True)
|
|
time_range_start: Mapped[Optional[datetime]] = mapped_column(DateTime(timezone=True), nullable=True)
|
|
time_range_end: Mapped[Optional[datetime]] = mapped_column(DateTime(timezone=True), nullable=True)
|
|
|
|
# New Phase 1-2 columns
|
|
processing_status: Mapped[str] = mapped_column(String(20), default="ready")
|
|
artifact_type: Mapped[Optional[str]] = mapped_column(String(128), nullable=True)
|
|
error_message: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
|
file_path: Mapped[Optional[str]] = mapped_column(String(512), nullable=True)
|
|
|
|
hunt_id: Mapped[Optional[str]] = mapped_column(
|
|
String(32), ForeignKey("hunts.id"), nullable=True
|
|
)
|
|
uploaded_by: Mapped[Optional[str]] = mapped_column(String(32), nullable=True)
|
|
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=_utcnow)
|
|
|
|
hunt: Mapped[Optional["Hunt"]] = relationship(back_populates="datasets", lazy="selectin")
|
|
rows: Mapped[list["DatasetRow"]] = relationship(
|
|
back_populates="dataset", lazy="noload", cascade="all, delete-orphan"
|
|
)
|
|
triage_results: Mapped[list["TriageResult"]] = relationship(
|
|
back_populates="dataset", lazy="noload", cascade="all, delete-orphan"
|
|
)
|
|
|
|
__table_args__ = (
|
|
Index("ix_datasets_hunt", "hunt_id"),
|
|
Index("ix_datasets_status", "processing_status"),
|
|
)
|
|
|
|
|
|
class DatasetRow(Base):
|
|
__tablename__ = "dataset_rows"
|
|
|
|
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
|
|
dataset_id: Mapped[str] = mapped_column(
|
|
String(32), ForeignKey("datasets.id", ondelete="CASCADE"), nullable=False
|
|
)
|
|
row_index: Mapped[int] = mapped_column(Integer, nullable=False)
|
|
data: Mapped[dict] = mapped_column(JSON, nullable=False)
|
|
normalized_data: Mapped[Optional[dict]] = mapped_column(JSON, nullable=True)
|
|
|
|
dataset: Mapped["Dataset"] = relationship(back_populates="rows")
|
|
annotations: Mapped[list["Annotation"]] = relationship(
|
|
back_populates="row", lazy="noload"
|
|
)
|
|
|
|
__table_args__ = (
|
|
Index("ix_dataset_rows_dataset", "dataset_id"),
|
|
Index("ix_dataset_rows_dataset_idx", "dataset_id", "row_index"),
|
|
)
|
|
|
|
|
|
# -- Conversations ---
|
|
|
|
class Conversation(Base):
|
|
__tablename__ = "conversations"
|
|
|
|
id: Mapped[str] = mapped_column(String(32), primary_key=True, default=_new_id)
|
|
title: Mapped[Optional[str]] = mapped_column(String(256), nullable=True)
|
|
hunt_id: Mapped[Optional[str]] = mapped_column(
|
|
String(32), ForeignKey("hunts.id"), nullable=True
|
|
)
|
|
dataset_id: Mapped[Optional[str]] = mapped_column(
|
|
String(32), ForeignKey("datasets.id"), nullable=True
|
|
)
|
|
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=_utcnow)
|
|
updated_at: Mapped[datetime] = mapped_column(
|
|
DateTime(timezone=True), default=_utcnow, onupdate=_utcnow
|
|
)
|
|
|
|
hunt: Mapped[Optional["Hunt"]] = relationship(back_populates="conversations", lazy="selectin")
|
|
messages: Mapped[list["Message"]] = relationship(
|
|
back_populates="conversation", lazy="selectin", cascade="all, delete-orphan",
|
|
order_by="Message.created_at",
|
|
)
|
|
|
|
|
|
class Message(Base):
|
|
__tablename__ = "messages"
|
|
|
|
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
|
|
conversation_id: Mapped[str] = mapped_column(
|
|
String(32), ForeignKey("conversations.id", ondelete="CASCADE"), nullable=False
|
|
)
|
|
role: Mapped[str] = mapped_column(String(16), nullable=False)
|
|
content: Mapped[str] = mapped_column(Text, nullable=False)
|
|
model_used: Mapped[Optional[str]] = mapped_column(String(128), nullable=True)
|
|
node_used: Mapped[Optional[str]] = mapped_column(String(64), nullable=True)
|
|
token_count: Mapped[Optional[int]] = mapped_column(Integer, nullable=True)
|
|
latency_ms: Mapped[Optional[int]] = mapped_column(Integer, nullable=True)
|
|
response_meta: Mapped[Optional[dict]] = mapped_column(JSON, nullable=True)
|
|
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=_utcnow)
|
|
|
|
conversation: Mapped["Conversation"] = relationship(back_populates="messages")
|
|
|
|
__table_args__ = (
|
|
Index("ix_messages_conversation", "conversation_id"),
|
|
)
|
|
|
|
|
|
# -- Annotations ---
|
|
|
|
class Annotation(Base):
|
|
__tablename__ = "annotations"
|
|
|
|
id: Mapped[str] = mapped_column(String(32), primary_key=True, default=_new_id)
|
|
row_id: Mapped[Optional[int]] = mapped_column(
|
|
Integer, ForeignKey("dataset_rows.id", ondelete="SET NULL"), nullable=True
|
|
)
|
|
dataset_id: Mapped[Optional[str]] = mapped_column(
|
|
String(32), ForeignKey("datasets.id"), nullable=True
|
|
)
|
|
author_id: Mapped[Optional[str]] = mapped_column(
|
|
String(32), ForeignKey("users.id"), nullable=True
|
|
)
|
|
text: Mapped[str] = mapped_column(Text, nullable=False)
|
|
severity: Mapped[str] = mapped_column(String(16), default="info")
|
|
tag: Mapped[Optional[str]] = mapped_column(String(32), nullable=True)
|
|
highlight_color: Mapped[Optional[str]] = mapped_column(String(16), nullable=True)
|
|
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=_utcnow)
|
|
updated_at: Mapped[datetime] = mapped_column(
|
|
DateTime(timezone=True), default=_utcnow, onupdate=_utcnow
|
|
)
|
|
|
|
row: Mapped[Optional["DatasetRow"]] = relationship(back_populates="annotations")
|
|
author: Mapped[Optional["User"]] = relationship(back_populates="annotations")
|
|
|
|
__table_args__ = (
|
|
Index("ix_annotations_dataset", "dataset_id"),
|
|
Index("ix_annotations_row", "row_id"),
|
|
)
|
|
|
|
|
|
# -- Hypotheses ---
|
|
|
|
class Hypothesis(Base):
|
|
__tablename__ = "hypotheses"
|
|
|
|
id: Mapped[str] = mapped_column(String(32), primary_key=True, default=_new_id)
|
|
hunt_id: Mapped[Optional[str]] = mapped_column(
|
|
String(32), ForeignKey("hunts.id"), nullable=True
|
|
)
|
|
title: Mapped[str] = mapped_column(String(256), nullable=False)
|
|
description: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
|
mitre_technique: Mapped[Optional[str]] = mapped_column(String(32), nullable=True)
|
|
status: Mapped[str] = mapped_column(String(16), default="draft")
|
|
evidence_row_ids: Mapped[Optional[list]] = mapped_column(JSON, nullable=True)
|
|
evidence_notes: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
|
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=_utcnow)
|
|
updated_at: Mapped[datetime] = mapped_column(
|
|
DateTime(timezone=True), default=_utcnow, onupdate=_utcnow
|
|
)
|
|
|
|
hunt: Mapped[Optional["Hunt"]] = relationship(back_populates="hypotheses", lazy="selectin")
|
|
|
|
__table_args__ = (
|
|
Index("ix_hypotheses_hunt", "hunt_id"),
|
|
)
|
|
|
|
|
|
# -- Enrichment Results ---
|
|
|
|
class EnrichmentResult(Base):
|
|
__tablename__ = "enrichment_results"
|
|
|
|
id: Mapped[str] = mapped_column(String(32), primary_key=True, default=_new_id)
|
|
ioc_value: Mapped[str] = mapped_column(String(512), nullable=False, index=True)
|
|
ioc_type: Mapped[str] = mapped_column(String(32), nullable=False)
|
|
source: Mapped[str] = mapped_column(String(32), nullable=False)
|
|
verdict: Mapped[Optional[str]] = mapped_column(String(16), nullable=True)
|
|
confidence: Mapped[Optional[float]] = mapped_column(Float, nullable=True)
|
|
raw_result: Mapped[Optional[dict]] = mapped_column(JSON, nullable=True)
|
|
summary: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
|
dataset_id: Mapped[Optional[str]] = mapped_column(
|
|
String(32), ForeignKey("datasets.id"), nullable=True
|
|
)
|
|
cached_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=_utcnow)
|
|
expires_at: Mapped[Optional[datetime]] = mapped_column(DateTime(timezone=True), nullable=True)
|
|
|
|
__table_args__ = (
|
|
Index("ix_enrichment_ioc_source", "ioc_value", "source"),
|
|
)
|
|
|
|
|
|
# -- AUP Keyword Themes & Keywords ---
|
|
|
|
class KeywordTheme(Base):
|
|
__tablename__ = "keyword_themes"
|
|
|
|
id: Mapped[str] = mapped_column(String(32), primary_key=True, default=_new_id)
|
|
name: Mapped[str] = mapped_column(String(128), unique=True, nullable=False, index=True)
|
|
color: Mapped[str] = mapped_column(String(16), default="#9e9e9e")
|
|
enabled: Mapped[bool] = mapped_column(Boolean, default=True)
|
|
is_builtin: Mapped[bool] = mapped_column(Boolean, default=False)
|
|
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=_utcnow)
|
|
|
|
keywords: Mapped[list["Keyword"]] = relationship(
|
|
back_populates="theme", lazy="selectin", cascade="all, delete-orphan"
|
|
)
|
|
|
|
|
|
class Keyword(Base):
|
|
__tablename__ = "keywords"
|
|
|
|
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
|
|
theme_id: Mapped[str] = mapped_column(
|
|
String(32), ForeignKey("keyword_themes.id", ondelete="CASCADE"), nullable=False
|
|
)
|
|
value: Mapped[str] = mapped_column(String(256), nullable=False)
|
|
is_regex: Mapped[bool] = mapped_column(Boolean, default=False)
|
|
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=_utcnow)
|
|
|
|
theme: Mapped["KeywordTheme"] = relationship(back_populates="keywords")
|
|
|
|
__table_args__ = (
|
|
Index("ix_keywords_theme", "theme_id"),
|
|
Index("ix_keywords_value", "value"),
|
|
)
|
|
|
|
|
|
# -- AI Analysis Tables (Phase 2) ---
|
|
|
|
class TriageResult(Base):
|
|
__tablename__ = "triage_results"
|
|
|
|
id: Mapped[str] = mapped_column(String(32), primary_key=True, default=_new_id)
|
|
dataset_id: Mapped[str] = mapped_column(
|
|
String(32), ForeignKey("datasets.id", ondelete="CASCADE"), nullable=False, index=True
|
|
)
|
|
row_start: Mapped[int] = mapped_column(Integer, nullable=False)
|
|
row_end: Mapped[int] = mapped_column(Integer, nullable=False)
|
|
risk_score: Mapped[float] = mapped_column(Float, default=0.0)
|
|
verdict: Mapped[str] = mapped_column(String(20), default="pending")
|
|
findings: Mapped[Optional[list]] = mapped_column(JSON, nullable=True)
|
|
suspicious_indicators: Mapped[Optional[list]] = mapped_column(JSON, nullable=True)
|
|
mitre_techniques: Mapped[Optional[list]] = mapped_column(JSON, nullable=True)
|
|
model_used: Mapped[Optional[str]] = mapped_column(String(128), nullable=True)
|
|
node_used: Mapped[Optional[str]] = mapped_column(String(64), nullable=True)
|
|
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=_utcnow)
|
|
|
|
dataset: Mapped["Dataset"] = relationship(back_populates="triage_results")
|
|
|
|
|
|
class HostProfile(Base):
|
|
__tablename__ = "host_profiles"
|
|
|
|
id: Mapped[str] = mapped_column(String(32), primary_key=True, default=_new_id)
|
|
hunt_id: Mapped[str] = mapped_column(
|
|
String(32), ForeignKey("hunts.id", ondelete="CASCADE"), nullable=False, index=True
|
|
)
|
|
hostname: Mapped[str] = mapped_column(String(256), nullable=False)
|
|
fqdn: Mapped[Optional[str]] = mapped_column(String(512), nullable=True)
|
|
client_id: Mapped[Optional[str]] = mapped_column(String(64), nullable=True)
|
|
risk_score: Mapped[float] = mapped_column(Float, default=0.0)
|
|
risk_level: Mapped[str] = mapped_column(String(20), default="unknown")
|
|
artifact_summary: Mapped[Optional[dict]] = mapped_column(JSON, nullable=True)
|
|
timeline_summary: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
|
suspicious_findings: Mapped[Optional[list]] = mapped_column(JSON, nullable=True)
|
|
mitre_techniques: Mapped[Optional[list]] = mapped_column(JSON, nullable=True)
|
|
llm_analysis: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
|
model_used: Mapped[Optional[str]] = mapped_column(String(128), nullable=True)
|
|
node_used: Mapped[Optional[str]] = mapped_column(String(64), nullable=True)
|
|
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=_utcnow)
|
|
updated_at: Mapped[datetime] = mapped_column(
|
|
DateTime(timezone=True), default=_utcnow, onupdate=_utcnow
|
|
)
|
|
|
|
hunt: Mapped["Hunt"] = relationship(back_populates="host_profiles")
|
|
|
|
|
|
class HuntReport(Base):
|
|
__tablename__ = "hunt_reports"
|
|
|
|
id: Mapped[str] = mapped_column(String(32), primary_key=True, default=_new_id)
|
|
hunt_id: Mapped[str] = mapped_column(
|
|
String(32), ForeignKey("hunts.id", ondelete="CASCADE"), nullable=False, index=True
|
|
)
|
|
status: Mapped[str] = mapped_column(String(20), default="pending")
|
|
exec_summary: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
|
full_report: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
|
findings: Mapped[Optional[list]] = mapped_column(JSON, nullable=True)
|
|
recommendations: Mapped[Optional[list]] = mapped_column(JSON, nullable=True)
|
|
mitre_mapping: Mapped[Optional[dict]] = mapped_column(JSON, nullable=True)
|
|
ioc_table: Mapped[Optional[list]] = mapped_column(JSON, nullable=True)
|
|
host_risk_summary: Mapped[Optional[list]] = mapped_column(JSON, nullable=True)
|
|
models_used: Mapped[Optional[list]] = mapped_column(JSON, nullable=True)
|
|
generation_time_ms: Mapped[Optional[int]] = mapped_column(Integer, nullable=True)
|
|
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=_utcnow)
|
|
updated_at: Mapped[datetime] = mapped_column(
|
|
DateTime(timezone=True), default=_utcnow, onupdate=_utcnow
|
|
)
|
|
|
|
hunt: Mapped["Hunt"] = relationship(back_populates="reports")
|
|
|
|
|
|
class AnomalyResult(Base):
|
|
__tablename__ = "anomaly_results"
|
|
|
|
id: Mapped[str] = mapped_column(String(32), primary_key=True, default=_new_id)
|
|
dataset_id: Mapped[str] = mapped_column(
|
|
String(32), ForeignKey("datasets.id", ondelete="CASCADE"), nullable=False, index=True
|
|
)
|
|
row_id: Mapped[Optional[int]] = mapped_column(
|
|
Integer, ForeignKey("dataset_rows.id", ondelete="CASCADE"), nullable=True
|
|
)
|
|
anomaly_score: Mapped[float] = mapped_column(Float, default=0.0)
|
|
distance_from_centroid: Mapped[Optional[float]] = mapped_column(Float, nullable=True)
|
|
cluster_id: Mapped[Optional[int]] = mapped_column(Integer, nullable=True)
|
|
is_outlier: Mapped[bool] = mapped_column(Boolean, default=False)
|
|
explanation: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
|
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=_utcnow) |