feat: host-centric network map, analysis dashboard, deduped inventory

- Rewrote NetworkMap to use deduplicated host inventory (163 hosts from 394K rows)
- New host_inventory.py service: scans datasets, groups by FQDN/ClientId, extracts IPs/users/OS
- New /api/network/host-inventory endpoint
- Added AnalysisDashboard with 6 tabs (IOC, anomaly, host profile, query, triage, reports)
- Added 16 analysis API endpoints with job queue and load balancer
- Added 4 AI/analysis ORM models (ProcessingJob, AnalysisResult, HostProfile, IOCEntry)
- Filters system accounts (DWM-*, UMFD-*, LOCAL/NETWORK SERVICE)
- Infers OS from hostname patterns (W10-* -> Windows 10)
- Canvas 2D force-directed graph with host/external-IP node types
- Click popover shows hostname, FQDN, IPs, OS, users, datasets, connections
This commit is contained in:
2026-02-20 07:16:17 -05:00
parent 9b98ab9614
commit 04a9946891
24 changed files with 4774 additions and 620 deletions

View File

@@ -3,6 +3,7 @@
Uses async SQLAlchemy with aiosqlite for local dev and asyncpg for production PostgreSQL.
"""
from sqlalchemy import event
from sqlalchemy.ext.asyncio import (
AsyncSession,
async_sessionmaker,
@@ -12,12 +13,32 @@ from sqlalchemy.orm import DeclarativeBase
from app.config import settings
engine = create_async_engine(
settings.DATABASE_URL,
_is_sqlite = settings.DATABASE_URL.startswith("sqlite")
_engine_kwargs: dict = dict(
echo=settings.DEBUG,
future=True,
)
if _is_sqlite:
_engine_kwargs["connect_args"] = {"timeout": 30}
_engine_kwargs["pool_size"] = 1
_engine_kwargs["max_overflow"] = 0
engine = create_async_engine(settings.DATABASE_URL, **_engine_kwargs)
@event.listens_for(engine.sync_engine, "connect")
def _set_sqlite_pragmas(dbapi_conn, connection_record):
"""Enable WAL mode and tune busy-timeout for SQLite connections."""
if _is_sqlite:
cursor = dbapi_conn.cursor()
cursor.execute("PRAGMA journal_mode=WAL")
cursor.execute("PRAGMA busy_timeout=5000")
cursor.execute("PRAGMA synchronous=NORMAL")
cursor.close()
async_session_factory = async_sessionmaker(
engine,
class_=AsyncSession,
@@ -51,4 +72,4 @@ async def init_db() -> None:
async def dispose_db() -> None:
"""Dispose of the engine connection pool."""
await engine.dispose()
await engine.dispose()

View File

@@ -1,7 +1,7 @@
"""SQLAlchemy ORM models for ThreatHunt.
All persistent entities: datasets, hunts, conversations, annotations,
hypotheses, enrichment results, and users.
hypotheses, enrichment results, users, and AI analysis tables.
"""
import uuid
@@ -32,8 +32,7 @@ def _new_id() -> str:
return uuid.uuid4().hex
# ── Users ──────────────────────────────────────────────────────────────
# -- Users ---
class User(Base):
__tablename__ = "users"
@@ -42,17 +41,15 @@ class User(Base):
username: Mapped[str] = mapped_column(String(64), unique=True, nullable=False, index=True)
email: Mapped[str] = mapped_column(String(256), unique=True, nullable=False)
hashed_password: Mapped[str] = mapped_column(String(256), nullable=False)
role: Mapped[str] = mapped_column(String(16), default="analyst") # analyst | admin | viewer
role: Mapped[str] = mapped_column(String(16), default="analyst")
is_active: Mapped[bool] = mapped_column(Boolean, default=True)
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=_utcnow)
# relationships
hunts: Mapped[list["Hunt"]] = relationship(back_populates="owner", lazy="selectin")
annotations: Mapped[list["Annotation"]] = relationship(back_populates="author", lazy="selectin")
# ── Hunts ──────────────────────────────────────────────────────────────
# -- Hunts ---
class Hunt(Base):
__tablename__ = "hunts"
@@ -60,7 +57,7 @@ class Hunt(Base):
id: Mapped[str] = mapped_column(String(32), primary_key=True, default=_new_id)
name: Mapped[str] = mapped_column(String(256), nullable=False)
description: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
status: Mapped[str] = mapped_column(String(32), default="active") # active | closed | archived
status: Mapped[str] = mapped_column(String(32), default="active")
owner_id: Mapped[Optional[str]] = mapped_column(
String(32), ForeignKey("users.id"), nullable=True
)
@@ -69,15 +66,15 @@ class Hunt(Base):
DateTime(timezone=True), default=_utcnow, onupdate=_utcnow
)
# relationships
owner: Mapped[Optional["User"]] = relationship(back_populates="hunts", lazy="selectin")
datasets: Mapped[list["Dataset"]] = relationship(back_populates="hunt", lazy="selectin")
conversations: Mapped[list["Conversation"]] = relationship(back_populates="hunt", lazy="selectin")
hypotheses: Mapped[list["Hypothesis"]] = relationship(back_populates="hunt", lazy="selectin")
host_profiles: Mapped[list["HostProfile"]] = relationship(back_populates="hunt", lazy="noload")
reports: Mapped[list["HuntReport"]] = relationship(back_populates="hunt", lazy="noload")
# ── Datasets ───────────────────────────────────────────────────────────
# -- Datasets ---
class Dataset(Base):
__tablename__ = "datasets"
@@ -85,36 +82,44 @@ class Dataset(Base):
id: Mapped[str] = mapped_column(String(32), primary_key=True, default=_new_id)
name: Mapped[str] = mapped_column(String(256), nullable=False, index=True)
filename: Mapped[str] = mapped_column(String(512), nullable=False)
source_tool: Mapped[Optional[str]] = mapped_column(String(64), nullable=True) # velociraptor, etc.
source_tool: Mapped[Optional[str]] = mapped_column(String(64), nullable=True)
row_count: Mapped[int] = mapped_column(Integer, default=0)
column_schema: Mapped[Optional[dict]] = mapped_column(JSON, nullable=True)
normalized_columns: Mapped[Optional[dict]] = mapped_column(JSON, nullable=True)
ioc_columns: Mapped[Optional[dict]] = mapped_column(JSON, nullable=True) # auto-detected IOC columns
ioc_columns: Mapped[Optional[dict]] = mapped_column(JSON, nullable=True)
file_size_bytes: Mapped[int] = mapped_column(Integer, default=0)
encoding: Mapped[Optional[str]] = mapped_column(String(32), nullable=True)
delimiter: Mapped[Optional[str]] = mapped_column(String(4), nullable=True)
time_range_start: Mapped[Optional[datetime]] = mapped_column(DateTime(timezone=True), nullable=True)
time_range_end: Mapped[Optional[datetime]] = mapped_column(DateTime(timezone=True), nullable=True)
# New Phase 1-2 columns
processing_status: Mapped[str] = mapped_column(String(20), default="ready")
artifact_type: Mapped[Optional[str]] = mapped_column(String(128), nullable=True)
error_message: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
file_path: Mapped[Optional[str]] = mapped_column(String(512), nullable=True)
hunt_id: Mapped[Optional[str]] = mapped_column(
String(32), ForeignKey("hunts.id"), nullable=True
)
uploaded_by: Mapped[Optional[str]] = mapped_column(String(32), nullable=True)
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=_utcnow)
# relationships
hunt: Mapped[Optional["Hunt"]] = relationship(back_populates="datasets", lazy="selectin")
rows: Mapped[list["DatasetRow"]] = relationship(
back_populates="dataset", lazy="noload", cascade="all, delete-orphan"
)
triage_results: Mapped[list["TriageResult"]] = relationship(
back_populates="dataset", lazy="noload", cascade="all, delete-orphan"
)
__table_args__ = (
Index("ix_datasets_hunt", "hunt_id"),
Index("ix_datasets_status", "processing_status"),
)
class DatasetRow(Base):
"""Individual row from a CSV dataset, stored as JSON blob."""
__tablename__ = "dataset_rows"
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
@@ -125,7 +130,6 @@ class DatasetRow(Base):
data: Mapped[dict] = mapped_column(JSON, nullable=False)
normalized_data: Mapped[Optional[dict]] = mapped_column(JSON, nullable=True)
# relationships
dataset: Mapped["Dataset"] = relationship(back_populates="rows")
annotations: Mapped[list["Annotation"]] = relationship(
back_populates="row", lazy="noload"
@@ -137,8 +141,7 @@ class DatasetRow(Base):
)
# ── Conversations ─────────────────────────────────────────────────────
# -- Conversations ---
class Conversation(Base):
__tablename__ = "conversations"
@@ -156,7 +159,6 @@ class Conversation(Base):
DateTime(timezone=True), default=_utcnow, onupdate=_utcnow
)
# relationships
hunt: Mapped[Optional["Hunt"]] = relationship(back_populates="conversations", lazy="selectin")
messages: Mapped[list["Message"]] = relationship(
back_populates="conversation", lazy="selectin", cascade="all, delete-orphan",
@@ -171,16 +173,15 @@ class Message(Base):
conversation_id: Mapped[str] = mapped_column(
String(32), ForeignKey("conversations.id", ondelete="CASCADE"), nullable=False
)
role: Mapped[str] = mapped_column(String(16), nullable=False) # user | agent | system
role: Mapped[str] = mapped_column(String(16), nullable=False)
content: Mapped[str] = mapped_column(Text, nullable=False)
model_used: Mapped[Optional[str]] = mapped_column(String(128), nullable=True)
node_used: Mapped[Optional[str]] = mapped_column(String(64), nullable=True) # wile | roadrunner | cluster
node_used: Mapped[Optional[str]] = mapped_column(String(64), nullable=True)
token_count: Mapped[Optional[int]] = mapped_column(Integer, nullable=True)
latency_ms: Mapped[Optional[int]] = mapped_column(Integer, nullable=True)
response_meta: Mapped[Optional[dict]] = mapped_column(JSON, nullable=True)
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=_utcnow)
# relationships
conversation: Mapped["Conversation"] = relationship(back_populates="messages")
__table_args__ = (
@@ -188,8 +189,7 @@ class Message(Base):
)
# ── Annotations ───────────────────────────────────────────────────────
# -- Annotations ---
class Annotation(Base):
__tablename__ = "annotations"
@@ -205,19 +205,14 @@ class Annotation(Base):
String(32), ForeignKey("users.id"), nullable=True
)
text: Mapped[str] = mapped_column(Text, nullable=False)
severity: Mapped[str] = mapped_column(
String(16), default="info"
) # info | low | medium | high | critical
tag: Mapped[Optional[str]] = mapped_column(
String(32), nullable=True
) # suspicious | benign | needs-review
severity: Mapped[str] = mapped_column(String(16), default="info")
tag: Mapped[Optional[str]] = mapped_column(String(32), nullable=True)
highlight_color: Mapped[Optional[str]] = mapped_column(String(16), nullable=True)
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=_utcnow)
updated_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True), default=_utcnow, onupdate=_utcnow
)
# relationships
row: Mapped[Optional["DatasetRow"]] = relationship(back_populates="annotations")
author: Mapped[Optional["User"]] = relationship(back_populates="annotations")
@@ -227,8 +222,7 @@ class Annotation(Base):
)
# ── Hypotheses ────────────────────────────────────────────────────────
# -- Hypotheses ---
class Hypothesis(Base):
__tablename__ = "hypotheses"
@@ -240,9 +234,7 @@ class Hypothesis(Base):
title: Mapped[str] = mapped_column(String(256), nullable=False)
description: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
mitre_technique: Mapped[Optional[str]] = mapped_column(String(32), nullable=True)
status: Mapped[str] = mapped_column(
String(16), default="draft"
) # draft | active | confirmed | rejected
status: Mapped[str] = mapped_column(String(16), default="draft")
evidence_row_ids: Mapped[Optional[list]] = mapped_column(JSON, nullable=True)
evidence_notes: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=_utcnow)
@@ -250,7 +242,6 @@ class Hypothesis(Base):
DateTime(timezone=True), default=_utcnow, onupdate=_utcnow
)
# relationships
hunt: Mapped[Optional["Hunt"]] = relationship(back_populates="hypotheses", lazy="selectin")
__table_args__ = (
@@ -258,21 +249,16 @@ class Hypothesis(Base):
)
# ── Enrichment Results ────────────────────────────────────────────────
# -- Enrichment Results ---
class EnrichmentResult(Base):
__tablename__ = "enrichment_results"
id: Mapped[str] = mapped_column(String(32), primary_key=True, default=_new_id)
ioc_value: Mapped[str] = mapped_column(String(512), nullable=False, index=True)
ioc_type: Mapped[str] = mapped_column(
String(32), nullable=False
) # ip | hash_md5 | hash_sha1 | hash_sha256 | domain | url
source: Mapped[str] = mapped_column(String(32), nullable=False) # virustotal | abuseipdb | shodan | ai
verdict: Mapped[Optional[str]] = mapped_column(
String(16), nullable=True
) # clean | suspicious | malicious | unknown
ioc_type: Mapped[str] = mapped_column(String(32), nullable=False)
source: Mapped[str] = mapped_column(String(32), nullable=False)
verdict: Mapped[Optional[str]] = mapped_column(String(16), nullable=True)
confidence: Mapped[Optional[float]] = mapped_column(Float, nullable=True)
raw_result: Mapped[Optional[dict]] = mapped_column(JSON, nullable=True)
summary: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
@@ -287,28 +273,24 @@ class EnrichmentResult(Base):
)
# ── AUP Keyword Themes & Keywords ────────────────────────────────────
# -- AUP Keyword Themes & Keywords ---
class KeywordTheme(Base):
"""A named category of keywords for AUP scanning (e.g. gambling, gaming)."""
__tablename__ = "keyword_themes"
id: Mapped[str] = mapped_column(String(32), primary_key=True, default=_new_id)
name: Mapped[str] = mapped_column(String(128), unique=True, nullable=False, index=True)
color: Mapped[str] = mapped_column(String(16), default="#9e9e9e") # hex chip color
color: Mapped[str] = mapped_column(String(16), default="#9e9e9e")
enabled: Mapped[bool] = mapped_column(Boolean, default=True)
is_builtin: Mapped[bool] = mapped_column(Boolean, default=False) # seed-provided
is_builtin: Mapped[bool] = mapped_column(Boolean, default=False)
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=_utcnow)
# relationships
keywords: Mapped[list["Keyword"]] = relationship(
back_populates="theme", lazy="selectin", cascade="all, delete-orphan"
)
class Keyword(Base):
"""Individual keyword / pattern belonging to a theme."""
__tablename__ = "keywords"
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
@@ -319,10 +301,102 @@ class Keyword(Base):
is_regex: Mapped[bool] = mapped_column(Boolean, default=False)
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=_utcnow)
# relationships
theme: Mapped["KeywordTheme"] = relationship(back_populates="keywords")
__table_args__ = (
Index("ix_keywords_theme", "theme_id"),
Index("ix_keywords_value", "value"),
)
# -- AI Analysis Tables (Phase 2) ---
class TriageResult(Base):
__tablename__ = "triage_results"
id: Mapped[str] = mapped_column(String(32), primary_key=True, default=_new_id)
dataset_id: Mapped[str] = mapped_column(
String(32), ForeignKey("datasets.id", ondelete="CASCADE"), nullable=False, index=True
)
row_start: Mapped[int] = mapped_column(Integer, nullable=False)
row_end: Mapped[int] = mapped_column(Integer, nullable=False)
risk_score: Mapped[float] = mapped_column(Float, default=0.0)
verdict: Mapped[str] = mapped_column(String(20), default="pending")
findings: Mapped[Optional[list]] = mapped_column(JSON, nullable=True)
suspicious_indicators: Mapped[Optional[list]] = mapped_column(JSON, nullable=True)
mitre_techniques: Mapped[Optional[list]] = mapped_column(JSON, nullable=True)
model_used: Mapped[Optional[str]] = mapped_column(String(128), nullable=True)
node_used: Mapped[Optional[str]] = mapped_column(String(64), nullable=True)
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=_utcnow)
dataset: Mapped["Dataset"] = relationship(back_populates="triage_results")
class HostProfile(Base):
__tablename__ = "host_profiles"
id: Mapped[str] = mapped_column(String(32), primary_key=True, default=_new_id)
hunt_id: Mapped[str] = mapped_column(
String(32), ForeignKey("hunts.id", ondelete="CASCADE"), nullable=False, index=True
)
hostname: Mapped[str] = mapped_column(String(256), nullable=False)
fqdn: Mapped[Optional[str]] = mapped_column(String(512), nullable=True)
client_id: Mapped[Optional[str]] = mapped_column(String(64), nullable=True)
risk_score: Mapped[float] = mapped_column(Float, default=0.0)
risk_level: Mapped[str] = mapped_column(String(20), default="unknown")
artifact_summary: Mapped[Optional[dict]] = mapped_column(JSON, nullable=True)
timeline_summary: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
suspicious_findings: Mapped[Optional[list]] = mapped_column(JSON, nullable=True)
mitre_techniques: Mapped[Optional[list]] = mapped_column(JSON, nullable=True)
llm_analysis: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
model_used: Mapped[Optional[str]] = mapped_column(String(128), nullable=True)
node_used: Mapped[Optional[str]] = mapped_column(String(64), nullable=True)
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=_utcnow)
updated_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True), default=_utcnow, onupdate=_utcnow
)
hunt: Mapped["Hunt"] = relationship(back_populates="host_profiles")
class HuntReport(Base):
__tablename__ = "hunt_reports"
id: Mapped[str] = mapped_column(String(32), primary_key=True, default=_new_id)
hunt_id: Mapped[str] = mapped_column(
String(32), ForeignKey("hunts.id", ondelete="CASCADE"), nullable=False, index=True
)
status: Mapped[str] = mapped_column(String(20), default="pending")
exec_summary: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
full_report: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
findings: Mapped[Optional[list]] = mapped_column(JSON, nullable=True)
recommendations: Mapped[Optional[list]] = mapped_column(JSON, nullable=True)
mitre_mapping: Mapped[Optional[dict]] = mapped_column(JSON, nullable=True)
ioc_table: Mapped[Optional[list]] = mapped_column(JSON, nullable=True)
host_risk_summary: Mapped[Optional[list]] = mapped_column(JSON, nullable=True)
models_used: Mapped[Optional[list]] = mapped_column(JSON, nullable=True)
generation_time_ms: Mapped[Optional[int]] = mapped_column(Integer, nullable=True)
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=_utcnow)
updated_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True), default=_utcnow, onupdate=_utcnow
)
hunt: Mapped["Hunt"] = relationship(back_populates="reports")
class AnomalyResult(Base):
__tablename__ = "anomaly_results"
id: Mapped[str] = mapped_column(String(32), primary_key=True, default=_new_id)
dataset_id: Mapped[str] = mapped_column(
String(32), ForeignKey("datasets.id", ondelete="CASCADE"), nullable=False, index=True
)
row_id: Mapped[Optional[int]] = mapped_column(
Integer, ForeignKey("dataset_rows.id", ondelete="CASCADE"), nullable=True
)
anomaly_score: Mapped[float] = mapped_column(Float, default=0.0)
distance_from_centroid: Mapped[Optional[float]] = mapped_column(Float, nullable=True)
cluster_id: Mapped[Optional[int]] = mapped_column(Integer, nullable=True)
is_outlier: Mapped[bool] = mapped_column(Boolean, default=False)
explanation: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=_utcnow)