feat: interactive network map, IOC highlighting, AUP hunt selector, type filters

- NetworkMap: hunt-scoped force-directed graph with click-to-inspect popover - NetworkMap: zoom/pan (wheel, drag, buttons), viewport transform - NetworkMap: clickable IP/Host/Domain/URL legend chips to filter node types - NetworkMap: brighter colors, 20% smaller nodes - DatasetViewer: IOC columns highlighted with colored headers + cell tinting - AUPScanner: hunt dropdown replacing dataset checkboxes, auto-select all - Rename 'Social Media (Personal)' theme to 'Social Media' with DB migration - Fix /api/hunts timeout: Dataset.rows lazy='noload' (was selectin cascade) - Add OS column mapping to normalizer - Full backend services, DB models, alembic migrations, new routes - New components: Dashboard, HuntManager, FileUpload, NetworkMap, etc. - Docker Compose deployment with nginx reverse proxy
2026-03-01 14:00:20 -05:00 · 2026-02-19 15:41:15 -05:00
parent d0c9f88268
commit 9b98ab9614
92 changed files with 13042 additions and 1089 deletions
--- a/backend/app/services/reports.py
+++ b/backend/app/services/reports.py
@@ -0,0 +1,425 @@
+"""Report generation — JSON, HTML, and CSV export for hunt investigations.
+
+Generates comprehensive investigation reports including:
+- Hunt metadata and status
+- Dataset summaries with IOC counts
+- Hypotheses and their evidence
+- Annotations timeline
+- Enrichment verdicts
+- Agent conversation history
+- Cross-hunt correlations
+"""
+
+import csv
+import io
+import json
+import logging
+from dataclasses import asdict
+from datetime import datetime, timezone
+from typing import Optional
+
+from sqlalchemy import select
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from app.db.models import (
+    Hunt, Dataset, DatasetRow, Hypothesis,
+    Annotation, Conversation, Message, EnrichmentResult,
+)
+
+logger = logging.getLogger(__name__)
+
+
+class ReportGenerator:
+    """Generates exportable investigation reports."""
+
+    async def generate_hunt_report(
+        self,
+        hunt_id: str,
+        db: AsyncSession,
+        format: str = "json",
+        include_rows: bool = False,
+        max_rows: int = 500,
+    ) -> dict | str:
+        """Generate a comprehensive report for a hunt investigation."""
+
+        # Gather all hunt data
+        report_data = await self._gather_hunt_data(
+            hunt_id, db, include_rows=include_rows, max_rows=max_rows,
+        )
+
+        if not report_data:
+            return {"error": "Hunt not found"}
+
+        if format == "json":
+            return report_data
+        elif format == "html":
+            return self._render_html(report_data)
+        elif format == "csv":
+            return self._render_csv(report_data)
+        else:
+            return report_data
+
+    async def _gather_hunt_data(
+        self,
+        hunt_id: str,
+        db: AsyncSession,
+        include_rows: bool = False,
+        max_rows: int = 500,
+    ) -> dict | None:
+        """Gather all data for a hunt report."""
+
+        # Hunt metadata
+        result = await db.execute(select(Hunt).where(Hunt.id == hunt_id))
+        hunt = result.scalar_one_or_none()
+        if not hunt:
+            return None
+
+        # Datasets
+        ds_result = await db.execute(
+            select(Dataset).where(Dataset.hunt_id == hunt_id)
+        )
+        datasets = ds_result.scalars().all()
+
+        dataset_summaries = []
+        all_iocs = {}
+        for ds in datasets:
+            summary = {
+                "id": ds.id,
+                "name": ds.name,
+                "filename": ds.filename,
+                "source_tool": ds.source_tool,
+                "row_count": ds.row_count,
+                "columns": list((ds.column_schema or {}).keys()),
+                "ioc_columns": ds.ioc_columns or {},
+                "time_range": {
+                    "start": ds.time_range_start,
+                    "end": ds.time_range_end,
+                },
+                "created_at": ds.created_at.isoformat(),
+            }
+
+            if include_rows:
+                rows_result = await db.execute(
+                    select(DatasetRow)
+                    .where(DatasetRow.dataset_id == ds.id)
+                    .order_by(DatasetRow.row_index)
+                    .limit(max_rows)
+                )
+                rows = rows_result.scalars().all()
+                summary["rows"] = [r.data for r in rows]
+
+            dataset_summaries.append(summary)
+
+            # Collect IOCs for enrichment lookup
+            if ds.ioc_columns:
+                all_iocs.update(ds.ioc_columns)
+
+        # Hypotheses
+        hyp_result = await db.execute(
+            select(Hypothesis).where(Hypothesis.hunt_id == hunt_id)
+        )
+        hypotheses = hyp_result.scalars().all()
+
+        hypotheses_data = [
+            {
+                "id": h.id,
+                "title": h.title,
+                "description": h.description,
+                "mitre_technique": h.mitre_technique,
+                "status": h.status,
+                "evidence_row_ids": h.evidence_row_ids,
+                "evidence_notes": h.evidence_notes,
+                "created_at": h.created_at.isoformat(),
+                "updated_at": h.updated_at.isoformat(),
+            }
+            for h in hypotheses
+        ]
+
+        # Annotations (across all datasets in this hunt)
+        dataset_ids = [ds.id for ds in datasets]
+        annotations_data = []
+        if dataset_ids:
+            ann_result = await db.execute(
+                select(Annotation)
+                .where(Annotation.dataset_id.in_(dataset_ids))
+                .order_by(Annotation.created_at)
+            )
+            annotations = ann_result.scalars().all()
+            annotations_data = [
+                {
+                    "id": a.id,
+                    "dataset_id": a.dataset_id,
+                    "row_id": a.row_id,
+                    "text": a.text,
+                    "severity": a.severity,
+                    "tag": a.tag,
+                    "created_at": a.created_at.isoformat(),
+                }
+                for a in annotations
+            ]
+
+        # Conversations
+        conv_result = await db.execute(
+            select(Conversation).where(Conversation.hunt_id == hunt_id)
+        )
+        conversations = conv_result.scalars().all()
+
+        conversations_data = []
+        for conv in conversations:
+            msg_result = await db.execute(
+                select(Message)
+                .where(Message.conversation_id == conv.id)
+                .order_by(Message.created_at)
+            )
+            messages = msg_result.scalars().all()
+            conversations_data.append({
+                "id": conv.id,
+                "title": conv.title,
+                "messages": [
+                    {
+                        "role": m.role,
+                        "content": m.content,
+                        "model_used": m.model_used,
+                        "node_used": m.node_used,
+                        "latency_ms": m.latency_ms,
+                        "created_at": m.created_at.isoformat(),
+                    }
+                    for m in messages
+                ],
+            })
+
+        # Enrichment results
+        enrichment_data = []
+        for ds in datasets:
+            if not ds.ioc_columns:
+                continue
+            # Get unique enriched IOCs for this dataset
+            for col_name in ds.ioc_columns.keys():
+                enrich_result = await db.execute(
+                    select(EnrichmentResult)
+                    .where(EnrichmentResult.source.isnot(None))
+                    .limit(100)
+                )
+                enrichments = enrich_result.scalars().all()
+                for e in enrichments:
+                    enrichment_data.append({
+                        "ioc_value": e.ioc_value,
+                        "ioc_type": e.ioc_type,
+                        "source": e.source,
+                        "verdict": e.verdict,
+                        "score": e.score,
+                        "tags": e.tags,
+                        "country": e.country,
+                    })
+                break  # Only query once
+
+        # Build report
+        now = datetime.now(timezone.utc)
+        return {
+            "report_metadata": {
+                "generated_at": now.isoformat(),
+                "format_version": "1.0",
+                "generator": "ThreatHunt Report Engine",
+            },
+            "hunt": {
+                "id": hunt.id,
+                "name": hunt.name,
+                "description": hunt.description,
+                "status": hunt.status,
+                "created_at": hunt.created_at.isoformat(),
+                "updated_at": hunt.updated_at.isoformat(),
+            },
+            "summary": {
+                "dataset_count": len(datasets),
+                "total_rows": sum(ds.row_count for ds in datasets),
+                "hypothesis_count": len(hypotheses),
+                "confirmed_hypotheses": len([h for h in hypotheses if h.status == "confirmed"]),
+                "annotation_count": len(annotations_data),
+                "critical_annotations": len([a for a in annotations_data if a["severity"] == "critical"]),
+                "conversation_count": len(conversations_data),
+                "enrichment_count": len(enrichment_data),
+                "malicious_iocs": len([e for e in enrichment_data if e["verdict"] == "malicious"]),
+            },
+            "datasets": dataset_summaries,
+            "hypotheses": hypotheses_data,
+            "annotations": annotations_data,
+            "conversations": conversations_data,
+            "enrichments": enrichment_data[:100],
+        }
+
+    def _render_html(self, data: dict) -> str:
+        """Render report as self-contained HTML."""
+        hunt = data.get("hunt", {})
+        summary = data.get("summary", {})
+        hypotheses = data.get("hypotheses", [])
+        annotations = data.get("annotations", [])
+        datasets = data.get("datasets", [])
+        enrichments = data.get("enrichments", [])
+        meta = data.get("report_metadata", {})
+
+        html = f"""<!DOCTYPE html>
+<html lang="en">
+<head>
+<meta charset="UTF-8">
+<meta name="viewport" content="width=device-width, initial-scale=1.0">
+<title>ThreatHunt Report: {hunt.get('name', 'Unknown')}</title>
+<style>
+  :root {{ --bg: #0d1117; --surface: #161b22; --border: #30363d; --text: #c9d1d9; --accent: #58a6ff; --red: #f85149; --orange: #d29922; --green: #3fb950; }}
+  * {{ box-sizing: border-box; margin: 0; padding: 0; }}
+  body {{ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Helvetica, Arial, sans-serif; background: var(--bg); color: var(--text); line-height: 1.6; padding: 2rem; }}
+  .container {{ max-width: 1200px; margin: 0 auto; }}
+  h1 {{ color: var(--accent); border-bottom: 2px solid var(--border); padding-bottom: 0.5rem; margin-bottom: 1rem; }}
+  h2 {{ color: var(--accent); margin: 1.5rem 0 0.75rem; }}
+  h3 {{ color: var(--text); margin: 1rem 0 0.5rem; }}
+  .card {{ background: var(--surface); border: 1px solid var(--border); border-radius: 8px; padding: 1rem; margin: 0.75rem 0; }}
+  .stat-grid {{ display: grid; grid-template-columns: repeat(auto-fit, minmax(180px, 1fr)); gap: 0.75rem; }}
+  .stat {{ background: var(--surface); border: 1px solid var(--border); border-radius: 8px; padding: 1rem; text-align: center; }}
+  .stat .value {{ font-size: 2rem; font-weight: 700; color: var(--accent); }}
+  .stat .label {{ font-size: 0.85rem; color: #8b949e; }}
+  table {{ width: 100%; border-collapse: collapse; margin: 0.5rem 0; }}
+  th, td {{ padding: 0.5rem 0.75rem; border: 1px solid var(--border); text-align: left; }}
+  th {{ background: var(--surface); color: var(--accent); }}
+  .badge {{ display: inline-block; padding: 0.15rem 0.5rem; border-radius: 999px; font-size: 0.8rem; font-weight: 600; }}
+  .badge-malicious {{ background: var(--red); color: white; }}
+  .badge-suspicious {{ background: var(--orange); color: #000; }}
+  .badge-clean {{ background: var(--green); color: #000; }}
+  .badge-critical {{ background: var(--red); color: white; }}
+  .badge-high {{ background: #da3633; color: white; }}
+  .badge-medium {{ background: var(--orange); color: #000; }}
+  .badge-confirmed {{ background: var(--green); color: #000; }}
+  .badge-active {{ background: var(--accent); color: #000; }}
+  .footer {{ margin-top: 2rem; padding-top: 1rem; border-top: 1px solid var(--border); color: #8b949e; font-size: 0.85rem; }}
+</style>
+</head>
+<body>
+<div class="container">
+<h1>🔍 ThreatHunt Report: {hunt.get('name', 'Untitled')}</h1>
+<p><strong>Hunt ID:</strong> {hunt.get('id', '')}<br>
+<strong>Status:</strong> {hunt.get('status', 'unknown')}<br>
+<strong>Description:</strong> {hunt.get('description', 'N/A')}<br>
+<strong>Created:</strong> {hunt.get('created_at', '')}</p>
+
+<h2>Summary</h2>
+<div class="stat-grid">
+  <div class="stat"><div class="value">{summary.get('dataset_count', 0)}</div><div class="label">Datasets</div></div>
+  <div class="stat"><div class="value">{summary.get('total_rows', 0):,}</div><div class="label">Total Rows</div></div>
+  <div class="stat"><div class="value">{summary.get('hypothesis_count', 0)}</div><div class="label">Hypotheses</div></div>
+  <div class="stat"><div class="value">{summary.get('confirmed_hypotheses', 0)}</div><div class="label">Confirmed</div></div>
+  <div class="stat"><div class="value">{summary.get('annotation_count', 0)}</div><div class="label">Annotations</div></div>
+  <div class="stat"><div class="value">{summary.get('malicious_iocs', 0)}</div><div class="label">Malicious IOCs</div></div>
+</div>
+"""
+
+        # Hypotheses section
+        if hypotheses:
+            html += "<h2>Hypotheses</h2>\n"
+            html += "<table><tr><th>Title</th><th>MITRE</th><th>Status</th><th>Description</th></tr>\n"
+            for h in hypotheses:
+                status_class = f"badge-{h['status']}" if h['status'] in ('confirmed', 'active') else ""
+                html += (
+                    f"<tr><td>{h['title']}</td>"
+                    f"<td>{h.get('mitre_technique', 'N/A')}</td>"
+                    f"<td><span class='badge {status_class}'>{h['status']}</span></td>"
+                    f"<td>{h.get('description', '') or ''}</td></tr>\n"
+                )
+            html += "</table>\n"
+
+        # Datasets section
+        if datasets:
+            html += "<h2>Datasets</h2>\n"
+            for ds in datasets:
+                html += f"""<div class="card">
+<h3>{ds['name']} ({ds.get('filename', '')})</h3>
+<p><strong>Source:</strong> {ds.get('source_tool', 'N/A')} |
+<strong>Rows:</strong> {ds['row_count']:,} |
+<strong>IOC Columns:</strong> {len(ds.get('ioc_columns', {}))} |
+<strong>Time Range:</strong> {ds.get('time_range', {}).get('start', 'N/A')} to {ds.get('time_range', {}).get('end', 'N/A')}</p>
+</div>\n"""
+
+        # Annotations
+        if annotations:
+            critical = [a for a in annotations if a['severity'] in ('critical', 'high')]
+            html += f"<h2>Annotations ({len(annotations)} total, {len(critical)} critical/high)</h2>\n"
+            html += "<table><tr><th>Severity</th><th>Tag</th><th>Text</th><th>Created</th></tr>\n"
+            for a in annotations[:50]:
+                sev_class = f"badge-{a['severity']}" if a['severity'] in ('critical', 'high', 'medium') else ""
+                html += (
+                    f"<tr><td><span class='badge {sev_class}'>{a['severity']}</span></td>"
+                    f"<td>{a.get('tag', 'N/A')}</td>"
+                    f"<td>{a['text'][:200]}</td>"
+                    f"<td>{a['created_at'][:19]}</td></tr>\n"
+                )
+            html += "</table>\n"
+
+        # Enrichments
+        if enrichments:
+            malicious = [e for e in enrichments if e['verdict'] == 'malicious']
+            html += f"<h2>IOC Enrichment ({len(enrichments)} results, {len(malicious)} malicious)</h2>\n"
+            html += "<table><tr><th>IOC</th><th>Type</th><th>Source</th><th>Verdict</th><th>Score</th></tr>\n"
+            for e in enrichments[:50]:
+                verdict_class = f"badge-{e['verdict']}"
+                html += (
+                    f"<tr><td><code>{e['ioc_value']}</code></td>"
+                    f"<td>{e['ioc_type']}</td>"
+                    f"<td>{e['source']}</td>"
+                    f"<td><span class='badge {verdict_class}'>{e['verdict']}</span></td>"
+                    f"<td>{e.get('score', 0)}</td></tr>\n"
+                )
+            html += "</table>\n"
+
+        html += f"""
+<div class="footer">
+  <p>Generated by ThreatHunt Report Engine | {meta.get('generated_at', '')[:19]}</p>
+</div>
+</div>
+</body>
+</html>"""
+
+        return html
+
+    def _render_csv(self, data: dict) -> str:
+        """Render key report data as CSV."""
+        output = io.StringIO()
+
+        # Hypotheses sheet
+        output.write("=== HYPOTHESES ===\n")
+        writer = csv.writer(output)
+        writer.writerow(["Title", "MITRE Technique", "Status", "Description", "Evidence Notes"])
+        for h in data.get("hypotheses", []):
+            writer.writerow([
+                h.get("title", ""),
+                h.get("mitre_technique", ""),
+                h.get("status", ""),
+                h.get("description", ""),
+                h.get("evidence_notes", ""),
+            ])
+
+        output.write("\n=== ANNOTATIONS ===\n")
+        writer.writerow(["Severity", "Tag", "Text", "Dataset ID", "Row ID", "Created"])
+        for a in data.get("annotations", []):
+            writer.writerow([
+                a.get("severity", ""),
+                a.get("tag", ""),
+                a.get("text", ""),
+                a.get("dataset_id", ""),
+                a.get("row_id", ""),
+                a.get("created_at", ""),
+            ])
+
+        output.write("\n=== ENRICHMENTS ===\n")
+        writer.writerow(["IOC Value", "IOC Type", "Source", "Verdict", "Score", "Country"])
+        for e in data.get("enrichments", []):
+            writer.writerow([
+                e.get("ioc_value", ""),
+                e.get("ioc_type", ""),
+                e.get("source", ""),
+                e.get("verdict", ""),
+                e.get("score", ""),
+                e.get("country", ""),
+            ])
+
+        return output.getvalue()
+
+
+# Singleton
+report_generator = ReportGenerator()