feat: Add Playbook Manager, Saved Searches, and Timeline View components

- Implemented PlaybookManager for creating and managing investigation playbooks with templates. - Added SavedSearches component for managing bookmarked queries and recurring scans. - Introduced TimelineView for visualizing forensic event timelines with zoomable charts. - Enhanced backend processing with auto-queued jobs for dataset uploads and improved database concurrency. - Updated frontend components for better user experience and performance optimizations. - Documented changes in update log for future reference.
2026-03-01 14:00:20 -05:00 · 2026-02-23 14:23:07 -05:00
parent 37a9584d0c
commit 5a2ad8ec1c
110 changed files with 10537 additions and 1185 deletions
--- a/backend/tests/test_agent_policy_execution.py
+++ b/backend/tests/test_agent_policy_execution.py
@@ -0,0 +1,124 @@
+"""Tests for execution-mode behavior in /api/agent/assist."""
+
+import io
+
+import pytest
+
+
+@pytest.mark.asyncio
+async def test_agent_assist_policy_query_executes_scan(client):
+    # 1) Create hunt
+    h = await client.post("/api/hunts", json={"name": "Policy Hunt"})
+    assert h.status_code == 200
+    hunt_id = h.json()["id"]
+
+    # 2) Upload browser-history-like CSV
+    csv_bytes = (
+        b"User,visited_url,title,ClientId,Fqdn\n"
+        b"Alice,https://www.pornhub.com/view_video.php,site,HOST-A,host-a.local\n"
+        b"Bob,https://news.example.org/article,news,HOST-B,host-b.local\n"
+    )
+    files = {"file": ("web_history.csv", io.BytesIO(csv_bytes), "text/csv")}
+    up = await client.post(f"/api/datasets/upload?hunt_id={hunt_id}", files=files)
+    assert up.status_code == 200
+
+    # 3) Ensure policy theme/keyword exists
+    t = await client.post(
+        "/api/keywords/themes",
+        json={
+            "name": "Adult Content",
+            "color": "#e91e63",
+            "enabled": True,
+        },
+    )
+    assert t.status_code in (201, 409)
+
+    themes = await client.get("/api/keywords/themes")
+    assert themes.status_code == 200
+    adult = next(x for x in themes.json()["themes"] if x["name"] == "Adult Content")
+
+    k = await client.post(
+        f"/api/keywords/themes/{adult['id']}/keywords",
+        json={"value": "pornhub", "is_regex": False},
+    )
+    assert k.status_code in (201, 409)
+
+    # 4) Execution-mode query
+    q = await client.post(
+        "/api/agent/assist",
+        json={
+            "query": "Analyze browser history for policy-violating domains and summarize by user and host.",
+            "hunt_id": hunt_id,
+        },
+    )
+    assert q.status_code == 200
+    body = q.json()
+
+    assert body["model_used"] == "execution:keyword_scanner"
+    assert body["execution"] is not None
+    assert body["execution"]["policy_hits"] >= 1
+    assert len(body["execution"]["top_user_hosts"]) >= 1
+
+
+@pytest.mark.asyncio
+async def test_agent_assist_execution_preference_off_stays_advisory(client):
+    h = await client.post("/api/hunts", json={"name": "No Exec Hunt"})
+    assert h.status_code == 200
+    hunt_id = h.json()["id"]
+
+    q = await client.post(
+        "/api/agent/assist",
+        json={
+            "query": "Analyze browser history for policy-violating domains and summarize by user and host.",
+            "hunt_id": hunt_id,
+            "execution_preference": "off",
+        },
+    )
+    assert q.status_code == 200
+    body = q.json()
+    assert body["model_used"] != "execution:keyword_scanner"
+    assert body["execution"] is None
+
+
+@pytest.mark.asyncio
+async def test_agent_assist_execution_preference_force_executes(client):
+    # Create hunt + dataset even when the query text is not policy-specific
+    h = await client.post("/api/hunts", json={"name": "Force Exec Hunt"})
+    assert h.status_code == 200
+    hunt_id = h.json()["id"]
+
+    csv_bytes = (
+        b"User,visited_url,title,ClientId,Fqdn\n"
+        b"Alice,https://www.pornhub.com/view_video.php,site,HOST-A,host-a.local\n"
+    )
+    files = {"file": ("web_history.csv", io.BytesIO(csv_bytes), "text/csv")}
+    up = await client.post(f"/api/datasets/upload?hunt_id={hunt_id}", files=files)
+    assert up.status_code == 200
+
+    t = await client.post(
+        "/api/keywords/themes",
+        json={"name": "Adult Content", "color": "#e91e63", "enabled": True},
+    )
+    assert t.status_code in (201, 409)
+
+    themes = await client.get("/api/keywords/themes")
+    assert themes.status_code == 200
+    adult = next(x for x in themes.json()["themes"] if x["name"] == "Adult Content")
+    k = await client.post(
+        f"/api/keywords/themes/{adult['id']}/keywords",
+        json={"value": "pornhub", "is_regex": False},
+    )
+    assert k.status_code in (201, 409)
+
+    q = await client.post(
+        "/api/agent/assist",
+        json={
+            "query": "Summarize notable activity in this hunt.",
+            "hunt_id": hunt_id,
+            "execution_preference": "force",
+        },
+    )
+    assert q.status_code == 200
+    body = q.json()
+    assert body["model_used"] == "execution:keyword_scanner"
+    assert body["execution"] is not None
--- a/backend/tests/test_api.py
+++ b/backend/tests/test_api.py
@@ -77,6 +77,26 @@ class TestHuntEndpoints:
        assert resp.status_code == 404


+    async def test_hunt_progress(self, client):
+        create = await client.post("/api/hunts", json={"name": "Progress Hunt"})
+        hunt_id = create.json()["id"]
+
+        # attach one dataset so progress has scope
+        from tests.conftest import SAMPLE_CSV
+        import io
+        files = {"file": ("progress.csv", io.BytesIO(SAMPLE_CSV), "text/csv")}
+        up = await client.post(f"/api/datasets/upload?hunt_id={hunt_id}", files=files)
+        assert up.status_code == 200
+
+        res = await client.get(f"/api/hunts/{hunt_id}/progress")
+        assert res.status_code == 200
+        body = res.json()
+        assert body["hunt_id"] == hunt_id
+        assert "progress_percent" in body
+        assert "dataset_total" in body
+        assert "network_status" in body
+
+
@pytest.mark.asyncio
 class TestDatasetEndpoints:
    """Test dataset upload and retrieval."""
--- a/backend/tests/test_csv_parser.py
+++ b/backend/tests/test_csv_parser.py
@@ -1,4 +1,4 @@
-"""Tests for CSV parser and normalizer services."""
+"""Tests for CSV parser and normalizer services."""

 import pytest
 from app.services.csv_parser import parse_csv_bytes, detect_encoding, detect_delimiter, infer_column_types
@@ -43,8 +43,9 @@ class TestCSVParser:
        assert len(rows) == 2

    def test_parse_empty_file(self):
-        with pytest.raises(Exception):
-            parse_csv_bytes(b"")
+        rows, meta = parse_csv_bytes(b"")
+        assert len(rows) == 0
+        assert meta["row_count"] == 0

    def test_detect_encoding_utf8(self):
        enc = detect_encoding(SAMPLE_CSV)
@@ -53,17 +54,15 @@ class TestCSVParser:

    def test_infer_column_types(self):
        types = infer_column_types(
-            ["192.168.1.1", "10.0.0.1", "8.8.8.8"],
-            "src_ip",
+            [{"src_ip": "192.168.1.1"}, {"src_ip": "10.0.0.1"}, {"src_ip": "8.8.8.8"}],
        )
-        assert types == "ip"
+        assert types["src_ip"] == "ip"

    def test_infer_column_types_hash(self):
        types = infer_column_types(
-            ["d41d8cd98f00b204e9800998ecf8427e"],
-            "hash",
+            [{"hash": "d41d8cd98f00b204e9800998ecf8427e"}],
        )
-        assert types == "hash_md5"
+        assert types["hash"] == "hash_md5"


 class TestNormalizer:
@@ -94,7 +93,7 @@ class TestNormalizer:
        start, end = detect_time_range(rows, column_mapping)
        # Should detect time range from timestamp column
        if start:
-            assert "2025" in start
+            assert "2025" in str(start)

    def test_normalize_rows(self):
        rows = [{"SourceAddr": "10.0.0.1", "ProcessName": "cmd.exe"}]
@@ -102,3 +101,6 @@ class TestNormalizer:
        normalized = normalize_rows(rows, mapping)
        assert len(normalized) == 1
        assert normalized[0].get("src_ip") == "10.0.0.1"
+
+
+
--- a/backend/tests/test_keywords.py
+++ b/backend/tests/test_keywords.py
@@ -197,3 +197,27 @@ async def test_quick_scan(client: AsyncClient):
    assert "total_hits" in data
    # powershell should match at least one row
    assert data["total_hits"] > 0
+
+
+@pytest.mark.asyncio
+async def test_quick_scan_cache_hit(client: AsyncClient):
+    """Second quick scan should return cache hit metadata."""
+    theme_res = await client.post("/api/keywords/themes", json={"name": "Quick Cache Theme", "color": "#00aa00"})
+    tid = theme_res.json()["id"]
+    await client.post(f"/api/keywords/themes/{tid}/keywords", json={"value": "chrome.exe"})
+
+    from tests.conftest import SAMPLE_CSV
+    import io
+    files = {"file": ("cache_quick.csv", io.BytesIO(SAMPLE_CSV), "text/csv")}
+    upload = await client.post("/api/datasets/upload", files=files)
+    ds_id = upload.json()["id"]
+
+    first = await client.get(f"/api/keywords/scan/quick?dataset_id={ds_id}")
+    assert first.status_code == 200
+    assert first.json().get("cache_status") in ("miss", "hit")
+
+    second = await client.get(f"/api/keywords/scan/quick?dataset_id={ds_id}")
+    assert second.status_code == 200
+    body = second.json()
+    assert body.get("cache_used") is True
+    assert body.get("cache_status") == "hit"
--- a/backend/tests/test_network.py
+++ b/backend/tests/test_network.py
@@ -0,0 +1,84 @@
+"""Tests for network inventory endpoints and cache/polling behavior."""
+
+import io
+
+import pytest
+
+from app.services.host_inventory import inventory_cache
+from tests.conftest import SAMPLE_CSV
+
+
+@pytest.mark.asyncio
+async def test_inventory_status_none_for_unknown_hunt(client):
+    hunt_id = "hunt-does-not-exist"
+    inventory_cache.invalidate(hunt_id)
+    inventory_cache.clear_building(hunt_id)
+
+    res = await client.get(f"/api/network/inventory-status?hunt_id={hunt_id}")
+    assert res.status_code == 200
+    body = res.json()
+    assert body["hunt_id"] == hunt_id
+    assert body["status"] == "none"
+
+
+@pytest.mark.asyncio
+async def test_host_inventory_cold_cache_returns_202(client):
+    # Create hunt and upload dataset linked to that hunt
+    hunt = await client.post("/api/hunts", json={"name": "Net Hunt"})
+    hunt_id = hunt.json()["id"]
+
+    files = {"file": ("network.csv", io.BytesIO(SAMPLE_CSV), "text/csv")}
+    up = await client.post("/api/datasets/upload", files=files, params={"hunt_id": hunt_id})
+    assert up.status_code == 200
+
+    # Ensure cache is cold for this hunt
+    inventory_cache.invalidate(hunt_id)
+    inventory_cache.clear_building(hunt_id)
+
+    res = await client.get(f"/api/network/host-inventory?hunt_id={hunt_id}")
+    assert res.status_code == 202
+    body = res.json()
+    assert body["status"] == "building"
+
+
+@pytest.mark.asyncio
+async def test_host_inventory_ready_cache_returns_200(client):
+    hunt = await client.post("/api/hunts", json={"name": "Ready Hunt"})
+    hunt_id = hunt.json()["id"]
+
+    mock_inventory = {
+        "hosts": [
+            {
+                "id": "host-1",
+                "hostname": "HOST-1",
+                "fqdn": "HOST-1.local",
+                "client_id": "C.1234abcd",
+                "ips": ["10.0.0.10"],
+                "os": "Windows 10",
+                "users": ["alice"],
+                "datasets": ["test"],
+                "row_count": 5,
+            }
+        ],
+        "connections": [],
+        "stats": {
+            "total_hosts": 1,
+            "hosts_with_ips": 1,
+            "hosts_with_users": 1,
+            "total_datasets_scanned": 1,
+            "total_rows_scanned": 5,
+        },
+    }
+
+    inventory_cache.put(hunt_id, mock_inventory)
+
+    res = await client.get(f"/api/network/host-inventory?hunt_id={hunt_id}")
+    assert res.status_code == 200
+    body = res.json()
+    assert body["stats"]["total_hosts"] == 1
+    assert len(body["hosts"]) == 1
+    assert body["hosts"][0]["hostname"] == "HOST-1"
+
+    status_res = await client.get(f"/api/network/inventory-status?hunt_id={hunt_id}")
+    assert status_res.status_code == 200
+    assert status_res.json()["status"] == "ready"
--- a/backend/tests/test_network_scale.py
+++ b/backend/tests/test_network_scale.py
@@ -0,0 +1,82 @@
+"""Scale-oriented network endpoint tests (summary/subgraph/backpressure)."""
+
+import pytest
+
+from app.config import settings
+from app.services.host_inventory import inventory_cache
+
+
+@pytest.mark.asyncio
+async def test_network_summary_from_cache(client):
+    hunt_id = "scale-hunt-summary"
+    inv = {
+        "hosts": [
+            {"id": "h1", "hostname": "H1", "ips": ["10.0.0.1"], "users": ["a"], "row_count": 50},
+            {"id": "h2", "hostname": "H2", "ips": [], "users": [], "row_count": 10},
+        ],
+        "connections": [
+            {"source": "h1", "target": "8.8.8.8", "count": 7},
+            {"source": "h1", "target": "h2", "count": 3},
+        ],
+        "stats": {"total_hosts": 2, "total_rows_scanned": 60},
+    }
+    inventory_cache.put(hunt_id, inv)
+
+    res = await client.get(f"/api/network/summary?hunt_id={hunt_id}&top_n=1")
+    assert res.status_code == 200
+    body = res.json()
+    assert body["stats"]["total_hosts"] == 2
+    assert len(body["top_hosts"]) == 1
+    assert body["top_hosts"][0]["id"] == "h1"
+
+
+@pytest.mark.asyncio
+async def test_network_subgraph_truncates(client):
+    hunt_id = "scale-hunt-subgraph"
+    inv = {
+        "hosts": [
+            {"id": f"h{i}", "hostname": f"H{i}", "ips": [], "users": [], "row_count": 100 - i}
+            for i in range(1, 8)
+        ],
+        "connections": [
+            {"source": "h1", "target": "h2", "count": 20},
+            {"source": "h1", "target": "h3", "count": 15},
+            {"source": "h2", "target": "h4", "count": 5},
+            {"source": "h3", "target": "h5", "count": 4},
+        ],
+        "stats": {"total_hosts": 7, "total_rows_scanned": 999},
+    }
+    inventory_cache.put(hunt_id, inv)
+
+    res = await client.get(f"/api/network/subgraph?hunt_id={hunt_id}&max_hosts=3&max_edges=2")
+    assert res.status_code == 200
+    body = res.json()
+    assert len(body["hosts"]) <= 3
+    assert len(body["connections"]) <= 2
+    assert body["stats"]["truncated"] is True
+
+
+@pytest.mark.asyncio
+async def test_manual_job_submit_backpressure_returns_429(client):
+    old = settings.JOB_QUEUE_MAX_BACKLOG
+    settings.JOB_QUEUE_MAX_BACKLOG = 0
+    try:
+        res = await client.post("/api/analysis/jobs/submit/triage", json={"params": {"dataset_id": "abc"}})
+        assert res.status_code == 429
+    finally:
+        settings.JOB_QUEUE_MAX_BACKLOG = old
+@pytest.mark.asyncio
+async def test_network_host_inventory_deferred_when_queue_backlogged(client):
+    hunt_id = "deferred-hunt"
+    inventory_cache.invalidate(hunt_id)
+    inventory_cache.clear_building(hunt_id)
+
+    old = settings.JOB_QUEUE_MAX_BACKLOG
+    settings.JOB_QUEUE_MAX_BACKLOG = 0
+    try:
+        res = await client.get(f"/api/network/host-inventory?hunt_id={hunt_id}")
+        assert res.status_code == 202
+        body = res.json()
+        assert body["status"] == "deferred"
+    finally:
+        settings.JOB_QUEUE_MAX_BACKLOG = old
--- a/backend/tests/test_new_features.py
+++ b/backend/tests/test_new_features.py
@@ -0,0 +1,203 @@
+"""Tests for new feature API routes: MITRE, Timeline, Playbooks, Saved Searches."""
+
+import pytest
+import pytest_asyncio
+
+
+class TestMitreRoutes:
+    """Tests for /api/mitre endpoints."""
+
+    @pytest.mark.asyncio
+    async def test_mitre_coverage_empty(self, client):
+        resp = await client.get("/api/mitre/coverage")
+        assert resp.status_code == 200
+        data = resp.json()
+        assert "tactics" in data
+        assert "technique_count" in data
+        assert data["technique_count"] == 0
+        assert len(data["tactics"]) == 14  # 14 MITRE tactics
+
+    @pytest.mark.asyncio
+    async def test_mitre_coverage_with_hunt_filter(self, client):
+        resp = await client.get("/api/mitre/coverage?hunt_id=nonexistent")
+        assert resp.status_code == 200
+        assert resp.json()["technique_count"] == 0
+
+
+class TestTimelineRoutes:
+    """Tests for /api/timeline endpoints."""
+
+    @pytest.mark.asyncio
+    async def test_timeline_hunt_not_found(self, client):
+        resp = await client.get("/api/timeline/hunt/nonexistent")
+        assert resp.status_code == 404
+
+    @pytest.mark.asyncio
+    async def test_timeline_with_hunt(self, client):
+        # Create a hunt first
+        hunt_resp = await client.post("/api/hunts", json={"name": "Timeline Test"})
+        assert hunt_resp.status_code in (200, 201)
+        hunt_id = hunt_resp.json()["id"]
+
+        resp = await client.get(f"/api/timeline/hunt/{hunt_id}")
+        assert resp.status_code == 200
+        data = resp.json()
+        assert data["hunt_id"] == hunt_id
+        assert "events" in data
+        assert "datasets" in data
+
+
+class TestPlaybookRoutes:
+    """Tests for /api/playbooks endpoints."""
+
+    @pytest.mark.asyncio
+    async def test_list_playbooks_empty(self, client):
+        resp = await client.get("/api/playbooks")
+        assert resp.status_code == 200
+        assert resp.json()["playbooks"] == []
+
+    @pytest.mark.asyncio
+    async def test_get_templates(self, client):
+        resp = await client.get("/api/playbooks/templates")
+        assert resp.status_code == 200
+        templates = resp.json()["templates"]
+        assert len(templates) >= 2
+        assert templates[0]["name"] == "Standard Threat Hunt"
+
+    @pytest.mark.asyncio
+    async def test_create_playbook(self, client):
+        resp = await client.post("/api/playbooks", json={
+            "name": "My Investigation",
+            "description": "Test playbook",
+            "steps": [
+                {"title": "Step 1", "description": "Upload data", "step_type": "upload", "target_route": "/upload"},
+                {"title": "Step 2", "description": "Triage", "step_type": "analysis", "target_route": "/analysis"},
+            ],
+        })
+        assert resp.status_code == 201
+        data = resp.json()
+        assert data["name"] == "My Investigation"
+        assert len(data["steps"]) == 2
+
+    @pytest.mark.asyncio
+    async def test_playbook_crud(self, client):
+        # Create
+        resp = await client.post("/api/playbooks", json={
+            "name": "CRUD Test",
+            "steps": [{"title": "Do something"}],
+        })
+        assert resp.status_code == 201
+        pb_id = resp.json()["id"]
+
+        # Get
+        resp = await client.get(f"/api/playbooks/{pb_id}")
+        assert resp.status_code == 200
+        assert resp.json()["name"] == "CRUD Test"
+        assert len(resp.json()["steps"]) == 1
+
+        # Update
+        resp = await client.put(f"/api/playbooks/{pb_id}", json={"name": "Updated"})
+        assert resp.status_code == 200
+
+        # Delete
+        resp = await client.delete(f"/api/playbooks/{pb_id}")
+        assert resp.status_code == 200
+
+    @pytest.mark.asyncio
+    async def test_playbook_step_completion(self, client):
+        # Create with step
+        resp = await client.post("/api/playbooks", json={
+            "name": "Step Test",
+            "steps": [{"title": "Task 1"}],
+        })
+        pb_id = resp.json()["id"]
+
+        # Get to find step ID
+        resp = await client.get(f"/api/playbooks/{pb_id}")
+        steps = resp.json()["steps"]
+        step_id = steps[0]["id"]
+        assert steps[0]["is_completed"] is False
+
+        # Mark complete
+        resp = await client.put(f"/api/playbooks/steps/{step_id}", json={"is_completed": True, "notes": "Done!"})
+        assert resp.status_code == 200
+        assert resp.json()["is_completed"] is True
+
+
+class TestSavedSearchRoutes:
+    """Tests for /api/searches endpoints."""
+
+    @pytest.mark.asyncio
+    async def test_list_empty(self, client):
+        resp = await client.get("/api/searches")
+        assert resp.status_code == 200
+        assert resp.json()["searches"] == []
+
+    @pytest.mark.asyncio
+    async def test_create_saved_search(self, client):
+        resp = await client.post("/api/searches", json={
+            "name": "Suspicious IPs",
+            "search_type": "ioc_search",
+            "query_params": {"ioc_value": "203.0.113"},
+        })
+        assert resp.status_code == 201
+        data = resp.json()
+        assert data["name"] == "Suspicious IPs"
+        assert data["search_type"] == "ioc_search"
+
+    @pytest.mark.asyncio
+    async def test_search_crud(self, client):
+        # Create
+        resp = await client.post("/api/searches", json={
+            "name": "Test Query",
+            "search_type": "keyword_scan",
+            "query_params": {"theme": "malware"},
+        })
+        s_id = resp.json()["id"]
+
+        # Get
+        resp = await client.get(f"/api/searches/{s_id}")
+        assert resp.status_code == 200
+
+        # Update
+        resp = await client.put(f"/api/searches/{s_id}", json={"name": "Updated Query"})
+        assert resp.status_code == 200
+
+        # Run
+        resp = await client.post(f"/api/searches/{s_id}/run")
+        assert resp.status_code == 200
+        data = resp.json()
+        assert "result_count" in data
+        assert "delta" in data
+
+        # Delete
+        resp = await client.delete(f"/api/searches/{s_id}")
+        assert resp.status_code == 200
+
+
+
+class TestStixExport:
+    """Tests for /api/export/stix endpoints."""
+
+    @pytest.mark.asyncio
+    async def test_stix_export_hunt_not_found(self, client):
+        resp = await client.get("/api/export/stix/nonexistent-id")
+        assert resp.status_code == 404
+
+    @pytest.mark.asyncio
+    async def test_stix_export_empty_hunt(self, client):
+        """Export from a real hunt with no data returns valid but minimal bundle."""
+        hunt_resp = await client.post("/api/hunts", json={"name": "STIX Test Hunt"})
+        assert hunt_resp.status_code in (200, 201)
+        hunt_id = hunt_resp.json()["id"]
+
+        resp = await client.get(f"/api/export/stix/{hunt_id}")
+        assert resp.status_code == 200
+        data = resp.json()
+        assert data["type"] == "bundle"
+        assert data["objects"][0]["spec_version"] == "2.1"  # spec_version is on objects, not bundle
+        assert "objects" in data
+        # At minimum should have the identity object
+        types = [o["type"] for o in data["objects"]]
+        assert "identity" in types
+