feat: interactive network map, IOC highlighting, AUP hunt selector, type filters

- NetworkMap: hunt-scoped force-directed graph with click-to-inspect popover
- NetworkMap: zoom/pan (wheel, drag, buttons), viewport transform
- NetworkMap: clickable IP/Host/Domain/URL legend chips to filter node types
- NetworkMap: brighter colors, 20% smaller nodes
- DatasetViewer: IOC columns highlighted with colored headers + cell tinting
- AUPScanner: hunt dropdown replacing dataset checkboxes, auto-select all
- Rename 'Social Media (Personal)' theme to 'Social Media' with DB migration
- Fix /api/hunts timeout: Dataset.rows lazy='noload' (was selectin cascade)
- Add OS column mapping to normalizer
- Full backend services, DB models, alembic migrations, new routes
- New components: Dashboard, HuntManager, FileUpload, NetworkMap, etc.
- Docker Compose deployment with nginx reverse proxy
This commit is contained in:
2026-02-19 15:41:15 -05:00
parent d0c9f88268
commit 9b98ab9614
92 changed files with 13042 additions and 1089 deletions

View File

@@ -0,0 +1,104 @@
"""Tests for CSV parser and normalizer services."""
import pytest
from app.services.csv_parser import parse_csv_bytes, detect_encoding, detect_delimiter, infer_column_types
from app.services.normalizer import normalize_columns, normalize_rows, detect_ioc_columns, detect_time_range
from tests.conftest import SAMPLE_CSV, SAMPLE_HASH_CSV, make_csv_bytes
class TestCSVParser:
"""Tests for CSV parsing."""
def test_parse_csv_basic(self):
rows, meta = parse_csv_bytes(SAMPLE_CSV)
assert len(rows) == 5
assert "timestamp" in meta["columns"]
assert "hostname" in meta["columns"]
assert meta["encoding"] is not None
assert meta["delimiter"] == ","
def test_parse_csv_columns(self):
rows, meta = parse_csv_bytes(SAMPLE_CSV)
assert meta["columns"] == ["timestamp", "hostname", "src_ip", "dst_ip", "process_name", "command_line"]
def test_parse_csv_row_data(self):
rows, meta = parse_csv_bytes(SAMPLE_CSV)
assert rows[0]["hostname"] == "DESKTOP-ABC"
assert rows[0]["src_ip"] == "192.168.1.100"
assert rows[2]["process_name"] == "chrome.exe"
def test_parse_csv_hash_file(self):
rows, meta = parse_csv_bytes(SAMPLE_HASH_CSV)
assert len(rows) == 2
assert "md5" in meta["columns"]
assert "sha256" in meta["columns"]
def test_parse_tsv(self):
tsv_data = make_csv_bytes(
["host", "ip", "port"],
[["server1", "10.0.0.1", "443"], ["server2", "10.0.0.2", "80"]],
delimiter="\t",
)
rows, meta = parse_csv_bytes(tsv_data)
assert len(rows) == 2
def test_parse_empty_file(self):
with pytest.raises(Exception):
parse_csv_bytes(b"")
def test_detect_encoding_utf8(self):
enc = detect_encoding(SAMPLE_CSV)
assert enc is not None
assert "ascii" in enc.lower() or "utf" in enc.lower()
def test_infer_column_types(self):
types = infer_column_types(
["192.168.1.1", "10.0.0.1", "8.8.8.8"],
"src_ip",
)
assert types == "ip"
def test_infer_column_types_hash(self):
types = infer_column_types(
["d41d8cd98f00b204e9800998ecf8427e"],
"hash",
)
assert types == "hash_md5"
class TestNormalizer:
"""Tests for column normalization."""
def test_normalize_columns(self):
mapping = normalize_columns(["SourceAddr", "DestAddr", "ProcessName"])
assert "SourceAddr" in mapping
# Should map to canonical names
assert mapping.get("SourceAddr") in ("src_ip", "source_address", None) or isinstance(mapping.get("SourceAddr"), str)
def test_normalize_known_columns(self):
mapping = normalize_columns(["timestamp", "hostname", "src_ip"])
assert mapping.get("timestamp") == "timestamp"
assert mapping.get("hostname") == "hostname"
assert mapping.get("src_ip") == "src_ip"
def test_detect_ioc_columns(self):
rows, meta = parse_csv_bytes(SAMPLE_CSV)
column_mapping = normalize_columns(meta["columns"])
iocs = detect_ioc_columns(meta["columns"], meta["column_types"], column_mapping)
# Should detect IP columns
assert isinstance(iocs, dict)
def test_detect_time_range(self):
rows, meta = parse_csv_bytes(SAMPLE_CSV)
column_mapping = normalize_columns(meta["columns"])
start, end = detect_time_range(rows, column_mapping)
# Should detect time range from timestamp column
if start:
assert "2025" in start
def test_normalize_rows(self):
rows = [{"SourceAddr": "10.0.0.1", "ProcessName": "cmd.exe"}]
mapping = {"SourceAddr": "src_ip", "ProcessName": "process_name"}
normalized = normalize_rows(rows, mapping)
assert len(normalized) == 1
assert normalized[0].get("src_ip") == "10.0.0.1"