mirror of
https://github.com/mblanke/ThreatHunt.git
synced 2026-03-01 14:00:20 -05:00
465 lines
19 KiB
Python
465 lines
19 KiB
Python
"""Pluggable Analyzer Framework for ThreatHunt.
|
|
|
|
Each analyzer implements a simple protocol:
|
|
- name / description properties
|
|
- async analyze(rows, config) -> list[AlertCandidate]
|
|
|
|
The AnalyzerRegistry discovers and runs all enabled analyzers against
|
|
a dataset, producing alert candidates that the alert system can persist.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import logging
|
|
import math
|
|
import re
|
|
from abc import ABC, abstractmethod
|
|
from collections import Counter, defaultdict
|
|
from dataclasses import dataclass, field
|
|
from typing import Any, Optional, Sequence
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
# ── Alert Candidate DTO ──────────────────────────────────────────────
|
|
|
|
|
|
@dataclass
|
|
class AlertCandidate:
|
|
"""A single finding from an analyzer, before it becomes a persisted Alert."""
|
|
analyzer: str
|
|
title: str
|
|
severity: str # critical | high | medium | low | info
|
|
description: str
|
|
evidence: list[dict] = field(default_factory=list) # [{row_index, field, value, ...}]
|
|
mitre_technique: Optional[str] = None
|
|
tags: list[str] = field(default_factory=list)
|
|
score: float = 0.0 # 0-100
|
|
|
|
|
|
# ── Base Analyzer ────────────────────────────────────────────────────
|
|
|
|
|
|
class BaseAnalyzer(ABC):
|
|
"""Interface every analyzer must implement."""
|
|
|
|
@property
|
|
@abstractmethod
|
|
def name(self) -> str: ...
|
|
|
|
@property
|
|
@abstractmethod
|
|
def description(self) -> str: ...
|
|
|
|
@abstractmethod
|
|
async def analyze(
|
|
self, rows: list[dict[str, Any]], config: dict[str, Any] | None = None
|
|
) -> list[AlertCandidate]: ...
|
|
|
|
|
|
# ── Built-in Analyzers ──────────────────────────────────────────────
|
|
|
|
|
|
class EntropyAnalyzer(BaseAnalyzer):
|
|
"""Detects high-entropy strings (encoded payloads, obfuscated commands)."""
|
|
|
|
name = "entropy"
|
|
description = "Flags fields with high Shannon entropy (possible encoding/obfuscation)"
|
|
|
|
ENTROPY_FIELDS = [
|
|
"command_line", "commandline", "process_command_line", "cmdline",
|
|
"powershell_command", "script_block", "url", "uri", "path",
|
|
"file_path", "target_filename", "query", "dns_query",
|
|
]
|
|
DEFAULT_THRESHOLD = 4.5
|
|
|
|
@staticmethod
|
|
def _shannon(s: str) -> float:
|
|
if not s or len(s) < 8:
|
|
return 0.0
|
|
freq = Counter(s)
|
|
length = len(s)
|
|
return -sum((c / length) * math.log2(c / length) for c in freq.values())
|
|
|
|
async def analyze(self, rows, config=None):
|
|
config = config or {}
|
|
threshold = config.get("entropy_threshold", self.DEFAULT_THRESHOLD)
|
|
min_length = config.get("min_length", 20)
|
|
alerts: list[AlertCandidate] = []
|
|
|
|
for idx, row in enumerate(rows):
|
|
for field_name in self.ENTROPY_FIELDS:
|
|
val = str(row.get(field_name, ""))
|
|
if len(val) < min_length:
|
|
continue
|
|
ent = self._shannon(val)
|
|
if ent >= threshold:
|
|
sev = "critical" if ent > 5.5 else "high" if ent > 5.0 else "medium"
|
|
alerts.append(AlertCandidate(
|
|
analyzer=self.name,
|
|
title=f"High-entropy string in {field_name}",
|
|
severity=sev,
|
|
description=f"Shannon entropy {ent:.2f} (threshold {threshold}) in row {idx}, field '{field_name}'",
|
|
evidence=[{"row_index": idx, "field": field_name, "value": val[:200], "entropy": round(ent, 3)}],
|
|
mitre_technique="T1027", # Obfuscated Files or Information
|
|
tags=["obfuscation", "entropy"],
|
|
score=min(100, ent * 18),
|
|
))
|
|
return alerts
|
|
|
|
|
|
class SuspiciousCommandAnalyzer(BaseAnalyzer):
|
|
"""Detects known-bad command patterns (credential dumping, lateral movement, persistence)."""
|
|
|
|
name = "suspicious_commands"
|
|
description = "Flags processes executing known-suspicious command patterns"
|
|
|
|
PATTERNS: list[tuple[str, str, str, str]] = [
|
|
# (regex, title, severity, mitre_technique)
|
|
(r"mimikatz|sekurlsa|lsadump|kerberos::list", "Mimikatz / Credential Dumping", "critical", "T1003"),
|
|
(r"(?i)-enc\s+[A-Za-z0-9+/=]{40,}", "Encoded PowerShell command", "high", "T1059.001"),
|
|
(r"(?i)invoke-(mimikatz|expression|webrequest|shellcode)", "Suspicious PowerShell Invoke", "high", "T1059.001"),
|
|
(r"(?i)net\s+(user|localgroup|group)\s+/add", "Local account creation", "high", "T1136.001"),
|
|
(r"(?i)schtasks\s+/create", "Scheduled task creation", "medium", "T1053.005"),
|
|
(r"(?i)reg\s+add\s+.*\\run", "Registry Run key persistence", "high", "T1547.001"),
|
|
(r"(?i)wmic\s+.*(process\s+call|shadowcopy\s+delete)", "WMI abuse / shadow copy deletion", "critical", "T1047"),
|
|
(r"(?i)psexec|winrm|wmic\s+/node:", "Lateral movement tool", "high", "T1021"),
|
|
(r"(?i)certutil\s+-urlcache", "Certutil download (LOLBin)", "high", "T1105"),
|
|
(r"(?i)bitsadmin\s+/transfer", "BITSAdmin download", "medium", "T1197"),
|
|
(r"(?i)vssadmin\s+delete\s+shadows", "VSS shadow deletion (ransomware)", "critical", "T1490"),
|
|
(r"(?i)bcdedit.*recoveryenabled.*no", "Boot config tamper (ransomware)", "critical", "T1490"),
|
|
(r"(?i)attrib\s+\+h\s+\+s", "Hidden file attribute set", "low", "T1564.001"),
|
|
(r"(?i)netsh\s+advfirewall\s+.*disable", "Firewall disabled", "high", "T1562.004"),
|
|
(r"(?i)whoami\s*/priv", "Privilege enumeration", "medium", "T1033"),
|
|
(r"(?i)nltest\s+/dclist", "Domain controller enumeration", "medium", "T1018"),
|
|
(r"(?i)dsquery|ldapsearch|adfind", "Active Directory enumeration", "medium", "T1087.002"),
|
|
(r"(?i)procdump.*-ma\s+lsass", "LSASS memory dump", "critical", "T1003.001"),
|
|
(r"(?i)rundll32.*comsvcs.*MiniDump", "LSASS dump via comsvcs", "critical", "T1003.001"),
|
|
]
|
|
|
|
CMD_FIELDS = [
|
|
"command_line", "commandline", "process_command_line", "cmdline",
|
|
"parent_command_line", "powershell_command",
|
|
]
|
|
|
|
async def analyze(self, rows, config=None):
|
|
alerts: list[AlertCandidate] = []
|
|
compiled = [(re.compile(p, re.IGNORECASE), t, s, m) for p, t, s, m in self.PATTERNS]
|
|
|
|
for idx, row in enumerate(rows):
|
|
for fld in self.CMD_FIELDS:
|
|
val = str(row.get(fld, ""))
|
|
if len(val) < 3:
|
|
continue
|
|
for pattern, title, sev, mitre in compiled:
|
|
if pattern.search(val):
|
|
alerts.append(AlertCandidate(
|
|
analyzer=self.name,
|
|
title=title,
|
|
severity=sev,
|
|
description=f"Suspicious command pattern in row {idx}: {val[:200]}",
|
|
evidence=[{"row_index": idx, "field": fld, "value": val[:300]}],
|
|
mitre_technique=mitre,
|
|
tags=["command", "suspicious"],
|
|
score={"critical": 95, "high": 80, "medium": 60, "low": 30}.get(sev, 50),
|
|
))
|
|
return alerts
|
|
|
|
|
|
class NetworkAnomalyAnalyzer(BaseAnalyzer):
|
|
"""Detects anomalous network patterns (beaconing, unusual ports, large transfers)."""
|
|
|
|
name = "network_anomaly"
|
|
description = "Flags anomalous network behavior (beaconing, unusual ports, large transfers)"
|
|
|
|
SUSPICIOUS_PORTS = {4444, 5555, 6666, 8888, 9999, 1234, 31337, 12345, 54321, 1337}
|
|
C2_PORTS = {443, 8443, 8080, 4443, 9443}
|
|
|
|
async def analyze(self, rows, config=None):
|
|
config = config or {}
|
|
alerts: list[AlertCandidate] = []
|
|
|
|
# Track destination IP frequency for beaconing detection
|
|
dst_freq: dict[str, list[int]] = defaultdict(list)
|
|
port_hits: list[tuple[int, str, int]] = []
|
|
|
|
for idx, row in enumerate(rows):
|
|
dst_ip = str(row.get("dst_ip", row.get("destination_ip", row.get("dest_ip", ""))))
|
|
dst_port = row.get("dst_port", row.get("destination_port", row.get("dest_port", "")))
|
|
|
|
if dst_ip and dst_ip != "":
|
|
dst_freq[dst_ip].append(idx)
|
|
|
|
if dst_port:
|
|
try:
|
|
port_num = int(dst_port)
|
|
if port_num in self.SUSPICIOUS_PORTS:
|
|
port_hits.append((idx, dst_ip, port_num))
|
|
except (ValueError, TypeError):
|
|
pass
|
|
|
|
# Large transfer detection
|
|
bytes_val = row.get("bytes_sent", row.get("bytes_out", row.get("sent_bytes", 0)))
|
|
try:
|
|
if int(bytes_val or 0) > config.get("large_transfer_threshold", 10_000_000):
|
|
alerts.append(AlertCandidate(
|
|
analyzer=self.name,
|
|
title="Large data transfer detected",
|
|
severity="medium",
|
|
description=f"Row {idx}: {bytes_val} bytes sent to {dst_ip}",
|
|
evidence=[{"row_index": idx, "dst_ip": dst_ip, "bytes": str(bytes_val)}],
|
|
mitre_technique="T1048",
|
|
tags=["exfiltration", "network"],
|
|
score=65,
|
|
))
|
|
except (ValueError, TypeError):
|
|
pass
|
|
|
|
# Beaconing: IPs contacted more than threshold times
|
|
beacon_thresh = config.get("beacon_threshold", 20)
|
|
for ip, indices in dst_freq.items():
|
|
if len(indices) >= beacon_thresh:
|
|
alerts.append(AlertCandidate(
|
|
analyzer=self.name,
|
|
title=f"Possible beaconing to {ip}",
|
|
severity="high",
|
|
description=f"Destination {ip} contacted {len(indices)} times (threshold: {beacon_thresh})",
|
|
evidence=[{"dst_ip": ip, "contact_count": len(indices), "sample_rows": indices[:10]}],
|
|
mitre_technique="T1071",
|
|
tags=["beaconing", "c2", "network"],
|
|
score=min(95, 50 + len(indices)),
|
|
))
|
|
|
|
# Suspicious ports
|
|
for idx, ip, port in port_hits:
|
|
alerts.append(AlertCandidate(
|
|
analyzer=self.name,
|
|
title=f"Connection on suspicious port {port}",
|
|
severity="medium",
|
|
description=f"Row {idx}: connection to {ip}:{port}",
|
|
evidence=[{"row_index": idx, "dst_ip": ip, "dst_port": port}],
|
|
mitre_technique="T1571",
|
|
tags=["suspicious_port", "network"],
|
|
score=55,
|
|
))
|
|
|
|
return alerts
|
|
|
|
|
|
class FrequencyAnomalyAnalyzer(BaseAnalyzer):
|
|
"""Detects statistically rare values that may indicate anomalies."""
|
|
|
|
name = "frequency_anomaly"
|
|
description = "Flags statistically rare field values (potential anomalies)"
|
|
|
|
FIELDS_TO_CHECK = [
|
|
"process_name", "image_name", "parent_process_name",
|
|
"user", "username", "user_name",
|
|
"event_type", "action", "status",
|
|
]
|
|
|
|
async def analyze(self, rows, config=None):
|
|
config = config or {}
|
|
rarity_threshold = config.get("rarity_threshold", 0.01) # <1% occurrence
|
|
min_rows = config.get("min_rows", 50)
|
|
alerts: list[AlertCandidate] = []
|
|
|
|
if len(rows) < min_rows:
|
|
return alerts
|
|
|
|
for fld in self.FIELDS_TO_CHECK:
|
|
values = [str(row.get(fld, "")) for row in rows if row.get(fld)]
|
|
if not values:
|
|
continue
|
|
counts = Counter(values)
|
|
total = len(values)
|
|
|
|
for val, cnt in counts.items():
|
|
pct = cnt / total
|
|
if pct <= rarity_threshold and cnt <= 3:
|
|
# Find row indices
|
|
indices = [i for i, r in enumerate(rows) if str(r.get(fld, "")) == val]
|
|
alerts.append(AlertCandidate(
|
|
analyzer=self.name,
|
|
title=f"Rare {fld}: {val[:80]}",
|
|
severity="low",
|
|
description=f"'{val}' appears {cnt}/{total} times ({pct:.2%}) in field '{fld}'",
|
|
evidence=[{"field": fld, "value": val[:200], "count": cnt, "total": total, "rows": indices[:5]}],
|
|
tags=["anomaly", "rare"],
|
|
score=max(20, 50 - (pct * 5000)),
|
|
))
|
|
|
|
return alerts
|
|
|
|
|
|
class AuthAnomalyAnalyzer(BaseAnalyzer):
|
|
"""Detects authentication anomalies (brute force, unusual logon types)."""
|
|
|
|
name = "auth_anomaly"
|
|
description = "Flags authentication anomalies (failed logins, unusual logon types)"
|
|
|
|
async def analyze(self, rows, config=None):
|
|
config = config or {}
|
|
alerts: list[AlertCandidate] = []
|
|
|
|
# Track failed logins per user
|
|
failed_by_user: dict[str, list[int]] = defaultdict(list)
|
|
logon_types: dict[str, list[int]] = defaultdict(list)
|
|
|
|
for idx, row in enumerate(rows):
|
|
event_type = str(row.get("event_type", row.get("action", ""))).lower()
|
|
status = str(row.get("status", row.get("result", ""))).lower()
|
|
user = str(row.get("username", row.get("user", row.get("user_name", ""))))
|
|
logon_type = str(row.get("logon_type", ""))
|
|
|
|
if "logon" in event_type or "auth" in event_type or "login" in event_type:
|
|
if "fail" in status or "4625" in str(row.get("event_id", "")):
|
|
if user:
|
|
failed_by_user[user].append(idx)
|
|
|
|
if logon_type in ("3", "10"): # Network/RemoteInteractive
|
|
logon_types[logon_type].append(idx)
|
|
|
|
# Brute force: >5 failed logins for same user
|
|
brute_thresh = config.get("brute_force_threshold", 5)
|
|
for user, indices in failed_by_user.items():
|
|
if len(indices) >= brute_thresh:
|
|
alerts.append(AlertCandidate(
|
|
analyzer=self.name,
|
|
title=f"Possible brute force: {user}",
|
|
severity="high",
|
|
description=f"User '{user}' had {len(indices)} failed logins",
|
|
evidence=[{"user": user, "failed_count": len(indices), "rows": indices[:10]}],
|
|
mitre_technique="T1110",
|
|
tags=["brute_force", "authentication"],
|
|
score=min(90, 50 + len(indices) * 3),
|
|
))
|
|
|
|
# Unusual logon types
|
|
for ltype, indices in logon_types.items():
|
|
label = "Network logon (Type 3)" if ltype == "3" else "Remote Desktop (Type 10)"
|
|
if len(indices) >= 3:
|
|
alerts.append(AlertCandidate(
|
|
analyzer=self.name,
|
|
title=f"{label} detected",
|
|
severity="medium" if ltype == "3" else "high",
|
|
description=f"{len(indices)} {label} events detected",
|
|
evidence=[{"logon_type": ltype, "count": len(indices), "rows": indices[:10]}],
|
|
mitre_technique="T1021",
|
|
tags=["authentication", "lateral_movement"],
|
|
score=55 if ltype == "3" else 70,
|
|
))
|
|
|
|
return alerts
|
|
|
|
|
|
class PersistenceAnalyzer(BaseAnalyzer):
|
|
"""Detects persistence mechanisms (registry keys, services, scheduled tasks)."""
|
|
|
|
name = "persistence"
|
|
description = "Flags persistence mechanism installations"
|
|
|
|
REGISTRY_PATTERNS = [
|
|
(r"(?i)\\CurrentVersion\\Run", "Run key persistence", "T1547.001"),
|
|
(r"(?i)\\Services\\", "Service installation", "T1543.003"),
|
|
(r"(?i)\\Winlogon\\", "Winlogon persistence", "T1547.004"),
|
|
(r"(?i)\\Image File Execution Options\\", "IFEO debugger persistence", "T1546.012"),
|
|
(r"(?i)\\Explorer\\Shell Folders", "Shell folder hijack", "T1547.001"),
|
|
]
|
|
|
|
async def analyze(self, rows, config=None):
|
|
alerts: list[AlertCandidate] = []
|
|
compiled = [(re.compile(p), t, m) for p, t, m in self.REGISTRY_PATTERNS]
|
|
|
|
for idx, row in enumerate(rows):
|
|
# Check registry paths
|
|
reg_path = str(row.get("registry_key", row.get("target_object", row.get("registry_path", ""))))
|
|
for pattern, title, mitre in compiled:
|
|
if pattern.search(reg_path):
|
|
alerts.append(AlertCandidate(
|
|
analyzer=self.name,
|
|
title=title,
|
|
severity="high",
|
|
description=f"Row {idx}: {reg_path[:200]}",
|
|
evidence=[{"row_index": idx, "registry_key": reg_path[:300]}],
|
|
mitre_technique=mitre,
|
|
tags=["persistence", "registry"],
|
|
score=75,
|
|
))
|
|
|
|
# Check for service creation events
|
|
event_type = str(row.get("event_type", "")).lower()
|
|
if "service" in event_type and "creat" in event_type:
|
|
svc_name = row.get("service_name", row.get("target_filename", "unknown"))
|
|
alerts.append(AlertCandidate(
|
|
analyzer=self.name,
|
|
title=f"Service created: {svc_name}",
|
|
severity="medium",
|
|
description=f"Row {idx}: New service '{svc_name}' created",
|
|
evidence=[{"row_index": idx, "service_name": str(svc_name)}],
|
|
mitre_technique="T1543.003",
|
|
tags=["persistence", "service"],
|
|
score=60,
|
|
))
|
|
|
|
return alerts
|
|
|
|
|
|
# ── Analyzer Registry ────────────────────────────────────────────────
|
|
|
|
|
|
_ALL_ANALYZERS: list[BaseAnalyzer] = [
|
|
EntropyAnalyzer(),
|
|
SuspiciousCommandAnalyzer(),
|
|
NetworkAnomalyAnalyzer(),
|
|
FrequencyAnomalyAnalyzer(),
|
|
AuthAnomalyAnalyzer(),
|
|
PersistenceAnalyzer(),
|
|
]
|
|
|
|
|
|
def get_available_analyzers() -> list[dict[str, str]]:
|
|
"""Return metadata about all registered analyzers."""
|
|
return [{"name": a.name, "description": a.description} for a in _ALL_ANALYZERS]
|
|
|
|
|
|
def get_analyzer(name: str) -> BaseAnalyzer | None:
|
|
"""Get an analyzer by name."""
|
|
for a in _ALL_ANALYZERS:
|
|
if a.name == name:
|
|
return a
|
|
return None
|
|
|
|
|
|
async def run_all_analyzers(
|
|
rows: list[dict[str, Any]],
|
|
enabled: list[str] | None = None,
|
|
config: dict[str, Any] | None = None,
|
|
) -> list[AlertCandidate]:
|
|
"""Run all (or selected) analyzers and return combined alert candidates.
|
|
|
|
Args:
|
|
rows: Flat list of row dicts (normalized_data or data from DatasetRow).
|
|
enabled: Optional list of analyzer names to run. Runs all if None.
|
|
config: Optional config overrides passed to each analyzer.
|
|
|
|
Returns:
|
|
Combined list of AlertCandidate from all analyzers, sorted by score desc.
|
|
"""
|
|
config = config or {}
|
|
results: list[AlertCandidate] = []
|
|
|
|
for analyzer in _ALL_ANALYZERS:
|
|
if enabled and analyzer.name not in enabled:
|
|
continue
|
|
try:
|
|
candidates = await analyzer.analyze(rows, config)
|
|
results.extend(candidates)
|
|
logger.info("Analyzer %s produced %d alerts", analyzer.name, len(candidates))
|
|
except Exception:
|
|
logger.exception("Analyzer %s failed", analyzer.name)
|
|
|
|
# Sort by score descending
|
|
results.sort(key=lambda a: a.score, reverse=True)
|
|
return results
|