mirror of
https://github.com/mblanke/ThreatHunt.git
synced 2026-03-01 14:00:20 -05:00
199 lines
6.1 KiB
Python
199 lines
6.1 KiB
Python
"""
|
|
Threat Intelligence and Machine Learning Module
|
|
|
|
This module provides threat scoring, anomaly detection, and predictive analytics.
|
|
"""
|
|
|
|
from typing import Dict, Any, List, Optional
|
|
import random # For demo purposes - would use actual ML models in production
|
|
|
|
|
|
class ThreatAnalyzer:
|
|
"""Analyzes threats using ML models and heuristics"""
|
|
|
|
def __init__(self, model_version: str = "1.0"):
|
|
"""
|
|
Initialize threat analyzer
|
|
|
|
Args:
|
|
model_version: Version of ML models to use
|
|
"""
|
|
self.model_version = model_version
|
|
|
|
def analyze_host(self, host_data: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""
|
|
Analyze a host for threats
|
|
|
|
Args:
|
|
host_data: Host information and telemetry
|
|
|
|
Returns:
|
|
Dictionary with threat score and indicators
|
|
"""
|
|
# In production, this would use ML models
|
|
# For demo, using simple heuristics
|
|
|
|
score = 0.0
|
|
confidence = 0.8
|
|
indicators = []
|
|
|
|
# Check for suspicious patterns
|
|
hostname = host_data.get("hostname", "")
|
|
if "temp" in hostname.lower() or "test" in hostname.lower():
|
|
score += 0.2
|
|
indicators.append({
|
|
"type": "suspicious_hostname",
|
|
"description": "Hostname contains suspicious keywords",
|
|
"severity": "low"
|
|
})
|
|
|
|
# Check metadata for anomalies
|
|
metadata = host_data.get("host_metadata", {})
|
|
if metadata:
|
|
# Check for unusual processes, connections, etc.
|
|
if "suspicious_process" in str(metadata):
|
|
score += 0.5
|
|
indicators.append({
|
|
"type": "suspicious_process",
|
|
"description": "Unusual process detected",
|
|
"severity": "high"
|
|
})
|
|
|
|
# Normalize score
|
|
score = min(score, 1.0)
|
|
|
|
return {
|
|
"score": score,
|
|
"confidence": confidence,
|
|
"threat_type": self._classify_threat(score),
|
|
"indicators": indicators,
|
|
"ml_model_version": self.model_version
|
|
}
|
|
|
|
def analyze_artifact(self, artifact_data: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""
|
|
Analyze an artifact for threats
|
|
|
|
Args:
|
|
artifact_data: Artifact information
|
|
|
|
Returns:
|
|
Dictionary with threat score and indicators
|
|
"""
|
|
score = 0.0
|
|
confidence = 0.7
|
|
indicators = []
|
|
|
|
artifact_type = artifact_data.get("artifact_type", "")
|
|
value = artifact_data.get("value", "")
|
|
|
|
# Hash analysis
|
|
if artifact_type == "hash":
|
|
# In production, check against threat intelligence feeds
|
|
if len(value) == 32: # MD5
|
|
score += 0.3
|
|
indicators.append({
|
|
"type": "weak_hash",
|
|
"description": "MD5 hashes are considered weak",
|
|
"severity": "low"
|
|
})
|
|
|
|
# IP analysis
|
|
elif artifact_type == "ip":
|
|
# Check if IP is in known malicious ranges
|
|
if value.startswith("10.") or value.startswith("192.168."):
|
|
score += 0.1 # Private IP, lower risk
|
|
else:
|
|
score += 0.4 # Public IP, higher scrutiny
|
|
indicators.append({
|
|
"type": "public_ip",
|
|
"description": "Communication with public IP",
|
|
"severity": "medium"
|
|
})
|
|
|
|
# Domain analysis
|
|
elif artifact_type == "domain":
|
|
# Check for suspicious TLDs or patterns
|
|
suspicious_tlds = [".ru", ".cn", ".tk", ".xyz"]
|
|
if any(value.endswith(tld) for tld in suspicious_tlds):
|
|
score += 0.6
|
|
indicators.append({
|
|
"type": "suspicious_tld",
|
|
"description": f"Domain uses potentially suspicious TLD",
|
|
"severity": "high"
|
|
})
|
|
|
|
score = min(score, 1.0)
|
|
|
|
return {
|
|
"score": score,
|
|
"confidence": confidence,
|
|
"threat_type": self._classify_threat(score),
|
|
"indicators": indicators,
|
|
"ml_model_version": self.model_version
|
|
}
|
|
|
|
def detect_anomalies(
|
|
self,
|
|
historical_data: List[Dict[str, Any]],
|
|
current_data: Dict[str, Any]
|
|
) -> Dict[str, Any]:
|
|
"""
|
|
Detect anomalies in current data compared to historical baseline
|
|
|
|
Args:
|
|
historical_data: Historical baseline data
|
|
current_data: Current data to analyze
|
|
|
|
Returns:
|
|
Anomaly detection results
|
|
"""
|
|
# Simple anomaly detection based on statistical deviation
|
|
# In production, use more sophisticated methods
|
|
|
|
anomalies = []
|
|
score = 0.0
|
|
|
|
# Compare metrics
|
|
if historical_data and len(historical_data) >= 3:
|
|
# Calculate baseline
|
|
# This is a simplified example
|
|
anomalies.append({
|
|
"type": "behavioral_anomaly",
|
|
"description": "Behavior deviates from baseline",
|
|
"severity": "medium"
|
|
})
|
|
score = 0.5
|
|
|
|
return {
|
|
"is_anomaly": score > 0.4,
|
|
"anomaly_score": score,
|
|
"anomalies": anomalies
|
|
}
|
|
|
|
def _classify_threat(self, score: float) -> str:
|
|
"""Classify threat based on score"""
|
|
if score >= 0.8:
|
|
return "critical"
|
|
elif score >= 0.6:
|
|
return "high"
|
|
elif score >= 0.4:
|
|
return "medium"
|
|
elif score >= 0.2:
|
|
return "low"
|
|
else:
|
|
return "benign"
|
|
|
|
|
|
def get_threat_analyzer(model_version: str = "1.0") -> ThreatAnalyzer:
|
|
"""
|
|
Factory function to create threat analyzer
|
|
|
|
Args:
|
|
model_version: Version of ML models
|
|
|
|
Returns:
|
|
Configured ThreatAnalyzer instance
|
|
"""
|
|
return ThreatAnalyzer(model_version)
|