""" Nmap Parser Module Parses Nmap XML or JSON output to extract host information including: - IP addresses, hostnames - Operating system detection - Device type classification (workstation/server/appliance) - MAC vendor information - Open ports and services """ import xml.etree.ElementTree as ET import json from typing import Dict, List, Any, Optional import re def parse_nmap_xml(xml_content: str) -> List[Dict[str, Any]]: """ Parse Nmap XML output and extract host information. Args: xml_content: Raw XML string from nmap -oX output Returns: List of host dictionaries with parsed information """ hosts = [] try: # Clean up XML content - remove any non-XML content before the declaration xml_start = xml_content.find(' 0: xml_content = xml_content[xml_start:] root = ET.fromstring(xml_content) for host_elem in root.findall('.//host'): # Check if host is up status = host_elem.find('status') if status is None or status.get('state') != 'up': continue host = _parse_host_element(host_elem) if host.get('ip'): hosts.append(host) except ET.ParseError as e: print(f"XML parsing error: {e}") # Return empty list on parse error return [] return hosts def parse_nmap_json(json_content: str) -> List[Dict[str, Any]]: """ Parse Nmap JSON output and extract host information. Args: json_content: JSON string from nmap with JSON output Returns: List of host dictionaries with parsed information """ hosts = [] try: data = json.loads(json_content) # Handle different JSON structures if isinstance(data, list): scan_results = data elif isinstance(data, dict): # Try common JSON nmap output structures scan_results = data.get('hosts', data.get('scan', [])) else: return [] for host_data in scan_results: host = _parse_host_json(host_data) if host.get('ip'): hosts.append(host) except json.JSONDecodeError as e: print(f"JSON parsing error: {e}") return [] return hosts def _parse_host_element(host_elem: ET.Element) -> Dict[str, Any]: """ Parse an individual host XML element. Args: host_elem: XML Element representing a single host Returns: Dictionary with host information """ host = { 'ip': '', 'hostname': '', 'mac': '', 'vendor': '', 'os_type': '', 'os_details': '', 'device_type': '', 'ports': [], 'os_accuracy': 0 } # Extract IP address addr = host_elem.find("address[@addrtype='ipv4']") if addr is not None: host['ip'] = addr.get('addr', '') # Extract MAC address and vendor mac = host_elem.find("address[@addrtype='mac']") if mac is not None: host['mac'] = mac.get('addr', '') host['vendor'] = mac.get('vendor', '') # Extract hostname hostname_elem = host_elem.find(".//hostname") if hostname_elem is not None: host['hostname'] = hostname_elem.get('name', '') # Extract OS information osmatch = host_elem.find(".//osmatch") if osmatch is not None: os_name = osmatch.get('name', '') host['os_details'] = os_name host['os_type'] = detect_os_type(os_name) try: host['os_accuracy'] = int(osmatch.get('accuracy', 0)) except (ValueError, TypeError): host['os_accuracy'] = 0 else: # Try osclass as fallback osclass = host_elem.find(".//osclass") if osclass is not None: osfamily = osclass.get('osfamily', '') osgen = osclass.get('osgen', '') host['os_type'] = detect_os_type(osfamily) host['os_details'] = f"{osfamily} {osgen}".strip() try: host['os_accuracy'] = int(osclass.get('accuracy', 0)) except (ValueError, TypeError): host['os_accuracy'] = 0 # Extract ports for port_elem in host_elem.findall(".//port"): port_info = { 'port': int(port_elem.get('portid', 0)), 'protocol': port_elem.get('protocol', 'tcp'), 'state': '', 'service': '', 'product': '', 'version': '' } state_elem = port_elem.find('state') if state_elem is not None: port_info['state'] = state_elem.get('state', '') service_elem = port_elem.find('service') if service_elem is not None: port_info['service'] = service_elem.get('name', '') port_info['product'] = service_elem.get('product', '') port_info['version'] = service_elem.get('version', '') # Use service info to help detect OS if not already detected if not host['os_type']: product = service_elem.get('product', '').lower() if 'microsoft' in product or 'windows' in product: host['os_type'] = 'Windows' elif 'apache' in product or 'nginx' in product or 'linux' in product: host['os_type'] = 'Linux' if port_info['state'] == 'open': host['ports'].append(port_info) # Infer OS from ports if still unknown if not host['os_type'] and host['ports']: host['os_type'] = _infer_os_from_ports(host['ports']) # Classify device type host['device_type'] = classify_device_type(host) return host def _parse_host_json(host_data: Dict[str, Any]) -> Dict[str, Any]: """ Parse host data from JSON format. Args: host_data: Dictionary containing host information Returns: Standardized host dictionary """ host = { 'ip': host_data.get('ip', host_data.get('address', '')), 'hostname': host_data.get('hostname', host_data.get('name', '')), 'mac': host_data.get('mac', ''), 'vendor': host_data.get('vendor', ''), 'os_type': '', 'os_details': '', 'device_type': '', 'ports': [], 'os_accuracy': 0 } # Extract OS information os_info = host_data.get('os', host_data.get('osmatch', {})) if isinstance(os_info, dict): host['os_details'] = os_info.get('name', os_info.get('details', '')) host['os_accuracy'] = int(os_info.get('accuracy', 0)) elif isinstance(os_info, str): host['os_details'] = os_info host['os_type'] = detect_os_type(host['os_details']) # Extract ports ports_data = host_data.get('ports', host_data.get('tcp', {})) if isinstance(ports_data, list): host['ports'] = ports_data elif isinstance(ports_data, dict): for port_num, port_info in ports_data.items(): if isinstance(port_info, dict): host['ports'].append({ 'port': int(port_num), 'protocol': 'tcp', 'state': port_info.get('state', ''), 'service': port_info.get('service', port_info.get('name', '')), 'product': port_info.get('product', ''), 'version': port_info.get('version', '') }) # Infer OS from ports if unknown if not host['os_type'] and host['ports']: host['os_type'] = _infer_os_from_ports(host['ports']) # Classify device type host['device_type'] = classify_device_type(host) return host def detect_os_type(os_string: str) -> str: """ Detect OS type from an OS description string. Args: os_string: OS description from nmap Returns: Standardized OS type string """ if not os_string: return 'Unknown' os_lower = os_string.lower() # Windows detection if any(keyword in os_lower for keyword in ['windows', 'microsoft', 'win7', 'win10', 'win11', 'server 20']): return 'Windows' # Linux detection elif any(keyword in os_lower for keyword in ['linux', 'ubuntu', 'debian', 'centos', 'red hat', 'rhel', 'fedora', 'arch', 'gentoo', 'suse']): return 'Linux' # macOS detection elif any(keyword in os_lower for keyword in ['mac os', 'darwin', 'apple', 'macos']): return 'macOS' # Unix variants elif any(keyword in os_lower for keyword in ['freebsd', 'openbsd', 'netbsd', 'unix', 'solaris', 'aix']): return 'Unix' # Network devices elif any(keyword in os_lower for keyword in ['cisco', 'ios']): return 'Cisco' elif 'juniper' in os_lower or 'junos' in os_lower: return 'Juniper' elif 'fortinet' in os_lower or 'fortigate' in os_lower: return 'Fortinet' elif 'palo alto' in os_lower or 'panos' in os_lower: return 'Palo Alto' elif any(keyword in os_lower for keyword in ['switch', 'router', 'firewall', 'gateway']): return 'Network Device' # Virtualization elif 'vmware' in os_lower or 'esxi' in os_lower: return 'VMware' elif 'hyper-v' in os_lower: return 'Hyper-V' # Mobile elif 'android' in os_lower: return 'Android' elif 'ios' in os_lower and 'apple' in os_lower: return 'iOS' # Printers and IoT elif any(keyword in os_lower for keyword in ['printer', 'hp jetdirect', 'canon', 'epson', 'xerox']): return 'Printer' elif 'iot' in os_lower or 'embedded' in os_lower: return 'IoT Device' return 'Unknown' def classify_device_type(host: Dict[str, Any]) -> str: """ Classify the device type based on OS, ports, and services. Args: host: Host dictionary with OS and port information Returns: Device type classification (workstation, server, network, appliance, etc.) """ os_type = host.get('os_type', '').lower() os_details = host.get('os_details', '').lower() ports = host.get('ports', []) vendor = host.get('vendor', '').lower() port_numbers = {p['port'] for p in ports} services = {p.get('service', '').lower() for p in ports} # Network infrastructure if os_type in ['cisco', 'juniper', 'fortinet', 'palo alto', 'network device']: if 'switch' in os_details or 'catalyst' in os_details: return 'Network Switch' elif 'router' in os_details or 'ios' in os_details: return 'Router' elif 'firewall' in os_details or 'fortigate' in os_details: return 'Firewall' else: return 'Network Device' # Check for SNMP (common on network devices) if 161 in port_numbers or 162 in port_numbers: return 'Network Device' # Printers if os_type == 'printer' or 9100 in port_numbers or 631 in port_numbers: return 'Printer' # IoT devices if os_type == 'iot device': return 'IoT Device' # Servers - check for common server ports and services server_indicators = { # Web servers 80, 443, 8080, 8443, # Database servers 3306, 5432, 1433, 27017, 6379, # Mail servers 25, 587, 465, 110, 995, 143, 993, # File servers 21, 22, 139, 445, 2049, # Directory services 389, 636, 88, 464, # Application servers 8000, 8001, 8888, 9000, 3000, 5000, # Virtualization 902, 443 } server_services = { 'http', 'https', 'apache', 'nginx', 'iis', 'mysql', 'postgresql', 'mssql', 'mongodb', 'redis', 'smtp', 'pop3', 'imap', 'ftp', 'ssh', 'smb', 'nfs', 'ldap', 'ldaps', 'kerberos', 'vmware' } # Check if it's explicitly a server OS if 'server' in os_details: return 'Server' # Check for server ports/services if port_numbers & server_indicators or services & server_services: # More than 3 server ports suggests a server if len(port_numbers & server_indicators) >= 3: return 'Server' # Specific database or web server services if any(svc in services for svc in ['mysql', 'postgresql', 'mongodb', 'apache', 'nginx', 'iis']): return 'Server' # Virtualization hosts if os_type in ['vmware', 'hyper-v'] or 'esxi' in os_details: return 'Virtualization Host' # Workstations if os_type in ['windows', 'macos', 'linux']: # Windows/macOS are typically workstations unless server indicators if os_type in ['windows', 'macos']: if 3389 in port_numbers: # RDP # Could be either, but default to workstation return 'Workstation' return 'Workstation' # Linux could be either elif os_type == 'linux': # Desktop Linux if few ports open if len(port_numbers) <= 3: return 'Workstation' else: return 'Server' # Mobile devices if os_type in ['android', 'ios']: return 'Mobile Device' # Default classification if len(port_numbers) >= 5: return 'Server' elif len(port_numbers) >= 1: return 'Workstation' return 'Unknown' def _infer_os_from_ports(ports: List[Dict[str, Any]]) -> str: """ Infer OS type from open ports and services. Args: ports: List of port dictionaries Returns: Inferred OS type """ port_numbers = {p['port'] for p in ports} services = [p.get('service', '').lower() for p in ports] products = [p.get('product', '').lower() for p in ports] # Windows indicators windows_ports = {135, 139, 445, 3389, 5985, 5986} if windows_ports & port_numbers: return 'Windows' if any('microsoft' in p or 'windows' in p for p in products): return 'Windows' # Linux indicators (SSH is common) if 22 in port_numbers and 'ssh' in services: # Could be Linux or Unix return 'Linux' # Network device indicators if 161 in port_numbers or 162 in port_numbers: # SNMP return 'Network Device' if 23 in port_numbers: # Telnet (often network devices) return 'Network Device' # Printer indicators if 9100 in port_numbers or 631 in port_numbers: return 'Printer' return 'Unknown' def get_os_icon_name(host: Dict[str, Any]) -> str: """ Get the appropriate icon name for a host based on OS and device type. Args: host: Host dictionary Returns: Icon filename (without extension) """ os_type = host.get('os_type', '').lower() device_type = host.get('device_type', '').lower() # Device type takes precedence for specialized devices if 'server' in device_type: return 'server' elif 'network' in device_type or 'router' in device_type or 'switch' in device_type or 'firewall' in device_type: return 'network' elif 'printer' in device_type: return 'printer' elif 'workstation' in device_type: return 'workstation' # Fall back to OS type if 'windows' in os_type: return 'windows' elif 'linux' in os_type or 'unix' in os_type: return 'linux' elif 'mac' in os_type: return 'mac' elif any(net in os_type for net in ['cisco', 'juniper', 'fortinet', 'network']): return 'network' return 'unknown'