Bot and Scanner Detection

Automated scanners and malicious bots constantly probe web servers for vulnerabilities. Identifying and blocking these tools prevents reconnaissance that often precedes targeted attacks.
Advanced bot detection system:
class BotDetector:
    def __init__(self):
        self.known_bad_bots = [
            'sqlmap', 'nikto', 'masscan', 'nmap', 'openvas',
            'nessus', 'acunetix', 'burpsuite', 'zaproxy',
            'havij', 'pangolin', 'sqlninja'
        ]
        
        self.suspicious_patterns = {
            'user_agent': [
                r'python-requests(?!/\d)',  # Requests without version
                r'curl/\d',
                r'wget/\d',
                r'go-http-client',
                r'java/\d',
                r'^-$',  # Empty user agent
                r'bot|spider|crawl|scraper'
            ],
            'behavior': {
                'rapid_requests': 100,  # Requests per minute
                'directory_enumeration': 20,  # 404s per minute
                'parameter_fuzzing': 30,  # Different parameters tested
                'sequential_scanning': 10  # Sequential port/path access
            }
        }
        
        self.legitimate_bots = [
            'googlebot', 'bingbot', 'slurp', 'duckduckbot',
            'facebookexternalhit', 'twitterbot', 'linkedinbot',
            'whatsapp', 'applebot'
        ]
    
    def analyze_request(self, request):
        user_agent = request.get('user_agent', '').lower()
        
        # Check against known bad bots
        for bad_bot in self.known_bad_bots:
            if bad_bot in user_agent:
                return {
                    'is_bot': True,
                    'malicious': True,
                    'type': 'scanner',
                    'name': bad_bot
                }
        
        # Check suspicious patterns
        for pattern in self.suspicious_patterns['user_agent']:
            if re.search(pattern, user_agent, re.IGNORECASE):
                return {
                    'is_bot': True,
                    'malicious': True,
                    'type': 'suspicious',
                    'pattern': pattern
                }
        
        # Check legitimate bots
        for good_bot in self.legitimate_bots:
            if good_bot in user_agent:
                # Verify legitimate bot
                if self.verify_bot(request['source_ip'], good_bot):
                    return {
                        'is_bot': True,
                        'malicious': False,
                        'type': 'search_engine',
                        'name': good_bot
                    }
                else:
                    return {
                        'is_bot': True,
                        'malicious': True,
                        'type': 'impostor',
                        'name': f'fake_{good_bot}'
                    }
        
        return {'is_bot': False}
    
    def verify_bot(self, ip, bot_name):
        """Verify legitimate bots through reverse DNS"""
        try:
            import socket
            hostname = socket.gethostbyaddr(ip)[0]
            
            # Verification patterns for major search engines
            verifications = {
                'googlebot': lambda h: h.endswith('.googlebot.com') or h.endswith('.google.com'),
                'bingbot': lambda h: h.endswith('.search.msn.com'),
                'slurp': lambda h: h.endswith('.yahoo.com') or h.endswith('.yahoo.net'),
            }
            
            if bot_name in verifications:
                return verifications[bot_name](hostname)
                
        except:
            pass
            
        return False