Bot and Scanner Detection
Bot and Scanner Detection
Automated scanners and malicious bots constantly probe web servers for vulnerabilities. Identifying and blocking these tools prevents reconnaissance that often precedes targeted attacks.
Advanced bot detection system:
class BotDetector:
def __init__(self):
self.known_bad_bots = [
'sqlmap', 'nikto', 'masscan', 'nmap', 'openvas',
'nessus', 'acunetix', 'burpsuite', 'zaproxy',
'havij', 'pangolin', 'sqlninja'
]
self.suspicious_patterns = {
'user_agent': [
r'python-requests(?!/\d)', # Requests without version
r'curl/\d',
r'wget/\d',
r'go-http-client',
r'java/\d',
r'^-$', # Empty user agent
r'bot|spider|crawl|scraper'
],
'behavior': {
'rapid_requests': 100, # Requests per minute
'directory_enumeration': 20, # 404s per minute
'parameter_fuzzing': 30, # Different parameters tested
'sequential_scanning': 10 # Sequential port/path access
}
}
self.legitimate_bots = [
'googlebot', 'bingbot', 'slurp', 'duckduckbot',
'facebookexternalhit', 'twitterbot', 'linkedinbot',
'whatsapp', 'applebot'
]
def analyze_request(self, request):
user_agent = request.get('user_agent', '').lower()
# Check against known bad bots
for bad_bot in self.known_bad_bots:
if bad_bot in user_agent:
return {
'is_bot': True,
'malicious': True,
'type': 'scanner',
'name': bad_bot
}
# Check suspicious patterns
for pattern in self.suspicious_patterns['user_agent']:
if re.search(pattern, user_agent, re.IGNORECASE):
return {
'is_bot': True,
'malicious': True,
'type': 'suspicious',
'pattern': pattern
}
# Check legitimate bots
for good_bot in self.legitimate_bots:
if good_bot in user_agent:
# Verify legitimate bot
if self.verify_bot(request['source_ip'], good_bot):
return {
'is_bot': True,
'malicious': False,
'type': 'search_engine',
'name': good_bot
}
else:
return {
'is_bot': True,
'malicious': True,
'type': 'impostor',
'name': f'fake_{good_bot}'
}
return {'is_bot': False}
def verify_bot(self, ip, bot_name):
"""Verify legitimate bots through reverse DNS"""
try:
import socket
hostname = socket.gethostbyaddr(ip)[0]
# Verification patterns for major search engines
verifications = {
'googlebot': lambda h: h.endswith('.googlebot.com') or h.endswith('.google.com'),
'bingbot': lambda h: h.endswith('.search.msn.com'),
'slurp': lambda h: h.endswith('.yahoo.com') or h.endswith('.yahoo.net'),
}
if bot_name in verifications:
return verifications[bot_name](hostname)
except:
pass
return False