Intelligent Rate Limiting at Scale

Intelligent Rate Limiting at Scale

Rate limiting on high-traffic websites requires sophisticated approaches that distinguish between legitimate traffic spikes and attacks. Simple connection-based limits often block real users during viral events or flash sales.

Adaptive Rate Limiting: Implement dynamic thresholds based on traffic patterns:

import time
import collections
import numpy as np
from datetime import datetime, timedelta

class AdaptiveRateLimiter:
    def __init__(self):
        self.baseline_window = timedelta(hours=24)
        self.traffic_history = collections.deque()
        self.anomaly_threshold = 3  # Standard deviations
        
    def update_baseline(self, current_traffic):
        """Update traffic baseline with current metrics"""
        
        timestamp = datetime.now()
        self.traffic_history.append({
            'timestamp': timestamp,
            'requests_per_second': current_traffic['rps'],
            'unique_ips': current_traffic['unique_ips'],
            'new_connection_rate': current_traffic['new_connections']
        })
        
        # Remove old data
        cutoff_time = timestamp - self.baseline_window
        while self.traffic_history and self.traffic_history[0]['timestamp'] < cutoff_time:
            self.traffic_history.popleft()
    
    def calculate_dynamic_limits(self):
        """Calculate rate limits based on historical patterns"""
        
        if len(self.traffic_history) < 100:
            # Not enough data, use static limits
            return {
                'global_rps': 10000,
                'per_ip_rps': 100,
                'new_connections_ps': 1000
            }
        
        # Calculate statistics for each metric
        rps_values = [h['requests_per_second'] for h in self.traffic_history]
        mean_rps = np.mean(rps_values)
        std_rps = np.std(rps_values)
        
        # Dynamic limits based on baseline + anomaly threshold
        return {
            'global_rps': int(mean_rps + (self.anomaly_threshold * std_rps)),
            'per_ip_rps': int(mean_rps / 100),  # Assume 100 concurrent users average
            'new_connections_ps': int(mean_rps * 0.1),  # 10% new connections
            'burst_multiplier': 2.0  # Allow 2x burst for short periods
        }
    
    def generate_firewall_rules(self, limits):
        """Generate firewall rules based on calculated limits"""
        
        rules = []
        
        # Global rate limiting using hashlimit
        rules.append(f"""
iptables -A INPUT -p tcp --dport 443 \
    -m hashlimit --hashlimit-above {limits['global_rps']}/sec \
    --hashlimit-burst {int(limits['global_rps'] * limits['burst_multiplier'])} \
    --hashlimit-mode srcip,dstport \
    --hashlimit-name https_global \
    -j DROP
""")
        
        # Per-IP rate limiting
        rules.append(f"""
iptables -A INPUT -p tcp --dport 443 \
    -m hashlimit --hashlimit-above {limits['per_ip_rps']}/sec \
    --hashlimit-burst {int(limits['per_ip_rps'] * limits['burst_multiplier'])} \
    --hashlimit-mode srcip \
    --hashlimit-name https_per_ip \
    -j SET --add-set ratelimit_ips src
""")
        
        return rules