Log Analysis Techniques

Log Analysis Techniques

Effective log analysis transforms raw firewall data into actionable security intelligence. Multiple analysis techniques, applied systematically, reveal different aspects of your security posture and threat landscape.

Statistical Analysis identifies deviations from normal patterns. Establish baselines for typical traffic patterns - connection rates, common source countries, port distributions - then flag significant deviations:

import numpy as np
from scipy import stats
import pandas as pd

class StatisticalAnalyzer:
    def __init__(self, baseline_window_days=30):
        self.baseline_window = baseline_window_days
        
    def analyze_traffic_anomalies(self, log_data):
        # Convert to DataFrame for analysis
        df = pd.DataFrame(log_data)
        df['timestamp'] = pd.to_datetime(df['timestamp'])
        
        # Calculate baseline metrics
        baseline_data = df[df['timestamp'] > datetime.now() - timedelta(days=self.baseline_window)]
        
        # Connection rate analysis
        hourly_connections = baseline_data.groupby(pd.Grouper(key='timestamp', freq='H')).size()
        mean_rate = hourly_connections.mean()
        std_rate = hourly_connections.std()
        
        # Detect anomalies using z-score
        current_hour_connections = len(df[df['timestamp'] > datetime.now() - timedelta(hours=1)])
        z_score = (current_hour_connections - mean_rate) / std_rate
        
        if abs(z_score) > 3:  # 3 standard deviations
            return {
                'anomaly_detected': True,
                'type': 'connection_rate',
                'severity': 'high' if abs(z_score) > 5 else 'medium',
                'details': f'Current rate {current_hour_connections}/hour, baseline {mean_rate:.0f}/hour'
            }
        
        return {'anomaly_detected': False}

Pattern Recognition identifies attack signatures across multiple log entries. Attacks often involve sequences of actions rather than single events:

class AttackPatternDetector:
    def __init__(self):
        self.patterns = {
            'brute_force': {
                'indicators': ['multiple_failed_auth', 'same_source_ip', 'short_time_window'],
                'threshold': 10,
                'window': 300  # 5 minutes
            },
            'vulnerability_scan': {
                'indicators': ['multiple_ports', 'sequential_scanning', 'known_scanner_ua'],
                'threshold': 20,
                'window': 60
            },
            'ddos': {
                'indicators': ['high_connection_rate', 'distributed_sources', 'same_target'],
                'threshold': 1000,
                'window': 60
            }
        }
    
    def detect_patterns(self, recent_logs):
        detected_patterns = []
        
        # Group logs by source IP
        by_source = {}
        for log in recent_logs:
            source = log.get('source_ip')
            if source not in by_source:
                by_source[source] = []
            by_source[source].append(log)
        
        # Check for brute force
        for source_ip, logs in by_source.items():
            auth_failures = [l for l in logs if 'auth_fail' in l.get('rule_name', '')]
            if len(auth_failures) > self.patterns['brute_force']['threshold']:
                detected_patterns.append({
                    'pattern': 'brute_force',
                    'source_ip': source_ip,
                    'event_count': len(auth_failures),
                    'confidence': 'high'
                })
        
        return detected_patterns