Directory Traversal and File Inclusion

Path traversal attacks attempt to access files outside the web root directory, potentially exposing sensitive configuration files, source code, or system files. Modern attacks use various encoding techniques to bypass basic filters.

Comprehensive path traversal detection:

class PathTraversalDetector:
    def __init__(self):
        self.patterns = [
            # Basic traversal
            r'\.\./|\.\.\\'
            r'\.\.%2f|\.\.%5c',
            
            # Encoded variations
            r'%2e%2e%2f|%2e%2e%5c',
            r'%252e%252e%252f',
            r'\.\.%c0%af|\.\.%c1%9c',
            
            # Unicode encoding
            r'\x2e\x2e\x2f|\x2e\x2e\x5c',
            
            # Double encoding
            r'%%32%65%%32%65%%32%66',
            
            # Null byte injection
            r'\.php\x00|\.asp\x00',
            
            # Windows specific
            r'\.\.\\\\|\\\\',
            r'c:\\\\|d:\\\\|e:\\\\',
            
            # Unix specific
            r'/etc/passwd|/etc/shadow',
            r'/proc/self/environ',
            r'/var/log/'
        ]
        
        self.suspicious_files = [
            'passwd', 'shadow', 'hosts', 'config.php',
            'wp-config.php', 'configuration.php', '.env',
            '.git/config', '.ssh/id_rsa', 'web.config'
        ]
    
    def check_path(self, path):
        # Normalize path
        normalized = self.normalize_path(path)
        
        # Check against patterns
        for pattern in self.patterns:
            if re.search(pattern, normalized, re.IGNORECASE):
                return {
                    'blocked': True,
                    'reason': 'Path traversal pattern detected',
                    'pattern': pattern
                }
        
        # Check for suspicious files
        for file in self.suspicious_files:
            if file in normalized.lower():
                return {
                    'blocked': True,
                    'reason': 'Suspicious file access attempt',
                    'file': file
                }
        
        # Check path depth
        if normalized.count('../') > 2 or normalized.count('..\\') > 2:
            return {
                'blocked': True,
                'reason': 'Excessive directory traversal'
            }
        
        return {'blocked': False}
    
    def normalize_path(self, path):
        # Multiple decoding passes
        normalized = path
        for _ in range(3):
            try:
                normalized = urllib.parse.unquote(normalized)
                normalized = normalized.replace('\\x', '%')
                normalized = normalized.replace('\\u00', '%')
            except:
                break
        
        return normalized