Comprehensive Validation Strategies

Comprehensive Validation Strategies

Effective input validation combines multiple techniques to ensure data conforms to expected patterns. Whitelist validation, where you explicitly define acceptable input, provides stronger security than blacklist approaches:

import re
from datetime import datetime

class InputValidator:
    # Define validation patterns
    PATTERNS = {
        'username': re.compile(r'^[a-zA-Z0-9_]{3,20}$'),
        'email': re.compile(r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$'),
        'phone': re.compile(r'^\+?1?\d{9,15}$'),
        'uuid': re.compile(r'^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$'),
        'alphanumeric': re.compile(r'^[a-zA-Z0-9\s]+$'),
        'numeric_id': re.compile(r'^\d{1,10}$')
    }
    
    @classmethod
    def validate_username(cls, username):
        """Validates username format and checks for SQL keywords"""
        if not cls.PATTERNS['username'].match(username):
            raise ValueError("Username must be 3-20 characters, alphanumeric and underscore only")
        
        # Additional check for SQL keywords (defense in depth)
        sql_keywords = ['SELECT', 'INSERT', 'UPDATE', 'DELETE', 'DROP', 'UNION', 'WHERE', 'OR']
        if any(keyword in username.upper() for keyword in sql_keywords):
            raise ValueError("Username contains invalid keywords")
        
        return username
    
    @classmethod
    def validate_search_term(cls, search_term, max_length=100):
        """Validates search input with length and content restrictions"""
        if len(search_term) > max_length:
            raise ValueError(f"Search term exceeds maximum length of {max_length}")
        
        # Remove potentially dangerous characters while preserving search functionality
        # Allow letters, numbers, spaces, and basic punctuation
        cleaned = re.sub(r'[^a-zA-Z0-9\s\-.,!?]', '', search_term)
        
        # Check for SQL injection patterns
        injection_patterns = [
            r"('\s*OR\s*'|\"\s*OR\s*\")",  # 'OR' attacks
            r"(;\s*DROP\s+TABLE)",           # DROP TABLE attempts
            r"(UNION\s+SELECT)",             # UNION attacks
            r"(/\*.*\*/)",                   # SQL comments
            r"(--\s*$)",                     # Line comments
            r"(xp_|sp_)",                    # SQL Server procedures
            r"(EXEC\s*\(|EXECUTE\s*\()"      # Execute statements
        ]
        
        for pattern in injection_patterns:
            if re.search(pattern, search_term, re.IGNORECASE):
                raise ValueError("Search term contains suspicious patterns")
        
        return cleaned