Comprehensive Input Validation Strategies

Effective input validation requires a multi-layered approach combining different validation techniques. Whitelisting defines exactly what input is acceptable, rejecting everything else by default. This approach provides the strongest security by explicitly defining allowed values, formats, and ranges. Blacklisting, while less secure, can complement whitelisting by explicitly blocking known dangerous patterns.

# Python comprehensive input validation example
import re
from typing import Any, Dict, List, Optional, Union
from datetime import datetime
from enum import Enum

class ValidationError(Exception):
    def __init__(self, field: str, message: str):
        self.field = field
        self.message = message
        super().__init__(f"{field}: {message}")

class InputValidator:
    def __init__(self):
        self.errors: List[ValidationError] = []
    
    def validate_string(self, value: Any, field_name: str, 
                       min_length: int = 0, max_length: int = 1000,
                       pattern: Optional[str] = None,
                       allowed_values: Optional[List[str]] = None) -> str:
        """Validate string input with multiple constraints"""
        if not isinstance(value, str):
            raise ValidationError(field_name, "Must be a string")
        
        # Strip whitespace
        value = value.strip()
        
        # Length validation
        if len(value) < min_length:
            raise ValidationError(field_name, f"Must be at least {min_length} characters")
        if len(value) > max_length:
            raise ValidationError(field_name, f"Must not exceed {max_length} characters")
        
        # Pattern validation
        if pattern and not re.match(pattern, value):
            raise ValidationError(field_name, "Invalid format")
        
        # Whitelist validation
        if allowed_values and value not in allowed_values:
            raise ValidationError(field_name, f"Must be one of: {', '.join(allowed_values)}")
        
        # Sanitize common dangerous patterns
        dangerous_patterns = [
            r'<script.*?>.*?</script>',  # Script tags
            r'javascript:',               # JavaScript protocol
            r'on\w+\s*=',                # Event handlers
            r'--',                       # SQL comments
            r'/\*.*?\*/',                # C-style comments
            r'xp_cmdshell',              # SQL Server command execution
            r'exec\s*\(',                # SQL execution
        ]
        
        for pattern in dangerous_patterns:
            if re.search(pattern, value, re.IGNORECASE):
                raise ValidationError(field_name, "Contains potentially dangerous content")
        
        return value
    
    def validate_integer(self, value: Any, field_name: str,
                        min_value: Optional[int] = None,
                        max_value: Optional[int] = None) -> int:
        """Validate integer input"""
        try:
            # Handle string numbers
            if isinstance(value, str):
                value = int(value)
            elif not isinstance(value, int):
                raise ValueError()
        except ValueError:
            raise ValidationError(field_name, "Must be a valid integer")
        
        if min_value is not None and value < min_value:
            raise ValidationError(field_name, f"Must be at least {min_value}")
        if max_value is not None and value > max_value:
            raise ValidationError(field_name, f"Must not exceed {max_value}")
        
        return value
    
    def validate_email(self, value: Any, field_name: str) -> str:
        """Validate email address"""
        email_pattern = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$'
        value = self.validate_string(value, field_name, min_length=5, max_length=254)
        
        if not re.match(email_pattern, value):
            raise ValidationError(field_name, "Invalid email format")
        
        # Additional checks for common issues
        if '..' in value:
            raise ValidationError(field_name, "Email cannot contain consecutive dots")
        
        return value.lower()
    
    def validate_sql_identifier(self, value: Any, field_name: str) -> str:
        """Validate SQL identifiers (table names, column names)"""
        # Only allow alphanumeric and underscore
        identifier_pattern = r'^[a-zA-Z][a-zA-Z0-9_]{0,63}$'
        value = self.validate_string(value, field_name, pattern=identifier_pattern)
        
        # Check against SQL reserved words
        sql_reserved = {'SELECT', 'INSERT', 'UPDATE', 'DELETE', 'DROP', 'CREATE', 
                       'ALTER', 'TABLE', 'FROM', 'WHERE', 'AND', 'OR'}
        
        if value.upper() in sql_reserved:
            raise ValidationError(field_name, "Cannot use SQL reserved words")
        
        return value

# Example usage in API endpoint
from flask import Flask, request, jsonify

app = Flask(__name__)

@app.route('/api/users', methods=['POST'])
def create_user():
    validator = InputValidator()
    
    try:
        # Validate all inputs
        data = request.get_json()
        
        username = validator.validate_string(
            data.get('username'), 
            'username',
            min_length=3,
            max_length=50,
            pattern=r'^[a-zA-Z0-9_]+$'
        )
        
        email = validator.validate_email(data.get('email'), 'email')
        
        age = validator.validate_integer(
            data.get('age'),
            'age',
            min_value=13,
            max_value=120
        )
        
        role = validator.validate_string(
            data.get('role'),
            'role',
            allowed_values=['user', 'admin', 'moderator']
        )
        
        # Safe to use validated inputs
        user = create_user_safely(username, email, age, role)
        return jsonify(user), 201
        
    except ValidationError as e:
        return jsonify({'error': str(e)}), 400

Type validation ensures data matches expected types before processing. Strong typing prevents type confusion attacks where attackers exploit weak type coercion. Validate not just the presence of required fields but also their types, considering that JSON doesn't distinguish between integers and floats, and everything from query parameters arrives as strings.