Comprehensive Input Validation Strategies
Comprehensive Input Validation Strategies
Effective input validation requires a multi-layered approach combining different validation techniques. Whitelisting defines exactly what input is acceptable, rejecting everything else by default. This approach provides the strongest security by explicitly defining allowed values, formats, and ranges. Blacklisting, while less secure, can complement whitelisting by explicitly blocking known dangerous patterns.
# Python comprehensive input validation example
import re
from typing import Any, Dict, List, Optional, Union
from datetime import datetime
from enum import Enum
class ValidationError(Exception):
def __init__(self, field: str, message: str):
self.field = field
self.message = message
super().__init__(f"{field}: {message}")
class InputValidator:
def __init__(self):
self.errors: List[ValidationError] = []
def validate_string(self, value: Any, field_name: str,
min_length: int = 0, max_length: int = 1000,
pattern: Optional[str] = None,
allowed_values: Optional[List[str]] = None) -> str:
"""Validate string input with multiple constraints"""
if not isinstance(value, str):
raise ValidationError(field_name, "Must be a string")
# Strip whitespace
value = value.strip()
# Length validation
if len(value) < min_length:
raise ValidationError(field_name, f"Must be at least {min_length} characters")
if len(value) > max_length:
raise ValidationError(field_name, f"Must not exceed {max_length} characters")
# Pattern validation
if pattern and not re.match(pattern, value):
raise ValidationError(field_name, "Invalid format")
# Whitelist validation
if allowed_values and value not in allowed_values:
raise ValidationError(field_name, f"Must be one of: {', '.join(allowed_values)}")
# Sanitize common dangerous patterns
dangerous_patterns = [
r'<script.*?>.*?</script>', # Script tags
r'javascript:', # JavaScript protocol
r'on\w+\s*=', # Event handlers
r'--', # SQL comments
r'/\*.*?\*/', # C-style comments
r'xp_cmdshell', # SQL Server command execution
r'exec\s*\(', # SQL execution
]
for pattern in dangerous_patterns:
if re.search(pattern, value, re.IGNORECASE):
raise ValidationError(field_name, "Contains potentially dangerous content")
return value
def validate_integer(self, value: Any, field_name: str,
min_value: Optional[int] = None,
max_value: Optional[int] = None) -> int:
"""Validate integer input"""
try:
# Handle string numbers
if isinstance(value, str):
value = int(value)
elif not isinstance(value, int):
raise ValueError()
except ValueError:
raise ValidationError(field_name, "Must be a valid integer")
if min_value is not None and value < min_value:
raise ValidationError(field_name, f"Must be at least {min_value}")
if max_value is not None and value > max_value:
raise ValidationError(field_name, f"Must not exceed {max_value}")
return value
def validate_email(self, value: Any, field_name: str) -> str:
"""Validate email address"""
email_pattern = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$'
value = self.validate_string(value, field_name, min_length=5, max_length=254)
if not re.match(email_pattern, value):
raise ValidationError(field_name, "Invalid email format")
# Additional checks for common issues
if '..' in value:
raise ValidationError(field_name, "Email cannot contain consecutive dots")
return value.lower()
def validate_sql_identifier(self, value: Any, field_name: str) -> str:
"""Validate SQL identifiers (table names, column names)"""
# Only allow alphanumeric and underscore
identifier_pattern = r'^[a-zA-Z][a-zA-Z0-9_]{0,63}$'
value = self.validate_string(value, field_name, pattern=identifier_pattern)
# Check against SQL reserved words
sql_reserved = {'SELECT', 'INSERT', 'UPDATE', 'DELETE', 'DROP', 'CREATE',
'ALTER', 'TABLE', 'FROM', 'WHERE', 'AND', 'OR'}
if value.upper() in sql_reserved:
raise ValidationError(field_name, "Cannot use SQL reserved words")
return value
# Example usage in API endpoint
from flask import Flask, request, jsonify
app = Flask(__name__)
@app.route('/api/users', methods=['POST'])
def create_user():
validator = InputValidator()
try:
# Validate all inputs
data = request.get_json()
username = validator.validate_string(
data.get('username'),
'username',
min_length=3,
max_length=50,
pattern=r'^[a-zA-Z0-9_]+$'
)
email = validator.validate_email(data.get('email'), 'email')
age = validator.validate_integer(
data.get('age'),
'age',
min_value=13,
max_value=120
)
role = validator.validate_string(
data.get('role'),
'role',
allowed_values=['user', 'admin', 'moderator']
)
# Safe to use validated inputs
user = create_user_safely(username, email, age, role)
return jsonify(user), 201
except ValidationError as e:
return jsonify({'error': str(e)}), 400
Type validation ensures data matches expected types before processing. Strong typing prevents type confusion attacks where attackers exploit weak type coercion. Validate not just the presence of required fields but also their types, considering that JSON doesn't distinguish between integers and floats, and everything from query parameters arrives as strings.