Comprehensive Validation Strategies
Comprehensive Validation Strategies
Effective input validation combines multiple techniques to ensure data conforms to expected patterns. Whitelist validation, where you explicitly define acceptable input, provides stronger security than blacklist approaches:
import re
from datetime import datetime
class InputValidator:
# Define validation patterns
PATTERNS = {
'username': re.compile(r'^[a-zA-Z0-9_]{3,20}$'),
'email': re.compile(r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$'),
'phone': re.compile(r'^\+?1?\d{9,15}$'),
'uuid': re.compile(r'^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$'),
'alphanumeric': re.compile(r'^[a-zA-Z0-9\s]+$'),
'numeric_id': re.compile(r'^\d{1,10}$')
}
@classmethod
def validate_username(cls, username):
"""Validates username format and checks for SQL keywords"""
if not cls.PATTERNS['username'].match(username):
raise ValueError("Username must be 3-20 characters, alphanumeric and underscore only")
# Additional check for SQL keywords (defense in depth)
sql_keywords = ['SELECT', 'INSERT', 'UPDATE', 'DELETE', 'DROP', 'UNION', 'WHERE', 'OR']
if any(keyword in username.upper() for keyword in sql_keywords):
raise ValueError("Username contains invalid keywords")
return username
@classmethod
def validate_search_term(cls, search_term, max_length=100):
"""Validates search input with length and content restrictions"""
if len(search_term) > max_length:
raise ValueError(f"Search term exceeds maximum length of {max_length}")
# Remove potentially dangerous characters while preserving search functionality
# Allow letters, numbers, spaces, and basic punctuation
cleaned = re.sub(r'[^a-zA-Z0-9\s\-.,!?]', '', search_term)
# Check for SQL injection patterns
injection_patterns = [
r"('\s*OR\s*'|\"\s*OR\s*\")", # 'OR' attacks
r"(;\s*DROP\s+TABLE)", # DROP TABLE attempts
r"(UNION\s+SELECT)", # UNION attacks
r"(/\*.*\*/)", # SQL comments
r"(--\s*$)", # Line comments
r"(xp_|sp_)", # SQL Server procedures
r"(EXEC\s*\(|EXECUTE\s*\()" # Execute statements
]
for pattern in injection_patterns:
if re.search(pattern, search_term, re.IGNORECASE):
raise ValueError("Search term contains suspicious patterns")
return cleaned