Input Validation and Sanitization
Input Validation and Sanitization
Input validation forms the first line of defense against many common attacks. In Python, every piece of external input—whether from web forms, API calls, command-line arguments, or file uploads—must be validated before use. Python's dynamic typing means that type confusion attacks are possible if input types aren't explicitly checked. Always validate both the type and content of input data.
import re
from typing import Optional
import ipaddress
from email_validator import validate_email, EmailNotValidError
class InputValidator:
@staticmethod
def validate_username(username: str) -> str:
"""Validate username with strict rules"""
if not isinstance(username, str):
raise ValueError("Username must be a string")
if len(username) < 3 or len(username) > 20:
raise ValueError("Username must be between 3 and 20 characters")
# Only allow alphanumeric characters and underscores
if not re.match(r'^[a-zA-Z0-9_]+$', username):
raise ValueError("Username can only contain letters, numbers, and underscores")
# Prevent SQL injection attempts
sql_keywords = ['DROP', 'DELETE', 'INSERT', 'UPDATE', 'SELECT', 'UNION']
if any(keyword in username.upper() for keyword in sql_keywords):
raise ValueError("Username contains invalid keywords")
return username.strip()
@staticmethod
def validate_email_address(email: str) -> str:
"""Validate email with proper library"""
try:
# This performs comprehensive validation
valid = validate_email(email)
return valid.email
except EmailNotValidError as e:
raise ValueError(f"Invalid email: {str(e)}")
@staticmethod
def validate_ip_address(ip: str) -> str:
"""Validate IP address format"""
try:
# This validates both IPv4 and IPv6
ip_obj = ipaddress.ip_address(ip)
# Additional check for private/reserved IPs if needed
if ip_obj.is_private:
raise ValueError("Private IP addresses not allowed")
return str(ip_obj)
except ValueError:
raise ValueError("Invalid IP address format")
@staticmethod
def sanitize_html_input(html: str) -> str:
"""Sanitize HTML input to prevent XSS"""
import bleach
# Define allowed tags and attributes
allowed_tags = ['p', 'br', 'strong', 'em', 'u', 'i', 'b']
allowed_attributes = {}
# Clean the HTML
cleaned = bleach.clean(
html,
tags=allowed_tags,
attributes=allowed_attributes,
strip=True
)
return cleaned