Input Validation and Sanitization
Input Validation and Sanitization
Password input validation represents a critical security boundary often implemented incorrectly. While passwords should accept diverse character sets to maximize entropy, certain validations remain necessary for security and system stability. The challenge lies in implementing these validations without reducing password space or creating vulnerabilities.
Length limits prevent both weak passwords and denial-of-service attacks. Minimum lengths should enforce reasonable security—12 characters for standard applications, 16 for high-security contexts. Maximum lengths prevent memory exhaustion attacks but should be generous—128 to 256 characters accommodate passphrases while preventing abuse. Rejecting passwords over 1KB protects against resource exhaustion without limiting legitimate use.
import re
import unicodedata
from typing import Tuple, Optional
class PasswordValidator:
"""Secure password validation with comprehensive checks"""
def __init__(self, min_length=12, max_length=128, require_complexity=True):
self.min_length = min_length
self.max_length = max_length
self.require_complexity = require_complexity
def validate(self, password: str) -> Tuple[bool, Optional[str]]:
"""Validate password with security-focused rules"""
# Check if password exists
if not password:
return False, "Password cannot be empty"
# Length validation
if len(password) < self.min_length:
return False, f"Password must be at least {self.min_length} characters"
if len(password) > self.max_length:
return False, f"Password cannot exceed {self.max_length} characters"
# Prevent DoS through excessive memory usage
if len(password.encode('utf-8')) > 1024: # 1KB limit
return False, "Password too long"
# Normalize Unicode to prevent homograph attacks
normalized = unicodedata.normalize('NFKC', password)
if normalized != password:
return False, "Password contains ambiguous characters"
# Check for null bytes (security issue)
if '\x00' in password:
return False, "Password contains invalid characters"
# Complexity requirements (if enabled)
if self.require_complexity:
complexity_ok, complexity_msg = self._check_complexity(password)
if not complexity_ok:
return False, complexity_msg
# Check against common patterns (optional)
pattern_ok, pattern_msg = self._check_patterns(password)
if not pattern_ok:
return False, pattern_msg
return True, None
def _check_complexity(self, password: str) -> Tuple[bool, Optional[str]]:
"""Check password complexity requirements"""
checks = {
'lowercase': (r'[a-z]', "lowercase letter"),
'uppercase': (r'[A-Z]', "uppercase letter"),
'digit': (r'\d', "number"),
'special': (r'[!@#$%^&*(),.?":{}|<>]', "special character")
}
missing = []
for check_name, (pattern, description) in checks.items():
if not re.search(pattern, password):
missing.append(description)
# Require at least 3 out of 4 character classes
if len(missing) > 1:
return False, f"Password must contain at least: {', '.join(missing)}"
return True, None
def _check_patterns(self, password: str) -> Tuple[bool, Optional[str]]:
"""Check for common weak patterns"""
# Sequential characters
if re.search(r'(012|123|234|345|456|567|678|789|890)', password):
return False, "Password contains sequential numbers"
if re.search(r'(abc|bcd|cde|def|efg|fgh|ghi|hij|ijk|jkl|klm|lmn|'
r'mno|nop|opq|pqr|qrs|rst|stu|tuv|uvw|vwx|wxy|xyz)',
password.lower()):
return False, "Password contains sequential letters"
# Repeated characters
if re.search(r'(.)\1{3,}', password):
return False, "Password contains too many repeated characters"
# Keyboard patterns (simplified check)
keyboard_patterns = ['qwerty', 'asdfgh', 'zxcvbn', '123456', 'password']
password_lower = password.lower()
for pattern in keyboard_patterns:
if pattern in password_lower:
return False, f"Password contains common pattern: {pattern}"
return True, None
# Advanced validation with context
class ContextualPasswordValidator(PasswordValidator):
"""Password validation with user context"""
def validate_with_context(self, password: str, username: str,
email: str = None, old_password: str = None) -> Tuple[bool, Optional[str]]:
"""Validate password considering user context"""
# Basic validation first
valid, msg = self.validate(password)
if not valid:
return False, msg
# Check password doesn't contain username
if username.lower() in password.lower():
return False, "Password cannot contain username"
# Check email parts
if email:
email_parts = email.lower().split('@')[0].split('.')
for part in email_parts:
if len(part) > 3 and part in password.lower():
return False, "Password cannot contain email address parts"
# Check similarity to old password
if old_password:
if self._calculate_similarity(password, old_password) > 0.8:
return False, "New password too similar to old password"
return True, None
def _calculate_similarity(self, str1: str, str2: str) -> float:
"""Calculate similarity between two strings"""
# Simple Jaccard similarity for demonstration
set1 = set(str1.lower())
set2 = set(str2.lower())
intersection = len(set1 & set2)
union = len(set1 | set2)
return intersection / union if union > 0 else 0
Character encoding issues create subtle vulnerabilities. Always handle passwords as UTF-8 encoded strings, properly supporting international characters. Normalize Unicode input to prevent homograph attacks where visually similar characters bypass security checks. However, avoid modifying passwords beyond normalization—transformations like lowercasing or trimming spaces can confuse users and reduce entropy.