Input Validation and Data Sanitization Standards
Input Validation and Data Sanitization Standards
Input validation represents the first line of defense against many common attacks. Every piece of data entering the system from external sources must be validated, including user inputs, API calls, file uploads, environment variables, and data from databases or third-party services. The fundamental rule is to never trust any input, regardless of its source.
# Python Input Validation Standards
from typing import Union, List, Dict, Any
import re
from datetime import datetime
from decimal import Decimal
import ipaddress
class ValidationStandards:
"""Standard validation patterns for common input types"""
# Define standard patterns as class constants
PATTERNS = {
'username': re.compile(r'^[a-zA-Z0-9_]{3,32}$'),
'email': re.compile(r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$'),
'phone': re.compile(r'^\+?1?\d{10,14}$'),
'uuid': re.compile(r'^[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$', re.I),
'alphanumeric': re.compile(r'^[a-zA-Z0-9]+$'),
'safe_string': re.compile(r'^[a-zA-Z0-9\s\-_.,!?]+$'),
'url_slug': re.compile(r'^[a-z0-9]+(?:-[a-z0-9]+)*$'),
}
# Define length constraints
MAX_LENGTHS = {
'username': 32,
'email': 254,
'password': 128,
'name': 100,
'description': 1000,
'url': 2048,
'file_path': 255,
}
@classmethod
def validate_string(cls, value: Any, field_type: str, required: bool = True) -> str:
"""Standard string validation with type checking"""
if value is None:
if required:
raise ValueError(f"{field_type} is required")
return None
if not isinstance(value, str):
raise TypeError(f"{field_type} must be a string")
# Strip whitespace
value = value.strip()
# Check empty after stripping
if not value and required:
raise ValueError(f"{field_type} cannot be empty")
# Check length
max_length = cls.MAX_LENGTHS.get(field_type, 255)
if len(value) > max_length:
raise ValueError(f"{field_type} exceeds maximum length of {max_length}")
# Check pattern if defined
pattern = cls.PATTERNS.get(field_type)
if pattern and not pattern.match(value):
raise ValueError(f"{field_type} contains invalid characters or format")
return value
@staticmethod
def validate_integer(value: Any, min_val: int = None, max_val: int = None) -> int:
"""Validate integer with range checking"""
if not isinstance(value, (int, str)):
raise TypeError("Value must be an integer or string")
try:
int_value = int(value)
except ValueError:
raise ValueError("Invalid integer format")
if min_val is not None and int_value < min_val:
raise ValueError(f"Value must be at least {min_val}")
if max_val is not None and int_value > max_val:
raise ValueError(f"Value must be at most {max_val}")
return int_value
@staticmethod
def validate_list(value: Any, allowed_values: List[Any] = None, max_length: int = 100) -> List:
"""Validate list inputs with constraints"""
if not isinstance(value, list):
raise TypeError("Value must be a list")
if len(value) > max_length:
raise ValueError(f"List exceeds maximum length of {max_length}")
if allowed_values:
for item in value:
if item not in allowed_values:
raise ValueError(f"Invalid value in list: {item}")
return value
// JavaScript Input Validation Standards
class ValidationStandards {
// Define standard patterns
static PATTERNS = {
username: /^[a-zA-Z0-9_]{3,32}$/,
email: /^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$/,
phone: /^\+?1?\d{10,14}$/,
uuid: /^[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$/i,
alphanumeric: /^[a-zA-Z0-9]+$/,
safeString: /^[a-zA-Z0-9\s\-_.,!?]+$/,
urlSlug: /^[a-z0-9]+(?:-[a-z0-9]+)*$/,
ipv4: /^(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$/,
ipv6: /^(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))$/
};
// Define length constraints
static MAX_LENGTHS = {
username: 32,
email: 254,
password: 128,
name: 100,
description: 1000,
url: 2048,
filePath: 255
};
static validateString(value, fieldType, options = {}) {
const { required = true, minLength = 0, maxLength = null, pattern = null } = options;
// Type checking
if (value === null || value === undefined) {
if (required) {
throw new Error(`${fieldType} is required`);
}
return null;
}
if (typeof value !== 'string') {
throw new TypeError(`${fieldType} must be a string`);
}
// Trim whitespace
value = value.trim();
// Check empty after trimming
if (!value && required) {
throw new Error(`${fieldType} cannot be empty`);
}
// Length validation
if (value.length < minLength) {
throw new Error(`${fieldType} must be at least ${minLength} characters`);
}
const maxLen = maxLength || this.MAX_LENGTHS[fieldType] || 255;
if (value.length > maxLen) {
throw new Error(`${fieldType} exceeds maximum length of ${maxLen}`);
}
// Pattern validation
const patternToUse = pattern || this.PATTERNS[fieldType];
if (patternToUse && !patternToUse.test(value)) {
throw new Error(`${fieldType} contains invalid characters or format`);
}
return value;
}
static validateNumber(value, options = {}) {
const { min = null, max = null, integer = false } = options;
const num = Number(value);
if (isNaN(num)) {
throw new Error('Invalid number format');
}
if (integer && !Number.isInteger(num)) {
throw new Error('Value must be an integer');
}
if (min !== null && num < min) {
throw new Error(`Value must be at least ${min}`);
}
if (max !== null && num > max) {
throw new Error(`Value must be at most ${max}`);
}
return num;
}
static sanitizeHTML(html) {
// Use DOMPurify for HTML sanitization
if (typeof DOMPurify !== 'undefined') {
return DOMPurify.sanitize(html, {
ALLOWED_TAGS: ['b', 'i', 'em', 'strong', 'a', 'p', 'br'],
ALLOWED_ATTR: ['href'],
ALLOW_DATA_ATTR: false
});
}
// Fallback basic sanitization
return html
.replace(/</g, '<')
.replace(/>/g, '>')
.replace(/"/g, '"')
.replace(/'/g, ''')
.replace(/\//g, '/');
}
}