Recovery and System Restoration
Recovery and System Restoration
Recovery procedures restore normal operations while ensuring threats have been completely eradicated. Effective recovery balances speed with security, gradually bringing systems back online while monitoring for reinfection. Understanding recovery strategies for different incident types ensures rapid restoration without compromising security.
Implement systematic recovery procedures:
#!/bin/bash
# System Recovery and Restoration Script
set -euo pipefail
# Configuration
INCIDENT_ID="$1"
RECOVERY_LOG="/var/log/recovery_${INCIDENT_ID}.log"
BACKUP_LOCATION="/backups"
VALIDATION_TESTS="/opt/recovery/validation_tests"
# Logging function
log() {
echo "[$(date +'%Y-%m-%d %H:%M:%S')] $*" | tee -a "$RECOVERY_LOG"
}
# Validation function
validate_system() {
local system="$1"
local test_suite="$2"
log "Running validation tests on $system"
# Run test suite
if [[ -x "$VALIDATION_TESTS/$test_suite" ]]; then
if "$VALIDATION_TESTS/$test_suite" "$system"; then
log "Validation passed for $system"
return 0
else
log "ERROR: Validation failed for $system"
return 1
fi
else
log "WARNING: No validation test suite found for $test_suite"
return 0
fi
}
# Restore from backup
restore_system() {
local system="$1"
local backup_date="$2"
log "Starting restoration of $system from backup dated $backup_date"
# Verify backup integrity
local backup_path="$BACKUP_LOCATION/$system/$backup_date"
if [[ ! -d "$backup_path" ]]; then
log "ERROR: Backup not found at $backup_path"
return 1
fi
# Verify backup hash
if [[ -f "$backup_path/backup.sha256" ]]; then
if ! sha256sum -c "$backup_path/backup.sha256"; then
log "ERROR: Backup integrity check failed"
return 1
fi
fi
# Perform restoration based on system type
case "$system" in
web-*)
restore_web_server "$system" "$backup_path"
;;
db-*)
restore_database_server "$system" "$backup_path"
;;
app-*)
restore_application_server "$system" "$backup_path"
;;
*)
restore_generic_system "$system" "$backup_path"
;;
esac
}
# Web server restoration
restore_web_server() {
local server="$1"
local backup_path="$2"
log "Restoring web server $server"
# Stop web services
systemctl stop nginx apache2 2>/dev/null || true
# Restore web content
rsync -av --delete "$backup_path/var/www/" /var/www/
# Restore configuration
cp -a "$backup_path/etc/nginx" /etc/ 2>/dev/null || true
cp -a "$backup_path/etc/apache2" /etc/ 2>/dev/null || true
# Update security configurations
apply_security_hardening "web"
# Start services
systemctl start nginx 2>/dev/null || systemctl start apache2
# Validate
validate_system "$server" "web_validation"
}
# Database restoration
restore_database_server() {
local server="$1"
local backup_path="$2"
log "Restoring database server $server"
# Stop database services
systemctl stop mysql postgresql 2>/dev/null || true
# Restore database files
if [[ -d "$backup_path/var/lib/mysql" ]]; then
# MySQL restoration
rm -rf /var/lib/mysql/*
cp -a "$backup_path/var/lib/mysql/"* /var/lib/mysql/
chown -R mysql:mysql /var/lib/mysql
# Start MySQL in safe mode for verification
mysqld_safe --skip-grant-tables &
sleep 5
# Reset passwords and privileges
mysql -e "FLUSH PRIVILEGES; ALTER USER 'root'@'localhost' IDENTIFIED BY 'NewSecurePassword123!';"
# Stop safe mode and start normally
mysqladmin shutdown
systemctl start mysql
elif [[ -d "$backup_path/var/lib/postgresql" ]]; then
# PostgreSQL restoration
rm -rf /var/lib/postgresql/*
cp -a "$backup_path/var/lib/postgresql/"* /var/lib/postgresql/
chown -R postgres:postgres /var/lib/postgresql
systemctl start postgresql
fi
# Validate
validate_system "$server" "database_validation"
}
# Application hardening
apply_security_hardening() {
local server_type="$1"
log "Applying security hardening for $server_type"
# Common hardening
# Update system
apt-get update && apt-get upgrade -y
# Configure firewall
ufw --force reset
ufw default deny incoming
ufw default allow outgoing
case "$server_type" in
web)
ufw allow 80/tcp
ufw allow 443/tcp
;;
database)
ufw allow from 10.0.0.0/24 to any port 3306
ufw allow from 10.0.0.0/24 to any port 5432
;;
esac
ufw allow from 10.0.0.0/24 to any port 22
ufw --force enable
# Update security configurations
# Kernel parameters
cat > /etc/sysctl.d/99-security.conf << 'EOF'
net.ipv4.tcp_syncookies = 1
net.ipv4.conf.all.rp_filter = 1
net.ipv4.conf.default.rp_filter = 1
net.ipv4.conf.all.accept_redirects = 0
net.ipv4.conf.default.accept_redirects = 0
net.ipv4.conf.all.send_redirects = 0
net.ipv4.conf.default.send_redirects = 0
net.ipv4.conf.all.accept_source_route = 0
net.ipv4.conf.default.accept_source_route = 0
net.ipv4.icmp_echo_ignore_broadcasts = 1
net.ipv4.icmp_ignore_bogus_error_responses = 1
kernel.randomize_va_space = 2
fs.suid_dumpable = 0
EOF
sysctl -p /etc/sysctl.d/99-security.conf
# Reset all passwords
log "Forcing password reset for all users"
for user in $(awk -F: '$3 >= 1000 {print $1}' /etc/passwd); do
chage -d 0 "$user"
done
}
# Gradual service restoration
gradual_restoration() {
local services=("$@")
local restored=()
local failed=()
log "Starting gradual service restoration"
for service in "${services[@]}"; do
log "Restoring service: $service"
# Restore service
if restore_system "$service" "latest"; then
restored+=("$service")
# Monitor for 30 minutes
log "Monitoring $service for stability"
sleep 1800
# Check for signs of reinfection
if check_reinfection "$service"; then
log "WARNING: Possible reinfection detected on $service"
isolate_system "$service"
failed+=("$service")
else
log "Service $service appears stable"
fi
else
failed+=("$service")
fi
done
# Report results
log "Restoration complete"
log "Successfully restored: ${restored[*]}"
log "Failed to restore: ${failed[*]}"
}
# Check for reinfection
check_reinfection() {
local system="$1"
# Check for IOCs
# File hashes
if [[ -f "/opt/recovery/ioc_hashes.txt" ]]; then
while read -r hash; do
if find /usr /var /home -type f -exec sha256sum {} \; 2>/dev/null | grep -q "$hash"; then
log "ERROR: Known malicious file detected on $system"
return 1
fi
done < "/opt/recovery/ioc_hashes.txt"
fi
# Network connections
if ss -tn | grep -E ":(4444|5555|6666|31337)"; then
log "ERROR: Suspicious network connection detected on $system"
return 1
fi
# Process checks
if ps aux | grep -E "(nc -l|/dev/tcp/|curl.*\|.*sh)"; then
log "ERROR: Suspicious process detected on $system"
return 1
fi
return 0
}
# Main recovery workflow
main() {
log "Starting recovery process for incident $INCIDENT_ID"
# Phase 1: Preparation
log "Phase 1: Recovery preparation"
# Verify backups are available and clean
verify_clean_backups
# Prepare recovery environment
prepare_recovery_environment
# Phase 2: Restoration
log "Phase 2: System restoration"
# Define restoration order (least critical first)
restoration_order=(
"web-dev01"
"app-test01"
"web-prod01"
"app-prod01"
"db-prod01"
)
gradual_restoration "${restoration_order[@]}"
# Phase 3: Validation
log "Phase 3: Post-recovery validation"
# Run comprehensive validation
run_validation_suite
# Phase 4: Monitoring
log "Phase 4: Enhanced monitoring"
# Deploy enhanced monitoring
deploy_enhanced_monitoring
log "Recovery process completed"
}
# Run main function
main "$@"