Data Retention and Deletion Strategies
Data Retention and Deletion Strategies
Privacy regulations require clear retention policies and reliable deletion mechanisms. Data should only be retained as long as necessary for its stated purpose. This requires automated retention management, as manual tracking becomes impossible at scale. Deletion must be comprehensive, including backups, caches, and derived data.
Implementing effective deletion is technically challenging. Soft deletion (marking records as deleted) might not satisfy regulatory requirements. Hard deletion must consider referential integrity, cascade deletion for related records, and backup retention policies. Some data might need retention for legal obligations even when users request deletion, requiring careful exception handling.
// Automated retention and deletion system
class DataRetentionManager {
constructor() {
this.retentionPolicies = {
transactional: { days: 2555 }, // 7 years for financial records
analytics: { days: 730 }, // 2 years
security_logs: { days: 180 }, // 6 months
user_content: { days: 90, afterInactivity: true },
marketing: { days: 365 },
session_data: { days: 30 }
};
}
// Process deletion requests with validation
async processDataDeletion(userId, options = {}) {
const deletionPlan = {
userId,
requestId: this.generateRequestId(),
timestamp: new Date().toISOString(),
categories: [],
exceptions: []
};
try {
// Identify all data associated with user
const userData = await this.identifyUserData(userId);
// Check for legal holds or obligations
const legalHolds = await this.checkLegalHolds(userId);
// Process each data category
for (const [category, data] of Object.entries(userData)) {
const policy = this.retentionPolicies[category];
// Check if data can be deleted
const canDelete = await this.canDeleteData(category, data, legalHolds);
if (canDelete) {
// Delete from primary storage
await this.deleteFromPrimary(category, data);
// Delete from caches
await this.deleteFromCaches(category, data);
// Schedule backup deletion
await this.scheduleBackupDeletion(category, data);
// Delete derived data
await this.deleteDerivedData(category, data);
deletionPlan.categories.push({
category,
recordCount: data.length,
status: 'deleted'
});
} else {
deletionPlan.exceptions.push({
category,
reason: this.getDeletionExceptionReason(category, legalHolds),
retentionUntil: this.calculateRetentionEnd(category, data)
});
}
}
// Anonymize data that cannot be deleted
if (deletionPlan.exceptions.length > 0) {
await this.anonymizeRetainedData(userId, deletionPlan.exceptions);
}
// Send confirmation
await this.sendDeletionConfirmation(userId, deletionPlan);
// Log deletion
await this.logDeletion(deletionPlan);
return deletionPlan;
} catch (error) {
console.error('Deletion failed:', error);
throw error;
}
}
// Implement cascading deletion
async deleteFromPrimary(category, data) {
const deletionStrategy = {
user_profile: async (records) => {
// Delete user account data
await this.db.transaction(async (trx) => {
await trx('users').whereIn('id', records.map(r => r.id)).delete();
await trx('user_preferences').whereIn('user_id', records.map(r => r.id)).delete();
await trx('user_sessions').whereIn('user_id', records.map(r => r.id)).delete();
});
},
user_content: async (records) => {
// Delete user-generated content
await this.db.transaction(async (trx) => {
await trx('posts').whereIn('user_id', records.map(r => r.userId)).delete();
await trx('comments').whereIn('user_id', records.map(r => r.userId)).delete();
await trx('uploads').whereIn('user_id', records.map(r => r.userId)).delete();
});
// Delete files from storage
for (const record of records) {
if (record.fileUrls) {
await this.deleteFiles(record.fileUrls);
}
}
},
analytics: async (records) => {
// Delete analytics data
await this.analyticsDb.deleteUserData(records.map(r => r.userId));
}
};
const strategy = deletionStrategy[category];
if (strategy) {
await strategy(data);
}
}
// Handle backup deletion with delay
async scheduleBackupDeletion(category, data) {
const backupRetention = 30; // Keep backups for 30 days after deletion
const deletionJob = {
jobId: this.generateJobId(),
category,
dataIds: data.map(d => d.id),
scheduledFor: new Date(Date.now() + backupRetention * 24 * 60 * 60 * 1000),
status: 'scheduled'
};
// Store deletion job
await this.jobQueue.scheduleJob(deletionJob);
// Set up monitoring
this.monitorDeletionJob(deletionJob);
}
// Anonymize data that must be retained
async anonymizeRetainedData(userId, exceptions) {
for (const exception of exceptions) {
const anonymizationStrategy = {
transactional: async () => {
// Replace user identifiers with anonymous tokens
await this.db('transactions')
.where('user_id', userId)
.update({
user_id: this.generateAnonymousId(),
email: '[email protected]',
name: 'ANONYMIZED',
ip_address: '0.0.0.0'
});
},
security_logs: async () => {
// Anonymize security logs while preserving patterns
await this.db('security_logs')
.where('user_id', userId)
.update({
user_id: this.hashUserId(userId),
ip_address: this.anonymizeIP,
user_agent: 'ANONYMIZED'
});
}
};
const strategy = anonymizationStrategy[exception.category];
if (strategy) {
await strategy();
}
}
}
// Monitor automated retention
async enforceRetentionPolicies() {
for (const [category, policy] of Object.entries(this.retentionPolicies)) {
const cutoffDate = new Date();
cutoffDate.setDate(cutoffDate.getDate() - policy.days);
if (policy.afterInactivity) {
// Delete based on last activity
const inactiveData = await this.db(category)
.where('last_activity', '<', cutoffDate)
.select('id', 'user_id');
for (const record of inactiveData) {
await this.processDataDeletion(record.user_id, {
reason: 'retention_policy',
category
});
}
} else {
// Delete based on creation date
const expiredData = await this.db(category)
.where('created_at', '<', cutoffDate)
.select('id', 'user_id');
// Batch delete for efficiency
await this.batchDelete(category, expiredData);
}
}
}
}