Data Retention and Deletion Strategies

Data Retention and Deletion Strategies

Privacy regulations require clear retention policies and reliable deletion mechanisms. Data should only be retained as long as necessary for its stated purpose. This requires automated retention management, as manual tracking becomes impossible at scale. Deletion must be comprehensive, including backups, caches, and derived data.

Implementing effective deletion is technically challenging. Soft deletion (marking records as deleted) might not satisfy regulatory requirements. Hard deletion must consider referential integrity, cascade deletion for related records, and backup retention policies. Some data might need retention for legal obligations even when users request deletion, requiring careful exception handling.

// Automated retention and deletion system
class DataRetentionManager {
  constructor() {
    this.retentionPolicies = {
      transactional: { days: 2555 }, // 7 years for financial records
      analytics: { days: 730 },       // 2 years
      security_logs: { days: 180 },   // 6 months
      user_content: { days: 90, afterInactivity: true },
      marketing: { days: 365 },
      session_data: { days: 30 }
    };
  }

  // Process deletion requests with validation
  async processDataDeletion(userId, options = {}) {
    const deletionPlan = {
      userId,
      requestId: this.generateRequestId(),
      timestamp: new Date().toISOString(),
      categories: [],
      exceptions: []
    };

    try {
      // Identify all data associated with user
      const userData = await this.identifyUserData(userId);
      
      // Check for legal holds or obligations
      const legalHolds = await this.checkLegalHolds(userId);
      
      // Process each data category
      for (const [category, data] of Object.entries(userData)) {
        const policy = this.retentionPolicies[category];
        
        // Check if data can be deleted
        const canDelete = await this.canDeleteData(category, data, legalHolds);
        
        if (canDelete) {
          // Delete from primary storage
          await this.deleteFromPrimary(category, data);
          
          // Delete from caches
          await this.deleteFromCaches(category, data);
          
          // Schedule backup deletion
          await this.scheduleBackupDeletion(category, data);
          
          // Delete derived data
          await this.deleteDerivedData(category, data);
          
          deletionPlan.categories.push({
            category,
            recordCount: data.length,
            status: 'deleted'
          });
        } else {
          deletionPlan.exceptions.push({
            category,
            reason: this.getDeletionExceptionReason(category, legalHolds),
            retentionUntil: this.calculateRetentionEnd(category, data)
          });
        }
      }
      
      // Anonymize data that cannot be deleted
      if (deletionPlan.exceptions.length > 0) {
        await this.anonymizeRetainedData(userId, deletionPlan.exceptions);
      }
      
      // Send confirmation
      await this.sendDeletionConfirmation(userId, deletionPlan);
      
      // Log deletion
      await this.logDeletion(deletionPlan);
      
      return deletionPlan;
    } catch (error) {
      console.error('Deletion failed:', error);
      throw error;
    }
  }

  // Implement cascading deletion
  async deleteFromPrimary(category, data) {
    const deletionStrategy = {
      user_profile: async (records) => {
        // Delete user account data
        await this.db.transaction(async (trx) => {
          await trx('users').whereIn('id', records.map(r => r.id)).delete();
          await trx('user_preferences').whereIn('user_id', records.map(r => r.id)).delete();
          await trx('user_sessions').whereIn('user_id', records.map(r => r.id)).delete();
        });
      },
      user_content: async (records) => {
        // Delete user-generated content
        await this.db.transaction(async (trx) => {
          await trx('posts').whereIn('user_id', records.map(r => r.userId)).delete();
          await trx('comments').whereIn('user_id', records.map(r => r.userId)).delete();
          await trx('uploads').whereIn('user_id', records.map(r => r.userId)).delete();
        });
        
        // Delete files from storage
        for (const record of records) {
          if (record.fileUrls) {
            await this.deleteFiles(record.fileUrls);
          }
        }
      },
      analytics: async (records) => {
        // Delete analytics data
        await this.analyticsDb.deleteUserData(records.map(r => r.userId));
      }
    };

    const strategy = deletionStrategy[category];
    if (strategy) {
      await strategy(data);
    }
  }

  // Handle backup deletion with delay
  async scheduleBackupDeletion(category, data) {
    const backupRetention = 30; // Keep backups for 30 days after deletion
    
    const deletionJob = {
      jobId: this.generateJobId(),
      category,
      dataIds: data.map(d => d.id),
      scheduledFor: new Date(Date.now() + backupRetention * 24 * 60 * 60 * 1000),
      status: 'scheduled'
    };
    
    // Store deletion job
    await this.jobQueue.scheduleJob(deletionJob);
    
    // Set up monitoring
    this.monitorDeletionJob(deletionJob);
  }

  // Anonymize data that must be retained
  async anonymizeRetainedData(userId, exceptions) {
    for (const exception of exceptions) {
      const anonymizationStrategy = {
        transactional: async () => {
          // Replace user identifiers with anonymous tokens
          await this.db('transactions')
            .where('user_id', userId)
            .update({
              user_id: this.generateAnonymousId(),
              email: '[email protected]',
              name: 'ANONYMIZED',
              ip_address: '0.0.0.0'
            });
        },
        security_logs: async () => {
          // Anonymize security logs while preserving patterns
          await this.db('security_logs')
            .where('user_id', userId)
            .update({
              user_id: this.hashUserId(userId),
              ip_address: this.anonymizeIP,
              user_agent: 'ANONYMIZED'
            });
        }
      };
      
      const strategy = anonymizationStrategy[exception.category];
      if (strategy) {
        await strategy();
      }
    }
  }

  // Monitor automated retention
  async enforceRetentionPolicies() {
    for (const [category, policy] of Object.entries(this.retentionPolicies)) {
      const cutoffDate = new Date();
      cutoffDate.setDate(cutoffDate.getDate() - policy.days);
      
      if (policy.afterInactivity) {
        // Delete based on last activity
        const inactiveData = await this.db(category)
          .where('last_activity', '<', cutoffDate)
          .select('id', 'user_id');
        
        for (const record of inactiveData) {
          await this.processDataDeletion(record.user_id, {
            reason: 'retention_policy',
            category
          });
        }
      } else {
        // Delete based on creation date
        const expiredData = await this.db(category)
          .where('created_at', '<', cutoffDate)
          .select('id', 'user_id');
        
        // Batch delete for efficiency
        await this.batchDelete(category, expiredData);
      }
    }
  }
}