Tutorial 3: Building a Privacy-First Analytics System

Tutorial 3: Building a Privacy-First Analytics System

This tutorial demonstrates how to implement analytics that respects user privacy by default, using techniques like differential privacy, on-device processing, and consent-aware data collection.

// Privacy-first analytics implementation
class PrivacyFirstAnalytics {
  constructor(config = {}) {
    this.config = {
      enableDifferentialPrivacy: true,
      epsilon: 1.0, // Privacy budget
      collectIPAddress: false,
      hashUserAgents: true,
      sessionTimeout: 30 * 60 * 1000, // 30 minutes
      ...config
    };
    
    this.sessionData = new Map();
    this.eventQueue = [];
    this.init();
  }

  init() {
    // Check consent before initializing
    if (!this.hasAnalyticsConsent()) {
      console.log('Analytics disabled - no consent');
      return;
    }
    
    // Start session
    this.startSession();
    
    // Setup event listeners
    this.setupEventListeners();
    
    // Process queued events periodically
    setInterval(() => this.processEventQueue(), 5000);
  }

  hasAnalyticsConsent() {
    const consent = this.getStoredConsent();
    return consent && consent.analytics === true;
  }

  startSession() {
    this.sessionId = this.generateSessionId();
    this.sessionStart = Date.now();
    
    // Store minimal session data
    this.sessionData.set(this.sessionId, {
      start: this.sessionStart,
      pageViews: 0,
      events: []
    });
  }

  trackPageView(pagePath) {
    if (!this.hasAnalyticsConsent()) return;
    
    const pageView = {
      type: 'pageview',
      path: this.sanitizePath(pagePath),
      timestamp: Date.now(),
      sessionId: this.sessionId,
      metadata: {
        referrer: this.sanitizeReferrer(document.referrer),
        screenResolution: this.getScreenResolution(),
        viewport: this.getViewport()
      }
    };
    
    this.addEvent(pageView);
  }

  trackEvent(category, action, label, value) {
    if (!this.hasAnalyticsConsent()) return;
    
    const event = {
      type: 'event',
      category,
      action,
      label,
      value,
      timestamp: Date.now(),
      sessionId: this.sessionId
    };
    
    this.addEvent(event);
  }

  addEvent(event) {
    // Apply privacy transformations
    const privatizedEvent = this.privatizeEvent(event);
    
    // Add to queue
    this.eventQueue.push(privatizedEvent);
    
    // Process immediately if queue is large
    if (this.eventQueue.length >= 10) {
      this.processEventQueue();
    }
  }

  privatizeEvent(event) {
    const privatized = { ...event };
    
    // Remove or hash sensitive data
    if (privatized.metadata) {
      // Generalize screen resolution
      if (privatized.metadata.screenResolution) {
        privatized.metadata.screenResolution = this.generalizeResolution(
          privatized.metadata.screenResolution
        );
      }
      
      // Remove detailed referrer
      if (privatized.metadata.referrer) {
        privatized.metadata.referrer = this.generalizeReferrer(
          privatized.metadata.referrer
        );
      }
    }
    
    // Apply differential privacy to numeric values
    if (this.config.enableDifferentialPrivacy && privatized.value) {
      privatized.value = this.addDifferentialPrivacy(privatized.value);
    }
    
    return privatized;
  }

  addDifferentialPrivacy(value) {
    // Add Laplace noise
    const scale = 1 / this.config.epsilon;
    const noise = this.laplaceSample(scale);
    return Math.max(0, Math.round(value + noise));
  }

  laplaceSample(scale) {
    const u = Math.random() - 0.5;
    return -scale * Math.sign(u) * Math.log(1 - 2 * Math.abs(u));
  }

  async processEventQueue() {
    if (this.eventQueue.length === 0) return;
    
    // Get events to process
    const events = this.eventQueue.splice(0, this.eventQueue.length);
    
    // Batch process on device first
    const aggregated = this.aggregateEvents(events);
    
    // Send aggregated data
    try {
      await this.sendAnalytics(aggregated);
    } catch (error) {
      console.error('Failed to send analytics:', error);
      // Re-queue events
      this.eventQueue.unshift(...events);
    }
  }

  aggregateEvents(events) {
    const aggregated = {
      sessionId: this.sessionId,
      timestamp: Date.now(),
      eventCount: events.length,
      pageViews: [],
      events: {},
      metrics: {}
    };
    
    for (const event of events) {
      if (event.type === 'pageview') {
        // Aggregate page views
        const existing = aggregated.pageViews.find(pv => pv.path === event.path);
        if (existing) {
          existing.count++;
        } else {
          aggregated.pageViews.push({
            path: event.path,
            count: 1
          });
        }
      } else if (event.type === 'event') {
        // Aggregate events by category/action
        const key = `${event.category}:${event.action}`;
        if (!aggregated.events[key]) {
          aggregated.events[key] = {
            count: 0,
            totalValue: 0
          };
        }
        aggregated.events[key].count++;
        if (event.value) {
          aggregated.events[key].totalValue += event.value;
        }
      }
    }
    
    // Calculate session metrics
    aggregated.metrics = {
      duration: Date.now() - this.sessionStart,
      pageViewCount: aggregated.pageViews.reduce((sum, pv) => sum + pv.count, 0),
      eventCount: Object.values(aggregated.events).reduce((sum, e) => sum + e.count, 0)
    };
    
    return aggregated;
  }

  async sendAnalytics(data) {
    const payload = {
      ...data,
      context: {
        consent: true,
        privacyMode: 'strict',
        differentialPrivacy: this.config.enableDifferentialPrivacy
      }
    };
    
    const response = await fetch('/api/analytics/collect', {
      method: 'POST',
      headers: { 'Content-Type': 'application/json' },
      body: JSON.stringify(payload)
    });
    
    if (!response.ok) {
      throw new Error('Analytics request failed');
    }
  }

  // Privacy-preserving helper methods
  sanitizePath(path) {
    // Remove query parameters and fragments
    const url = new URL(path, window.location.origin);
    
    // Remove sensitive parameters
    const sensitiveParams = ['email', 'token', 'key', 'session'];
    sensitiveParams.forEach(param => url.searchParams.delete(param));
    
    return url.pathname;
  }

  generalizeResolution(resolution) {
    // Round to common resolutions
    const commonResolutions = [
      '1920x1080', '1366x768', '1440x900', '1280x720'
    ];
    
    // Find closest common resolution
    const [width, height] = resolution.split('x').map(Number);
    let closest = commonResolutions[0];
    let minDiff = Infinity;
    
    for (const common of commonResolutions) {
      const [cw, ch] = common.split('x').map(Number);
      const diff = Math.abs(width - cw) + Math.abs(height - ch);
      if (diff < minDiff) {
        minDiff = diff;
        closest = common;
      }
    }
    
    return closest;
  }

  generalizeReferrer(referrer) {
    if (!referrer) return 'direct';
    
    try {
      const url = new URL(referrer);
      // Only keep domain
      return url.hostname;
    } catch {
      return 'invalid';
    }
  }
}

// Backend: Privacy-aware analytics processing
class PrivacyAnalyticsProcessor {
  constructor() {
    this.aggregator = new DataAggregator();
    this.storage = new AnalyticsStorage();
  }

  async processAnalytics(data) {
    // Validate data
    if (!this.validateAnalyticsData(data)) {
      throw new Error('Invalid analytics data');
    }
    
    // Check consent status
    if (!data.context.consent) {
      throw new Error('No consent for analytics');
    }
    
    // Further aggregate data
    const aggregated = await this.aggregator.aggregate(data);
    
    // Store aggregated data only
    await this.storage.store({
      ...aggregated,
      // Don't store session IDs
      sessionId: undefined,
      // Store time buckets instead of exact timestamps
      timeBucket: this.getTimeBucket(data.timestamp)
    });
    
    // Update real-time metrics
    await this.updateRealtimeMetrics(aggregated);
    
    return { success: true };
  }

  getTimeBucket(timestamp) {
    // Round to 5-minute buckets
    const date = new Date(timestamp);
    const minutes = Math.floor(date.getMinutes() / 5) * 5;
    date.setMinutes(minutes);
    date.setSeconds(0);
    date.setMilliseconds(0);
    return date.toISOString();
  }
}

These tutorials provide practical, working implementations of key privacy features. Each can be adapted and extended based on specific application needs while maintaining privacy compliance. The next chapter will explore common developer mistakes and how to avoid them.## Common Developer Mistakes and How to Avoid Them

Privacy compliance failures often stem from well-intentioned developers making predictable mistakes. These errors range from fundamental misunderstandings of privacy principles to technical implementation flaws that inadvertently expose user data. This chapter examines the most common privacy-related mistakes developers make, explains why they occur, and provides concrete guidance on avoiding them. By learning from these common pitfalls, developers can build more robust, privacy-compliant applications from the start.