Privacy by Design in Data Architecture
Privacy by Design in Data Architecture
Privacy by Design, mandated by GDPR, requires considering privacy implications from the earliest stages of system design. This proactive approach contrasts with traditional reactive security measures added after development. For data collection and storage, Privacy by Design means architecting systems that minimize data collection, limit retention, enable user control, and protect data throughout its lifecycle.
Data minimization forms the cornerstone of privacy-compliant collection. Every data field collected must serve a specific, documented purpose. This requires challenging assumptions about what data is "nice to have" versus essential. For example, collecting full birth dates when only age verification is needed violates minimization principles. Similarly, storing complete IP addresses when anonymized versions suffice for analytics creates unnecessary privacy risks.
Purpose limitation restricts how collected data can be used. Data collected for one purpose cannot be repurposed without additional legal basis or consent. This requires careful data governance and access controls. Systems must track not just what data exists, but why it was collected and how it may be used. This metadata becomes crucial for responding to user requests and demonstrating compliance.
// Privacy-first data collection framework
class PrivacyFirstDataCollector {
constructor() {
this.dataSchema = {
user_profile: {
fields: {
email: {
type: 'string',
purpose: ['authentication', 'communication'],
retention: '3 years after last activity',
encryption: 'required',
pii: true,
minimization: {
validate: (value) => this.isValidEmail(value),
transform: (value) => value.toLowerCase().trim()
}
},
displayName: {
type: 'string',
purpose: ['profile_display'],
retention: 'until_deletion',
encryption: 'optional',
pii: false,
minimization: {
maxLength: 50,
optional: true
}
},
dateOfBirth: {
type: 'date',
purpose: ['age_verification'],
retention: 'until_deletion',
encryption: 'required',
pii: true,
minimization: {
transform: (value) => {
// Store only year for age verification
const date = new Date(value);
return { yearOfBirth: date.getFullYear() };
}
}
},
ipAddress: {
type: 'string',
purpose: ['security', 'fraud_prevention'],
retention: '30 days',
encryption: 'required',
pii: true,
minimization: {
transform: (value) => this.anonymizeIP(value)
}
}
}
},
analytics: {
fields: {
sessionId: {
type: 'string',
purpose: ['analytics'],
retention: '26 months',
encryption: 'optional',
pii: false
},
pageViews: {
type: 'array',
purpose: ['analytics', 'performance'],
retention: '26 months',
encryption: 'optional',
pii: false,
minimization: {
excludeFields: ['queryParams', 'fragments'],
transform: (url) => this.sanitizeURL(url)
}
}
}
}
};
}
// Collect data with privacy controls
async collectData(category, data, userId, purpose) {
const schema = this.dataSchema[category];
if (!schema) {
throw new Error(`Unknown data category: ${category}`);
}
const collectedData = {};
const metadata = {
collectionTime: new Date().toISOString(),
purpose: purpose,
userId: userId,
category: category,
consentId: await this.getActiveConsent(userId, purpose)
};
// Process each field according to schema
for (const [field, config] of Object.entries(schema.fields)) {
if (data.hasOwnProperty(field)) {
// Validate purpose
if (!config.purpose.includes(purpose)) {
console.warn(`Field ${field} not allowed for purpose ${purpose}`);
continue;
}
// Apply minimization
let value = data[field];
if (config.minimization) {
if (config.minimization.validate && !config.minimization.validate(value)) {
throw new Error(`Invalid value for field ${field}`);
}
if (config.minimization.transform) {
value = config.minimization.transform(value);
}
}
// Encrypt if required
if (config.encryption === 'required') {
value = await this.encrypt(value);
}
collectedData[field] = value;
} else if (!config.minimization?.optional) {
throw new Error(`Required field missing: ${field}`);
}
}
// Store with metadata
await this.storeData(collectedData, metadata);
// Schedule retention
this.scheduleRetention(metadata);
return {
success: true,
dataId: metadata.dataId,
retentionScheduled: metadata.retentionDate
};
}
// Anonymize IP addresses
anonymizeIP(ip) {
if (ip.includes(':')) {
// IPv6: zero out last 80 bits
const parts = ip.split(':');
return parts.slice(0, 3).join(':') + '::';
} else {
// IPv4: zero out last octet
const parts = ip.split('.');
parts[3] = '0';
return parts.join('.');
}
}
// Sanitize URLs for privacy
sanitizeURL(url) {
try {
const urlObj = new URL(url);
// Remove sensitive parameters
const sensitiveParams = ['email', 'token', 'session', 'user', 'id'];
sensitiveParams.forEach(param => {
urlObj.searchParams.delete(param);
});
// Remove fragment
urlObj.hash = '';
return urlObj.pathname + urlObj.search;
} catch {
return '/invalid-url';
}
}
// Implement retention policies
async scheduleRetention(metadata) {
const schema = this.dataSchema[metadata.category];
for (const [field, config] of Object.entries(schema.fields)) {
const retention = config.retention;
let retentionDate;
if (retention === 'until_deletion') {
continue; // No automatic deletion
} else if (retention.includes('days')) {
const days = parseInt(retention);
retentionDate = new Date();
retentionDate.setDate(retentionDate.getDate() + days);
} else if (retention.includes('months')) {
const months = parseInt(retention);
retentionDate = new Date();
retentionDate.setMonth(retentionDate.getMonth() + months);
} else if (retention.includes('years')) {
const years = parseInt(retention);
retentionDate = new Date();
retentionDate.setFullYear(retentionDate.getFullYear() + years);
}
if (retentionDate) {
await this.scheduleDataDeletion({
userId: metadata.userId,
category: metadata.category,
field: field,
deleteAfter: retentionDate.toISOString()
});
}
}
}
}