Understanding Performance Characteristics

Modern password hashing algorithms exhibit fundamentally different performance characteristics than traditional cryptographic operations. While SHA-256 processes gigabytes per second, Argon2 deliberately constrains throughput to dozens of operations per second. This 100,000x performance difference isn't a bug—it's the central security feature. Understanding these characteristics enables appropriate optimization strategies.

Memory-hard functions like Argon2 and scrypt create unique scaling challenges. Unlike CPU-bound operations that benefit from faster processors, memory-hard functions require bandwidth and capacity. A system hashing passwords with 64MB of memory per operation needs 64GB of RAM to process 1,000 concurrent authentications. This memory pressure differs qualitatively from typical web application scaling patterns.

import time
import psutil
import threading
import multiprocessing
from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor
from argon2 import PasswordHasher, Type
import matplotlib.pyplot as plt
import numpy as np

class PasswordHashingBenchmark:
    """Comprehensive benchmarking for password hashing performance"""
    
    def __init__(self):
        self.results = {}
        
    def benchmark_algorithm_scaling(self):
        """Benchmark how different algorithms scale with parameters"""
        
        password = "BenchmarkPassword123!"
        
        # Argon2 scaling with memory
        print("Benchmarking Argon2 memory scaling...")
        memory_costs = [16, 32, 64, 128, 256, 512]  # MB
        argon2_times = []
        
        for memory_mb in memory_costs:
            ph = PasswordHasher(
                memory_cost=memory_mb * 1024,  # Convert to KB
                time_cost=3,
                parallelism=4,
                type=Type.ID
            )
            
            start = time.perf_counter()
            for _ in range(10):
                ph.hash(password)
            elapsed = time.perf_counter() - start
            
            argon2_times.append(elapsed / 10)
            print(f"  {memory_mb}MB: {elapsed/10:.3f}s per hash")
        
        # Bcrypt scaling with cost factor
        print("\nBenchmarking bcrypt cost scaling...")
        import bcrypt
        bcrypt_costs = [10, 11, 12, 13, 14, 15]
        bcrypt_times = []
        
        for cost in bcrypt_costs:
            start = time.perf_counter()
            for _ in range(10):
                bcrypt.hashpw(password.encode(), bcrypt.gensalt(rounds=cost))
            elapsed = time.perf_counter() - start
            
            bcrypt_times.append(elapsed / 10)
            print(f"  Cost {cost}: {elapsed/10:.3f}s per hash")
        
        return {
            'argon2': {'memory_costs': memory_costs, 'times': argon2_times},
            'bcrypt': {'costs': bcrypt_costs, 'times': bcrypt_times}
        }
    
    def measure_concurrency_impact(self):
        """Measure impact of concurrent hashing on performance"""
        
        ph = PasswordHasher(memory_cost=65536, time_cost=3, parallelism=4)
        password = "ConcurrencyTest123!"
        
        def hash_password():
            return ph.hash(password)
        
        concurrency_levels = [1, 2, 4, 8, 16, 32]
        results = {}
        
        for level in concurrency_levels:
            # Measure throughput
            start = time.perf_counter()
            
            with ThreadPoolExecutor(max_workers=level) as executor:
                futures = [executor.submit(hash_password) for _ in range(100)]
                for future in futures:
                    future.result()
            
            elapsed = time.perf_counter() - start
            throughput = 100 / elapsed
            
            # Measure memory usage
            process = psutil.Process()
            memory_mb = process.memory_info().rss / 1024 / 1024
            
            results[level] = {
                'throughput': throughput,
                'latency': elapsed / 100,
                'memory_mb': memory_mb
            }
            
            print(f"Concurrency {level}: {throughput:.1f} ops/sec, "
                  f"{elapsed/100:.3f}s/op, {memory_mb:.0f}MB RAM")
        
        return results
    
    def analyze_memory_patterns(self):
        """Analyze memory access patterns for optimization"""
        
        import tracemalloc
        
        # Start memory profiling
        tracemalloc.start()
        
        ph = PasswordHasher(memory_cost=65536, time_cost=3)
        password = "MemoryAnalysis123!"
        
        # Take snapshot before
        snapshot1 = tracemalloc.take_snapshot()
        
        # Hash password
        hash_result = ph.hash(password)
        
        # Take snapshot after
        snapshot2 = tracemalloc.take_snapshot()
        
        # Analyze differences
        top_stats = snapshot2.compare_to(snapshot1, 'lineno')
        
        print("\nMemory allocation analysis:")
        total_allocated = 0
        for stat in top_stats[:10]:
            print(f"{stat}")
            total_allocated += stat.size_diff
        
        print(f"\nTotal allocated: {total_allocated / 1024 / 1024:.1f}MB")
        
        # Measure memory bandwidth impact
        self._measure_memory_bandwidth_impact()
        
        return {
            'peak_memory_mb': total_allocated / 1024 / 1024,
            'allocation_count': len(top_stats)
        }
    
    def _measure_memory_bandwidth_impact(self):
        """Measure impact of memory bandwidth on performance"""
        
        print("\nMemory bandwidth impact test:")
        
        # Create memory pressure
        large_array = np.zeros((1000, 1000, 100), dtype=np.float64)  # ~800MB
        
        ph = PasswordHasher(memory_cost=65536)
        password = "BandwidthTest123!"
        
        # Baseline without memory pressure
        start = time.perf_counter()
        for _ in range(10):
            ph.hash(password)
        baseline = time.perf_counter() - start
        
        # With memory pressure
        def create_memory_pressure():
            while True:
                # Random access to prevent caching
                large_array[np.random.randint(1000), np.random.randint(1000)] += 1
        
        pressure_thread = threading.Thread(target=create_memory_pressure, daemon=True)
        pressure_thread.start()
        
        time.sleep(0.1)  # Let pressure build
        
        start = time.perf_counter()
        for _ in range(10):
            ph.hash(password)
        with_pressure = time.perf_counter() - start
        
        print(f"Baseline: {baseline/10:.3f}s per hash")
        print(f"With memory pressure: {with_pressure/10:.3f}s per hash")
        print(f"Performance degradation: {(with_pressure/baseline - 1) * 100:.1f}%")

# Run benchmarks
benchmark = PasswordHashingBenchmark()
scaling_results = benchmark.benchmark_algorithm_scaling()
concurrency_results = benchmark.measure_concurrency_impact()
memory_results = benchmark.analyze_memory_patterns()