Hardware Optimization Strategies

CPU architecture significantly impacts password hashing performance. Modern processors include instructions specifically beneficial for cryptographic operations. AES-NI instructions accelerate algorithms using AES primitives, while AVX2/AVX-512 instructions speed up parallel operations. Ensuring your deployment environment supports and enables these features can improve performance by 2-4x without compromising security.

Memory hierarchy optimization proves crucial for memory-hard functions. These algorithms deliberately access memory in patterns that defeat CPU caches, but system-level optimizations still matter. NUMA-aware memory allocation ensures hashing threads access local memory rather than remote NUMA nodes. Huge pages reduce TLB misses for the large memory regions these algorithms require. Proper configuration can improve throughput by 20-30%.

import os
import platform
import subprocess
import ctypes
from typing import Dict, List

class HardwareOptimizer:
    """Hardware-specific optimizations for password hashing"""
    
    def __init__(self):
        self.cpu_info = self._get_cpu_info()
        self.memory_info = self._get_memory_info()
        
    def _get_cpu_info(self) -> Dict:
        """Gather CPU capabilities"""
        
        info = {
            'cores': multiprocessing.cpu_count(),
            'architecture': platform.machine(),
        }
        
        # Check CPU features on Linux
        if platform.system() == 'Linux':
            try:
                with open('/proc/cpuinfo', 'r') as f:
                    cpuinfo = f.read()
                    
                # Check for relevant features
                info['features'] = {
                    'aes': 'aes' in cpuinfo,
                    'avx2': 'avx2' in cpuinfo,
                    'avx512': 'avx512' in cpuinfo,
                    'sse4': 'sse4' in cpuinfo,
                }
                
                # Get cache sizes
                cache_info = subprocess.check_output(['lscpu']).decode()
                for line in cache_info.split('\n'):
                    if 'L1d cache:' in line:
                        info['l1_cache'] = line.split(':')[1].strip()
                    elif 'L2 cache:' in line:
                        info['l2_cache'] = line.split(':')[1].strip()
                    elif 'L3 cache:' in line:
                        info['l3_cache'] = line.split(':')[1].strip()
                        
            except:
                pass
        
        return info
    
    def _get_memory_info(self) -> Dict:
        """Gather memory configuration"""
        
        info = {
            'total_gb': psutil.virtual_memory().total / (1024**3),
            'available_gb': psutil.virtual_memory().available / (1024**3),
        }
        
        # Check NUMA configuration
        if platform.system() == 'Linux':
            try:
                numa_output = subprocess.check_output(['numactl', '--hardware']).decode()
                numa_nodes = len([l for l in numa_output.split('\n') if 'node' in l and 'size:' in l])
                info['numa_nodes'] = numa_nodes
            except:
                info['numa_nodes'] = 1
        
        return info
    
    def optimize_for_hardware(self) -> Dict:
        """Generate optimization recommendations"""
        
        recommendations = []
        config = {}
        
        # CPU optimizations
        if self.cpu_info.get('features', {}).get('aes'):
            recommendations.append("AES-NI available: Use AES-based algorithms")
            config['aes_ni'] = True
        
        if self.cpu_info.get('features', {}).get('avx2'):
            recommendations.append("AVX2 available: Enable vectorized implementations")
            config['enable_avx2'] = True
        
        # Memory optimizations
        if self.memory_info['total_gb'] > 64:
            recommendations.append("Large memory available: Can use higher memory costs")
            config['suggested_memory_mb'] = 128
        else:
            config['suggested_memory_mb'] = 64
        
        # NUMA optimizations
        if self.memory_info.get('numa_nodes', 1) > 1:
            recommendations.append("NUMA system detected: Use NUMA-aware allocation")
            config['numa_aware'] = True
        
        # Threading recommendations
        physical_cores = self.cpu_info['cores'] // 2  # Assume hyperthreading
        config['max_parallel_hashes'] = min(physical_cores, 
                                           int(self.memory_info['available_gb'] * 1024 / 64))
        
        recommendations.append(
            f"Recommended max parallel hashes: {config['max_parallel_hashes']}"
        )
        
        return {
            'recommendations': recommendations,
            'config': config,
            'hardware': {
                'cpu': self.cpu_info,
                'memory': self.memory_info
            }
        }
    
    def configure_huge_pages(self, memory_mb: int) -> bool:
        """Configure huge pages for better TLB performance"""
        
        if platform.system() != 'Linux':
            return False
        
        try:
            # Calculate required huge pages (2MB per page)
            huge_pages_needed = (memory_mb + 1) // 2
            
            # Check current configuration
            with open('/proc/sys/vm/nr_hugepages', 'r') as f:
                current = int(f.read().strip())
            
            if current >= huge_pages_needed:
                return True
            
            # Attempt to allocate (requires root)
            print(f"Attempting to allocate {huge_pages_needed} huge pages...")
            subprocess.run(['sudo', 'sysctl', '-w', 
                          f'vm.nr_hugepages={huge_pages_needed}'], 
                          check=True)
            
            return True
            
        except:
            return False
    
    def benchmark_with_optimizations(self):
        """Benchmark performance with various optimizations"""
        
        from argon2 import PasswordHasher
        import numpy as np
        
        password = "OptimizationBenchmark123!"
        iterations = 50
        
        # Baseline
        ph_baseline = PasswordHasher(memory_cost=65536, time_cost=3, parallelism=1)
        
        start = time.perf_counter()
        for _ in range(iterations):
            ph_baseline.hash(password)
        baseline_time = time.perf_counter() - start
        
        print(f"Baseline (1 thread): {baseline_time:.2f}s for {iterations} hashes")
        
        # With parallelism
        ph_parallel = PasswordHasher(memory_cost=65536, time_cost=3, parallelism=4)
        
        start = time.perf_counter()
        for _ in range(iterations):
            ph_parallel.hash(password)
        parallel_time = time.perf_counter() - start
        
        print(f"Parallel (4 threads): {parallel_time:.2f}s for {iterations} hashes")
        print(f"Speedup: {baseline_time/parallel_time:.2f}x")
        
        # Test NUMA affinity if available
        if self.memory_info.get('numa_nodes', 1) > 1:
            self._test_numa_affinity()
    
    def _test_numa_affinity(self):
        """Test NUMA affinity impact"""
        
        print("\nTesting NUMA affinity impact...")
        
        # This would require actual NUMA binding
        # Simplified demonstration
        try:
            # Run on specific NUMA node
            subprocess.run(['numactl', '--cpunodebind=0', '--membind=0', 
                          'python', '-c', 
                          'from argon2 import PasswordHasher; '
                          'ph = PasswordHasher(); '
                          'ph.hash("test")'], 
                          check=True)
            print("NUMA affinity configuration available")
        except:
            print("NUMA affinity not available or not configured")