Hardware Optimization Strategies
Hardware Optimization Strategies
CPU architecture significantly impacts password hashing performance. Modern processors include instructions specifically beneficial for cryptographic operations. AES-NI instructions accelerate algorithms using AES primitives, while AVX2/AVX-512 instructions speed up parallel operations. Ensuring your deployment environment supports and enables these features can improve performance by 2-4x without compromising security.
Memory hierarchy optimization proves crucial for memory-hard functions. These algorithms deliberately access memory in patterns that defeat CPU caches, but system-level optimizations still matter. NUMA-aware memory allocation ensures hashing threads access local memory rather than remote NUMA nodes. Huge pages reduce TLB misses for the large memory regions these algorithms require. Proper configuration can improve throughput by 20-30%.
import os
import platform
import subprocess
import ctypes
from typing import Dict, List
class HardwareOptimizer:
"""Hardware-specific optimizations for password hashing"""
def __init__(self):
self.cpu_info = self._get_cpu_info()
self.memory_info = self._get_memory_info()
def _get_cpu_info(self) -> Dict:
"""Gather CPU capabilities"""
info = {
'cores': multiprocessing.cpu_count(),
'architecture': platform.machine(),
}
# Check CPU features on Linux
if platform.system() == 'Linux':
try:
with open('/proc/cpuinfo', 'r') as f:
cpuinfo = f.read()
# Check for relevant features
info['features'] = {
'aes': 'aes' in cpuinfo,
'avx2': 'avx2' in cpuinfo,
'avx512': 'avx512' in cpuinfo,
'sse4': 'sse4' in cpuinfo,
}
# Get cache sizes
cache_info = subprocess.check_output(['lscpu']).decode()
for line in cache_info.split('\n'):
if 'L1d cache:' in line:
info['l1_cache'] = line.split(':')[1].strip()
elif 'L2 cache:' in line:
info['l2_cache'] = line.split(':')[1].strip()
elif 'L3 cache:' in line:
info['l3_cache'] = line.split(':')[1].strip()
except:
pass
return info
def _get_memory_info(self) -> Dict:
"""Gather memory configuration"""
info = {
'total_gb': psutil.virtual_memory().total / (1024**3),
'available_gb': psutil.virtual_memory().available / (1024**3),
}
# Check NUMA configuration
if platform.system() == 'Linux':
try:
numa_output = subprocess.check_output(['numactl', '--hardware']).decode()
numa_nodes = len([l for l in numa_output.split('\n') if 'node' in l and 'size:' in l])
info['numa_nodes'] = numa_nodes
except:
info['numa_nodes'] = 1
return info
def optimize_for_hardware(self) -> Dict:
"""Generate optimization recommendations"""
recommendations = []
config = {}
# CPU optimizations
if self.cpu_info.get('features', {}).get('aes'):
recommendations.append("AES-NI available: Use AES-based algorithms")
config['aes_ni'] = True
if self.cpu_info.get('features', {}).get('avx2'):
recommendations.append("AVX2 available: Enable vectorized implementations")
config['enable_avx2'] = True
# Memory optimizations
if self.memory_info['total_gb'] > 64:
recommendations.append("Large memory available: Can use higher memory costs")
config['suggested_memory_mb'] = 128
else:
config['suggested_memory_mb'] = 64
# NUMA optimizations
if self.memory_info.get('numa_nodes', 1) > 1:
recommendations.append("NUMA system detected: Use NUMA-aware allocation")
config['numa_aware'] = True
# Threading recommendations
physical_cores = self.cpu_info['cores'] // 2 # Assume hyperthreading
config['max_parallel_hashes'] = min(physical_cores,
int(self.memory_info['available_gb'] * 1024 / 64))
recommendations.append(
f"Recommended max parallel hashes: {config['max_parallel_hashes']}"
)
return {
'recommendations': recommendations,
'config': config,
'hardware': {
'cpu': self.cpu_info,
'memory': self.memory_info
}
}
def configure_huge_pages(self, memory_mb: int) -> bool:
"""Configure huge pages for better TLB performance"""
if platform.system() != 'Linux':
return False
try:
# Calculate required huge pages (2MB per page)
huge_pages_needed = (memory_mb + 1) // 2
# Check current configuration
with open('/proc/sys/vm/nr_hugepages', 'r') as f:
current = int(f.read().strip())
if current >= huge_pages_needed:
return True
# Attempt to allocate (requires root)
print(f"Attempting to allocate {huge_pages_needed} huge pages...")
subprocess.run(['sudo', 'sysctl', '-w',
f'vm.nr_hugepages={huge_pages_needed}'],
check=True)
return True
except:
return False
def benchmark_with_optimizations(self):
"""Benchmark performance with various optimizations"""
from argon2 import PasswordHasher
import numpy as np
password = "OptimizationBenchmark123!"
iterations = 50
# Baseline
ph_baseline = PasswordHasher(memory_cost=65536, time_cost=3, parallelism=1)
start = time.perf_counter()
for _ in range(iterations):
ph_baseline.hash(password)
baseline_time = time.perf_counter() - start
print(f"Baseline (1 thread): {baseline_time:.2f}s for {iterations} hashes")
# With parallelism
ph_parallel = PasswordHasher(memory_cost=65536, time_cost=3, parallelism=4)
start = time.perf_counter()
for _ in range(iterations):
ph_parallel.hash(password)
parallel_time = time.perf_counter() - start
print(f"Parallel (4 threads): {parallel_time:.2f}s for {iterations} hashes")
print(f"Speedup: {baseline_time/parallel_time:.2f}x")
# Test NUMA affinity if available
if self.memory_info.get('numa_nodes', 1) > 1:
self._test_numa_affinity()
def _test_numa_affinity(self):
"""Test NUMA affinity impact"""
print("\nTesting NUMA affinity impact...")
# This would require actual NUMA binding
# Simplified demonstration
try:
# Run on specific NUMA node
subprocess.run(['numactl', '--cpunodebind=0', '--membind=0',
'python', '-c',
'from argon2 import PasswordHasher; '
'ph = PasswordHasher(); '
'ph.hash("test")'],
check=True)
print("NUMA affinity configuration available")
except:
print("NUMA affinity not available or not configured")