diff --git a/benchmark/README.md b/benchmark/README.md new file mode 100644 index 0000000..864eb93 --- /dev/null +++ b/benchmark/README.md @@ -0,0 +1,17 @@ +# Benchmark + +We can run simulation to compare the performance of native c executable file `cachesim`, and the python binding via [simulation.py](./simulation.py). + +## Example + +```bash +# at the root dir of the repository +bash script/install.sh +python benchmark/simulation.py --trace_path=./src/libCacheSim/data/cloudPhysicsIO.oracleGeneral.bin +``` + +## Usage + +``` +python benchmark/simulation.py -h +``` \ No newline at end of file diff --git a/benchmark/simulation.py b/benchmark/simulation.py index 5e8e23e..804cd7f 100644 --- a/benchmark/simulation.py +++ b/benchmark/simulation.py @@ -3,3 +3,649 @@ This module contains benchmarks for various components of the library, including request processing times, memory usage, and overall throughput. """ + +import libcachesim as lcs +import os +import sys +import tracemalloc +from time import perf_counter, sleep +import subprocess +import matplotlib.pyplot as plt +import numpy as np +import statistics +import psutil +import logging +import threading +from typing import List, Tuple, Dict, Any, Optional +from dataclasses import dataclass + +# Default configuration +DEFAULT_NUM_ITERATIONS = 20 +DEFAULT_CACHE_SIZE_RATIO = 0.1 + +@dataclass +class BenchmarkResult: + """Store benchmark results for a single method.""" + method_name: str + execution_times: List[float] + memory_usage: List[float] + miss_ratios: List[float] + + @property + def mean_time(self) -> float: + return statistics.mean(self.execution_times) + + @property + def std_time(self) -> float: + return statistics.stdev(self.execution_times) if len(self.execution_times) > 1 else 0.0 + + @property + def min_time(self) -> float: + return min(self.execution_times) + + @property + def max_time(self) -> float: + return max(self.execution_times) + + @property + def mean_memory(self) -> float: + return statistics.mean(self.memory_usage) if self.memory_usage else 0.0 + + @property + def mean_miss_ratio(self) -> float: + return statistics.mean(self.miss_ratios) + +class SubprocessMemoryMonitor: + """Monitor memory usage of a subprocess.""" + + def __init__(self, pid: int): + self.pid = pid + self.peak_memory = 0.0 + self.monitoring = False + self.monitor_thread = None + + def start_monitoring(self): + """Start monitoring memory usage in a separate thread.""" + self.monitoring = True + self.monitor_thread = threading.Thread(target=self._monitor_memory) + self.monitor_thread.daemon = True + self.monitor_thread.start() + + def stop_monitoring(self) -> float: + """Stop monitoring and return peak memory usage in MB.""" + self.monitoring = False + if self.monitor_thread: + self.monitor_thread.join(timeout=1.0) + return self.peak_memory + + def _monitor_memory(self): + """Monitor memory usage of the subprocess.""" + try: + process = psutil.Process(self.pid) + while self.monitoring: + try: + memory_info = process.memory_info() + current_memory = memory_info.rss / 1024 / 1024 # Convert to MB + self.peak_memory = max(self.peak_memory, current_memory) + sleep(0.01) # Sample every 10ms + except (psutil.NoSuchProcess, psutil.AccessDenied): + # Process ended or access denied + break + except psutil.NoSuchProcess: + # Process doesn't exist + pass + +class CacheSimulationBenchmark: + """Comprehensive benchmark for cache simulation performance.""" + + def __init__(self, trace_path: str, num_iterations: int = DEFAULT_NUM_ITERATIONS, + cache_size_ratio: float = DEFAULT_CACHE_SIZE_RATIO): + self.trace_path = trace_path + self.num_iterations = num_iterations + self.cache_size_ratio = cache_size_ratio + self.results: Dict[str, BenchmarkResult] = {} + self.logger = self._setup_logging() + + # Validate trace file + if not os.path.exists(trace_path): + raise FileNotFoundError(f"Trace file not found: {trace_path}") + + def _setup_logging(self) -> logging.Logger: + """Setup logging configuration.""" + logging.basicConfig( + level=logging.INFO, + format='%(asctime)s - %(levelname)s - %(message)s' + ) + return logging.getLogger(__name__) + + def _get_process_memory(self) -> float: + """Get current process memory usage in MB.""" + process = psutil.Process(os.getpid()) + return process.memory_info().rss / 1024 / 1024 + + def _find_cachesim_binary(self) -> Optional[str]: + """Find the cachesim binary in common locations.""" + possible_paths = [ + "./src/libCacheSim/build/bin/cachesim", + "./build/bin/cachesim", + "../build/bin/cachesim", + "cachesim" + ] + + for path in possible_paths: + if os.path.exists(path): + return path + elif path == "cachesim": + # Check if it's in PATH + try: + result = subprocess.run(["which", "cachesim"], + capture_output=True, text=True) + if result.returncode == 0: + return "cachesim" + except FileNotFoundError: + # 'which' command not available (e.g., on Windows) + pass + + return None + + def _parse_native_c_output(self, output: str) -> float: + """Parse miss ratio from native C binary output.""" + try: + for line in output.split('\n'): + line = line.strip() + if 'miss ratio' in line.lower(): + # Try to extract the last number from the line + parts = line.split() + for part in reversed(parts): + try: + return float(part.rstrip('%,.:')) + except ValueError: + continue + # Alternative patterns + elif 'miss rate' in line.lower(): + parts = line.split() + for part in reversed(parts): + try: + return float(part.rstrip('%,.:')) + except ValueError: + continue + except (ValueError, IndexError, AttributeError) as e: + self.logger.warning(f"Could not parse miss ratio from native C output: {e}") + + return 0.0 # Default value if parsing fails + + def _benchmark_native_c(self) -> BenchmarkResult: + """Benchmark native C binary execution with proper subprocess memory monitoring.""" + self.logger.info("Benchmarking native C binary...") + + execution_times = [] + memory_usage = [] + miss_ratios = [] + + cachesim_path = self._find_cachesim_binary() + if not cachesim_path: + self.logger.warning("Native C binary not found, skipping native benchmark") + return BenchmarkResult("Native C", [], [], []) + + for i in range(self.num_iterations): + self.logger.info(f"Native C - Iteration {i+1}/{self.num_iterations}") + + try: + start_time = perf_counter() + + # Use Popen for better control over the subprocess + process = subprocess.Popen([ + cachesim_path, + self.trace_path, + "oracleGeneral", + "LRU", + "1", + "--ignore-obj-size", "1" + ], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) + + # Start memory monitoring + memory_monitor = SubprocessMemoryMonitor(process.pid) + memory_monitor.start_monitoring() + + # Wait for process to complete + stdout, stderr = process.communicate() + end_time = perf_counter() + + # Stop memory monitoring + peak_memory = memory_monitor.stop_monitoring() + + if process.returncode != 0: + self.logger.warning(f"Native C execution failed with return code {process.returncode}") + self.logger.warning(f"stderr: {stderr}") + continue + + execution_time = end_time - start_time + miss_ratio = self._parse_native_c_output(stdout) + + execution_times.append(execution_time) + memory_usage.append(peak_memory) + miss_ratios.append(miss_ratio) + + except (subprocess.SubprocessError, OSError) as e: + self.logger.warning(f"Native C execution failed: {e}") + continue + + return BenchmarkResult("Native C", execution_times, memory_usage, miss_ratios) + + def _benchmark_c_process_trace(self) -> BenchmarkResult: + """Benchmark Python with c_process_trace method.""" + self.logger.info("Benchmarking Python c_process_trace...") + + execution_times = [] + memory_usage = [] + miss_ratios = [] + + for i in range(self.num_iterations): + self.logger.info(f"c_process_trace - Iteration {i+1}/{self.num_iterations}") + + # Start memory tracking + tracemalloc.start() + memory_before = self._get_process_memory() + + start_time = perf_counter() + + try: + # Setup reader and cache + reader = lcs.TraceReader( + trace=self.trace_path, + trace_type=lcs.TraceType.ORACLE_GENERAL_TRACE, + reader_init_params=lcs.ReaderInitParam(ignore_obj_size=True) + ) + + wss_size = reader.get_working_set_size() + cache_size = int(wss_size[0] * self.cache_size_ratio) + cache = lcs.LRU(cache_size=cache_size) + + # Process trace + req_miss_ratio, byte_miss_ratio = cache.process_trace(reader) + + end_time = perf_counter() + + # Memory tracking + current, peak = tracemalloc.get_traced_memory() + tracemalloc.stop() + memory_after = self._get_process_memory() + + execution_times.append(end_time - start_time) + memory_usage.append(memory_after - memory_before) + miss_ratios.append(req_miss_ratio) + + except Exception as e: + self.logger.error(f"c_process_trace iteration {i+1} failed: {e}") + tracemalloc.stop() + continue + + return BenchmarkResult("Python c_process_trace", execution_times, memory_usage, miss_ratios) + + def _benchmark_python_loop(self) -> BenchmarkResult: + """Benchmark Python with manual loop.""" + self.logger.info("Benchmarking Python loop...") + + execution_times = [] + memory_usage = [] + miss_ratios = [] + + for i in range(self.num_iterations): + self.logger.info(f"Python loop - Iteration {i+1}/{self.num_iterations}") + + # Start memory tracking + tracemalloc.start() + memory_before = self._get_process_memory() + + start_time = perf_counter() + + try: + # Setup reader and cache + reader = lcs.TraceReader( + trace=self.trace_path, + trace_type=lcs.TraceType.ORACLE_GENERAL_TRACE, + reader_init_params=lcs.ReaderInitParam(ignore_obj_size=True) + ) + + wss_size = reader.get_working_set_size() + cache_size = int(wss_size[0] * self.cache_size_ratio) + cache = lcs.LRU(cache_size=cache_size) + + # Manual loop processing + n_miss = 0 + n_req = 0 + reader.reset() + + for request in reader: + n_req += 1 + hit = cache.get(request) + if not hit: + n_miss += 1 + + req_miss_ratio = n_miss / n_req if n_req > 0 else 0.0 + + end_time = perf_counter() + + # Memory tracking + current, peak = tracemalloc.get_traced_memory() + tracemalloc.stop() + memory_after = self._get_process_memory() + + execution_times.append(end_time - start_time) + memory_usage.append(memory_after - memory_before) + miss_ratios.append(req_miss_ratio) + + except Exception as e: + self.logger.error(f"Python loop iteration {i+1} failed: {e}") + tracemalloc.stop() + continue + + return BenchmarkResult("Python loop", execution_times, memory_usage, miss_ratios) + + def run_benchmark(self) -> Dict[str, BenchmarkResult]: + """Run all benchmarks and return results.""" + self.logger.info(f"Starting benchmark with {self.num_iterations} iterations") + self.logger.info(f"Trace file: {self.trace_path}") + self.logger.info(f"Cache size ratio: {self.cache_size_ratio}") + + # Run benchmarks + self.results["native_c"] = self._benchmark_native_c() + self.results["c_process_trace"] = self._benchmark_c_process_trace() + self.results["python_loop"] = self._benchmark_python_loop() + + return self.results + + def validate_results(self) -> bool: + """Validate that all methods produce similar miss ratios.""" + self.logger.info("Validating results...") + + miss_ratios = [] + for name, result in self.results.items(): + if result.execution_times and result.miss_ratios: # Only check methods that ran successfully + miss_ratios.append((name, result.mean_miss_ratio)) + + if len(miss_ratios) < 2: + self.logger.warning("Not enough results to validate") + return True + + # Check if all miss ratios are within 1% of each other + base_ratio = miss_ratios[0][1] + validation_passed = True + + for name, ratio in miss_ratios[1:]: + relative_diff = abs(ratio - base_ratio) / max(base_ratio, 1e-10) # Avoid division by zero + if relative_diff > 0.01: # 1% tolerance + self.logger.warning(f"Miss ratio mismatch: {miss_ratios[0][0]}={base_ratio:.4f}, {name}={ratio:.4f} (diff: {relative_diff:.2%})") + validation_passed = False + + if validation_passed: + self.logger.info("All miss ratios match within tolerance") + + return validation_passed + + def print_statistics(self): + """Print detailed performance statistics.""" + print("\n" + "="*80) + print("COMPREHENSIVE PERFORMANCE ANALYSIS") + print("="*80) + print(f"Configuration: {self.num_iterations} iterations, cache size ratio: {self.cache_size_ratio}") + print(f"Trace file: {os.path.basename(self.trace_path)}") + + # Basic statistics + for name, result in self.results.items(): + if not result.execution_times: + print(f"\n{result.method_name}: No valid results") + continue + + print(f"\n{result.method_name} Performance:") + print(f" Execution Time:") + print(f" Mean: {result.mean_time:.4f} ± {result.std_time:.4f} seconds") + print(f" Range: [{result.min_time:.4f}, {result.max_time:.4f}] seconds") + print(f" Memory Usage:") + if result.memory_usage: + print(f" Mean: {result.mean_memory:.2f} MB") + else: + print(f" Mean: N/A") + print(f" Cache Performance:") + print(f" Mean Miss Ratio: {result.mean_miss_ratio:.4f}") + print(f" Successful Iterations: {len(result.execution_times)}/{self.num_iterations}") + + # Comparative analysis + valid_results = [(name, result) for name, result in self.results.items() if result.execution_times] + if len(valid_results) >= 2: + print(f"\n{'Comparative Analysis':=^60}") + + # Find fastest method + fastest_method = min(valid_results, key=lambda x: x[1].mean_time) + + print(f"\nFastest Method: {fastest_method[1].method_name} ({fastest_method[1].mean_time:.4f}s)") + + # Compare all methods to fastest + for name, result in valid_results: + if name == fastest_method[0]: + continue + + speedup_factor = result.mean_time / fastest_method[1].mean_time + overhead_percent = (speedup_factor - 1) * 100 + + print(f" {result.method_name}:") + print(f" {speedup_factor:.2f}x slower ({overhead_percent:.1f}% overhead)") + + # Throughput analysis + print(f"\n{'Throughput Analysis':=^60}") + for name, result in self.results.items(): + if not result.execution_times: + continue + + # Estimate traces per second + throughput = 1 / result.mean_time + print(f"{result.method_name}: ~{throughput:.1f} traces/second") + + def create_visualizations(self, save_path: str = "benchmark_comprehensive_analysis.png"): + """Create comprehensive visualizations.""" + # Filter out empty results + valid_results = {name: result for name, result in self.results.items() + if result.execution_times} + + if not valid_results: + self.logger.warning("No valid results to visualize") + return + + fig = plt.figure(figsize=(20, 15)) + + # Setup subplots + gs = fig.add_gridspec(3, 3, hspace=0.3, wspace=0.3) + + # Plot 1: Execution times across iterations + ax1 = fig.add_subplot(gs[0, :2]) + iterations = range(1, self.num_iterations + 1) + colors = ['blue', 'red', 'green', 'orange', 'purple'] + + for i, (name, result) in enumerate(valid_results.items()): + if result.execution_times: + ax1.plot(iterations[:len(result.execution_times)], result.execution_times, + color=colors[i % len(colors)], label=result.method_name, + marker='o', markersize=4, alpha=0.7) + + ax1.set_xlabel('Iteration') + ax1.set_ylabel('Execution Time (seconds)') + ax1.set_title('Execution Times Across Iterations') + ax1.legend() + ax1.grid(True, alpha=0.3) + + # Plot 2: Box plot of execution times + ax2 = fig.add_subplot(gs[0, 2]) + execution_data = [result.execution_times for result in valid_results.values() if result.execution_times] + labels = [result.method_name.replace(' ', '\n') for result in valid_results.values() if result.execution_times] + + if execution_data: + ax2.boxplot(execution_data, tick_labels=labels) # Fixed matplotlib warning + ax2.set_ylabel('Execution Time (seconds)') + ax2.set_title('Execution Time Distribution') + ax2.grid(True, alpha=0.3) + + # Plot 3: Memory usage comparison + ax3 = fig.add_subplot(gs[1, 0]) + methods_with_memory = [(result.method_name, result.mean_memory) for result in valid_results.values() if result.memory_usage] + + if methods_with_memory: + methods, memory_means = zip(*methods_with_memory) + bars = ax3.bar(methods, memory_means, color=['blue', 'red', 'green'][:len(methods)]) + ax3.set_ylabel('Memory Usage (MB) (Python show extra memory usage)') + ax3.set_title('Average Memory Usage') + ax3.tick_params(axis='x', rotation=45) + + # Add value labels on bars + for bar, value in zip(bars, memory_means): + if value > 0: # Only add label if we have valid memory data + ax3.text(bar.get_x() + bar.get_width()/2, bar.get_height() + max(memory_means)*0.01, + f'{value:.1f}', ha='center', va='bottom') + + # Plot 4: Performance comparison (relative to fastest) + ax4 = fig.add_subplot(gs[1, 1]) + if len(valid_results) >= 2: + fastest_time = min(result.mean_time for result in valid_results.values() if result.execution_times) + relative_times = [] + method_names = [] + + for result in valid_results.values(): + if result.execution_times: + relative_times.append(result.mean_time / fastest_time) + method_names.append(result.method_name) + + bars = ax4.bar(method_names, relative_times, color=['green', 'orange', 'red'][:len(method_names)]) + ax4.set_ylabel('Relative Performance (1.0 = fastest)') + ax4.set_title('Relative Performance Comparison') + ax4.tick_params(axis='x', rotation=45) + ax4.axhline(y=1, color='black', linestyle='--', alpha=0.5) + + # Add value labels + for bar, value in zip(bars, relative_times): + ax4.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.02, + f'{value:.2f}x', ha='center', va='bottom') + + # Plot 5: Miss ratio consistency + ax5 = fig.add_subplot(gs[1, 2]) + miss_ratio_data = [result.miss_ratios for result in valid_results.values() if result.miss_ratios] + miss_ratio_labels = [result.method_name.replace(' ', '\n') for result in valid_results.values() if result.miss_ratios] + + if miss_ratio_data: + ax5.boxplot(miss_ratio_data, tick_labels=miss_ratio_labels) + ax5.set_ylabel('Miss Ratio') + ax5.set_title('Miss Ratio Consistency') + ax5.grid(True, alpha=0.3) + + # Plot 6: Execution time histogram for each method + ax6 = fig.add_subplot(gs[2, :]) + for i, (name, result) in enumerate(valid_results.items()): + if result.execution_times: + ax6.hist(result.execution_times, alpha=0.6, label=result.method_name, + bins=min(10, len(result.execution_times)), + color=colors[i % len(colors)]) + + ax6.set_xlabel('Execution Time (seconds)') + ax6.set_ylabel('Frequency') + ax6.set_title('Execution Time Distribution by Method') + ax6.legend() + ax6.grid(True, alpha=0.3) + + plt.suptitle(f'Cache Simulation Performance Benchmark\n' + f'({self.num_iterations} iterations, Cache ratio: {self.cache_size_ratio}, Trace: {os.path.basename(self.trace_path)})', + fontsize=16, y=0.98) + + plt.savefig(save_path, dpi=300, bbox_inches='tight') + self.logger.info(f"Visualization saved as '{save_path}'") + + return save_path + + def export_results(self, csv_path: str = "benchmark_results.csv"): + """Export results to CSV file.""" + import csv + + with open(csv_path, 'w', newline='') as csvfile: + fieldnames = ['method', 'iteration', 'execution_time', 'memory_usage', 'miss_ratio'] + writer = csv.DictWriter(csvfile, fieldnames=fieldnames) + writer.writeheader() + + for name, result in self.results.items(): + if not result.execution_times: + continue + + max_len = max(len(result.execution_times), + len(result.memory_usage) if result.memory_usage else 0, + len(result.miss_ratios) if result.miss_ratios else 0) + + for i in range(max_len): + exec_time = result.execution_times[i] if i < len(result.execution_times) else None + mem_usage = result.memory_usage[i] if result.memory_usage and i < len(result.memory_usage) else None + miss_ratio = result.miss_ratios[i] if result.miss_ratios and i < len(result.miss_ratios) else None + + writer.writerow({ + 'method': result.method_name, + 'iteration': i + 1, + 'execution_time': exec_time, + 'memory_usage': mem_usage, + 'miss_ratio': miss_ratio + }) + + self.logger.info(f"Results exported to '{csv_path}'") + + +def main(): + """Main function to run the benchmark.""" + import argparse + + parser = argparse.ArgumentParser(description="Comprehensive Cache Simulation Performance Benchmark") + parser.add_argument("--trace_path", type=str, required=True, + help="Path to the trace file") + parser.add_argument("--iterations", type=int, default=DEFAULT_NUM_ITERATIONS, + help=f"Number of iterations (default: {DEFAULT_NUM_ITERATIONS})") + parser.add_argument("--cache_size_ratio", type=float, default=DEFAULT_CACHE_SIZE_RATIO, + help=f"Cache size as ratio of working set (default: {DEFAULT_CACHE_SIZE_RATIO})") + parser.add_argument("--output_dir", type=str, default=".", + help="Output directory for results (default: current directory)") + parser.add_argument("--export_csv", action="store_true", + help="Export results to CSV file") + parser.add_argument("--no_visualize", action="store_true", + help="Skip visualization generation") + + args = parser.parse_args() + + try: + # Create benchmark instance with proper parameters (no more global variables) + benchmark = CacheSimulationBenchmark( + trace_path=args.trace_path, + num_iterations=args.iterations, + cache_size_ratio=args.cache_size_ratio + ) + + # Run benchmark + results = benchmark.run_benchmark() + + # Validate results + benchmark.validate_results() + + # Print statistics + benchmark.print_statistics() + + # Create visualizations + if not args.no_visualize: + viz_path = os.path.join(args.output_dir, "benchmark_comprehensive_analysis.png") + benchmark.create_visualizations(viz_path) + + # Export CSV + if args.export_csv: + csv_path = os.path.join(args.output_dir, "benchmark_results.csv") + benchmark.export_results(csv_path) + + print(f"\n{'='*80}") + print("BENCHMARK COMPLETED SUCCESSFULLY") + print(f"{'='*80}") + + except Exception as e: + logging.error(f"Benchmark failed: {e}") + sys.exit(1) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/scripts/install.sh b/scripts/install.sh index 0c6f9a7..bc52786 100644 --- a/scripts/install.sh +++ b/scripts/install.sh @@ -6,10 +6,12 @@ function usage() { echo "Options:" echo " -h, --help Show this help message" echo " -b, --build-wheels Build the Python wheels" + echo " -a, --all Install with optional dependencies for development" exit 1 } # Parse command line arguments BUILD_WHEELS=0 +CMAKE_ARGS="" while [[ $# -gt 0 ]]; do case $1 in @@ -20,6 +22,11 @@ while [[ $# -gt 0 ]]; do BUILD_WHEELS=1 shift ;; + -a|--all) + CMAKE_ARGS="-DENABLE_LRB=ON -DENABLE_GLCACHE=ON -DENABLE_3L_CACHE=ON" + BUILD_WHEELS=0 + shift + ;; *) echo "Unknown option: $1" usage @@ -36,7 +43,7 @@ if [ $? -ne 0 ]; then fi python scripts/sync_version.py -python -m pip install -e . -vvv +CMAKE_ARGS=$CMAKE_ARGS python -m pip install -e . -vvv # Test that the import works echo "Testing import..." @@ -45,6 +52,10 @@ python -c "import libcachesim" # Run tests python -m pip install pytest python -m pytest tests +if [[ "$CMAKE_ARGS" == *"-DENABLE_LRB=ON"* && "$CMAKE_ARGS" == *"-DENABLE_GLCACHE=ON"* && "$CMAKE_ARGS" == *"-DENABLE_3L_CACHE=ON"* ]]; then + echo "Running tests for optional eviction algos..." + python -m pytest tests -m "optional" +fi # Build wheels if requested if [[ $BUILD_WHEELS -eq 1 ]]; then diff --git a/src/export_reader.cpp b/src/export_reader.cpp index 7f55cbe..eff5b31 100644 --- a/src/export_reader.cpp +++ b/src/export_reader.cpp @@ -271,9 +271,7 @@ void export_reader(py::module& m) { "read_one_req", [](reader_t& self, request_t& req) { int ret = read_one_req(&self, &req); - if (ret != 0) { - throw std::runtime_error("Failed to read request"); - } + // NOTE: If read successfully, it returns 0. return ret; }, "req"_a)