From 8a64e5b6e1a3455add69db69ca40da782c7dad60 Mon Sep 17 00:00:00 2001 From: haochengxia Date: Wed, 23 Jul 2025 15:14:39 -0400 Subject: [PATCH 1/6] Refactor: adjust structure --- .gitmodules | 6 + CMakeLists.txt | 142 +++++ MAINFEST.in | 0 README.md | 338 ++++++++++- examples/README.md | 280 +++++++++ examples/demo_unified_interface.py | 131 +++++ examples/python_hook_cache_example.py | 178 ++++++ examples/stream_request_example.py | 154 +++++ examples/zipf_trace_example.py | 243 ++++++++ export/CMakeLists.txt | 38 ++ export/README.md | 47 ++ libcachesim/__init__.py | 98 ++++ libcachesim/__init__.pyi | 249 ++++++++ .../__pycache__/__init__.cpython-310.pyc | Bin 0 -> 1328 bytes libcachesim/__pycache__/cache.cpython-310.pyc | Bin 0 -> 10978 bytes .../__pycache__/data_loader.cpython-310.pyc | Bin 0 -> 5814 bytes .../__pycache__/protocols.cpython-310.pyc | Bin 0 -> 2257 bytes .../synthetic_reader.cpython-310.pyc | Bin 0 -> 12050 bytes .../trace_analyzer.cpython-310.pyc | Bin 0 -> 1840 bytes .../__pycache__/trace_reader.cpython-310.pyc | Bin 0 -> 8917 bytes libcachesim/__pycache__/util.cpython-310.pyc | Bin 0 -> 2233 bytes libcachesim/cache.py | 396 +++++++++++++ libcachesim/data_loader.py | 131 +++++ libcachesim/protocols.py | 33 ++ libcachesim/synthetic_reader.py | 409 +++++++++++++ libcachesim/trace_analyzer.py | 53 ++ libcachesim/trace_reader.py | 251 ++++++++ libcachesim/util.py | 50 ++ pyproject.toml | 111 ++++ requirements.txt | 0 src/exception.cpp | 56 ++ src/exception.h | 33 ++ src/export.cpp | 38 ++ src/export.h | 27 + src/export_analyzer.cpp | 135 +++++ src/export_cache.cpp | 538 ++++++++++++++++++ src/export_misc.cpp | 30 + src/export_reader.cpp | 326 +++++++++++ src/libCacheSim | 1 + .../conftest.cpython-310-pytest-8.4.1.pyc | Bin 0 -> 358 bytes ...test_analyzer.cpython-310-pytest-8.4.1.pyc | Bin 0 -> 753 bytes ...t_data_loader.cpython-310-pytest-8.4.1.pyc | Bin 0 -> 644 bytes .../test_example.cpython-310-pytest-8.4.1.pyc | Bin 0 -> 680 bytes tests/conftest.py | 6 + tests/reference.csv | 20 + tests/test_analyzer.py | 15 + tests/test_data_loader.py | 8 + tests/test_example.py | 16 + 48 files changed, 4585 insertions(+), 2 deletions(-) create mode 100644 .gitmodules create mode 100644 CMakeLists.txt create mode 100644 MAINFEST.in create mode 100644 examples/README.md create mode 100644 examples/demo_unified_interface.py create mode 100644 examples/python_hook_cache_example.py create mode 100644 examples/stream_request_example.py create mode 100644 examples/zipf_trace_example.py create mode 100644 export/CMakeLists.txt create mode 100644 export/README.md create mode 100644 libcachesim/__init__.py create mode 100644 libcachesim/__init__.pyi create mode 100644 libcachesim/__pycache__/__init__.cpython-310.pyc create mode 100644 libcachesim/__pycache__/cache.cpython-310.pyc create mode 100644 libcachesim/__pycache__/data_loader.cpython-310.pyc create mode 100644 libcachesim/__pycache__/protocols.cpython-310.pyc create mode 100644 libcachesim/__pycache__/synthetic_reader.cpython-310.pyc create mode 100644 libcachesim/__pycache__/trace_analyzer.cpython-310.pyc create mode 100644 libcachesim/__pycache__/trace_reader.cpython-310.pyc create mode 100644 libcachesim/__pycache__/util.cpython-310.pyc create mode 100644 libcachesim/cache.py create mode 100644 libcachesim/data_loader.py create mode 100644 libcachesim/protocols.py create mode 100644 libcachesim/synthetic_reader.py create mode 100644 libcachesim/trace_analyzer.py create mode 100644 libcachesim/trace_reader.py create mode 100644 libcachesim/util.py create mode 100644 pyproject.toml create mode 100644 requirements.txt create mode 100644 src/exception.cpp create mode 100644 src/exception.h create mode 100644 src/export.cpp create mode 100644 src/export.h create mode 100644 src/export_analyzer.cpp create mode 100644 src/export_cache.cpp create mode 100644 src/export_misc.cpp create mode 100644 src/export_reader.cpp create mode 160000 src/libCacheSim create mode 100644 tests/__pycache__/conftest.cpython-310-pytest-8.4.1.pyc create mode 100644 tests/__pycache__/test_analyzer.cpython-310-pytest-8.4.1.pyc create mode 100644 tests/__pycache__/test_data_loader.cpython-310-pytest-8.4.1.pyc create mode 100644 tests/__pycache__/test_example.cpython-310-pytest-8.4.1.pyc create mode 100644 tests/conftest.py create mode 100644 tests/reference.csv create mode 100644 tests/test_analyzer.py create mode 100644 tests/test_data_loader.py create mode 100644 tests/test_example.py diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..210ee99 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,6 @@ +[submodule "libCacheSim"] + path = libCacheSim + url = git@github.com:1a1a11a/libCacheSim.git +[submodule "src/libCacheSim"] + path = src/libCacheSim + url = git@github.com:1a1a11a/libCacheSim.git diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..a8b76ec --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,142 @@ +cmake_minimum_required(VERSION 3.15...3.27) + +# Include exported variables from cache +if(DEFINED LIBCB_BUILD_DIR) + set(PARENT_BUILD_DIR "${LIBCB_BUILD_DIR}") + message(STATUS "Using provided LIBCB_BUILD_DIR: ${LIBCB_BUILD_DIR}") +else() + set(PARENT_BUILD_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../build") +endif() +set(EXPORT_FILE "${PARENT_BUILD_DIR}/export_vars.cmake") + +if(EXISTS "${EXPORT_FILE}") + include("${EXPORT_FILE}") + message(STATUS "Loaded variables from export_vars.cmake") +else() + message(FATAL_ERROR "export_vars.cmake not found at ${EXPORT_FILE}. Please build the main project first (e.g. cd .. && cmake -G Ninja -B build)") +endif() + +# Force enable -fPIC +set(CMAKE_POSITION_INDEPENDENT_CODE ON) +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fPIC") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC") + +project(libCacheSim-python VERSION "${LIBCACHESIM_VERSION}") + +if(LOG_LEVEL_LOWER STREQUAL "default") + if(CMAKE_BUILD_TYPE_LOWER MATCHES "debug") + add_compile_definitions(LOGLEVEL=6) + else() + add_compile_definitions(LOGLEVEL=7) + endif() +elseif(LOG_LEVEL_LOWER STREQUAL "verbose") + add_compile_definitions(LOGLEVEL=5) +elseif(LOG_LEVEL_LOWER STREQUAL "debug") + add_compile_definitions(LOGLEVEL=6) +elseif(LOG_LEVEL_LOWER STREQUAL "info") + add_compile_definitions(LOGLEVEL=7) +elseif(LOG_LEVEL_LOWER STREQUAL "warn") + add_compile_definitions(LOGLEVEL=8) +elseif(LOG_LEVEL_LOWER STREQUAL "error") + add_compile_definitions(LOGLEVEL=9) +else() + add_compile_definitions(LOGLEVEL=7) +endif() + +# Find python and pybind11 +find_package(Python REQUIRED COMPONENTS Interpreter Development.Module) +find_package(pybind11 CONFIG REQUIRED) + +# Include directories for dependencies +include_directories(${GLib_INCLUDE_DIRS}) +include_directories(${GLib_CONFIG_INCLUDE_DIR}) +include_directories(${XGBOOST_INCLUDE_DIR}) +include_directories(${LIGHTGBM_PATH}) +include_directories(${ZSTD_INCLUDE_DIR}) +include_directories(${MAIN_PROJECT_SOURCE_DIR}/libCacheSim/bin) + +# Find the main libCacheSim library +set(MAIN_PROJECT_BUILD_DIR "${PARENT_BUILD_DIR}") +set(MAIN_PROJECT_LIB_PATH "${MAIN_PROJECT_BUILD_DIR}/liblibCacheSim.a") + +if(EXISTS "${MAIN_PROJECT_LIB_PATH}") + message(STATUS "Found pre-built libCacheSim library at ${MAIN_PROJECT_LIB_PATH}") + + # Import the main library as an imported target + add_library(libCacheSim_main STATIC IMPORTED) + set_target_properties(libCacheSim_main PROPERTIES + IMPORTED_LOCATION "${MAIN_PROJECT_LIB_PATH}" + INTERFACE_INCLUDE_DIRECTORIES "${MAIN_PROJECT_SOURCE_DIR}/libCacheSim/include;${MAIN_PROJECT_SOURCE_DIR}/libCacheSim/utils/include;${MAIN_PROJECT_SOURCE_DIR}/libCacheSim" + ) + link_directories(${GLib_LIBRARY_DIRS}) + link_directories(${ZSTD_LIBRARY_DIRS}) + set(LIBCACHESIM_TARGET libCacheSim_main) + +else() + message(FATAL_ERROR "Pre-built libCacheSim library not found. Please build the main project first: cd .. && cmake -G Ninja -B build && ninja -C build") +endif() + +include_directories(src) + +python_add_library(libcachesim_python MODULE + src/export.cpp + src/export_cache.cpp + src/export_reader.cpp + src/export_analyzer.cpp + src/export_misc.cpp + src/exception.cpp + ${MAIN_PROJECT_SOURCE_DIR}/libCacheSim/bin/cli_reader_utils.c + ${MAIN_PROJECT_SOURCE_DIR}/libCacheSim/bin/traceUtils/traceConvLCS.cpp + ${MAIN_PROJECT_SOURCE_DIR}/libCacheSim/bin/traceUtils/traceConvOracleGeneral.cpp + ${MAIN_PROJECT_SOURCE_DIR}/libCacheSim/bin/traceUtils/utils.cpp + WITH_SOABI +) + +set_target_properties(libcachesim_python PROPERTIES + POSITION_INDEPENDENT_CODE ON + INSTALL_RPATH_USE_LINK_PATH TRUE + BUILD_WITH_INSTALL_RPATH TRUE + INSTALL_RPATH "$ORIGIN" +) + +target_compile_definitions(libcachesim_python PRIVATE VERSION_INFO=${PROJECT_VERSION}) + +target_link_libraries(libcachesim_python PRIVATE + ${LIBCACHESIM_TARGET} + pybind11::headers + pybind11::module + ${GLib_LIBRARIES} + ${ZSTD_LIBRARIES} +) + +# Add platform-specific link options and libraries +if(CMAKE_SYSTEM_NAME STREQUAL "Linux") + # GNU ld option, only available on Linux + target_link_options(libcachesim_python PRIVATE -Wl,--no-as-needed) + target_link_libraries(libcachesim_python PRIVATE dl) +elseif(CMAKE_SYSTEM_NAME STREQUAL "Darwin") + # macOS doesn't need --no-as-needed + # dl functions are part of the system library on macOS + # No need to explicitly link dl + + # Find argp library on macOS + find_library(ARGP_LIBRARY argp PATHS /opt/homebrew/lib /usr/local/lib) + if(ARGP_LIBRARY) + target_link_libraries(libcachesim_python PRIVATE ${ARGP_LIBRARY}) + endif() + + # Find and link other dependencies that might be needed + find_library(INTL_LIBRARY intl PATHS /opt/homebrew/lib /usr/local/lib) + if(INTL_LIBRARY) + target_link_libraries(libcachesim_python PRIVATE ${INTL_LIBRARY}) + endif() +else() + # Other platforms - try to link dl if available + find_library(DL_LIBRARY dl) + if(DL_LIBRARY) + target_link_libraries(libcachesim_python PRIVATE ${DL_LIBRARY}) + endif() +endif() + +# install to wheel directory +install(TARGETS libcachesim_python LIBRARY DESTINATION libcachesim) diff --git a/MAINFEST.in b/MAINFEST.in new file mode 100644 index 0000000..e69de29 diff --git a/README.md b/README.md index 888e444..23424c3 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,337 @@ -# libCacheSim +# libCacheSim Python Binding -The libCacheSim Python package. \ No newline at end of file +[![Python Release](https://github.com/1a1a11a/libCacheSim/actions/workflows/pypi-release.yml/badge.svg)](https://github.com/1a1a11a/libCacheSim/actions/workflows/pypi-release.yml) +[![Python Versions](https://img.shields.io/pypi/pyversions/libcachesim.svg?logo=python&logoColor=white)](https://pypi.org/project/libcachesim) +[![PyPI Version](https://img.shields.io/pypi/v/libcachesim.svg?)](https://pypi.org/project/libcachesim) +[![PyPI - Downloads](https://img.shields.io/pypi/dd/libcachesim)](https://pypistats.org/packages/libcachesim) + +Python bindings for libCacheSim, a high-performance cache simulator and analysis library. + +## Installation + +Binary installers for the latest released version are available at the [Python Package Index (PyPI)](https://pypi.org/project/libcachesim). + +```bash +pip install libcachesim +``` + +### Installation from sources + +If there are no wheels suitable for your environment, consider building from source. + +```bash +git clone https://github.com/1a1a11a/libCacheSim.git +cd libCacheSim + +# Build the main libCacheSim library first +cmake -G Ninja -B build +ninja -C build + +# Install Python binding +cd libCacheSim-python +pip install -e . +``` + +### Testing +```bash +# Run all tests +python -m pytest . + +# Test import +python -c "import libcachesim; print('Success!')" +``` + +## Quick Start + +### Basic Usage + +```python +import libcachesim as lcs + +# Create a cache +cache = lcs.LRU(cache_size=1024*1024) # 1MB cache + +# Process requests +req = lcs.Request() +req.obj_id = 1 +req.obj_size = 100 + +print(cache.get(req)) # False (first access) +print(cache.get(req)) # True (second access) +``` + +### Trace Processing + +To simulate with traces, we need to read the request of traces correctly. `open_trace` is an unified interface for trace reading, which accepet three parameters: + +- `trace_path`: trace path, can be relative or absolutive path. +- `type` (optional): if not given, we will automatically infer the type of trace according to the suffix of the trace file. +- `params` (optional): if not given, default params are applied. + +```python +import libcachesim as lcs + +# Open trace and process efficiently +reader = lcs.open_trace( + trace_path = "./data/cloudPhysicsIO.oracleGeneral.bin", + type = lcs.TraceType.ORACLE_GENERAL_TRACE, + params = lcs.ReaderInitParam(ignore_obj_size=True) +) +cache = lcs.S3FIFO(cache_size=1024*1024) + +# Process entire trace efficiently (C++ backend) +obj_miss_ratio, byte_miss_ratio = cache.process_trace(reader) +print(f"Object miss ratio: {obj_miss_ratio:.4f}, Byte miss ratio: {byte_miss_ratio:.4f}") + +cache = lcs.S3FIFO(cache_size=1024*1024) +# Process with limits and time ranges +obj_miss_ratio, byte_miss_ratio = cache.process_trace( + reader, + start_req=0, + max_req=1000 +) +print(f"Object miss ratio: {obj_miss_ratio:.4f}, Byte miss ratio: {byte_miss_ratio:.4f}") +``` + +## Custom Cache Policies + +Implement custom cache replacement algorithms using pure Python functions - **no C/C++ compilation required**. + +### Python Hook Cache Overview + +The `PythonHookCachePolicy` allows you to define custom caching behavior through Python callback functions. This is perfect for: +- Prototyping new cache algorithms +- Educational purposes and learning +- Research and experimentation +- Custom business logic implementation + +### Hook Functions + +You need to implement these callback functions: + +- **`init_hook(cache_size: int) -> Any`**: Initialize your data structure +- **`hit_hook(data: Any, obj_id: int, obj_size: int) -> None`**: Handle cache hits +- **`miss_hook(data: Any, obj_id: int, obj_size: int) -> None`**: Handle cache misses +- **`eviction_hook(data: Any, obj_id: int, obj_size: int) -> int`**: Return object ID to evict +- **`remove_hook(data: Any, obj_id: int) -> None`**: Clean up when object removed +- **`free_hook(data: Any) -> None`**: [Optional] Final cleanup + +### Example: Custom LRU Implementation + +```python +import libcachesim as lcs +from collections import OrderedDict + +# Create a Python hook-based cache +cache = lcs.PythonHookCachePolicy(cache_size=1024*1024, cache_name="MyLRU") + +# Define LRU policy hooks +def init_hook(cache_size): + return OrderedDict() # Track access order + +def hit_hook(lru_dict, obj_id, obj_size): + lru_dict.move_to_end(obj_id) # Move to most recent + +def miss_hook(lru_dict, obj_id, obj_size): + lru_dict[obj_id] = True # Add to end + +def eviction_hook(lru_dict, obj_id, obj_size): + return next(iter(lru_dict)) # Return least recent + +def remove_hook(lru_dict, obj_id): + lru_dict.pop(obj_id, None) + +# Set the hooks +cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook) + +# Use it like any other cache +req = lcs.Request() +req.obj_id = 1 +req.obj_size = 100 +hit = cache.get(req) +print(f"Cache hit: {hit}") # Should be False (miss) +``` + +### Example: Custom FIFO Implementation + +```python +import libcachesim as lcs +from collections import deque +from contextlib import suppress + +cache = lcs.PythonHookCachePolicy(cache_size=1024, cache_name="CustomFIFO") + +def init_hook(cache_size): + return deque() # Use deque for FIFO order + +def hit_hook(fifo_queue, obj_id, obj_size): + pass # FIFO doesn't reorder on hit + +def miss_hook(fifo_queue, obj_id, obj_size): + fifo_queue.append(obj_id) # Add to end of queue + +def eviction_hook(fifo_queue, obj_id, obj_size): + return fifo_queue[0] # Return first item (oldest) + +def remove_hook(fifo_queue, obj_id): + with suppress(ValueError): + fifo_queue.remove(obj_id) + +# Set the hooks and test +cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook) + +req = lcs.Request(obj_id=1, obj_size=100) +hit = cache.get(req) +print(f"Cache hit: {hit}") # Should be False (miss) +``` + +## Available Algorithms + +### Built-in Cache Algorithms + +#### Basic Algorithms +- **FIFO**: First-In-First-Out +- **LRU**: Least Recently Used +- **LFU**: Least Frequently Used +- **LFUDA**: LFU with Dynamic Aging +- **Clock**: Clock/Second-chance algorithm + +#### Advanced Algorithms +- **QDLP**: Queue Demotion with Lazy Promotion +- **S3FIFO**: Simple, Fast, Fair FIFO (recommended for most workloads) +- **Sieve**: High-performance eviction algorithm +- **ARC**: Adaptive Replacement Cache +- **TwoQ**: Two-Queue algorithm +- **SLRU**: Segmented LRU +- **TinyLFU**: TinyLFU with window +- **WTinyLFU**: Windowed TinyLFU + +#### Research/ML Algorithms +- **LeCaR**: Learning Cache Replacement (adaptive) +- **Cacheus**: Cache replacement policy +- **LRB**: Learning-based cache (if enabled) +- **GLCache**: Machine learning-based cache +- **ThreeLCache**: Three-level cache hierarchy (if enabled) + +#### Optimal Algorithms (for analysis) +- **Belady**: Optimal offline algorithm +- **BeladySize**: Size-aware optimal algorithm + +```python +import libcachesim as lcs + +# All algorithms use the same unified interface +cache_size = 1024 * 1024 # 1MB + +lru_cache = lcs.LRU(cache_size) +s3fifo_cache = lcs.S3FIFO(cache_size) +sieve_cache = lcs.Sieve(cache_size) +arc_cache = lcs.ARC(cache_size) + +# All caches work identically +req = lcs.Request() +req.obj_id = 1 +req.obj_size = 100 +hit = lru_cache.get(req) +print(hit) +``` + +## Examples and Testing + +### Algorithm Comparison +```python +import libcachesim as lcs + +def compare_algorithms(trace_path): + reader = lcs.open_trace(trace_path, lcs.TraceType.VSCSI_TRACE) + algorithms = ['LRU', 'S3FIFO', 'Sieve', 'ARC'] + for algo_name in algorithms: + cache = getattr(lcs, algo_name)(cache_size=1024*1024) + obj_miss_ratio, byte_miss_ratio = cache.process_trace(reader) + print(f"{algo_name}\t\tObj: {obj_miss_ratio:.4f}, Byte: {byte_miss_ratio:.4f}") + +compare_algorithms("./data/cloudPhysicsIO.vscsi") +``` + +### Performance Benchmarking +```python +import time + +def benchmark_cache(cache, num_requests=100000): + """Benchmark cache performance""" + start_time = time.time() + for i in range(num_requests): + req = lcs.Request() + req.obj_id = i % 1000 # Working set of 1000 objects + req.obj_size = 100 + cache.get(req) + end_time = time.time() + throughput = num_requests / (end_time - start_time) + print(f"Processed {num_requests} requests in {end_time - start_time:.2f}s") + print(f"Throughput: {throughput:.0f} requests/sec") + +# Compare performance +lru_cache = lcs.LRU(cache_size=1024*1024) +s3fifo_cache = lcs.S3FIFO(cache_size=1024*1024) + +print("LRU Performance:") +benchmark_cache(lru_cache) + +print("\nS3FIFO Performance:") +benchmark_cache(s3fifo_cache) +``` + +## Advanced Usage + +### Multi-Format Trace Processing + +```python +import libcachesim as lcs + +# Supported trace types +trace_types = { + "oracle": lcs.TraceType.ORACLE_GENERAL_TRACE, + "csv": lcs.TraceType.CSV_TRACE, + "vscsi": lcs.TraceType.VSCSI_TRACE, + "txt": lcs.TraceType.PLAIN_TXT_TRACE +} + +# Open different trace formats +oracle_reader = lcs.open_trace("./data/cloudPhysicsIO.oracleGeneral.bin", trace_types["oracle"]) +csv_reader = lcs.open_trace("./data/cloudPhysicsIO.txt", trace_types["txt"]) + +# Process traces with different caches +caches = [ + lcs.LRU(cache_size=1024*1024), + lcs.S3FIFO(cache_size=1024*1024), + lcs.Sieve(cache_size=1024*1024) +] + +for i, cache in enumerate(caches): + miss_ratio_oracle = cache.process_trace(oracle_reader)[0] + miss_ratio_csv = cache.process_trace(csv_reader)[0] + print(f"Cache {i} miss ratio: {miss_ratio_oracle:.4f}, {miss_ratio_csv:.4f}") +``` + +## Troubleshooting + +### Common Issues + +**Import Error**: Make sure libCacheSim C++ library is built first: +```bash +cmake -G Ninja -B build && ninja -C build +``` + +**Performance Issues**: Use `process_trace()` for large workloads instead of individual `get()` calls for better performance. + +**Memory Usage**: Monitor cache statistics (`cache.occupied_byte`) and ensure proper cache size limits for your system. + +**Custom Cache Issues**: Validate your custom implementation against built-in algorithms using the test functions above. + +**Install with uv**: Since automatically building with `uv` will fail due to incomplete source code, please force install the binary file via `uv pip install libcachesim --only-binary=:all:`. + +### Getting Help + +- Check the [main documentation](../doc/) for detailed guides +- Open issues on [GitHub](https://github.com/1a1a11a/libCacheSim/issues) +- Review [examples](/example) in the main repository diff --git a/examples/README.md b/examples/README.md new file mode 100644 index 0000000..3b63b7f --- /dev/null +++ b/examples/README.md @@ -0,0 +1,280 @@ +# libCacheSim Python Examples + +This directory contains examples demonstrating how to use libCacheSim Python bindings for cache simulation and trace generation. + +## Overview + +libCacheSim Python bindings provide a powerful interface for: + +- Cache simulation with various eviction policies (LRU, FIFO, ARC, etc.) +- Synthetic trace generation (Zipf and Uniform distributions) +- Real trace analysis and processing +- Custom cache policy implementation with Python hooks +- Unified interface supporting all cache algorithms + +## Example Files + +### 1. Stream Request Generation (`stream_request_example.py`) + +Demonstrates how to generate synthetic request traces and use them for cache simulation: + +```python +import libcachesim as lcs + +# Create Zipf-distributed requests +zipf_generator = lcs.create_zipf_requests( + num_objects=1000, # 1000 unique objects + num_requests=10000, # 10000 requests + alpha=1.0, # Zipf skewness + obj_size=4000, # Object size in bytes + seed=42 # For reproducibility +) + +# Test with LRU cache +cache = lcs.LRU(cache_size=50*1024*1024) # 50MB cache for better hit ratio +miss_count = sum(1 for req in zipf_generator if not cache.get(req)) +print(f"Final miss ratio: {miss_count / 10000:.3f}") +``` + +**Features**: +- Memory efficient: No temporary files created +- Fast: Direct Request object generation +- Reproducible: Support for random seeds +- Flexible: Easy parameter adjustment + +### 2. Unified Interface Demo (`demo_unified_interface.py`) + +Shows the unified interface for all cache policies, including built-in and custom Python hook caches: + +```python +import libcachesim as lcs + +cache_size = 1024 * 1024 # 1MB + +# Create different cache policies +caches = { + "LRU": lcs.LRU(cache_size), + "FIFO": lcs.FIFO(cache_size), + "ARC": lcs.ARC(cache_size), +} + +# Create Python hook cache +python_cache = lcs.PythonHookCachePolicy(cache_size, "CustomLRU") +# Set hook functions... +caches["Custom Python LRU"] = python_cache + +# Unified interface testing +test_req = lcs.Request() +test_req.obj_id = 1 +test_req.obj_size = 1024 + +for name, cache in caches.items(): + result = cache.get(test_req) + print(f"{name}: {'HIT' if result else 'MISS'}") +``` + +**Benefits of Unified Interface**: +- Same API for all cache policies +- Easy to switch between different algorithms +- Efficient C++ backend trace processing +- Consistent properties and statistics + +### 3. Python Hook Cache (`python_hook_cache_example.py`) + +Demonstrates how to create custom cache policies using Python hooks: + +```python +import libcachesim as lcs +from collections import OrderedDict + +class LRUPolicy: + def __init__(self, cache_size): + self.access_order = OrderedDict() + + def on_hit(self, obj_id, obj_size): + self.access_order.move_to_end(obj_id) + + def on_miss(self, obj_id, obj_size): + self.access_order[obj_id] = True + + def evict(self, obj_id, obj_size): + return next(iter(self.access_order)) + +def create_lru_cache(cache_size): + cache = lcs.PythonHookCachePolicy(cache_size, "PythonLRU") + + def init_hook(cache_size): + return LRUPolicy(cache_size) + + # Set other hooks... + cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook) + return cache +``` + +**Custom Policy Features**: +- Pure Python cache logic implementation +- Support for LRU, FIFO and other policies +- Flexible hook system +- Same interface as built-in policies + +### 4. Zipf Trace Examples (`zipf_trace_example.py`) + +Shows synthetic trace generation methods and algorithm comparison: + +```python +import libcachesim as lcs + +# Method 1: Create Zipf-distributed request generator +zipf_generator = lcs.create_zipf_requests( + num_objects=1000, + num_requests=10000, + alpha=1.0, + obj_size=1024, + seed=42 +) + +# Method 2: Create uniform-distributed request generator +uniform_generator = lcs.create_uniform_requests( + num_objects=1000, + num_requests=10000, + obj_size=1024, + seed=42 +) + +# Compare different Zipf parameters +alphas = [0.5, 1.0, 1.5, 2.0] +for alpha in alphas: + generator = lcs.create_zipf_requests(1000, 10000, alpha=alpha, seed=42) + cache = lcs.LRU(1024*1024) + hit_count = sum(1 for req in generator if cache.get(req)) + hit_ratio = hit_count / 10000 + print(f"α={alpha}: Hit ratio={hit_ratio:.4f}") +``` + +**Synthetic Trace Features**: +- Higher α values create more skewed access patterns +- Memory efficient: No temporary files created +- Request generators for flexible processing +- Suitable for simulating real workloads + +## Key Features + +### Trace Generation +- `create_zipf_requests()`: Create Zipf-distributed request generator +- `create_uniform_requests()`: Create uniform-distributed request generator + +### Cache Algorithms +- **Classic algorithms**: `LRU()`, `FIFO()`, `ARC()`, `Clock()` +- **Modern algorithms**: `S3FIFO()`, `Sieve()`, `TinyLFU()` +- **Custom policies**: `PythonHookCachePolicy()` + +### Trace Processing +- `open_trace()`: Open real trace files +- `process_trace()`: High-performance trace processing + +## Basic Usage Examples + +### 1. Compare Cache Algorithms + +```python +import libcachesim as lcs + +# Test different algorithms +algorithms = ['LRU', 'FIFO', 'ARC', 'S3FIFO'] +cache_size = 1024*1024 + +for algo_name in algorithms: + # Create fresh workload for each algorithm + generator = lcs.create_zipf_requests(1000, 10000, alpha=1.0, seed=42) + cache = getattr(lcs, algo_name)(cache_size) + hit_count = sum(1 for req in generator if cache.get(req)) + print(f"{algo_name}: {hit_count/10000:.3f}") +``` + +### 2. Parameter Sensitivity Analysis + +```python +import libcachesim as lcs + +# Test different Zipf parameters +for alpha in [0.5, 1.0, 1.5, 2.0]: + generator = lcs.create_zipf_requests(1000, 10000, alpha=alpha, seed=42) + cache = lcs.LRU(cache_size=512*1024) + + hit_count = sum(1 for req in generator if cache.get(req)) + print(f"α={alpha}: Hit ratio={hit_count/10000:.3f}") +``` + +## Parameters + +### Trace Generation Parameters +- `num_objects`: Number of unique objects +- `num_requests`: Number of requests to generate +- `alpha`: Zipf skewness (α=1.0 for classic Zipf) +- `obj_size`: Object size in bytes (default: 4000) +- `seed`: Random seed for reproducibility + +### Cache Parameters +- `cache_size`: Cache capacity in bytes +- Algorithm-specific parameters (e.g.,`fifo_size_ratio` for S3FIFO) + +## Running Examples + +```bash +# Navigate to examples directory +cd libCacheSim-python/examples + +# Run stream-based trace generation +python stream_request_example.py + +# Run unified interface demo +python demo_unified_interface.py + +# Run Python hook cache example +python python_hook_cache_example.py + +# Run Zipf trace examples +python zipf_trace_example.py + +# Run all tests +python -m pytest ../tests/ -v +``` + +## Performance Tips + +1. **Use appropriate cache and object sizes**: + ```python + # Good: cache can hold multiple objects + cache = lcs.LRU(cache_size=1024*1024) # 1MB + generator = lcs.create_zipf_requests(1000, 10000, obj_size=1024) # 1KB objects + ``` + +2. **Use seeds for reproducible experiments**: + ```python + generator = lcs.create_zipf_requests(1000, 10000, seed=42) + ``` + +3. **Process large traces with C++ backend**: + ```python + # Fast: C++ processing + obj_miss_ratio, byte_miss_ratio = lcs.process_trace(cache, reader) + + # Slow: Python loop + for req in reader: + cache.get(req) + ``` + +4. **Understand Zipf parameter effects**: + - α=0.5: Slightly skewed, close to uniform distribution + - α=1.0: Classic Zipf distribution + - α=2.0: Highly skewed, few objects get most accesses + +## Testing + +Run comprehensive tests: + +```bash +python -m pytest ../tests/test_trace_generator.py -v +python -m pytest ../tests/test_eviction.py -v +python -m pytest ../tests/test_process_trace.py -v +``` diff --git a/examples/demo_unified_interface.py b/examples/demo_unified_interface.py new file mode 100644 index 0000000..e435e58 --- /dev/null +++ b/examples/demo_unified_interface.py @@ -0,0 +1,131 @@ +#!/usr/bin/env python3 +""" +Demo script showing the unified interface for all cache policies. +This demonstrates how to use both native and Python hook-based caches +with the same API for seamless algorithm comparison and switching. +""" + +import sys +import os + +# Add parent directory for development testing +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) + +try: + import libcachesim as lcs +except ImportError as e: + print(f"Error importing libcachesim: {e}") + print("Make sure the Python binding is built and installed") + sys.exit(1) + +from collections import OrderedDict + + +def create_trace_reader(): + """Helper function to create a trace reader.""" + data_file = os.path.join( + os.path.dirname(os.path.dirname(os.path.dirname(__file__))), "data", "cloudPhysicsIO.oracleGeneral.bin" + ) + if not os.path.exists(data_file): + print(f"Warning: Trace file not found at {data_file}") + return None + return lcs.open_trace(data_file, lcs.TraceType.ORACLE_GENERAL_TRACE) + + +def create_demo_lru_hooks(): + """Create demo LRU hooks for Python-based cache policy.""" + + def init_hook(cache_size): + print(f" Initializing custom LRU with {cache_size} bytes") + return OrderedDict() + + def hit_hook(lru_dict, obj_id, obj_size): + if obj_id in lru_dict: + lru_dict.move_to_end(obj_id) + + def miss_hook(lru_dict, obj_id, obj_size): + lru_dict[obj_id] = obj_size + + def eviction_hook(lru_dict, obj_id, obj_size): + if lru_dict: + return next(iter(lru_dict)) + return obj_id + + def remove_hook(lru_dict, obj_id): + lru_dict.pop(obj_id, None) + + return init_hook, hit_hook, miss_hook, eviction_hook, remove_hook + + +def demo_unified_interface(): + """Demonstrate the unified interface across different cache policies.""" + print("libCacheSim Python Binding - Unified Interface Demo") + print("=" * 60) + + cache_size = 1024 * 1024 # 1MB + + # Create different cache policies + caches = { + "LRU": lcs.LRU(cache_size), + "FIFO": lcs.FIFO(cache_size), + "ARC": lcs.ARC(cache_size), + } + + # Create Python hook-based LRU + python_cache = lcs.PythonHookCachePolicy(cache_size, "CustomLRU") + init_hook, hit_hook, miss_hook, eviction_hook, remove_hook = create_demo_lru_hooks() + python_cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook) + caches["Custom Python LRU"] = python_cache + + print(f"Testing {len(caches)} different cache policies with unified interface:") + + # Demo 1: Single request interface + print("1. Single Request Interface:") + print(" All caches use: cache.get(request)") + + test_req = lcs.Request() + test_req.obj_id = 1 + test_req.obj_size = 1024 + + for name, cache in caches.items(): + result = cache.get(test_req) + print(f" {name:20s}: {'HIT' if result else 'MISS'}") + + # Demo 2: Unified properties interface + print("\n2. Unified Properties Interface:") + print(" All caches provide: cache_size, n_obj, occupied_byte, n_req") + + for name, cache in caches.items(): + print( + f" {name:20s}: size={cache.cache_size}, objs={cache.n_obj}, " + f"bytes={cache.occupied_byte}, reqs={cache.n_req}" + ) + + # Demo 3: Efficient trace processing + print("\n3. Efficient Trace Processing Interface:") + print(" All caches use: cache.process_trace(reader, max_req=N)") + + max_requests = 1000 + + for name, cache in caches.items(): + # Create fresh reader for each cache + reader = create_trace_reader() + if not reader: + print(f" {name:20s}: trace file not available") + continue + + obj_miss_ratio, byte_miss_ratio = cache.process_trace(reader, max_req=max_requests) + print(f" {name:20s}: obj_miss_ratio={obj_miss_ratio:.4f}, byte_miss_ratio={byte_miss_ratio:.4f}") + + print("\nKey Benefits of Unified Interface:") + print(" • Same API for all cache policies (built-in + custom)") + print(" • Easy to switch between different algorithms") + print(" • Efficient trace processing in C++ (no Python overhead)") + print(" • Consistent properties and statistics") + print(" • Type-safe and well-documented") + + print("\nDemo completed! All cache policies work with the same interface.") + + +if __name__ == "__main__": + demo_unified_interface() diff --git a/examples/python_hook_cache_example.py b/examples/python_hook_cache_example.py new file mode 100644 index 0000000..06d06c4 --- /dev/null +++ b/examples/python_hook_cache_example.py @@ -0,0 +1,178 @@ +#!/usr/bin/env python3 +""" +Example demonstrating how to create custom cache policies using Python hooks. + +This example shows how to implement LRU and FIFO cache policies using the +PythonHookCachePolicy class, which allows users to define cache behavior using +pure Python functions instead of C/C++ plugins. +""" + +import libcachesim as lcs +from collections import OrderedDict, deque +from contextlib import suppress + + +class LRUPolicy: + """LRU (Least Recently Used) cache policy implementation.""" + + def __init__(self, cache_size): + self.cache_size = cache_size + self.access_order = OrderedDict() # obj_id -> True (for ordering) + + def on_hit(self, obj_id, obj_size): + """Move accessed object to end (most recent).""" + if obj_id in self.access_order: + # Move to end (most recent) + self.access_order.move_to_end(obj_id) + + def on_miss(self, obj_id, obj_size): + """Add new object to end (most recent).""" + self.access_order[obj_id] = True + + def evict(self, obj_id, obj_size): + """Return the least recently used object ID.""" + if self.access_order: + # Return first item (least recent) + victim_id = next(iter(self.access_order)) + return victim_id + raise RuntimeError("No objects to evict") + + def on_remove(self, obj_id): + """Remove object from tracking.""" + self.access_order.pop(obj_id, None) + + +class FIFOPolicy: + """FIFO (First In First Out) cache policy implementation.""" + + def __init__(self, cache_size): + self.cache_size = cache_size + self.insertion_order = deque() # obj_id queue + + def on_hit(self, obj_id, obj_size): + """FIFO doesn't change order on hits.""" + pass + + def on_miss(self, obj_id, obj_size): + """Add new object to end of queue.""" + self.insertion_order.append(obj_id) + + def evict(self, obj_id, obj_size): + """Return the first inserted object ID.""" + if self.insertion_order: + victim_id = self.insertion_order.popleft() + return victim_id + raise RuntimeError("No objects to evict") + + def on_remove(self, obj_id): + """Remove object from tracking.""" + with suppress(ValueError): + self.insertion_order.remove(obj_id) + + +def create_lru_cache(cache_size): + """Create an LRU cache using Python hooks.""" + cache = lcs.PythonHookCachePolicy(cache_size, "PythonLRU") + + def init_hook(cache_size): + return LRUPolicy(cache_size) + + def hit_hook(policy, obj_id, obj_size): + policy.on_hit(obj_id, obj_size) + + def miss_hook(policy, obj_id, obj_size): + policy.on_miss(obj_id, obj_size) + + def eviction_hook(policy, obj_id, obj_size): + return policy.evict(obj_id, obj_size) + + def remove_hook(policy, obj_id): + policy.on_remove(obj_id) + + def free_hook(policy): + # Python garbage collection handles cleanup + pass + + cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook, free_hook) + return cache + + +def create_fifo_cache(cache_size): + """Create a FIFO cache using Python hooks.""" + cache = lcs.PythonHookCachePolicy(cache_size, "PythonFIFO") + + def init_hook(cache_size): + return FIFOPolicy(cache_size) + + def hit_hook(policy, obj_id, obj_size): + policy.on_hit(obj_id, obj_size) + + def miss_hook(policy, obj_id, obj_size): + policy.on_miss(obj_id, obj_size) + + def eviction_hook(policy, obj_id, obj_size): + return policy.evict(obj_id, obj_size) + + def remove_hook(policy, obj_id): + policy.on_remove(obj_id) + + cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook) + return cache + + +def test_cache_policy(cache, name): + """Test a cache policy with sample requests.""" + print(f"\n=== Testing {name} Cache ===") + + # Test requests: obj_id, obj_size + test_requests = [ + (1, 100), + (2, 100), + (3, 100), + (4, 100), + (5, 100), # Fill cache + (1, 100), # Hit + (6, 100), # Miss, should evict something + (2, 100), # Hit or miss depending on policy + (7, 100), # Miss, should evict something + ] + + hits = 0 + misses = 0 + + for obj_id, obj_size in test_requests: + req = lcs.Request() + req.obj_id = obj_id + req.obj_size = obj_size + + hit = cache.get(req) + if hit: + hits += 1 + print(f"Request {obj_id}: HIT") + else: + misses += 1 + print(f"Request {obj_id}: MISS") + + print(f"Total: {hits} hits, {misses} misses") + print(f"Cache stats: {cache.n_obj} objects, {cache.occupied_byte} bytes occupied") + + +def main(): + """Main example function.""" + cache_size = 500 # Bytes (can hold 5 objects of size 100 each) + + # Test LRU cache + lru_cache = create_lru_cache(cache_size) + test_cache_policy(lru_cache, "LRU") + + # Test FIFO cache + fifo_cache = create_fifo_cache(cache_size) + test_cache_policy(fifo_cache, "FIFO") + + print("\n=== Comparison ===") + print("LRU keeps recently accessed items, evicting least recently used") + print("FIFO keeps items in insertion order, evicting oldest inserted") + + +if __name__ == "__main__": + main() diff --git a/examples/stream_request_example.py b/examples/stream_request_example.py new file mode 100644 index 0000000..eed213b --- /dev/null +++ b/examples/stream_request_example.py @@ -0,0 +1,154 @@ +#!/usr/bin/env python3 +""" +Example: Using stream request generators for cache simulation. + +This example demonstrates how to use the stream request generators +to create synthetic traces and run cache simulations without creating +temporary files. +""" + +import libcachesim as lcs + + +def main(): + """Demonstrate stream request generators.""" + print("libCacheSim Stream Request Generation Example") + print("=" * 50) + + # Example 1: Basic Zipf generation with appropriate cache size + print("\n1. Basic Zipf Request Generation") + print("-" * 30) + + # Use reasonable cache and object sizes + cache_size = 50 * 1024 * 1024 # 50MB cache + obj_size = 1024 # 1KB objects + num_objects = 1000 + num_requests = 10000 + + # Create a cache + cache = lcs.LRU(cache_size=cache_size) + + # Create a Zipf-distributed request generator + zipf_generator = lcs.create_zipf_requests( + num_objects=num_objects, + num_requests=num_requests, + alpha=1.0, # Zipf skewness + obj_size=obj_size, # Object size in bytes + seed=42, # For reproducibility + ) + + print(f"Cache size: {cache_size // 1024 // 1024}MB") + print(f"Object size: {obj_size}B") + print(f"Generated {num_requests} Zipf requests for {num_objects} objects") + + # Process the requests directly + hit_count = 0 + for i, req in enumerate(zipf_generator): + if cache.get(req): + hit_count += 1 + + # Print progress every 2000 requests + if (i + 1) % 2000 == 0: + current_hit_ratio = hit_count / (i + 1) + print(f"Processed {i + 1} requests, hit ratio: {current_hit_ratio:.3f}") + + final_hit_ratio = hit_count / num_requests + print(f"Final hit ratio: {final_hit_ratio:.3f}") + + # Example 2: Uniform distribution comparison + print("\n2. Uniform Request Generation") + print("-" * 30) + + # Create a uniform-distributed request generator + uniform_generator = lcs.create_uniform_requests( + num_objects=num_objects, num_requests=num_requests, obj_size=obj_size, seed=42 + ) + + print(f"Generated {num_requests} uniform requests for {num_objects} objects") + + # Reset cache and process uniform requests + cache = lcs.LRU(cache_size=cache_size) + hit_count = 0 + + for i, req in enumerate(uniform_generator): + if cache.get(req): + hit_count += 1 + + if (i + 1) % 2000 == 0: + current_hit_ratio = hit_count / (i + 1) + print(f"Processed {i + 1} requests, hit ratio: {current_hit_ratio:.3f}") + + final_hit_ratio = hit_count / num_requests + print(f"Final hit ratio: {final_hit_ratio:.3f}") + + # Example 3: Compare different Zipf alpha values + print("\n3. Zipf Alpha Parameter Comparison") + print("-" * 30) + + alphas = [0.5, 1.0, 1.5, 2.0] + print(f"{'Alpha':<8} {'Hit Ratio':<12} {'Description'}") + print("-" * 40) + + for alpha in alphas: + generator = lcs.create_zipf_requests( + num_objects=num_objects, num_requests=num_requests, alpha=alpha, obj_size=obj_size, seed=42 + ) + + cache = lcs.LRU(cache_size=cache_size) + hit_count = sum(1 for req in generator if cache.get(req)) + hit_ratio = hit_count / num_requests + + # Describe the skewness + if alpha < 0.8: + description = "Low skew (nearly uniform)" + elif alpha < 1.2: + description = "Classic Zipf" + elif alpha < 1.8: + description = "High skew" + else: + description = "Very high skew" + + print(f"{alpha:<8.1f} {hit_ratio:<12.3f} {description}") + + # Example 4: Cache size sensitivity + print("\n4. Cache Size Sensitivity") + print("-" * 30) + + # Fixed workload + generator = lcs.create_zipf_requests( + num_objects=num_objects, num_requests=num_requests, alpha=1.0, obj_size=obj_size, seed=42 + ) + + cache_sizes = [ + 1 * 1024 * 1024, # 1MB + 5 * 1024 * 1024, # 5MB + 10 * 1024 * 1024, # 10MB + 50 * 1024 * 1024, # 50MB + ] + + print(f"{'Cache Size':<12} {'Hit Ratio':<12} {'Objects Fit'}") + print("-" * 36) + + for cache_size in cache_sizes: + cache = lcs.LRU(cache_size=cache_size) + + # Create fresh generator for each test + test_generator = lcs.create_zipf_requests( + num_objects=num_objects, num_requests=num_requests, alpha=1.0, obj_size=obj_size, seed=42 + ) + + hit_count = sum(1 for req in test_generator if cache.get(req)) + hit_ratio = hit_count / num_requests + objects_fit = cache_size // obj_size + + print(f"{cache_size // 1024 // 1024}MB{'':<8} {hit_ratio:<12.3f} ~{objects_fit}") + + print("\nNotes:") + print("- Higher α values create more skewed access patterns") + print("- Skewed patterns generally have higher hit ratios") + print("- Cache size affects performance, but beyond a point diminishing returns") + print(f"- Working set: {num_objects} objects × {obj_size}B = {num_objects * obj_size // 1024}KB") + + +if __name__ == "__main__": + main() diff --git a/examples/zipf_trace_example.py b/examples/zipf_trace_example.py new file mode 100644 index 0000000..662ae0f --- /dev/null +++ b/examples/zipf_trace_example.py @@ -0,0 +1,243 @@ +#!/usr/bin/env python3 +""" +Example demonstrating trace generation and cache simulation in libCacheSim Python bindings. + +This example shows how to: +1. Generate synthetic request traces using available APIs +2. Use the generated traces with cache simulations +3. Compare different algorithms and parameters +""" + +import libcachesim as lcs + + +def example_basic_trace_generation(): + """Basic example of generating synthetic traces.""" + print("=== Basic Synthetic Trace Generation ===") + + # Generate Zipf requests using available API + num_objects = 1000 + num_requests = 10000 + alpha = 1.0 + obj_size = 1024 # 1KB objects + + # Create Zipf-distributed requests + zipf_requests = lcs.create_zipf_requests( + num_objects=num_objects, num_requests=num_requests, alpha=alpha, obj_size=obj_size, seed=42 + ) + + print(f"Generated {num_requests} Zipf requests with α={alpha}") + print(f"Object size: {obj_size}B, Number of unique objects: {num_objects}") + + # Use the requests with a cache + cache = lcs.LRU(cache_size=50 * 1024 * 1024) # 50MB cache + hit_count = sum(1 for req in zipf_requests if cache.get(req)) + hit_ratio = hit_count / num_requests + print(f"LRU cache hit ratio: {hit_ratio:.4f}") + + return hit_ratio + + +def example_compare_zipf_parameters(): + """Compare different Zipf parameters.""" + print("\n=== Comparing Zipf Parameters ===") + + num_objects = 1000 + num_requests = 10000 + cache_size = 50 * 1024 * 1024 # 50MB + obj_size = 1024 # 1KB objects + + alphas = [0.5, 1.0, 1.5, 2.0] + results = {} + + print(f"{'Alpha':<8} {'LRU':<8} {'FIFO':<8} {'ARC':<8} {'Clock':<8}") + print("-" * 40) + + for alpha in alphas: + # Test with different cache policies + policies = { + "LRU": lcs.LRU(cache_size), + "FIFO": lcs.FIFO(cache_size), + "ARC": lcs.ARC(cache_size), + "Clock": lcs.Clock(cache_size), + } + + results[alpha] = {} + hit_ratios = [] + for name, cache in policies.items(): + # Create fresh request iterator for each cache + test_requests = lcs.create_zipf_requests( + num_objects=num_objects, num_requests=num_requests, alpha=alpha, obj_size=obj_size, seed=42 + ) + hit_count = sum(1 for req in test_requests if cache.get(req)) + hit_ratio = hit_count / num_requests + results[alpha][name] = hit_ratio + hit_ratios.append(f"{hit_ratio:.3f}") + + print(f"{alpha:<8.1f} {hit_ratios[0]:<8} {hit_ratios[1]:<8} {hit_ratios[2]:<8} {hit_ratios[3]:<8}") + + return results + + +def example_algorithm_comparison(): + """Compare different cache algorithms.""" + print("\n=== Cache Algorithm Comparison ===") + + # Fixed workload parameters + num_objects = 1000 + num_requests = 10000 + alpha = 1.0 + obj_size = 1024 + cache_size = 10 * 1024 * 1024 # 10MB + + # Available algorithms + algorithms = { + "LRU": lcs.LRU, + "FIFO": lcs.FIFO, + "ARC": lcs.ARC, + "Clock": lcs.Clock, + "S3FIFO": lcs.S3FIFO, + "Sieve": lcs.Sieve, + } + + print(f"Testing with: {num_objects} objects, {num_requests} requests") + print(f"Cache size: {cache_size // 1024 // 1024}MB, Object size: {obj_size}B") + print(f"Zipf alpha: {alpha}") + print() + + print(f"{'Algorithm':<10} {'Hit Ratio':<12} {'Description'}") + print("-" * 45) + + results = {} + for name, cache_class in algorithms.items(): + try: + # Create fresh requests for each algorithm + requests = lcs.create_zipf_requests( + num_objects=num_objects, num_requests=num_requests, alpha=alpha, obj_size=obj_size, seed=42 + ) + + cache = cache_class(cache_size) + hit_count = sum(1 for req in requests if cache.get(req)) + hit_ratio = hit_count / num_requests + results[name] = hit_ratio + + # Add descriptions + descriptions = { + "LRU": "Least Recently Used", + "FIFO": "First In First Out", + "ARC": "Adaptive Replacement Cache", + "Clock": "Clock/Second Chance", + "S3FIFO": "Simple Scalable FIFO", + "Sieve": "Lazy Promotion", + } + + print(f"{name:<10} {hit_ratio:<12.4f} {descriptions.get(name, '')}") + + except Exception as e: + print(f"{name:<10} {'ERROR':<12} {str(e)}") + + return results + + +def example_uniform_vs_zipf(): + """Compare uniform vs Zipf distributions.""" + print("\n=== Uniform vs Zipf Distribution Comparison ===") + + num_objects = 1000 + num_requests = 10000 + obj_size = 1024 + cache_size = 10 * 1024 * 1024 + + # Test uniform distribution + uniform_requests = lcs.create_uniform_requests( + num_objects=num_objects, num_requests=num_requests, obj_size=obj_size, seed=42 + ) + + cache = lcs.LRU(cache_size) + uniform_hits = sum(1 for req in uniform_requests if cache.get(req)) + uniform_hit_ratio = uniform_hits / num_requests + + # Test Zipf distribution + zipf_requests = lcs.create_zipf_requests( + num_objects=num_objects, num_requests=num_requests, alpha=1.0, obj_size=obj_size, seed=42 + ) + + cache = lcs.LRU(cache_size) + zipf_hits = sum(1 for req in zipf_requests if cache.get(req)) + zipf_hit_ratio = zipf_hits / num_requests + + print(f"{'Distribution':<12} {'Hit Ratio':<12} {'Description'}") + print("-" * 45) + print(f"{'Uniform':<12} {uniform_hit_ratio:<12.4f} {'All objects equally likely'}") + print(f"{'Zipf (α=1.0)':<12} {zipf_hit_ratio:<12.4f} {'Some objects much more popular'}") + + print( + f"\nObservation: Zipf typically shows{'higher' if zipf_hit_ratio > uniform_hit_ratio else 'lower'} hit ratios" + ) + print("due to locality of reference (hot objects get cached)") + + +def example_cache_size_analysis(): + """Analyze the effect of different cache sizes.""" + print("\n=== Cache Size Sensitivity Analysis ===") + + num_objects = 1000 + num_requests = 10000 + alpha = 1.0 + obj_size = 1024 + + cache_sizes = [ + 1 * 1024 * 1024, # 1MB + 5 * 1024 * 1024, # 5MB + 10 * 1024 * 1024, # 10MB + 25 * 1024 * 1024, # 25MB + 50 * 1024 * 1024, # 50MB + ] + + print(f"{'Cache Size':<12} {'Objects Fit':<12} {'Hit Ratio':<12} {'Efficiency'}") + print("-" * 55) + + for cache_size in cache_sizes: + requests = lcs.create_zipf_requests( + num_objects=num_objects, num_requests=num_requests, alpha=alpha, obj_size=obj_size, seed=42 + ) + + cache = lcs.LRU(cache_size) + hit_count = sum(1 for req in requests if cache.get(req)) + hit_ratio = hit_count / num_requests + objects_fit = cache_size // obj_size + efficiency = hit_ratio / (cache_size / (1024 * 1024)) # hit ratio per MB + + print(f"{cache_size // 1024 // 1024}MB{'':<8} {objects_fit:<12} {hit_ratio:<12.4f} {efficiency:<12.4f}") + + +def main(): + """Run all examples.""" + print("libCacheSim Python Bindings - Trace Generation Examples") + print("=" * 60) + + try: + # Run examples + example_basic_trace_generation() + example_compare_zipf_parameters() + example_algorithm_comparison() + example_uniform_vs_zipf() + example_cache_size_analysis() + + print("\n" + "=" * 60) + print("All examples completed successfully!") + print("\nKey Takeaways:") + print("• Higher Zipf α values create more skewed access patterns") + print("• Skewed patterns generally result in higher cache hit ratios") + print("• Different algorithms perform differently based on workload") + print("• Cache size has diminishing returns beyond working set size") + + except Exception as e: + print(f"Error running examples: {e}") + import traceback + + traceback.print_exc() + + +if __name__ == "__main__": + main() diff --git a/export/CMakeLists.txt b/export/CMakeLists.txt new file mode 100644 index 0000000..917e831 --- /dev/null +++ b/export/CMakeLists.txt @@ -0,0 +1,38 @@ +# Helper functions are removed since we don't export source files anymore + +set(EXPORT_FILE "${CMAKE_BINARY_DIR}/export_vars.cmake") +file(WRITE "${EXPORT_FILE}" "") + +get_filename_component(MAIN_PROJECT_SOURCE_DIR ${CMAKE_SOURCE_DIR} ABSOLUTE) +file(WRITE ${CMAKE_BINARY_DIR}/export_vars.cmake "set(MAIN_PROJECT_SOURCE_DIR \"${MAIN_PROJECT_SOURCE_DIR}\")\n") +file(APPEND ${CMAKE_BINARY_DIR}/export_vars.cmake "set(dependency_libs \"${dependency_libs}\")\n") +file(APPEND ${CMAKE_BINARY_DIR}/export_vars.cmake "set(LIBCACHESIM_VERSION \"${LIBCACHESIM_VERSION}\")\n") + +# ============================================================================== +# Export project metadata +# ============================================================================== +file(APPEND "${EXPORT_FILE}" "set(LIBCACHESIM_VERSION \"${${PROJECT_NAME}_VERSION}\")\n") + +# ============================================================================== +# Export essential include directory variables +# ============================================================================== +foreach(var IN ITEMS GLib_INCLUDE_DIRS GLib_CONFIG_INCLUDE_DIR XGBOOST_INCLUDE_DIR LIGHTGBM_PATH ZSTD_INCLUDE_DIR) + file(APPEND "${EXPORT_FILE}" "set(${var} \"${${var}}\")\n") +endforeach() + +# ============================================================================== +# Export dependency library variables +# ============================================================================== +file(APPEND "${EXPORT_FILE}" "set(GLib_LIBRARY_DIRS \"${GLib_LIBRARY_DIRS}\")\n") +file(APPEND "${EXPORT_FILE}" "set(GLib_LIBRARIES \"${GLib_LIBRARIES}\")\n") +get_filename_component(ZSTD_LIBRARY_DIR "${ZSTD_LIBRARIES}" DIRECTORY) +file(APPEND "${EXPORT_FILE}" "set(ZSTD_LIBRARY_DIRS \"${ZSTD_LIBRARY_DIRS}\")\n") +file(APPEND "${EXPORT_FILE}" "set(ZSTD_LIBRARIES \"${ZSTD_LIBRARIES}\")\n") +file(APPEND "${EXPORT_FILE}" "set(dependency_libs \"${dependency_libs}\")\n") + +# ============================================================================== +# Export essential build option variables +# ============================================================================== +file(APPEND "${EXPORT_FILE}" "set(LOG_LEVEL_LOWER \"${LOG_LEVEL_LOWER}\")\n") + +message(STATUS "Exported essential variables to ${EXPORT_FILE}") diff --git a/export/README.md b/export/README.md new file mode 100644 index 0000000..976b1da --- /dev/null +++ b/export/README.md @@ -0,0 +1,47 @@ +# Python Binding Export System + +Build system bridge for sharing CMake variables between the main libCacheSim project and Python binding. + +## Purpose + +The `export/CMakeLists.txt` exports all necessary build variables (source files, include directories, compiler flags, etc.) from the main project to the Python binding, enabling consistent builds without duplicating configuration. + +## How It Works + +1. **Export**: Main project writes variables to `export_vars.cmake` +2. **Import**: Python binding includes this file during CMake configuration +3. **Build**: Python binding uses shared variables for consistent compilation + +## Key Exported Variables + +### Source Files +- Cache algorithms, data structures, trace readers +- Profilers, utilities, analyzers + +### Build Configuration +- Include directories (main, GLib, ZSTD, XGBoost, LightGBM) +- Compiler flags (C/C++) +- Dependency libraries +- Build options (hugepage, tests, optional features) + +## Usage + +**Main Project** (`CMakeLists.txt`): +```cmake +add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/libCacheSim-python/export) +``` + +**Python Binding** (`libCacheSim-python/CMakeLists.txt`): +```cmake +set(EXPORT_FILE "${CMAKE_CURRENT_SOURCE_DIR}/../build/export_vars.cmake") +include("${EXPORT_FILE}") +``` + +## For Developers + +This system ensures the Python binding automatically picks up changes to: +- New source files added to the main project +- Updated compiler flags or dependencies +- Modified build options + +No manual synchronization needed between main project and Python binding builds. diff --git a/libcachesim/__init__.py b/libcachesim/__init__.py new file mode 100644 index 0000000..f71c6ee --- /dev/null +++ b/libcachesim/__init__.py @@ -0,0 +1,98 @@ +"""libCacheSim Python bindings""" + +from __future__ import annotations + +from .libcachesim_python import ( + Cache, + Request, + ReqOp, + TraceType, + SamplerType, + AnalysisParam, + AnalysisOption, + __doc__, + __version__, +) + +from .cache import ( + CacheBase, + # Core algorithms + LRU, + FIFO, + LFU, + ARC, + Clock, + Random, + # Advanced algorithms + S3FIFO, + Sieve, + LIRS, + TwoQ, + SLRU, + WTinyLFU, + LeCaR, + LFUDA, + ClockPro, + Cacheus, + # Optimal algorithms + Belady, + BeladySize, + # Plugin cache + PythonHookCachePolicy, +) + +from .trace_reader import TraceReader +from .trace_analyzer import TraceAnalyzer +from .synthetic_reader import SyntheticReader, create_zipf_requests, create_uniform_requests +from .util import Util +from .data_loader import DataLoader + +__all__ = [ + # Core classes + "Cache", + "Request", + "ReqOp", + "TraceType", + "SamplerType", + "AnalysisParam", + "AnalysisOption", + # Cache base class + "CacheBase", + # Core cache algorithms + "LRU", + "FIFO", + "LFU", + "ARC", + "Clock", + "Random", + # Advanced cache algorithms + "S3FIFO", + "Sieve", + "LIRS", + "TwoQ", + "SLRU", + "WTinyLFU", + "LeCaR", + "LFUDA", + "ClockPro", + "Cacheus", + # Optimal algorithms + "Belady", + "BeladySize", + # Plugin cache + "PythonHookCachePolicy", + # Readers and analyzers + "TraceReader", + "TraceAnalyzer", + "SyntheticReader", + # Trace generators + "create_zipf_requests", + "create_uniform_requests", + # Utilities + "Util", + # Data loader + "DataLoader", + # Metadata + "__doc__", + "__version__", +] diff --git a/libcachesim/__init__.pyi b/libcachesim/__init__.pyi new file mode 100644 index 0000000..2e2a565 --- /dev/null +++ b/libcachesim/__init__.pyi @@ -0,0 +1,249 @@ +from __future__ import annotations +from typing import bool, int, str, tuple +from collections.abc import Iterator + +from .libcachesim_python import ReqOp, TraceType, SamplerType +from .protocols import ReaderProtocol + +class Request: + clock_time: int + hv: int + obj_id: int + obj_size: int + ttl: int + op: ReqOp + valid: bool + next_access_vtime: int + + def __init__( + self, + obj_size: int = 1, + op: ReqOp = ReqOp.READ, + valid: bool = True, + obj_id: int = 0, + clock_time: int = 0, + hv: int = 0, + next_access_vtime: int = -2, + ttl: int = 0, + ): ... + def __init__(self): ... + +class CacheObject: + obj_id: int + obj_size: int + +class CommonCacheParams: + cache_size: int + default_ttl: int + hashpower: int + consider_obj_metadata: bool + +class Cache: + cache_size: int + default_ttl: int + obj_md_size: int + n_req: int + cache_name: str + init_params: CommonCacheParams + + def __init__(self, init_params: CommonCacheParams, cache_specific_params: str = ""): ... + def get(self, req: Request) -> bool: ... + def find(self, req: Request, update_cache: bool = True) -> CacheObject: ... + def can_insert(self, req: Request) -> bool: ... + def insert(self, req: Request) -> CacheObject: ... + def need_eviction(self, req: Request) -> bool: ... + def evict(self, req: Request) -> CacheObject: ... + def remove(self, obj_id: int) -> bool: ... + def to_evict(self, req: Request) -> CacheObject: ... + def get_occupied_byte(self) -> int: ... + def get_n_obj(self) -> int: ... + def print_cache(self) -> str: ... + +class CacheBase: + """Base class for all cache implementations""" + def __init__(self, _cache: Cache): ... + def get(self, req: Request) -> bool: ... + def find(self, req: Request, update_cache: bool = True) -> CacheObject: ... + def can_insert(self, req: Request) -> bool: ... + def insert(self, req: Request) -> CacheObject: ... + def need_eviction(self, req: Request) -> bool: ... + def evict(self, req: Request) -> CacheObject: ... + def remove(self, obj_id: int) -> bool: ... + def to_evict(self, req: Request) -> CacheObject: ... + def get_occupied_byte(self) -> int: ... + def get_n_obj(self) -> int: ... + def print_cache(self) -> str: ... + def process_trace(self, reader: ReaderProtocol, start_req: int = 0, max_req: int = -1) -> tuple[float, float]: ... + @property + def cache_size(self) -> int: ... + @property + def cache_name(self) -> str: ... + +# Core cache algorithms +class LRU(CacheBase): + def __init__( + self, cache_size: int, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False + ): ... + +class FIFO(CacheBase): + def __init__( + self, cache_size: int, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False + ): ... + +class LFU(CacheBase): + def __init__( + self, cache_size: int, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False + ): ... + +class ARC(CacheBase): + def __init__( + self, cache_size: int, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False + ): ... + +class Clock(CacheBase): + def __init__( + self, cache_size: int, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False + ): ... + +class Random(CacheBase): + def __init__( + self, cache_size: int, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False + ): ... + +# Advanced algorithms +class S3FIFO(CacheBase): + def __init__( + self, cache_size: int, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False + ): ... + +class Sieve(CacheBase): + def __init__( + self, cache_size: int, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False + ): ... + +class LIRS(CacheBase): + def __init__( + self, cache_size: int, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False + ): ... + +class TwoQ(CacheBase): + def __init__( + self, cache_size: int, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False + ): ... + +class SLRU(CacheBase): + def __init__( + self, cache_size: int, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False + ): ... + +class WTinyLFU(CacheBase): + def __init__( + self, cache_size: int, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False + ): ... + +class LeCaR(CacheBase): + def __init__( + self, cache_size: int, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False + ): ... + +class LFUDA(CacheBase): + def __init__( + self, cache_size: int, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False + ): ... + +class ClockPro(CacheBase): + def __init__( + self, cache_size: int, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False + ): ... + +class Cacheus(CacheBase): + def __init__( + self, cache_size: int, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False + ): ... + +# Optimal algorithms +class Belady(CacheBase): + def __init__( + self, cache_size: int, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False + ): ... + +class BeladySize(CacheBase): + def __init__( + self, cache_size: int, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False + ): ... + +# Plugin cache +class PythonHookCachePolicy(CacheBase): + def __init__( + self, + cache_size: int, + cache_name: str = "PythonHookCache", + default_ttl: int = 25920000, + hashpower: int = 24, + consider_obj_metadata: bool = False, + cache_init_hook=None, + cache_hit_hook=None, + cache_miss_hook=None, + cache_eviction_hook=None, + cache_remove_hook=None, + cache_free_hook=None, + ): ... + def set_hooks(self, init_hook, hit_hook, miss_hook, eviction_hook, remove_hook, free_hook=None): ... + +# Readers +class TraceReader(ReaderProtocol): + c_reader: bool + def __init__(self, trace: str, trace_type: TraceType = TraceType.UNKNOWN_TRACE, **kwargs): ... + +class SyntheticReader(ReaderProtocol): + c_reader: bool + def __init__( + self, + num_of_req: int, + obj_size: int = 4000, + time_span: int = 604800, + start_obj_id: int = 0, + seed: int | None = None, + alpha: float = 1.0, + dist: str = "zipf", + num_objects: int | None = None, + ): ... + +# Trace generators +def create_zipf_requests( + num_objects: int, + num_requests: int, + alpha: float = 1.0, + obj_size: int = 4000, + time_span: int = 604800, + start_obj_id: int = 0, + seed: int | None = None, +) -> Iterator[Request]: ... + +def create_uniform_requests( + num_objects: int, + num_requests: int, + obj_size: int = 4000, + time_span: int = 604800, + start_obj_id: int = 0, + seed: int | None = None, +) -> Iterator[Request]: ... + +# Analyzer +class TraceAnalyzer: + def __init__(self, analyzer, reader: ReaderProtocol, output_path: str, analysis_param, analysis_option): ... + def run(self) -> None: ... + def cleanup(self) -> None: ... + +# Utilities +class Util: + @staticmethod + def convert_to_oracleGeneral(reader, ofilepath, output_txt: bool = False, remove_size_change: bool = False): ... + @staticmethod + def convert_to_lcs( + reader, ofilepath, output_txt: bool = False, remove_size_change: bool = False, lcs_ver: int = 1 + ): ... + @staticmethod + def process_trace( + cache: CacheBase, reader: ReaderProtocol, start_req: int = 0, max_req: int = -1 + ) -> tuple[float, float]: ... diff --git a/libcachesim/__pycache__/__init__.cpython-310.pyc b/libcachesim/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e2627cb76e5fe1fcdbcd8caa67b4b79880636ce2 GIT binary patch literal 1328 zcmchWOK;mo5XVW0)RU55l3(&eaU45Joft*0K~cwnkN`&QiZ)uzWw8<0Hjy1tC8-qh zEuSN|-ufl-LFU?C@)dgO&Z;ilTTdl{-_8v8!Trxn+G;f%^L*}py!}*hoZs1b_>_$E zo#VLqAI69ra3VJ>)}f9KXrNhvO=#M=z7DVj zEo?&@7hnMwVG);L3726RS6~G@(7`TraTQi^4c2fS)^P(ia1%DM2R+<^E!>7}+<_h3 zgA#DUC{AwjFD%Z$rKU`hw2(!dCixeaV|DWF zBC3wm&r_8b@o43W8qX JcHDs9(qDZ7S`z>O literal 0 HcmV?d00001 diff --git a/libcachesim/__pycache__/cache.cpython-310.pyc b/libcachesim/__pycache__/cache.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ebb9c03a44098c251fdf6066206e0d0196749734 GIT binary patch literal 10978 zcmcgy-H#hba-SIv$sw2Iet%0^>dRVMT1nQ2{Ec`B?yqm1MV?E-Ub2kkcU8d3bF|hICtm#s%H4L zS}j+|DnoR$tERiFtGc?nx_aDfHm%_Kn{U2Yt)5eqe<7gz836EIBrm5aico}FRUFk+ zRVr&$&54;Ylw(4#>Q3B@I|(!4B+aB_n1+)wQ%>4UI|Jr`lQA<+*33FNGv^GNgU*mS z#A@h)P*gWhUF^@QN z=A4r^^UhK8DASHtk2%N9i6WA=x*0Q1q|)MDF<_q*8Kf*y&VEe{+VOH) z42j_fjEGUR7`0EJ#XR6KF%Ec~;R4_ZF$s8*;nx9AiD|&o44(!(BW3~5GJFQ` zA#oV+VTKm~9}#na=NLWhU9pB3i-pJVt#g!+D`3~@JiMN4&oAK|8cb1gG)n}wB1vQ9WyS|7&cj#p1p2RT|hE*P?1n8H|Qth6i5vLTKQ+T`h`@5Eu-BrAnhkfd+w= z%i;T6xmxsAJ8-H~Gxhx&OE;rlZf&gFa@nm`N+DuoePg{^U#WyrjVhI_b?KID&$E14 zEZLZg+&#M}Y>Do9MoKhO-A{X6e~h`Z6;l<8seOd?ZN_ZfjtlK7*0-&fHCo$GbTc91 zc2Xp4qpXXhFn~!xvj~^A4dBz|gcuMRtf_&nnpu%U%}j(zqwSy=0w&v4XIP8?lk2K8 zD#m~r#2OrmDAF3#&Ec*(vHE%nZpDo+4c&;s4Ft7QhZ6gdyvH;d=$o-Ms|w~8)^)mF|`E6Y5p zrHa#CURdAoSKS(+n7oJATsXVF5f~OLnq^%?m)kUuwAvWqZiub3t=B781wX z)EkK;r#5n(u~7RLNUzCbuiCE1>heHm$)9 zEp8Gd+~qZ^@-lM)?gsknd9a?j@0;#x)DmB7cwG?V$M`;65Zg(;V%k$<1DZ*1)=~)cnw9>`IMR z#Qiq4@eZ}_h9uW#-{K(?NZW6a^8BLoEwYM9r}#M)N@Rjs50&FcY?5dp_^|9k-PGhv zQ(aT#48t1W315S`iUF%PwPtKnUsC+Ih;6E0Vkh`If&Bp3r!j0hdgBtdeewqK><#1} zRs7YwzgqF44F~>GJ~YvJr-E9NHXbi9N*ErGIvEujCl*hiMpZ&`)k%Lt`2FcZ;N3gsB7UWUNPEbb6El7V{tk&)CN$JX9yw_2&!cC{3hgx-w+ORnwdF+_* zAlu{Pe1aOh6Ao!!3Y{QG-R0lRKIh)dhE{UGf< zsH|hXk*>xc7ONEz7-SMXIIz6w76x{kr63+PsadP!xRaIexRa;|Ebrq%j(Q`7vsmGR zL8{t?*fHOd>}CncdmWjf8){nBHC?5jp(Qv^t7(n!L%?faOlec9p*H3^i*r|Pd(JG7 zct{(z4P@jKtnKtkusg$f3PrJ-E&2!zH>n4*s)p9PKet-GkgjCI(rx-8dbOfs?`n-m z`M*MvA0vC78nwGqQ14@B;;H{GY8B?li=#J|6{Ic8aYemK<&0&0RxehgH%ZG9ZppGF z$s!oEtYWR^MxH6KG!-&Iuk>Ztt+HOoBq7NalH{dGav&sa?2>leAO^!KX&;odNlA8M z$aV)Y4+euhjbjAH{W|>MAYQJzMPI&8bx8vP1Nw|3kiQ{G%92>`(v*1u*)Kef61i0# zQAjJKBspy)HWeG``sHHuI}y{e`nQ^@wNqM4|4z#fsakAImDJNS<;$O`&!$ki$#jDO zVV8^bs&DyzHAunUt**O|VByD0@YyRckhD=*j_nuWeHP^cTK1S7T7D!rQZ#P}87w<_ z@vz!L{ROOfL2Jz4va4V<@4NXDtgesq0w16wyS+|FXxoQ3zom!5u+fh!Q$`EApi0_H zn6`9ac*7p8ut$sg@_vq*9(m)M4r%Z&@f%qIqJ-4*q#|gWX0~-;q~l^r$H6ad z>o^qQXozGWz+!x*lvR-u=_gq;@fGZ?nFNQKAciw=WBRsT^!)riyM#kebtC_gXN!;^I}Nbd1OA8;F{`dJ~l}bYg6CE=?ucJ{`L z?$sgZP@9P{W_c?f+Ba6SUcy^ykg_a3ykQZvrOF-rIofa2%qC((YWmwo1}wG@L|d^g zmbMJ}BO>-=%1p|BpRzxo>{H5Ubp_doP|2=VTf~^+I#pp+e}K|2k+w-c4XC;n>s4n> zo%mK#trt$CT*F&Th)&Uv#?;M<#EiUO%ZG&z>i++2LSz-)ONVIavPNaL9BC?%dP$cu z=BQVie@S?WtdXUWwFN0Xdf`*X4R%Q65nlWSy?jU+L47%oA*$1u4OQ_bHof{mDdP)x zC%HeT4Uy#D6adJhc*`CdKFP43PI#~@9fgq z-R#T@m*ERICj+xL=YLN%Le4R?{c^s6R)0q=nR9~ravtaK9Zo0|KZIx7VZ2yfaU~pO z=YW~d(g62V`Cn5FW;>W}KkDjO%nI(3rdJ zKEnP6=V?LOWn0>{5*a>tAf=7`lIiUF*^Qjn{$Htb58d@jqH-WLVFvWOn?VG z(ZhY&9q6!2!`UypU!Y@yn({;w)R$eOb%oW)eS+vS6h6ArI$)m1Fqplkc%seoaBEEa z<@+b-e6-CsL4Em-^IctI2ELrEQP_i>m~OY`fCVt#R{*$}ZrL{;+<-xxG5yjTXIU7$cN0!m@Ditm~a*f`AemCcXavKj4+e82Mf3f*~l^oqC22T0pGGJz;Af@&x4xC<0OWRCNQpRKxrui?4lkP5un;dd31+q4PdH-j9%V2h0S2Q$ZoK z&JSNCCGgcn(oRtLOr(&~cCwu2FdBVVku?o`jiCzqqCz!O98ybzzrkQUyaTueE_n6d zrEf(x8fT-cf$(FI$l<3C4@z~Gms ze4^mvj-O(2&G@Dc7|}UI>2yDiIJ<@#3A!Dj8;$0`rqPMjg`uNF80{rLIv9-tmnDwG zhvCbv$Wk743Yo3|-Y&xp2E)emg^@+G9^^sf^j(G^Dv%N-2Np#S9s7xxqh$MfC zjAF<*%;2&QE1f?sVdc&j%Q*PtiSG(7b{f1CB8^Y7%z{)$C`MZxsg5ABy&@RsWN}Vw#rpxWjFN%f$jlGLG;3#Q8@D#Ntr^M=4YYrmcBuzvWX9tE E3+2V?X8-^I literal 0 HcmV?d00001 diff --git a/libcachesim/__pycache__/data_loader.cpython-310.pyc b/libcachesim/__pycache__/data_loader.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d686c9f64e5a02780eb205d6fe45da3eb30fd9c5 GIT binary patch literal 5814 zcmZ`-&2!tv6~_V~NJ7+?ttjzF6I4x7GqL2OZ2itva(LUJ)5ury zdQ2`XG>VmC!>zcw?u28FQl-QUVegNNi33)d{LtWeUU*>e!k$@~;?|l`F1FuaTl6ls zYIjBI@oHN2!nn$X^tOX^6LhT_dbMh8Gl=TmscWrz9mLgYP0T0hPAJL?-_thAOpR5e zC{C+s5J$->>h>qqbhB)#!pBW&tA;9fI|6~O+`U$uiW*HEd~8%M5(fVSA&d$G4kl+6 z3-ad<3~rt+gJ^m7(Me7*< zC4Yq<16AU`;=ksv^4BnHoG)>Y{}ing7&*s(25OT3hM(te@Hf#v#oyTerl)D1NZfL;T69ki(CkzvjChge;-Ebxh)-{m?4S%z>zIs)S zt!udbsM-+9N>ZtcdW6pd`Q&&HurD-TU!eQ6M_}H$qYAA=_|;arsqFPQ4r>Hi?;+a6 zf0-aW3!=k1hHrFCSxi~S++&g1F}Cpct&WA#<_5}K$Jloc3@D7TW;|l6<-9Clt|Wz2 z(<@i5Ub=nbmcM*y`PvnK<@!xYYAj3C>H5i{Ukd{frE1z2QPPsa?{A}QD?1V4M#il_ z>yaUkV)B#gXRs5QoViwwfmu{1kd_I)wV*Lb&o_6{%{Usw@C*U$XG*%z+))MJ527IT zePW!@N{%zb9Q;bGef02r7JB29BSqShk+N6gNYof+^d6!KM~&fe5I~In>;#a5xxpwO zl0gEivbB2ak*V^{sssXwDv0|*lKS!8vXy~EFsfYRF3$83lSfd~ThEFas=^T%ui>B2 zu^ngalY_#J0y-oo$w>UQoJ|lUCpTJ#JlB35QW_!K)zXG$!`kh+5WA7vwHWkb%i%U= zXj8`CX1*~CqX@`0&3xZiNnr}->K>0ZSQ)Nq}4F=*5fo@T=JSh(+dWh z%0)Sa6;w_SP?Pkk#ZoL}4CGI!v(RgmuTs^?^f6P=vFnXyEYmAe#0r}VVI%x8|-o*6i>{#0e%6+7B~YILr>hYTu* zzdo`h5UH`xI&9xOFb>$h1^)&=|2Oj({M^e%YIorCx&09IE}1RNbjvvbV?IHums|Z_ zL&JPUCtxSl4N=ak{AM-Tgdr&>*{q&D_nvY@RExP##Z7Ua2Xz5g;$Y=N{e-S+#XN;p z>>g~iPa4H#tZUazcp>sE3F+TkY>KsK2M`z-q->7GYP9FxlLtY@srUb15FK+L|1GHL z%RpckPHD^Bvbc$#MMl=Xmcs9AMRmS4c&@{~HNLYC7!(+)yHy}WfD)8;*G`L2;ryQY z&Gc@rliPPYHZL4A_QwcY28HHOX4}|<@<)YV7|93QXr2eL3bkZFjBu zrQlu~FL~`_y{XN7WdGa6f5nYy|hA6Ww>_u;dA>uADHg*M(Ge z5Mi=*{6D95w=Uqy*0)40Rd(DIk;-k$AQj4%qRL6XT4@j}yAi{%i!`Hn<$g`*NDr+e zPnamD6@@``SCwwIqBLlT%n?A9EW&SW^nEmO>l30T zB^0V%?Y~Nhh_63uPYzftjT&-NliobeKHRg9Lur)Jkv+Z+LNd0Baw)`JvaKqoseEgQtSe>UlOj zP6yzgf6)m=Zna%p4h8ZFzU1A4Umr*KM8NX}MKI)(-A)hrMj(aG$^{I)aCjc_L)!8C zR9v6}=LjR0jZ_Muz5bxXmp5t12!dlZk|4uy4gCoP8k@P=x@MUDbjqF`cKZVpo)kk^ zK4q+ThSHUxwCZ&*1R98b2S$>w6Glj@9%OCs{ReCX89bt#y*k8%4u%3Kq+$sLtU9LL zncAfwYQPp@gIfrv94(-F*x${2CKxgKE`}o`%|qih;3}1^Gw*_kctSo>yD%hk z=E6IQHAf+F&(p4Iaie+R&)}mGaSRjYWIjcD@D!=$v-Ud!3P!`bs;`$z7S8K?RB{1C zkCkF?^)f{=RF#wUFkY9GbBqcw{T#zPBnL%Pm)VfRB?$(z$LA&}D~Qq|IO$CK`3R-D z>ESsLjNNm1(Mgb(Na!TyKzvOnQ5rl6I@~SuQy9fPoJh}eSklL6s8 zKL+)H10iOD7I#o!QC4{N0ym;}@iwtMheA0`DJbF=xd?*>q9)=i4{F2dkY3p$FRBp|r?aG49MEve?{4dSmk(cVIXpBqq3Z*9@b;(p>)Bkna5(cigY@K$ko^gzU3Mx*dDS(nm&+*z?|8ce8U#nW*uG zm$cUBvki#EJJszUmbEt*-X$5BnPI8d!bIvkoTetVnRem!zdVbyFW+U3JVg(-Bqcq{ z`J}5-D?OExFg|0*pvgb>tnxACmf9-wWR=`FW^`?l9IGi?=w8(}he{0n#NKUx}?x4E+wcK&6R~wz0@iuW=&#N zJe-FQ1xYJJ3XG=b^3v?|4iVVGwk3r=9k8-iI&yG0Y+a;&Y8{wmUX`mft}rl8=aIx;CzAsle@`Qy zTSt%7d1|+-GO|Rr+@EN2SKuI{`<@YEYMAv#3G0DH@6p&9`j|tZ+4u2z$w!Xf3W+ZH z{=HT;?DkCbzDixbz5LOYTiK^7doykbIY~@kqJqAy;M^rHFEZWHT9Ktml#EK`5QhAe ziqEN_a4I*ch^W}3;^=Te2P^X^Xo=)a6a~XZUTxZR%%%UiCDU@5JL%e`DL3z0CChc) z!cg*zi#)rHg#FH7DI^QWu$_q>!yj zEg7PF0arPEMWaxniqPd7`Z6aU&^Vm~lDt2*5mh*!VS9nH0IjsPQ8EiAU7 H9fmCAVY}E{EmFa;>Q1R-&p~jcRVqG(=79fd!S&j_Ph5 z$JGM^s!)4pKuuKMmfxDL1NO1esbBu-90?DEVq+Dju^)$Q7%LVAr+v>KiQ^z*htqTv zOLiJa2;^DPb!Iybiwz)#ffNZ#M}h@16>8}Dg88vbmFK70A0fe%W=dWOA-Ek34xM(809pe4wl9m|4SqwvyLWp)u|wyq{J4I;tuSpGR~7g_7%r^8pg|N2$`hc|22*Wi|juMz|8TRS2-=Pff2yCb$s=#AtKoe0F7_{LYq8g$#SVv^T6L8#uP;~`h#Vh0&Mmawyo z%l(@obZ=T_Wi(edn)+m4G{VIxR1*iPMIA-nDXBHW8^uI^o^>2%P11(PQcw^N$>0HY zm%B?LELvrz{J+ebcQ8B2c`)XZcz5T#J4?LRHkmcRC_gFO(vj7@I~XgNnj+nslWi=K z>E+cJ{4h?${wEpq^3~e2o5asn=9J>m=QyRfS}zLK&qsOD`DG4jy4$JW1{bw8 zbyQE3e$P7Jb$V#67%j}}*>xpQbzGoVlTh%sVU--)F&BT@rnVV$8tN&UP*B4OmQ)N% zQT55H>2!?WT1B5PMTJz+{}if9YxfrlAB zs&lH^$+X%c>_plJ;EflAK=z|09*_WOUqC$aA0U-@;{oLjgv8ngcKLl@UAp=*#19A| zs#RZ~bL!NoQ{VUdF6TStFD{l99Dnir%iW)TM^XNfD*59mD%WrZ*HlFjN<$H<(7I|v zDJa1%;QA6CN{)5JZCk#?_ze}{Cif~Rq?Vok9*dHwyo6b!#^zFmCcrG-*Y`zHbY<9 zJzosEuH8nl+u6C^Z0)++ou2*iez@y<_D;tW9q(RnrCh$d+X>R%ed#~w2sg0X1Fsc! zd@ry=KONMygMBaDb;C}}4(Vxu{dp(cwMD1hcBSiuCZ42bj@uNj{8;*--}1XH z8Zv(@p>hpp@F|9Hm4*s10YnYGq?PoNQ8G(bsZcV-_eIe)00;w6FaQMuP%vEsB3N`S zh@l{k#C3?HCXRx&MYklD-16Q+TVwGX2Wq2&76WwxfYDeKD{d7n)wUs4#Tr_Uh)uCB zPNGyp>6AE)(oylYI3v!Yw3O>_iG}qU8j} zu8UVuS`n8p`z2wb_qW7lw5;M@3p2bTuA;Vv+PYg^Q_%97_%>Qj^4OaAj`%KWr%o|kI0U4=6y|2n*s0i&q{joX%j!>_mu5%qm3H4RffdyQ*P*+0u!DFdD^#cObkrI-?Z;EJ>mB-nk(4)Nf%a43|gI?PPY^8Pu#&k*|Zyg znjPGCKlj`qu=`Ef?76T^_C??8?(-l7rFS<7k{k7j5$M9EeIsZ1>~O#D+85)_Hk4pv z;B{bzdsBDjaGB>cI>Q7eZy4AYh1+gIb(r&G*^DQ_dNVvZz64DRLVL%x`+fijJaC_{ zrNWuLw4MoL&Nb#v*6#V`PS7z;L70YhfN<2(z|yx=PUE&%YF8n)YKwZ8gzPBGsBn6 z>`H$h6LLCGJjbCI3SI*#t`^nFUsbgP2bekHs049KhC3qoWmE#fURf`ztKeJt=gZ*VaAlfnN&gvU z5yOaC|7wBYb(L-9iMoZS@_a6$ar)8Kdv55A_3IP5Sp&gS&rCmcvTLNnZnmgzdgv(7 zNizrp2+e>5aNm>*F#9T;gvufzaVcxcLk+G&g}WfZsc;x^e`}fVT8Gne3Sn`tg#WUj zo)EQxK0<#}(MWt44Z^!fe%tmvH?`z86v%FOx?THD;&$OL{`k#vHnKoR@ZOccgT>)B6TTz^s#g2gZFPln%{q-$I`q?gJB^s!`upKwS$R~ zi~kLD2uSxUP!K~UT2z;DT544@)ZwXVcIR)Nku>q0`Aka@cByde;D$cLI|ptER)8n5 zl?0p-=UH(4E?1cFG%!G(L%qAK$WLN=YGZq7FWE~Kj z{6UeYd-6JF8oGAVZu$LvvIbZiB(sP1zo3<`mn2Q#MY|vR;-1^bFafy`4N#fT`+)I^=;w;saUKg>>s~N)gqDCiF}X98$>oTat@`aN|-Xk$*4 zS&fL23egEAnz31vHy_TDv3Hrn5`C5>%syk4*l4o3QrKKsVPUTb_LKw}w3tcO@OZ|g zZgZMPU;hFJo29$;^5NX#XyQd#w#aWy&mxZf-aL-2rW`vxtJz?8dT*q96Fegb={Q-Z z$Q33!rLGVsv;$fRAim!trB{bXw(ocPwwHn;z-n3Vb-c0lMX1I8@CO)aTJGflH#;7^ z!5`srV1O(`+Qaqf8ILI(mi-h-fYLK~i)p7m7UNQV=fkSpDoMTO+ZY2KlVI8Z&H`RfNU4voYi9^IaH zhlvL?CB3@51`_EVFO0R1Moe+}0bSq7$T{j+f#U-xp(L2tpQDA}R-EIA$Lr|7|oT%?F+-eQ2e%}gkb zYvrr-h)YDib=boSj?+az?))`%xB@a8J|{gU9z*ibak5No6v$fOBuh~YwI8dYexN)A zz8)DP^>HGpE%%GiMjS(2;H5OZCS@q==Ge$Zi`${!kJ$!B$pQwKZ-S&K%5Spmx|tvr`)ao5O1;?wx0b*);K{A6O7E+Yr2{pW6 z92!v!pd+E9C9%1N*qr*u=8s&RmPFEbw1 z*>U33HOdvo`E1baCRc!3;kO(|-lWk;V)8?fNJq9ozD%VeGCh4)hWqjiRSTZrWoaB% zU8Wl{V8xoaNo{Wvd56e*MBXRzLn1eb+#+(D2pLrQOGIc9Du0#8$3(`s-QWZb5TwOA%22Qi8pD6NEBDEJUs3}5-OwO1wk6Q%n^dR-1muKDmeH2C%+Nwb@ z$#PcUA(DJ0XEm2mkF%aChrnk-fE!Cu{Q&+h)E?*dK(8V)p9S2fuaM2OnkMYmt$6do<$T=bEQ^GU z(&xUs52G%U%;jX(vX@Vv#jDtH5;4i}ZO=*W#p$j0-nCyFXVbj=SV^Eh9*lWAHx0=- z!8@Fu$>meur=-Y4o&-B9ez)s?PDxD8I%aPPv&Vz5#yr2A%MS8Vkm~7tvXorBc75Q= zH3*ag4f!rgSW=v%h6_Yk6U@H|~$KT`R<$SG@WWbQ)T1Vcd zqSo}9DydIP2T2bqla?_^$$MoWB@n7I18J!40fK-dY%zjP^)Uru`tTnz1SQF&{~-|l zZ=xcZ`9uG)kzK z-38vAqODc~`)gI+U!(0-+FzsHrxVd}=ZDCG+==s4iHLnUe1}@?7S>^;RWe;dr722Z za?V`3$o-?6FR=AA=yy9Uj>TqN6h-$@C9h`V9M?EU-69*B9#qcYN_>iUkmlIbc;{$r zQ;{wJoKL~V#yd&znvn9X8t)G2*jqBO>+^bO?3s_Ok;S_{SqTvq(hLB<`Izo0GM}vB zMsCk%C(ax3Zt9pLSJ=8Rh~x?;ELTCcQhVW23puSgF`vPUv-8nW%F>sr|8wJV*V=TbE(+12R6f@;E;pXU5}P__97cGgtCiMts)Ik;~I?vIQZZ zM++Z{+>l>;XM3>2Xu-xc?~!EDIg&H92abH6k3>yw!#nmS*k=lcmkz&shB0+DI=1D9 zx6|m~6&$ubNrJF3tC&GkylG?B7l=3X9z|_<;Se57OJgEbCqC#nNw0&uAitX}pl&F? z{nC%Lhw4L(qggD>5w+@^pfM1&>WE|Ema!>G2|qoFVRgi?DryTO{ZlM-KC_aAM4+?9^4=-Ktljk_)wBSY|d(c z1G7uSHckP4xrBxP6347@>dScc-+<`_R#ZsUti}GFc1WjoMX|8sH@QetpAsjkMNtf8m%U`Dw!6yz?na$&Pbv!RFu^tL^ z#)maZK+cw~z3=#LlAp6YO*}7T_Y5;UXxSnsE^u z)H)FaB*09eitR{Vn@ll1-lk%i5}a~IF*9i`!fAR?nNCu<;5CpDS$yPfv#6TvG%p_! zwvgqf{eOypEquBZXOQyNn?u8ysV3H&g1xOY2B7I7=pSI&!&Vis+e>JAffsn9VqeZ- zgNQZN7!!o2_XrdGQV0<&bsbsbxeWdWOKf7O>o@}f1`;@AY3Swk0|Qc1hc9RG+KimW zmKQ5V3KZjLbeBqFBr}K!@mr~e)yQDZ!@`vrEByoudm3&^_E>Kps&F4te~ri-t&0{D zcDCGZH)Sc$)uTIju3ePgrnz20A+Knf#Nex4S>_#2g6Jcr|mF{ORj2c9<1y`08P=H4A|)sLP?k=J;o9Q^%;| z5{pyfown)G778RzTa(l_4XiPv*u@>CE@MkF9aENYnOR1+QQng+27NaKn}Pke0;H!k z{|e8|w%}&;XV!ScAN@=&ohjT+R?PW@FQk$C28CB2>8C--xIw;q+LjNUvYWo?6PtOf za7>9m^!kR6iL!53zTrcn7ygLo#Xcb7SDRVuCs4*ZB;0L=_I}6hiePGUJ`w+M!WDQ#t(W}=eyOonJ~5k%7?hHHW{av_r_ z1I8@I*jIUsB|18xq5o%K_Wv9*Rt_?AObF1zW@i9VPNQWdK}#aO3@{G~Ff^8SJP#P_ zxqN^|jEL)s>=U_94eK0uMkZ`VKTfd5xd#j1yU`~T8uiqPkjamZ;A`D3KGH*8E4b3! zX>m*%E#*F-a^i2Q;+!n`v8aUnEq~@4NV0+PmjO4a#EzG3@heolKtvE>CE<4`kn_}Y z49F~XRD37SDpO>m1mutnrD)ODB}hi$OOu+e{LaRQXU Wi<7nGr`q|rukn0d&p54HefD2-X$Wxu literal 0 HcmV?d00001 diff --git a/libcachesim/__pycache__/trace_analyzer.cpython-310.pyc b/libcachesim/__pycache__/trace_analyzer.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a35123518c487b4a67d5a86c47e5be8aa42666da GIT binary patch literal 1840 zcmZ`)OK;mo5Z)zM6eUZt{0!i>>9#!yL2U%+$q0%Bf{~;T#i&~p;blQ{*A@f5vLw-3 zP!FyAL)xSN60bd_*WB7mXO@EzhFJ+mRhJk5QtSK9o$hqE_s94(u`MMD5u1+_>X);;z@FE06Qs;!0X^+4{(F38=5<@M#(1sU~bd*5q6%arEn6@QZW(alWhH<;3hR$G3Oq=jFE zX_AB3|MuninHWDi8-MwH^7IX@jRxN+Un+gB(;^MhXk=IJ%1<>gTps2xGIUb)SMs^9 z{W!od>kB#KW9J z`aDSE4DN6lsmX{{t`LbIDG56uS-8#?zDxFX&}TM(F4fDkUzgVDRZUs zlV^S!K#=Q`D7+l|z+Qy$I{#^QQ%utYRRM12uw7_BP{Lordz{_q4*11e<_-{-Fv_UW z{Rfk#BTm6^g~@l&3kpzqL^^iImizKR_GJIIDq;hK5H1s8@w$if5%0d8g)5^{ZT@6h7P)pE3@wAI(+l?|c713?b2Iyh6 zkjHO~b^QXEg3PI;3ro9m`ikbg!jjg4{zB)rv`eD*3VT6*vggc5M+F0k6)muJ=GL6e z?FE|?(KdHtvp>3gxt=Fs5&BU$Q{14<8??KP%@ci{pEeR6tgn2^>&D{QvWYir){r>m z=YR(5p({69N4R=srXJ$3SKWn{k#0N7e-DP~(aHUxT`joIw_!PYo|V z83_A6Dn3BIt??(Jw~+LJRNDfcX)}q1(C^_EM}n!WrDqbwDrp=p7|j%@OI&I>7VZ7j zJsQ|%Z!#L_KJ4QdyegqCFa|XuS2@K^7ADu)MxnEM5_vrrcGt$5s3)Cir@FDsOz1R2 jx$Zm6v9(JG(R8 z>*`)R9>yVxBMwM#;J|?h6my7V4je$@z<~n?4oFByNC;X;NFW3e5=ux2V4nZ~_e^c~ zZV%cyFtc4#{dN8I)#IFr!C*Us1S?Lw{4F4l_e zQmxb;sg0=0(+bbQMq85+!7x`Bb2Es^E1vh zOSBx5`^igo)3K|npPBS5VS0}EP=!JO6a$&_!ttEC)2gQZ>_uVLt&2AnEk8GHwijEL zpqums>*|u_dVVPonY5eUc~h8eKNSFex>wvUAaJHBthxuZb!<=gvjc8U!sY%#Qxr>q zVNz=Bl45CAf~&_A&T2_31$ph5Vx@V)(r~C637+PecN4eNTE@!G>pa{0>O4oeJp2}T z(aO!|8%2;RKz+pq+obT4mE|K=ijTrohHD*M>#Y*s0M{7gH^Oy~RpguCelz@Tu}1h- zE7#c0w*g}k_(plsDm572&Ua7?$`YyYorJ6-1o-$aelH>GWtrXlK0-DSG6rSt=MNCF zk&rAPdw7+Qdt^O(`GbUPlJ)H4`w7`hND8FJ`2j+<$TA1{LxgOV$iw^*Lbge%L;Nry z+hv(Y`4K{P$TE-d#|f#(GDrCngzO{)dwPsNNysi)<~Tn=$i1@6Nj^cyZbGolr}$|? z?vuzFewL8?CGr%1nve%1@(iCOWRFCi<Z9DkmWz1E)j2l*7tYM?hSNZD!x zKQC+8OL(bQ{zX|&f8H0xe$R z*8n+5H8FmjzX^8}y_#y{w<~ z(A4-TVR=i!o~pBumB2jOsMBaD(7)3OSlJ~ zF`fFHWzQaHHD^wnkezO}!}3O3jQ!$ro}{we zYBk$Uu>F2HXqD@k!ZVyk11l}gG;LGdFdA*oa6RD{U9|g6d)9!xqUrbrd5@i`j|5$$ z6vw9s7fsU#knxnR>Z!Mw(7S3^F&Iu;x5(86#Vx}xTS$m8PvhDx^&OS&nb^jan!scYyMJ3%6+kjKOK_j%$!_sIg{Qiu_mr;(&-8AwTe`(=j(E9lp{sQh z-DEe_O?NZhY*+8*y7}%1Pk$+YbJSCzEv!3wOY66&nwgrV_Cq3IOIJF3Y4d_USE$dG z_LA#WW-R#gu3MH}IXHf>V%og&(D=d55m^?;qtbLMP5YYJYVv)RB^RrhK0k4Ba^jpZ zJ@Necb7wEWW7mV3>5NE0A~{;=jP9#Q3@bZYspiE4*s5)qY{vxmOHsjOCnmcvxfhe& znB0fS{gC)+H`uPLd0%frJ7A9Ox+QX0po9hV%VulIIxB=DL~#3Jm~Kpl&xa27ZSf5n2TT; zE~1~|F0XYlhUGRwg52(-km%)-cSlDhXOAq80 zkAPf+GiMu~CMFwJ_bA&Bw?h7fq_BE zA^5V5X3HW&7EAXyP+gXE8^ZmQfMh5%cr3}2Krk;!%3+c~YVACMfsLD{oj_nqlE|QicjO_c&}7HOb;^RPiO%g|xJ|sF6Bgplfj9A7Kz}6YSxGXvZV;(%Gj_du8PM!q1 zwC znZdooz4D8aa8p=WBf`V5*4=A{bXa4%c?KB29M2jN9)h)qg3E4mEbUpK{EDRA9PVsX zdhNV{a~EfE(ChoEB#Vq})lXuw!WYEB{T5v0e&*T0>IM3! zeqGCSJ@m5;a30`n?KRu+yCu5Y-x9YAj$fJ4z7=IvH*F{(Zv9&YN zK`T~$_X5KY<5_xV%(b)lMLI6=emFRm@H~+HND@XYYgBp_uUsrTh~rXuU%lO5Dm^>o z7iTSqm@Kuy6&ZMfng;P7!$qq`S38k5MCE-A)MM3R&_H;3)lx&c>g|=M$2@fhd|5g_ z_EzyF5M8Sc&mi7%``<0}PxDXLxU$uuI*4>5XVi14?RibUkM3NPKo&oIFl00IV zmn&E`!TsedUPRIGI8#8#=&c;XoWYk{aXM88!QY_pMMyf4<5PIq1T_riU!}&SS;v?$ zVMpjSA(os2=kJi5^eWMq7ugSCrBBxd#IVeNtqfj|`0)1LJtetS}_uN(PU`s=2`MI0TaoBX$rlvie83f*FV$Li%!R;1U#GikV?QZGbyq z8^)g`M`R!DJsn8lZU#9V+j)3D3*qLB#~>7q23y0OqspcznqN(j6%puQ_=RcDS)8PI zr|?EopmC1tI=(lstvBJl!N*X-O+k`}s3dGBD@GwG zyW2uilsv{6h0uHLRs(HG1lmHqNm6gLpf{cJBvMtJB@f>e3)7yps;Qud)4<~E^gM_S zqxl!OeyO8<s$71~Fko+_bFA?hJa=&y9#b>WLtHO@V#aY=0`;6? z!y=7f;nLJ|Q!iYZGA>@2IDMAl-eNx%L6h!l_`XAH+Mb_mz^ii4&nyZDUZ;CE#33w% zfmMOQPJtmvig=5onA?g8hSJ0_OrFF9gG3aH7dUYOqZ{HRCKv+{r!YB<2_9bs9wtMf z{q1;*?zY$kiJyX%3F=1LC#Dca&o2nOqjx3>n4HH1r%z-tA+@B*!;MwoCMR&~5pQ5Z z6N=VRpp_D67eoRRT(4pdlX*9W(7|yS8$Si{T6k{~ zXd$Us3I4C575poNJ8sU@Y0&*F=o$UXg)7iZ_~p>+NF#VdN%|WoEKM)2t>A4Ay?r4K hz&VMVEI)HBSU<;Mt`u^Y67Vi!1eRr{{k_fE-C;3 literal 0 HcmV?d00001 diff --git a/libcachesim/__pycache__/util.cpython-310.pyc b/libcachesim/__pycache__/util.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..20aaf49b89395fa950199c59646e8a7143e248c1 GIT binary patch literal 2233 zcmaJ?-EZ4A5Emu+Gj3V+E4vP*j%~ zei-KbGke~Dqt`v@Lew#N>EN^AJ^-{6XNGKYIX+Q^A66jDS=?z-sk$i$82o_jJGa(~n1oH>40+Jr;}$^-M`E0~vu_qsM}wc*0WQIr(-VlQ%+XUrS#q z7DVDuBto$$UoGuKL7J}x{z%C{q^Yk#Py}d2c~pRTjIntHgs}Pq;$sVk-{UrSuE~`2 zUEsEOg;xz%0q#EE<{iFqU4h*y-vqB8#mH6asw;!Iso zB1axkkT1(~^Qtlq=M6asBQaunm^WpnM;Z8dsq>B!v3w)^G#m>*7_#I{{AH;%(7k=K zo25dfyHA;f03~O;QFv-%brQy9`m@oM9?Arl0t{se+ZourGrBTHpn<6k)2coG2%Xs} zg>os#3LOj3v$q53DtyM%bs!d=PTXk)ePtR_+oZG6EAmGfHCdrr(w}=LbRbpC^iFdw z_k%i1_vRcb7jfF7&xc}0qw>tuoh8wEC<|!}D#~imEJ8~$)^~K~G2f#vhXQ03>dfR( zwJUHZsd90VAvLq@!cT7jQx;nc-d{qW!kGXp!pYQD^$9sAX-nJOo{~R_b|%)j3u~2mc{yxah8*+~Yy)%1 z{T>1w7&V6fFXD_ukpn{_9%enoB(VSl9=8bk`;m(HEvXHZjF;8sF)OY#Y?zz@*e^1X z>eB7Ydb8LTTEK*H7Skk)PXWPznVEXJq(3j0W0Q|NrS^{G6sOD6a}j9l?^RR=8?as=ZWF03oUkn}pq#gt9smlPDdKc}txh`jxMP65By!~02M3;W za9|ppXB(U=tOl=HT)2#LWv;#l`^Ei+x{vHO>eTYO?8rPsnQ9*~l4m=tr@y9~_h-xaQYk8L7F Rn_a6#TDEJ$Og8O~^B=?XL9+k= literal 0 HcmV?d00001 diff --git a/libcachesim/cache.py b/libcachesim/cache.py new file mode 100644 index 0000000..3e40249 --- /dev/null +++ b/libcachesim/cache.py @@ -0,0 +1,396 @@ +from abc import ABC +from typing import Protocol +from .libcachesim_python import ( + CommonCacheParams, + Request, + CacheObject, + Cache, + # Core cache algorithms + LRU_init, + FIFO_init, + LFU_init, + ARC_init, + Clock_init, + Random_init, + LIRS_init, + TwoQ_init, + SLRU_init, + # Advanced algorithms + S3FIFO_init, + Sieve_init, + WTinyLFU_init, + LeCaR_init, + LFUDA_init, + ClockPro_init, + Cacheus_init, + # Optimal algorithms + Belady_init, + BeladySize_init, + # Probabilistic algorithms + LRU_Prob_init, + flashProb_init, + # Size-based algorithms + Size_init, + GDSF_init, + # Hyperbolic algorithms + Hyperbolic_init, + # Plugin cache + pypluginCache_init, + # Process trace function + c_process_trace, +) + +from .protocols import ReaderProtocol + + +class CacheBase(ABC): + """Base class for all cache implementations""" + + _cache: Cache # Internal C++ cache object + + def __init__(self, _cache: Cache): + self._cache = _cache + + def get(self, req: Request) -> bool: + return self._cache.get(req) + + def find(self, req: Request, update_cache: bool = True) -> CacheObject: + return self._cache.find(req, update_cache) + + def can_insert(self, req: Request) -> bool: + return self._cache.can_insert(req) + + def insert(self, req: Request) -> CacheObject: + return self._cache.insert(req) + + def need_eviction(self, req: Request) -> bool: + return self._cache.need_eviction(req) + + def evict(self, req: Request) -> CacheObject: + return self._cache.evict(req) + + def remove(self, obj_id: int) -> bool: + return self._cache.remove(obj_id) + + def to_evict(self, req: Request) -> CacheObject: + return self._cache.to_evict(req) + + def get_occupied_byte(self) -> int: + return self._cache.get_occupied_byte() + + def get_n_obj(self) -> int: + return self._cache.get_n_obj() + + def print_cache(self) -> str: + return self._cache.print_cache() + + def process_trace(self, reader: ReaderProtocol, start_req: int = 0, max_req: int = -1) -> tuple[float, float]: + """Process trace with this cache and return miss ratios""" + if hasattr(reader, "c_reader") and reader.c_reader: + # C++ reader with _reader attribute + if hasattr(reader, "_reader"): + return c_process_trace(self._cache, reader._reader, start_req, max_req) + else: + raise ValueError("C++ reader missing _reader attribute") + else: + # Python reader - use Python implementation + return self._process_trace_python(reader, start_req, max_req) + + def _process_trace_python( + self, reader: ReaderProtocol, start_req: int = 0, max_req: int = -1 + ) -> tuple[float, float]: + """Python fallback for processing traces""" + reader.reset() + if start_req > 0: + reader.skip_n_req(start_req) + + n_req = 0 + n_hit = 0 + bytes_req = 0 + bytes_hit = 0 + + for req in reader: + if not req.valid: + break + + n_req += 1 + bytes_req += req.obj_size + + if self.get(req): + n_hit += 1 + bytes_hit += req.obj_size + + if max_req > 0 and n_req >= max_req: + break + + obj_miss_ratio = 1.0 - (n_hit / n_req) if n_req > 0 else 0.0 + byte_miss_ratio = 1.0 - (bytes_hit / bytes_req) if bytes_req > 0 else 0.0 + return obj_miss_ratio, byte_miss_ratio + + # Properties + @property + def cache_size(self) -> int: + return self._cache.cache_size + + @property + def cache_name(self) -> str: + return self._cache.cache_name + + +def _create_common_params( + cache_size: int, default_ttl: int = 86400 * 300, hashpower: int = 24, consider_obj_metadata: bool = False +) -> CommonCacheParams: + """Helper to create common cache parameters""" + return CommonCacheParams( + cache_size=cache_size, + default_ttl=default_ttl, + hashpower=hashpower, + consider_obj_metadata=consider_obj_metadata, + ) + + +# Core cache algorithms +class LRU(CacheBase): + """Least Recently Used cache""" + + def __init__( + self, cache_size: int, default_ttl: int = 86400 * 300, hashpower: int = 24, consider_obj_metadata: bool = False + ): + super().__init__( + _cache=LRU_init(_create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata)) + ) + + +class FIFO(CacheBase): + """First In First Out cache""" + + def __init__( + self, cache_size: int, default_ttl: int = 86400 * 300, hashpower: int = 24, consider_obj_metadata: bool = False + ): + super().__init__( + _cache=FIFO_init(_create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata)) + ) + + +class LFU(CacheBase): + """Least Frequently Used cache""" + + def __init__( + self, cache_size: int, default_ttl: int = 86400 * 300, hashpower: int = 24, consider_obj_metadata: bool = False + ): + super().__init__( + _cache=LFU_init(_create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata)) + ) + + +class ARC(CacheBase): + """Adaptive Replacement Cache""" + + def __init__( + self, cache_size: int, default_ttl: int = 86400 * 300, hashpower: int = 24, consider_obj_metadata: bool = False + ): + super().__init__( + _cache=ARC_init(_create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata)) + ) + + +class Clock(CacheBase): + """Clock replacement algorithm""" + + def __init__( + self, cache_size: int, default_ttl: int = 86400 * 300, hashpower: int = 24, consider_obj_metadata: bool = False + ): + super().__init__( + _cache=Clock_init(_create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata)) + ) + + +class Random(CacheBase): + """Random replacement cache""" + + def __init__( + self, cache_size: int, default_ttl: int = 86400 * 300, hashpower: int = 24, consider_obj_metadata: bool = False + ): + super().__init__( + _cache=Random_init(_create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata)) + ) + + +# Advanced algorithms +class S3FIFO(CacheBase): + """S3-FIFO cache algorithm""" + + def __init__( + self, cache_size: int, default_ttl: int = 86400 * 300, hashpower: int = 24, consider_obj_metadata: bool = False + ): + super().__init__( + _cache=S3FIFO_init(_create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata)) + ) + + +class Sieve(CacheBase): + """Sieve cache algorithm""" + + def __init__( + self, cache_size: int, default_ttl: int = 86400 * 300, hashpower: int = 24, consider_obj_metadata: bool = False + ): + super().__init__( + _cache=Sieve_init(_create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata)) + ) + + +class LIRS(CacheBase): + """Low Inter-reference Recency Set""" + + def __init__( + self, cache_size: int, default_ttl: int = 86400 * 300, hashpower: int = 24, consider_obj_metadata: bool = False + ): + super().__init__( + _cache=LIRS_init(_create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata)) + ) + + +class TwoQ(CacheBase): + """2Q replacement algorithm""" + + def __init__( + self, cache_size: int, default_ttl: int = 86400 * 300, hashpower: int = 24, consider_obj_metadata: bool = False + ): + super().__init__( + _cache=TwoQ_init(_create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata)) + ) + + +class SLRU(CacheBase): + """Segmented LRU""" + + def __init__( + self, cache_size: int, default_ttl: int = 86400 * 300, hashpower: int = 24, consider_obj_metadata: bool = False + ): + super().__init__( + _cache=SLRU_init(_create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata)) + ) + + +class WTinyLFU(CacheBase): + """Window TinyLFU""" + + def __init__( + self, cache_size: int, default_ttl: int = 86400 * 300, hashpower: int = 24, consider_obj_metadata: bool = False + ): + super().__init__( + _cache=WTinyLFU_init(_create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata)) + ) + + +class LeCaR(CacheBase): + """Learning Cache Replacement""" + + def __init__( + self, cache_size: int, default_ttl: int = 86400 * 300, hashpower: int = 24, consider_obj_metadata: bool = False + ): + super().__init__( + _cache=LeCaR_init(_create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata)) + ) + + +class LFUDA(CacheBase): + """LFU with Dynamic Aging""" + + def __init__( + self, cache_size: int, default_ttl: int = 86400 * 300, hashpower: int = 24, consider_obj_metadata: bool = False + ): + super().__init__( + _cache=LFUDA_init(_create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata)) + ) + + +class ClockPro(CacheBase): + """Clock-Pro replacement algorithm""" + + def __init__( + self, cache_size: int, default_ttl: int = 86400 * 300, hashpower: int = 24, consider_obj_metadata: bool = False + ): + super().__init__( + _cache=ClockPro_init(_create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata)) + ) + + +class Cacheus(CacheBase): + """Cacheus algorithm""" + + def __init__( + self, cache_size: int, default_ttl: int = 86400 * 300, hashpower: int = 24, consider_obj_metadata: bool = False + ): + super().__init__( + _cache=Cacheus_init(_create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata)) + ) + + +# Optimal algorithms +class Belady(CacheBase): + """Belady's optimal algorithm""" + + def __init__( + self, cache_size: int, default_ttl: int = 86400 * 300, hashpower: int = 24, consider_obj_metadata: bool = False + ): + super().__init__( + _cache=Belady_init(_create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata)) + ) + + +class BeladySize(CacheBase): + """Belady's optimal algorithm with size consideration""" + + def __init__( + self, cache_size: int, default_ttl: int = 86400 * 300, hashpower: int = 24, consider_obj_metadata: bool = False + ): + super().__init__( + _cache=BeladySize_init(_create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata)) + ) + + +# Plugin cache for custom Python implementations +def nop_method(*args, **kwargs): + """No-operation method for default hooks""" + pass + + +class PythonHookCachePolicy(CacheBase): + """Python plugin cache for custom implementations""" + + def __init__( + self, + cache_size: int, + cache_name: str = "PythonHookCache", + default_ttl: int = 86400 * 300, + hashpower: int = 24, + consider_obj_metadata: bool = False, + cache_init_hook=nop_method, + cache_hit_hook=nop_method, + cache_miss_hook=nop_method, + cache_eviction_hook=nop_method, + cache_remove_hook=nop_method, + cache_free_hook=nop_method, + ): + self.cache_name = cache_name + self.common_cache_params = _create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata) + + super().__init__( + _cache=pypluginCache_init( + self.common_cache_params, + cache_name, + cache_init_hook, + cache_hit_hook, + cache_miss_hook, + cache_eviction_hook, + cache_remove_hook, + cache_free_hook, + ) + ) + + def set_hooks(self, init_hook, hit_hook, miss_hook, eviction_hook, remove_hook, free_hook=nop_method): + """Set the cache hooks after initialization""" + # Note: This would require C++ side support to change hooks after creation + # For now, hooks should be set during initialization + pass diff --git a/libcachesim/data_loader.py b/libcachesim/data_loader.py new file mode 100644 index 0000000..fee5f9b --- /dev/null +++ b/libcachesim/data_loader.py @@ -0,0 +1,131 @@ +"""S3 Bucket data loader with local caching (HuggingFace-style).""" + +from __future__ import annotations + +import hashlib +import logging +import shutil +from pathlib import Path +from typing import Optional, Union +from urllib.parse import quote + +logger = logging.getLogger(__name__) + + +class DataLoader: + DEFAULT_BUCKET = "cache-datasets" + DEFAULT_CACHE_DIR = Path.home() / ".cache/libcachesim_hub" + + def __init__( + self, + bucket_name: str = DEFAULT_BUCKET, + cache_dir: Optional[Union[str, Path]] = None, + use_auth: bool = False + ): + self.bucket_name = bucket_name + self.cache_dir = Path(cache_dir) if cache_dir else self.DEFAULT_CACHE_DIR + self.use_auth = use_auth + self._s3_client = None + self._ensure_cache_dir() + + def _ensure_cache_dir(self) -> None: + (self.cache_dir / self.bucket_name).mkdir(parents=True, exist_ok=True) + + @property + def s3_client(self): + if self._s3_client is None: + try: + import boto3 + from botocore.config import Config + from botocore import UNSIGNED + + self._s3_client = boto3.client( + 's3', + config=None if self.use_auth else Config(signature_version=UNSIGNED) + ) + except ImportError: + raise ImportError("Install boto3: pip install boto3") + return self._s3_client + + def _cache_path(self, key: str) -> Path: + safe_name = hashlib.sha256(key.encode()).hexdigest()[:16] + "_" + quote(key, safe='') + return self.cache_dir / self.bucket_name / safe_name + + def _download(self, key: str, dest: Path) -> None: + temp = dest.with_suffix(dest.suffix + '.tmp') + temp.parent.mkdir(parents=True, exist_ok=True) + + try: + logger.info(f"Downloading s3://{self.bucket_name}/{key}") + obj = self.s3_client.get_object(Bucket=self.bucket_name, Key=key) + with open(temp, 'wb') as f: + f.write(obj['Body'].read()) + shutil.move(str(temp), str(dest)) + logger.info(f"Saved to: {dest}") + except Exception as e: + if temp.exists(): + temp.unlink() + raise RuntimeError(f"Download failed for s3://{self.bucket_name}/{key}: {e}") + + def load(self, key: str, force: bool = False, mode: str = 'rb') -> Union[bytes, str]: + path = self._cache_path(key) + if not path.exists() or force: + self._download(key, path) + with open(path, mode) as f: + return f.read() + + def is_cached(self, key: str) -> bool: + return self._cache_path(key).exists() + + def get_cache_path(self, key: str) -> Path: + return self._cache_path(key).as_posix() + + def clear_cache(self, key: Optional[str] = None) -> None: + if key: + path = self._cache_path(key) + if path.exists(): + path.unlink() + logger.info(f"Cleared: {path}") + else: + shutil.rmtree(self.cache_dir, ignore_errors=True) + logger.info(f"Cleared entire cache: {self.cache_dir}") + + def list_cached_files(self) -> list[str]: + if not self.cache_dir.exists(): + return [] + return [ + str(p) for p in self.cache_dir.rglob('*') + if p.is_file() and not p.name.endswith('.tmp') + ] + + def get_cache_size(self) -> int: + return sum( + p.stat().st_size for p in self.cache_dir.rglob('*') if p.is_file() + ) + + def list_s3_objects(self, prefix: str = "", delimiter: str = "/") -> dict: + """ + List S3 objects and pseudo-folders under a prefix. + + Args: + prefix: The S3 prefix to list under (like folder path) + delimiter: Use "/" to simulate folder structure + + Returns: + A dict with two keys: + - "folders": list of sub-prefixes (folders) + - "files": list of object keys (files) + """ + paginator = self.s3_client.get_paginator('list_objects_v2') + result = {"folders": [], "files": []} + + for page in paginator.paginate( + Bucket=self.bucket_name, + Prefix=prefix, + Delimiter=delimiter + ): + # CommonPrefixes are like subdirectories + result["folders"].extend(cp["Prefix"] for cp in page.get("CommonPrefixes", [])) + result["files"].extend(obj["Key"] for obj in page.get("Contents", [])) + + return result diff --git a/libcachesim/protocols.py b/libcachesim/protocols.py new file mode 100644 index 0000000..58eeddb --- /dev/null +++ b/libcachesim/protocols.py @@ -0,0 +1,33 @@ +""" +Reader protocol for libCacheSim Python bindings. + +ReaderProtocol defines the interface contract for trace readers, +enabling different implementations (Python/C++) to work interchangeably. +""" + +from __future__ import annotations +from typing import Iterator, Protocol, runtime_checkable, TYPE_CHECKING + +if TYPE_CHECKING: + from .libcachesim_python import Request + + +@runtime_checkable +class ReaderProtocol(Protocol): + """Protocol for trace readers + + This protocol ensures that different reader implementations + (SyntheticReader, TraceReader) can be used interchangeably. + + Only core methods are defined here. + """ + + def get_num_of_req(self) -> int: ... + def read_one_req(self, req: Request) -> Request: ... + def skip_n_req(self, n: int) -> int: ... + def reset(self) -> None: ... + def close(self) -> None: ... + def clone(self) -> "ReaderProtocol": ... + def __iter__(self) -> Iterator[Request]: ... + def __next__(self) -> Request: ... + def __len__(self) -> int: ... diff --git a/libcachesim/synthetic_reader.py b/libcachesim/synthetic_reader.py new file mode 100644 index 0000000..16f8a10 --- /dev/null +++ b/libcachesim/synthetic_reader.py @@ -0,0 +1,409 @@ +""" +Trace generator module for libCacheSim Python bindings. + +This module provides functions to generate synthetic traces with different distributions. +""" + +import numpy as np +import random +from typing import Optional, Union, Any +from collections.abc import Iterator +from .libcachesim_python import Request, ReqOp + +from .protocols import ReaderProtocol + + +class SyntheticReader(ReaderProtocol): + """Efficient synthetic request generator supporting multiple distributions""" + + def __init__( + self, + num_of_req: int, + obj_size: int = 4000, + time_span: int = 86400 * 7, + start_obj_id: int = 0, + seed: Optional[int] = None, + alpha: float = 1.0, + dist: str = "zipf", + num_objects: Optional[int] = None, + ): + """ + Initialize synthetic reader. + + Args: + num_of_req: Number of requests to generate + obj_size: Object size in bytes + time_span: Time span in seconds + start_obj_id: Starting object ID + seed: Random seed for reproducibility + alpha: Zipf skewness parameter (only for dist="zipf") + dist: Distribution type ("zipf" or "uniform") + num_objects: Number of unique objects (defaults to num_of_req) + """ + if num_of_req <= 0: + raise ValueError("num_of_req must be positive") + if obj_size <= 0: + raise ValueError("obj_size must be positive") + if time_span <= 0: + raise ValueError("time_span must be positive") + if alpha < 0: + raise ValueError("alpha must be non-negative") + if dist not in ["zipf", "uniform"]: + raise ValueError(f"Unsupported distribution: {dist}") + + self.num_of_req = num_of_req + self.obj_size = obj_size + self.time_span = time_span + self.start_obj_id = start_obj_id + self.seed = seed + self.alpha = alpha + self.dist = dist + self.num_objects = num_objects or num_of_req + self.current_pos = 0 + + # Set the reader type - this is a Python reader, not C++ + self.c_reader = False + + # Set random seed for reproducibility + if seed is not None: + np.random.seed(seed) + random.seed(seed) + + # Lazy generation: generate object IDs only when needed + self._obj_ids: Optional[np.ndarray] = None + + @property + def obj_ids(self) -> np.ndarray: + """Lazy generation of object ID array""" + if self._obj_ids is None: + if self.dist == "zipf": + self._obj_ids = _gen_zipf(self.num_objects, self.alpha, self.num_of_req, self.start_obj_id) + elif self.dist == "uniform": + self._obj_ids = _gen_uniform(self.num_objects, self.num_of_req, self.start_obj_id) + return self._obj_ids + + def get_num_of_req(self) -> int: + return self.num_of_req + + def read_one_req(self, req: Request) -> Request: + """Read one request and fill Request object""" + if self.current_pos >= self.num_of_req: + req.valid = False + return req + + obj_id = self.obj_ids[self.current_pos] + req.obj_id = obj_id + req.obj_size = self.obj_size + req.clock_time = self.current_pos * self.time_span // self.num_of_req + req.op = ReqOp.OP_READ + req.valid = True + + self.current_pos += 1 + return req + + def reset(self) -> None: + """Reset read position to beginning""" + self.current_pos = 0 + + def close(self) -> None: + """Close reader and release resources""" + self._obj_ids = None + + def clone(self) -> "SyntheticReader": + """Create a copy of the reader""" + return SyntheticReader( + num_of_req=self.num_of_req, + obj_size=self.obj_size, + time_span=self.time_span, + start_obj_id=self.start_obj_id, + seed=self.seed, + alpha=self.alpha, + dist=self.dist, + num_objects=self.num_objects, + ) + + def read_first_req(self, req: Request) -> Request: + """Read the first request""" + if self.num_of_req == 0: + req.valid = False + return req + + obj_id = self.obj_ids[0] + req.obj_id = obj_id + req.obj_size = self.obj_size + req.clock_time = 0 + req.op = ReqOp.OP_READ + req.valid = True + return req + + def read_last_req(self, req: Request) -> Request: + """Read the last request""" + if self.num_of_req == 0: + req.valid = False + return req + + obj_id = self.obj_ids[-1] + req.obj_id = obj_id + req.obj_size = self.obj_size + req.clock_time = (self.num_of_req - 1) * self.time_span // self.num_of_req + req.op = ReqOp.OP_READ + req.valid = True + return req + + def skip_n_req(self, n: int) -> int: + """Skip n requests""" + self.current_pos = min(self.current_pos + n, self.num_of_req) + return self.current_pos + + def read_one_req_above(self, req: Request) -> Request: + """Read one request above current position""" + if self.current_pos + 1 >= self.num_of_req: + req.valid = False + return req + + obj_id = self.obj_ids[self.current_pos + 1] + req.obj_id = obj_id + req.obj_size = self.obj_size + req.clock_time = (self.current_pos + 1) * self.time_span // self.num_of_req + req.op = ReqOp.OP_READ + req.valid = True + return req + + def go_back_one_req(self) -> None: + """Go back one request""" + self.current_pos = max(0, self.current_pos - 1) + + def set_read_pos(self, pos: float) -> None: + """Set read position""" + self.current_pos = max(0, min(int(pos), self.num_of_req)) + + def get_read_pos(self) -> float: + """Get current read position""" + return float(self.current_pos) + + def __iter__(self) -> Iterator[Request]: + """Iterator implementation""" + self.reset() + return self + + def __len__(self) -> int: + return self.num_of_req + + def __next__(self) -> Request: + """Next element for iterator""" + if self.current_pos >= self.num_of_req: + raise StopIteration + + req = Request() + return self.read_one_req(req) + + def __getitem__(self, index: int) -> Request: + """Support index access""" + if index < 0 or index >= self.num_of_req: + raise IndexError("Index out of range") + + req = Request() + obj_id = self.obj_ids[index] + req.obj_id = obj_id + req.obj_size = self.obj_size + req.clock_time = index * self.time_span // self.num_of_req + req.op = ReqOp.OP_READ + req.valid = True + return req + + +def _gen_zipf(m: int, alpha: float, n: int, start: int = 0) -> np.ndarray: + """Generate Zipf-distributed workload. + + Args: + m: Number of objects + alpha: Skewness parameter (alpha >= 0) + n: Number of requests + start: Starting object ID + + Returns: + Array of object IDs following Zipf distribution + """ + if m <= 0 or n <= 0: + raise ValueError("num_objects and num_requests must be positive") + if alpha < 0: + raise ValueError("alpha must be non-negative") + + # Optimization: for alpha=0 (uniform), use uniform distribution directly + if alpha == 0: + return _gen_uniform(m, n, start) + + # Calculate Zipf distribution PMF + np_tmp = np.power(np.arange(1, m + 1), -alpha) + np_zeta = np.cumsum(np_tmp) + dist_map = np_zeta / np_zeta[-1] + + # Generate random samples + r = np.random.uniform(0, 1, n) + return np.searchsorted(dist_map, r) + start + + +def _gen_uniform(m: int, n: int, start: int = 0) -> np.ndarray: + """Generate uniform-distributed workload. + + Args: + m: Number of objects + n: Number of requests + start: Starting object ID + + Returns: + Array of object IDs following uniform distribution + """ + if m <= 0 or n <= 0: + raise ValueError("num_objects and num_requests must be positive") + # Optimized: directly generate in the target range for better performance + return np.random.randint(start, start + m, n) + + +class _BaseRequestGenerator: + """Base class for request generators to reduce code duplication""" + + def __init__( + self, + num_objects: int, + num_requests: int, + obj_size: int = 4000, + time_span: int = 86400 * 7, + start_obj_id: int = 0, + seed: Optional[int] = None, + ): + """Initialize base request generator.""" + if num_objects <= 0 or num_requests <= 0: + raise ValueError("num_objects and num_requests must be positive") + if obj_size <= 0: + raise ValueError("obj_size must be positive") + if time_span <= 0: + raise ValueError("time_span must be positive") + + self.num_requests = num_requests + self.obj_size = obj_size + self.time_span = time_span + + # Set random seed + if seed is not None: + np.random.seed(seed) + random.seed(seed) + + # Subclasses must implement this method + self.obj_ids = self._generate_obj_ids(num_objects, num_requests, start_obj_id) + + def _generate_obj_ids(self, num_objects: int, num_requests: int, start_obj_id: int) -> np.ndarray: + """Subclasses must implement this method to generate object IDs""" + raise NotImplementedError("Subclasses must implement _generate_obj_ids") + + def __iter__(self) -> Iterator[Request]: + """Iterate over generated requests""" + for i, obj_id in enumerate(self.obj_ids): + req = Request() + req.clock_time = i * self.time_span // self.num_requests + req.obj_id = obj_id + req.obj_size = self.obj_size + req.op = ReqOp.OP_READ + req.valid = True + yield req + + def __len__(self) -> int: + """Return number of requests""" + return self.num_requests + + +class _ZipfRequestGenerator(_BaseRequestGenerator): + """Zipf-distributed request generator""" + + def __init__( + self, + num_objects: int, + num_requests: int, + alpha: float = 1.0, + obj_size: int = 4000, + time_span: int = 86400 * 7, + start_obj_id: int = 0, + seed: Optional[int] = None, + ): + """Initialize Zipf request generator.""" + if alpha < 0: + raise ValueError("alpha must be non-negative") + self.alpha = alpha + super().__init__(num_objects, num_requests, obj_size, time_span, start_obj_id, seed) + + def _generate_obj_ids(self, num_objects: int, num_requests: int, start_obj_id: int) -> np.ndarray: + """Generate Zipf-distributed object IDs""" + return _gen_zipf(num_objects, self.alpha, num_requests, start_obj_id) + + +class _UniformRequestGenerator(_BaseRequestGenerator): + """Uniform-distributed request generator""" + + def _generate_obj_ids(self, num_objects: int, num_requests: int, start_obj_id: int) -> np.ndarray: + """Generate uniformly-distributed object IDs""" + return _gen_uniform(num_objects, num_requests, start_obj_id) + + +def create_zipf_requests( + num_objects: int, + num_requests: int, + alpha: float = 1.0, + obj_size: int = 4000, + time_span: int = 86400 * 7, + start_obj_id: int = 0, + seed: Optional[int] = None, +) -> _ZipfRequestGenerator: + """Create a Zipf-distributed request generator. + + Args: + num_objects: Number of unique objects + num_requests: Number of requests to generate + alpha: Zipf skewness parameter (alpha >= 0) + obj_size: Object size in bytes + time_span: Time span in seconds + start_obj_id: Starting object ID + seed: Random seed for reproducibility + + Returns: + Generator that yields Request objects + """ + return _ZipfRequestGenerator( + num_objects=num_objects, + num_requests=num_requests, + alpha=alpha, + obj_size=obj_size, + time_span=time_span, + start_obj_id=start_obj_id, + seed=seed, + ) + + +def create_uniform_requests( + num_objects: int, + num_requests: int, + obj_size: int = 4000, + time_span: int = 86400 * 7, + start_obj_id: int = 0, + seed: Optional[int] = None, +) -> _UniformRequestGenerator: + """Create a uniform-distributed request generator. + + Args: + num_objects: Number of unique objects + num_requests: Number of requests to generate + obj_size: Object size in bytes + time_span: Time span in seconds + start_obj_id: Starting object ID + seed: Random seed for reproducibility + + Returns: + Generator that yields Request objects + """ + return _UniformRequestGenerator( + num_objects=num_objects, + num_requests=num_requests, + obj_size=obj_size, + time_span=time_span, + start_obj_id=start_obj_id, + seed=seed, + ) diff --git a/libcachesim/trace_analyzer.py b/libcachesim/trace_analyzer.py new file mode 100644 index 0000000..4e51da4 --- /dev/null +++ b/libcachesim/trace_analyzer.py @@ -0,0 +1,53 @@ +"""Wrapper of Analyzer""" +from __future__ import annotations + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from .protocols import ReaderProtocol + +from .libcachesim_python import ( + Analyzer, + AnalysisOption, + AnalysisParam, +) + +# Import ReaderException +class ReaderException(Exception): + """Exception raised when reader is not compatible""" + pass + +class TraceAnalyzer: + _analyzer: Analyzer + + def __init__( + self, + reader: ReaderProtocol, + output_path: str, + analysis_param: AnalysisParam = None, + analysis_option: AnalysisOption = None, + ): + """ + Initialize trace analyzer. + + Args: + reader: Reader protocol + output_path: Path to output file + analysis_param: Analysis parameters + analysis_option: Analysis options + """ + if not hasattr(reader, 'c_reader') or not reader.c_reader: + raise ReaderException("Only C/C++ reader is supported") + + if analysis_param is None: + analysis_param = AnalysisParam() + if analysis_option is None: + analysis_option = AnalysisOption() + + self._analyzer = Analyzer(reader._reader, output_path, analysis_option, analysis_param) + + def run(self) -> None: + self._analyzer.run() + + def cleanup(self) -> None: + self._analyzer.cleanup() diff --git a/libcachesim/trace_reader.py b/libcachesim/trace_reader.py new file mode 100644 index 0000000..d37dead --- /dev/null +++ b/libcachesim/trace_reader.py @@ -0,0 +1,251 @@ +"""Wrapper of Reader""" + +import logging +from typing import overload, Union +from collections.abc import Iterator + +from .protocols import ReaderProtocol + +from .libcachesim_python import TraceType, SamplerType, Request, ReaderInitParam, Reader, Sampler, ReadDirection + + +class TraceReader(ReaderProtocol): + _reader: Reader + + # Mark this as a C++ reader for c_process_trace compatibility + c_reader: bool = True + + @overload + def __init__(self, trace: Reader) -> None: ... + + def __init__( + self, + trace: Union[Reader, str], + trace_type: TraceType = TraceType.UNKNOWN_TRACE, + ignore_obj_size: bool = False, + ignore_size_zero_req: bool = False, + obj_id_is_num: bool = False, + obj_id_is_num_set: bool = False, + cap_at_n_req: int = -1, + block_size: int = 0, + has_header: bool = False, + has_header_set: bool = False, + delimiter: str = ",", + trace_start_offset: int = 0, + binary_fmt_str: str = "", + sampling_ratio: float = 1.0, + sampling_type: SamplerType = SamplerType.INVALID_SAMPLER, + ): + if isinstance(trace, Reader): + self._reader = trace + return + + # Process sampling_type + if sampling_ratio < 0.0 or sampling_ratio > 1.0: + raise ValueError("Sampling ratio must be between 0.0 and 1.0") + + if sampling_ratio == 1.0: + sampler = None + else: + if sampling_type == SamplerType.INVALID_SAMPLER: + logging.warning("Sampling type is invalid, using SPATIAL_SAMPLER instead") + sampling_type = SamplerType.SPATIAL_SAMPLER + logging.info(f"Sampling ratio: {sampling_ratio}, Sampling type: {sampling_type}") + sampler = Sampler(sampling_ratio, sampling_type) + + # Construct ReaderInitParam + reader_init_params = ReaderInitParam( + binary_fmt_str=binary_fmt_str, + ignore_obj_size=ignore_obj_size, + ignore_size_zero_req=ignore_size_zero_req, + obj_id_is_num=obj_id_is_num, + obj_id_is_num_set=obj_id_is_num_set, + cap_at_n_req=cap_at_n_req, + block_size=block_size, + has_header=has_header, + has_header_set=has_header_set, + delimiter=delimiter, + trace_start_offset=trace_start_offset, + sampler=sampler, + ) + + self._reader = Reader(trace, trace_type, reader_init_params) + + @property + def n_read_req(self) -> int: + return self._reader.n_read_req + + @property + def n_total_req(self) -> int: + return self._reader.n_total_req + + @property + def trace_path(self) -> str: + return self._reader.trace_path + + @property + def file_size(self) -> int: + return self._reader.file_size + + @property + def init_params(self) -> ReaderInitParam: + return self._reader.init_params + + @property + def trace_type(self) -> TraceType: + return self._reader.trace_type + + @property + def trace_format(self) -> str: + return self._reader.trace_format + + @property + def ver(self) -> int: + return self._reader.ver + + @property + def cloned(self) -> bool: + return self._reader.cloned + + @property + def cap_at_n_req(self) -> int: + return self._reader.cap_at_n_req + + @property + def trace_start_offset(self) -> int: + return self._reader.trace_start_offset + + @property + def mapped_file(self) -> bool: + return self._reader.mapped_file + + @property + def mmap_offset(self) -> int: + return self._reader.mmap_offset + + @property + def is_zstd_file(self) -> bool: + return self._reader.is_zstd_file + + @property + def item_size(self) -> int: + return self._reader.item_size + + @property + def line_buf(self) -> str: + return self._reader.line_buf + + @property + def line_buf_size(self) -> int: + return self._reader.line_buf_size + + @property + def csv_delimiter(self) -> str: + return self._reader.csv_delimiter + + @property + def csv_has_header(self) -> bool: + return self._reader.csv_has_header + + @property + def obj_id_is_num(self) -> bool: + return self._reader.obj_id_is_num + + @property + def obj_id_is_num_set(self) -> bool: + return self._reader.obj_id_is_num_set + + @property + def ignore_size_zero_req(self) -> bool: + return self._reader.ignore_size_zero_req + + @property + def ignore_obj_size(self) -> bool: + return self._reader.ignore_obj_size + + @property + def block_size(self) -> int: + return self._reader.block_size + + @ignore_size_zero_req.setter + def ignore_size_zero_req(self, value: bool) -> None: + self._reader.ignore_size_zero_req = value + + @ignore_obj_size.setter + def ignore_obj_size(self, value: bool) -> None: + self._reader.ignore_obj_size = value + + @block_size.setter + def block_size(self, value: int) -> None: + self._reader.block_size = value + + @property + def n_req_left(self) -> int: + return self._reader.n_req_left + + @property + def last_req_clock_time(self) -> int: + return self._reader.last_req_clock_time + + @property + def lcs_ver(self) -> int: + return self._reader.lcs_ver + + @property + def sampler(self) -> Sampler: + return self._reader.sampler + + @property + def read_direction(self) -> ReadDirection: + return self._reader.read_direction + + def get_num_of_req(self) -> int: + return self._reader.get_num_of_req() + + def read_one_req(self, req: Request) -> Request: + return self._reader.read_one_req(req) + + def reset(self) -> None: + self._reader.reset() + + def close(self) -> None: + self._reader.close() + + def clone(self) -> "TraceReader": + return TraceReader(self._reader.clone()) + + def read_first_req(self, req: Request) -> Request: + return self._reader.read_first_req(req) + + def read_last_req(self, req: Request) -> Request: + return self._reader.read_last_req(req) + + def skip_n_req(self, n: int) -> int: + return self._reader.skip_n_req(n) + + def read_one_req_above(self) -> Request: + return self._reader.read_one_req_above() + + def go_back_one_req(self) -> None: + self._reader.go_back_one_req() + + def set_read_pos(self, pos: float) -> None: + self._reader.set_read_pos(pos) + + def __iter__(self) -> Iterator[Request]: + return self._reader.__iter__() + + def __len__(self) -> int: + return self._reader.get_num_of_req() + + def __next__(self) -> Request: + if self._reader.n_req_left == 0: + raise StopIteration + return self._reader.read_one_req() + + def __getitem__(self, index: int) -> Request: + if index < 0 or index >= self._reader.get_num_of_req(): + raise IndexError("Index out of range") + self._reader.reset() + self._reader.skip_n_req(index) + return self._reader.read_one_req() diff --git a/libcachesim/util.py b/libcachesim/util.py new file mode 100644 index 0000000..c9c351b --- /dev/null +++ b/libcachesim/util.py @@ -0,0 +1,50 @@ +"""Wrapper misc functions""" +from __future__ import annotations + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from .protocols import ReaderProtocol + from .cache import CacheBase + +from .libcachesim_python import convert_to_oracleGeneral, convert_to_lcs, c_process_trace + + +class Util: + @staticmethod + def convert_to_oracleGeneral(reader, ofilepath, output_txt=False, remove_size_change=False): + return convert_to_oracleGeneral(reader, ofilepath, output_txt, remove_size_change) + + @staticmethod + def convert_to_lcs(reader, ofilepath, output_txt=False, remove_size_change=False, lcs_ver=1): + """ + Convert a trace to LCS format. + + Args: + reader: The reader to convert. + ofilepath: The path to the output file. + output_txt: Whether to output the trace in text format. + remove_size_change: Whether to remove the size change field. + lcs_ver: The version of LCS format (1, 2, 3, 4, 5, 6, 7, 8). + """ + return convert_to_lcs(reader, ofilepath, output_txt, remove_size_change, lcs_ver) + + @staticmethod + def process_trace(cache: CacheBase, reader: ReaderProtocol, start_req: int = 0, max_req: int = -1) -> tuple[float, float]: + """ + Process a trace with a cache. + + Args: + cache: The cache to process the trace with. + reader: The reader to read the trace from. + start_req: The starting request to process. + max_req: The maximum number of requests to process. + + Returns: + tuple[float, float]: The object miss ratio and byte miss ratio. + """ + # Check if reader is C++ reader + if not hasattr(reader, 'c_reader') or not reader.c_reader: + raise ValueError("Reader must be a C++ reader") + + return c_process_trace(cache._cache, reader._reader, start_req, max_req) diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..45eb26f --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,111 @@ +[build-system] +requires = ["scikit-build-core>=0.10", "pybind11"] +build-backend = "scikit_build_core.build" + + +[project] +name = "libcachesim" +version = "0.3.2" +description="Python bindings for libCacheSim" +readme = "README.md" +requires-python = ">=3.9" +keywords = ["performance", "cache", "simulator"] +classifiers = [ + "Intended Audience :: Developers", + "License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", +] +dependencies = [ + "numpy>=1.20.0", +] + +[project.optional-dependencies] +test = ["pytest"] +dev = [ + "pytest", + "pre-commit", + "ruff>=0.7.0", + "mypy>=1.0.0", +] + + +[tool.scikit-build] +wheel.expand-macos-universal-tags = true + +[tool.pytest.ini_options] +minversion = "8.0" +addopts = ["-ra", "--showlocals", "--strict-markers", "--strict-config", "-m", "not optional"] +xfail_strict = true +log_cli_level = "INFO" +filterwarnings = [ + "error", + "ignore::pytest.PytestCacheWarning", +] +testpaths = ["tests"] +markers = [ + "optional: mark test as optional", +] +python_files = ["test.py", "test_*.py", "*_test.py"] +python_classes = ["Test*"] +python_functions = ["test_*"] + + +[tool.cibuildwheel] +manylinux-x86_64-image = "quay.io/pypa/manylinux_2_34_x86_64" +manylinux-aarch64-image = "quay.io/pypa/manylinux_2_34_aarch64" + +build = ["cp39-*", "cp310-*", "cp311-*", "cp312-*", "cp313-*"] +skip = ["*-win32", "*-manylinux_i686", "*-musllinux*", "pp*"] + +# Set the environment variable for the wheel build step. +environment = { LCS_BUILD_DIR = "{project}/build", MACOSX_DEPLOYMENT_TARGET = "14.0" } + +# Test that the wheel can be imported +test-command = "python -c 'import libcachesim; print(\"Import successful\")'" + +[tool.cibuildwheel.linux] +before-all = "yum install -y yum-utils && yum-config-manager --set-enabled crb && yum install -y ninja-build cmake libzstd-devel glib2-devel" +before-build = "rm -rf {project}/build && cmake -S {project} -B {project}/build -G Ninja && cmake --build {project}/build" + +[tool.cibuildwheel.macos] +before-all = "brew install glib google-perftools argp-standalone xxhash llvm wget cmake ninja zstd xgboost lightgbm" +before-build = "rm -rf {project}/build && cmake -S {project} -B {project}/build -G Ninja -DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 && cmake --build {project}/build" + +[tool.ruff] +# Allow lines to be as long as 120. +line-length = 120 + +[tool.ruff.lint] +select = [ + # pycodestyle + "E", + # Pyflakes + "F", + # pyupgrade + "UP", + # flake8-bugbear + "B", + # flake8-simplify + "SIM", + # isort + # "I", + # flake8-logging-format + "G", +] +ignore = [ + # star imports + "F405", "F403", + # lambda expression assignment + "E731", + # Loop control variable not used within loop body + "B007", + # f-string format + "UP032", + # Can remove once 3.10+ is the minimum Python version + "UP007", + "UP045" +] diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..e69de29 diff --git a/src/exception.cpp b/src/exception.cpp new file mode 100644 index 0000000..078d9c4 --- /dev/null +++ b/src/exception.cpp @@ -0,0 +1,56 @@ +// libcachesim_python - libCacheSim Python bindings +// Copyright 2025 The libcachesim Authors. All rights reserved. +// +// Use of this source code is governed by a GPL-3.0 +// license that can be found in the LICENSE file or at +// https://github.com/1a1a11a/libcachesim/blob/develop/LICENSE + +#include "exception.h" + +#include + +namespace libcachesim { + +namespace py = pybind11; + +void register_exception(py::module& m) { + static py::exception exc_cache(m, "CacheException"); + static py::exception exc_reader(m, "ReaderException"); + + py::register_exception_translator([](std::exception_ptr p) { + try { + if (p) std::rethrow_exception(p); + } catch (const CacheException& e) { + py::set_error(exc_cache, e.what()); + } catch (const ReaderException& e) { + py::set_error(exc_reader, e.what()); + } + }); + + py::register_exception_translator([](std::exception_ptr p) { + try { + if (p) std::rethrow_exception(p); + } catch (const std::bad_alloc& e) { + PyErr_SetString(PyExc_MemoryError, e.what()); + } catch (const std::invalid_argument& e) { + PyErr_SetString(PyExc_ValueError, e.what()); + } catch (const std::out_of_range& e) { + PyErr_SetString(PyExc_IndexError, e.what()); + } catch (const std::domain_error& e) { + PyErr_SetString(PyExc_ValueError, + ("Domain error: " + std::string(e.what())).c_str()); + } catch (const std::overflow_error& e) { + PyErr_SetString(PyExc_OverflowError, e.what()); + } catch (const std::range_error& e) { + PyErr_SetString(PyExc_ValueError, + ("Range error: " + std::string(e.what())).c_str()); + } catch (const std::runtime_error& e) { + PyErr_SetString(PyExc_RuntimeError, e.what()); + } catch (const std::exception& e) { + PyErr_SetString(PyExc_RuntimeError, + ("C++ exception: " + std::string(e.what())).c_str()); + } + }); +} + +} // namespace libcachesim diff --git a/src/exception.h b/src/exception.h new file mode 100644 index 0000000..2749ae0 --- /dev/null +++ b/src/exception.h @@ -0,0 +1,33 @@ +// libcachesim_python - libCacheSim Python bindings +// Copyright 2025 The libcachesim Authors. All rights reserved. +// +// Use of this source code is governed by a GPL-3.0 +// license that can be found in the LICENSE file or at +// https://github.com/1a1a11a/libcachesim/blob/develop/LICENSE + +#pragma once + +#include + +#include +#include + +namespace libcachesim { + +namespace py = pybind11; + +class CacheException : public std::runtime_error { + public: + explicit CacheException(const std::string& message) + : std::runtime_error("CacheException: " + message) {} +}; + +class ReaderException : public std::runtime_error { + public: + explicit ReaderException(const std::string& message) + : std::runtime_error("ReaderException: " + message) {} +}; + +void register_exception(py::module& m); + +} // namespace libcachesim diff --git a/src/export.cpp b/src/export.cpp new file mode 100644 index 0000000..0ef8d83 --- /dev/null +++ b/src/export.cpp @@ -0,0 +1,38 @@ +// libcachesim_python - libCacheSim Python bindings +// Copyright 2025 The libcachesim Authors. All rights reserved. +// +// Use of this source code is governed by a GPL-3.0 +// license that can be found in the LICENSE file or at +// https://github.com/1a1a11a/libcachesim/blob/develop/LICENSE + +#include "export.h" + +#include "exception.h" + +#define STRINGIFY(x) #x +#define MACRO_STRINGIFY(x) STRINGIFY(x) + +namespace libcachesim { + +PYBIND11_MODULE(libcachesim_python, m) { + m.doc() = "libcachesim_python"; + + // NOTE(haocheng): can use decentralized interface holder to export all the + // methods if the codebase is large enough + + export_cache(m); + export_reader(m); + export_analyzer(m); + export_misc(m); + + // NOTE(haocheng): register exception to make it available in Python + register_exception(m); + +#ifdef VERSION_INFO + m.attr("__version__") = MACRO_STRINGIFY(VERSION_INFO); +#else + m.attr("__version__") = "dev"; +#endif +} + +} // namespace libcachesim diff --git a/src/export.h b/src/export.h new file mode 100644 index 0000000..121ff97 --- /dev/null +++ b/src/export.h @@ -0,0 +1,27 @@ +// libcachesim_python - libCacheSim Python bindings +// Copyright 2025 The libcachesim Authors. All rights reserved. +// +// Use of this source code is governed by a GPL-3.0 +// license that can be found in the LICENSE file or at +// https://github.com/1a1a11a/libcachesim/blob/develop/LICENSE + +#pragma once + +#include "pybind11/operators.h" +#include "pybind11/pybind11.h" +#include "pybind11/stl.h" + +namespace libcachesim { + +namespace py = pybind11; + +using py::literals::operator""_a; + +void export_cache(py::module &m); +void export_pyplugin_cache(py::module &m); + +void export_reader(py::module &m); +void export_analyzer(py::module &m); +void export_misc(py::module &m); + +} // namespace libcachesim diff --git a/src/export_analyzer.cpp b/src/export_analyzer.cpp new file mode 100644 index 0000000..f05c853 --- /dev/null +++ b/src/export_analyzer.cpp @@ -0,0 +1,135 @@ +// libcachesim_python - libCacheSim Python bindings +// Copyright 2025 The libcachesim Authors. All rights reserved. +// +// Use of this source code is governed by a GPL-3.0 +// license that can be found in the LICENSE file or at +// https://github.com/1a1a11a/libcachesim/blob/develop/LICENSE + +#include +#include +#include + +#include +#include + +#include "../libCacheSim/traceAnalyzer/analyzer.h" +#include "export.h" +#include "libCacheSim/cache.h" +#include "libCacheSim/reader.h" +#include "libCacheSim/request.h" + +namespace libcachesim { + +namespace py = pybind11; + +// Custom deleters for smart pointers +struct AnalysisParamDeleter { + void operator()(traceAnalyzer::analysis_param_t* ptr) const { + if (ptr != nullptr) free(ptr); + } +}; + +struct AnalysisOptionDeleter { + void operator()(traceAnalyzer::analysis_option_t* ptr) const { + if (ptr != nullptr) free(ptr); + } +}; + +void export_analyzer(py::module& m) { + py::class_< + traceAnalyzer::analysis_param_t, + std::unique_ptr>( + m, "AnalysisParam") + .def(py::init([](int access_pattern_sample_ratio_inv, int track_n_popular, + int track_n_hit, int time_window, int warmup_time) { + traceAnalyzer::analysis_param_t params; + params.access_pattern_sample_ratio_inv = + access_pattern_sample_ratio_inv; + params.track_n_popular = track_n_popular; + params.track_n_hit = track_n_hit; + params.time_window = time_window; + params.warmup_time = warmup_time; + return std::unique_ptr( + new traceAnalyzer::analysis_param_t(params)); + }), + "access_pattern_sample_ratio_inv"_a = 10, "track_n_popular"_a = 10, + "track_n_hit"_a = 5, "time_window"_a = 60, "warmup_time"_a = 0) + .def_readwrite( + "access_pattern_sample_ratio_inv", + &traceAnalyzer::analysis_param_t::access_pattern_sample_ratio_inv) + .def_readwrite("track_n_popular", + &traceAnalyzer::analysis_param_t::track_n_popular) + .def_readwrite("track_n_hit", + &traceAnalyzer::analysis_param_t::track_n_hit) + .def_readwrite("time_window", + &traceAnalyzer::analysis_param_t::time_window) + .def_readwrite("warmup_time", + &traceAnalyzer::analysis_param_t::warmup_time); + + py::class_< + traceAnalyzer::analysis_option_t, + std::unique_ptr>( + m, "AnalysisOption") + .def( + py::init([](bool req_rate, bool access_pattern, bool size, bool reuse, + bool popularity, bool ttl, bool popularity_decay, + bool lifetime, bool create_future_reuse_ccdf, + bool prob_at_age, bool size_change) { + traceAnalyzer::analysis_option_t option; + option.req_rate = req_rate; + option.access_pattern = access_pattern; + option.size = size; + option.reuse = reuse; + option.popularity = popularity; + option.ttl = ttl; + option.popularity_decay = popularity_decay; + option.lifetime = lifetime; + option.create_future_reuse_ccdf = create_future_reuse_ccdf; + option.prob_at_age = prob_at_age; + option.size_change = size_change; + return std::unique_ptr( + new traceAnalyzer::analysis_option_t(option)); + }), + "req_rate"_a = true, "access_pattern"_a = true, "size"_a = true, + "reuse"_a = true, "popularity"_a = true, "ttl"_a = false, + "popularity_decay"_a = false, "lifetime"_a = false, + "create_future_reuse_ccdf"_a = false, "prob_at_age"_a = false, + "size_change"_a = false) + .def_readwrite("req_rate", &traceAnalyzer::analysis_option_t::req_rate) + .def_readwrite("access_pattern", + &traceAnalyzer::analysis_option_t::access_pattern) + .def_readwrite("size", &traceAnalyzer::analysis_option_t::size) + .def_readwrite("reuse", &traceAnalyzer::analysis_option_t::reuse) + .def_readwrite("popularity", + &traceAnalyzer::analysis_option_t::popularity) + .def_readwrite("ttl", &traceAnalyzer::analysis_option_t::ttl) + .def_readwrite("popularity_decay", + &traceAnalyzer::analysis_option_t::popularity_decay) + .def_readwrite("lifetime", &traceAnalyzer::analysis_option_t::lifetime) + .def_readwrite( + "create_future_reuse_ccdf", + &traceAnalyzer::analysis_option_t::create_future_reuse_ccdf) + .def_readwrite("prob_at_age", + &traceAnalyzer::analysis_option_t::prob_at_age) + .def_readwrite("size_change", + &traceAnalyzer::analysis_option_t::size_change); + + py::class_>(m, "Analyzer") + .def(py::init([](reader_t* reader, std::string output_path, + const traceAnalyzer::analysis_option_t& option, + const traceAnalyzer::analysis_param_t& param) { + traceAnalyzer::TraceAnalyzer* analyzer = + new traceAnalyzer::TraceAnalyzer(reader, output_path, option, + param); + return std::unique_ptr(analyzer); + }), + "reader"_a, "output_path"_a, + "option"_a = traceAnalyzer::default_option(), + "param"_a = traceAnalyzer::default_param()) + .def("run", &traceAnalyzer::TraceAnalyzer::run); +} + +} // namespace libcachesim diff --git a/src/export_cache.cpp b/src/export_cache.cpp new file mode 100644 index 0000000..fb383a2 --- /dev/null +++ b/src/export_cache.cpp @@ -0,0 +1,538 @@ +// libcachesim_python - libCacheSim Python bindings +// Export cache core functions and classes +// Copyright 2025 The libcachesim Authors. All rights reserved. +// +// Use of this source code is governed by a GPL-3.0 +// license that can be found in the LICENSE file or at +// https://github.com/1a1a11a/libcachesim/blob/develop/LICENSE + +#include +#include +#include + +#include +#include +#include + +#include "config.h" +#include "dataStructure/hashtable/hashtable.h" +#include "export.h" +#include "libCacheSim/cache.h" +#include "libCacheSim/cacheObj.h" +#include "libCacheSim/enum.h" +#include "libCacheSim/evictionAlgo.h" +#include "libCacheSim/plugin.h" +#include "libCacheSim/request.h" + +namespace libcachesim { + +namespace py = pybind11; + +// Custom deleters for smart pointers +struct CacheDeleter { + void operator()(cache_t* ptr) const { + if (ptr != nullptr) ptr->cache_free(ptr); + } +}; + +struct CommonCacheParamsDeleter { + void operator()(common_cache_params_t* ptr) const { + if (ptr != nullptr) { + delete ptr; // Simple delete for POD struct + } + } +}; + +struct CacheObjectDeleter { + void operator()(cache_obj_t* ptr) const { + if (ptr != nullptr) free_cache_obj(ptr); + } +}; + +struct RequestDeleter { + void operator()(request_t* ptr) const { + if (ptr != nullptr) free_request(ptr); + } +}; + +// *********************************************************************** +// **** Python plugin cache implementation BEGIN **** +// *********************************************************************** + +// Forward declaration with appropriate visibility +struct pypluginCache_params; + +typedef struct __attribute__((visibility("hidden"))) pypluginCache_params { + py::object data; ///< Plugin's internal data structure (python object) + py::function cache_init_hook; + py::function cache_hit_hook; + py::function cache_miss_hook; + py::function cache_eviction_hook; + py::function cache_remove_hook; + py::function cache_free_hook; + std::string cache_name; +} pypluginCache_params_t; + +// Custom deleter for pypluginCache_params_t +struct PypluginCacheParamsDeleter { + void operator()(pypluginCache_params_t* ptr) const { + if (ptr != nullptr) { + // Call the free hook if available before deletion + if (!ptr->cache_free_hook.is_none()) { + try { + ptr->cache_free_hook(ptr->data); + } catch (...) { + // Ignore exceptions during cleanup to prevent double-fault + } + } + delete ptr; + } + } +}; + +static void pypluginCache_free(cache_t* cache); +static bool pypluginCache_get(cache_t* cache, const request_t* req); +static cache_obj_t* pypluginCache_find(cache_t* cache, const request_t* req, + const bool update_cache); +static cache_obj_t* pypluginCache_insert(cache_t* cache, const request_t* req); +static cache_obj_t* pypluginCache_to_evict(cache_t* cache, + const request_t* req); +static void pypluginCache_evict(cache_t* cache, const request_t* req); +static bool pypluginCache_remove(cache_t* cache, const obj_id_t obj_id); + +cache_t* pypluginCache_init( + const common_cache_params_t ccache_params, std::string cache_name, + py::function cache_init_hook, py::function cache_hit_hook, + py::function cache_miss_hook, py::function cache_eviction_hook, + py::function cache_remove_hook, py::function cache_free_hook) { + // Initialize base cache structure with exception safety + cache_t* cache = nullptr; + std::unique_ptr params; + + try { + cache = cache_struct_init(cache_name.c_str(), ccache_params, NULL); + if (!cache) { + throw std::runtime_error("Failed to initialize cache structure"); + } + + // Set function pointers for cache operations + cache->cache_init = NULL; + cache->cache_free = pypluginCache_free; + cache->get = pypluginCache_get; + cache->find = pypluginCache_find; + cache->insert = pypluginCache_insert; + cache->evict = pypluginCache_evict; + cache->remove = pypluginCache_remove; + cache->to_evict = pypluginCache_to_evict; + cache->get_occupied_byte = cache_get_occupied_byte_default; + cache->get_n_obj = cache_get_n_obj_default; + cache->can_insert = cache_can_insert_default; + cache->obj_md_size = 0; + + // Allocate and initialize plugin parameters using smart pointer with custom + // deleter + params = + std::unique_ptr( + new pypluginCache_params_t(), PypluginCacheParamsDeleter()); + params->cache_name = cache_name; + params->cache_init_hook = cache_init_hook; + params->cache_hit_hook = cache_hit_hook; + params->cache_miss_hook = cache_miss_hook; + params->cache_eviction_hook = cache_eviction_hook; + params->cache_remove_hook = cache_remove_hook; + params->cache_free_hook = cache_free_hook; + + // Initialize the cache data - this might throw + params->data = cache_init_hook(ccache_params); + + // Transfer ownership to the cache structure + cache->eviction_params = params.release(); + + return cache; + + } catch (...) { + // Clean up on exception + if (cache) { + cache_struct_free(cache); + } + // params will be automatically cleaned up by smart pointer destructor + throw; // Re-throw the exception + } +} + +static void pypluginCache_free(cache_t* cache) { + if (!cache || !cache->eviction_params) { + return; + } + + // Use smart pointer for automatic cleanup + std::unique_ptr params( + static_cast(cache->eviction_params)); + + // The smart pointer destructor will handle cleanup automatically + cache_struct_free(cache); +} + +static bool pypluginCache_get(cache_t* cache, const request_t* req) { + bool hit = cache_get_base(cache, req); + pypluginCache_params_t* params = + (pypluginCache_params_t*)cache->eviction_params; + + if (hit) { + params->cache_hit_hook(params->data, req); + } else { + params->cache_miss_hook(params->data, req); + } + + return hit; +} + +static cache_obj_t* pypluginCache_find(cache_t* cache, const request_t* req, + const bool update_cache) { + return cache_find_base(cache, req, update_cache); +} + +static cache_obj_t* pypluginCache_insert(cache_t* cache, const request_t* req) { + return cache_insert_base(cache, req); +} + +static cache_obj_t* pypluginCache_to_evict(cache_t* cache, + const request_t* req) { + throw std::runtime_error("pypluginCache does not support to_evict function"); +} + +static void pypluginCache_evict(cache_t* cache, const request_t* req) { + pypluginCache_params_t* params = + (pypluginCache_params_t*)cache->eviction_params; + + // Get eviction candidate from plugin + py::object result = params->cache_eviction_hook(params->data, req); + obj_id_t obj_id = result.cast(); + + // Find the object in the cache + cache_obj_t* obj_to_evict = hashtable_find_obj_id(cache->hashtable, obj_id); + if (obj_to_evict == NULL) { + throw std::runtime_error("pypluginCache: object " + std::to_string(obj_id) + + " to be evicted not found in cache"); + } + + // Perform the eviction + cache_evict_base(cache, obj_to_evict, true); +} + +static bool pypluginCache_remove(cache_t* cache, const obj_id_t obj_id) { + pypluginCache_params_t* params = + (pypluginCache_params_t*)cache->eviction_params; + + // Notify plugin of the removal + params->cache_remove_hook(params->data, obj_id); + + // Find the object in the cache + cache_obj_t* obj = hashtable_find_obj_id(cache->hashtable, obj_id); + if (obj == NULL) { + return false; + } + + // Remove the object from the cache + cache_remove_obj_base(cache, obj, true); + return true; +} + +// *********************************************************************** +// **** Python plugin cache implementation END **** +// *********************************************************************** + +// Templates +template +auto make_cache_wrapper(const std::string& fn_name) { + return [=](py::module_& m) { + m.def( + fn_name.c_str(), + [](const common_cache_params_t& cc_params, + const std::string& cache_specific_params) { + const char* params_cstr = cache_specific_params.empty() + ? nullptr + : cache_specific_params.c_str(); + cache_t* ptr = InitFn(cc_params, params_cstr); + return std::unique_ptr(ptr); + }, + "cc_params"_a, "cache_specific_params"_a = ""); + }; +} + +void export_cache(py::module& m) { + /** + * @brief Cache structure + */ + py::class_>(m, "Cache") + .def_readonly("cache_size", &cache_t::cache_size) + .def_readonly("default_ttl", &cache_t::default_ttl) + .def_readonly("obj_md_size", &cache_t::obj_md_size) + .def_readonly("n_req", &cache_t::n_req) + .def_readonly("cache_name", &cache_t::cache_name) + .def_readonly("init_params", &cache_t::init_params) + .def( + "get", + [](cache_t& self, const request_t& req) { + return self.get(&self, &req); + }, + "req"_a) + .def( + "find", + [](cache_t& self, const request_t& req, const bool update_cache) { + return self.find(&self, &req, update_cache); + }, + "req"_a, "update_cache"_a = true) + .def( + "can_insert", + [](cache_t& self, const request_t& req) { + return self.can_insert(&self, &req); + }, + "req"_a) + .def( + "insert", + [](cache_t& self, const request_t& req) { + return self.insert(&self, &req); + }, + "req"_a) + .def( + "need_eviction", + [](cache_t& self, const request_t& req) { + return self.need_eviction(&self, &req); + }, + "req"_a) + .def( + "evict", + [](cache_t& self, const request_t& req) { + return self.evict(&self, &req); + }, + "req"_a) + .def( + "remove", + [](cache_t& self, obj_id_t obj_id) { + return self.remove(&self, obj_id); + }, + "obj_id"_a) + .def( + "to_evict", + [](cache_t& self, const request_t& req) { + return self.to_evict(&self, &req); + }, + "req"_a) + .def("get_occupied_byte", + [](cache_t& self) { return self.get_occupied_byte(&self); }) + .def("get_n_obj", [](cache_t& self) { return self.get_n_obj(&self); }) + .def("print_cache", [](cache_t& self) { + // Capture stdout to return as string + std::ostringstream captured_output; + std::streambuf* orig = std::cout.rdbuf(); + std::cout.rdbuf(captured_output.rdbuf()); + + self.print_cache(&self); + + // Restore original stdout + std::cout.rdbuf(orig); + return captured_output.str(); + }); + + /** + * @brief Common cache parameters + */ + py::class_>( + m, "CommonCacheParams") + .def(py::init([](uint64_t cache_size, uint64_t default_ttl, + int32_t hashpower, bool consider_obj_metadata) { + common_cache_params_t* params = new common_cache_params_t(); + params->cache_size = cache_size; + params->default_ttl = default_ttl; + params->hashpower = hashpower; + params->consider_obj_metadata = consider_obj_metadata; + return params; + }), + "cache_size"_a, "default_ttl"_a = 86400 * 300, "hashpower"_a = 24, + "consider_obj_metadata"_a = false) + .def_readwrite("cache_size", &common_cache_params_t::cache_size) + .def_readwrite("default_ttl", &common_cache_params_t::default_ttl) + .def_readwrite("hashpower", &common_cache_params_t::hashpower) + .def_readwrite("consider_obj_metadata", + &common_cache_params_t::consider_obj_metadata); + + /** + * @brief Cache object + * + * TODO: full support for cache object + */ + py::class_>( + m, "CacheObject") + .def_readonly("obj_id", &cache_obj_t::obj_id) + .def_readonly("obj_size", &cache_obj_t::obj_size); + + /** + * @brief Request operation enumeration + */ + py::enum_(m, "ReqOp") + .value("OP_NOP", OP_NOP) + .value("OP_GET", OP_GET) + .value("OP_GETS", OP_GETS) + .value("OP_SET", OP_SET) + .value("OP_ADD", OP_ADD) + .value("OP_CAS", OP_CAS) + .value("OP_REPLACE", OP_REPLACE) + .value("OP_APPEND", OP_APPEND) + .value("OP_PREPEND", OP_PREPEND) + .value("OP_DELETE", OP_DELETE) + .value("OP_INCR", OP_INCR) + .value("OP_DECR", OP_DECR) + .value("OP_READ", OP_READ) + .value("OP_WRITE", OP_WRITE) + .value("OP_UPDATE", OP_UPDATE) + .value("OP_INVALID", OP_INVALID) + .export_values(); + + /** + * @brief Request structure + */ + py::class_>(m, + "Request") + .def(py::init([](int64_t obj_size, req_op_e op, bool valid, + obj_id_t obj_id, int64_t clock_time, uint64_t hv, + int64_t next_access_vtime, int32_t ttl) { + request_t* req = new_request(); + req->obj_size = obj_size; + req->op = op; + req->valid = valid; + req->obj_id = obj_id; + req->clock_time = clock_time; + req->hv = hv; + req->next_access_vtime = next_access_vtime; + req->ttl = ttl; + return req; + }), + "obj_size"_a = 1, "op"_a = OP_NOP, "valid"_a = true, "obj_id"_a = 0, + "clock_time"_a = 0, "hv"_a = 0, "next_access_vtime"_a = -2, + "ttl"_a = 0) + .def_readwrite("clock_time", &request_t::clock_time) + .def_readwrite("hv", &request_t::hv) + .def_readwrite("obj_id", &request_t::obj_id) + .def_readwrite("obj_size", &request_t::obj_size) + .def_readwrite("ttl", &request_t::ttl) + .def_readwrite("op", &request_t::op) + .def_readwrite("valid", &request_t::valid) + .def_readwrite("next_access_vtime", &request_t::next_access_vtime); + + /** + * @brief Generic function to create a cache instance. + * + * TODO: add support for general cache creation and add support for cache + * specific parameters this is a backup for cache creation in python. + */ + + // Cache algorithm initialization functions + + make_cache_wrapper("ARC_init")(m); + make_cache_wrapper("ARCv0_init")(m); + make_cache_wrapper("CAR_init")(m); + make_cache_wrapper("Cacheus_init")(m); + make_cache_wrapper("Clock_init")(m); + make_cache_wrapper("ClockPro_init")(m); + make_cache_wrapper("FIFO_init")(m); + make_cache_wrapper("FIFO_Merge_init")(m); + make_cache_wrapper("flashProb_init")(m); + make_cache_wrapper("GDSF_init")(m); + make_cache_wrapper("LHD_init")(m); + make_cache_wrapper("LeCaR_init")(m); + make_cache_wrapper("LeCaRv0_init")(m); + make_cache_wrapper("LFU_init")(m); + make_cache_wrapper("LFUCpp_init")(m); + make_cache_wrapper("LFUDA_init")(m); + make_cache_wrapper("LIRS_init")(m); + make_cache_wrapper("LRU_init")(m); + make_cache_wrapper("LRU_Prob_init")(m); + make_cache_wrapper("nop_init")(m); + + make_cache_wrapper("QDLP_init")(m); + make_cache_wrapper("Random_init")(m); + make_cache_wrapper("RandomLRU_init")(m); + make_cache_wrapper("RandomTwo_init")(m); + make_cache_wrapper("S3FIFO_init")(m); + make_cache_wrapper("S3FIFOv0_init")(m); + make_cache_wrapper("S3FIFOd_init")(m); + make_cache_wrapper("Sieve_init")(m); + make_cache_wrapper("Size_init")(m); + make_cache_wrapper("SLRU_init")(m); + make_cache_wrapper("SLRUv0_init")(m); + make_cache_wrapper("TwoQ_init")(m); + make_cache_wrapper("WTinyLFU_init")(m); + make_cache_wrapper("Hyperbolic_init")(m); + make_cache_wrapper("Belady_init")(m); + make_cache_wrapper("BeladySize_init")(m); + +#ifdef ENABLE_3L_CACHE + make_cache_wrapper("ThreeLCache_init")(m); +#endif + +#ifdef ENABLE_GLCACHE + make_cache_wrapper("GLCache_init")(m); +#endif + +#ifdef ENABLE_LRB + make_cache_wrapper("LRB_init")(m); +#endif + + // *********************************************************************** + // **** **** + // **** Python plugin cache bindings **** + // **** **** + // *********************************************************************** + + m.def("pypluginCache_init", &pypluginCache_init, "cc_params"_a, + "cache_name"_a, "cache_init_hook"_a, "cache_hit_hook"_a, + "cache_miss_hook"_a, "cache_eviction_hook"_a, "cache_remove_hook"_a, + "cache_free_hook"_a); + // *********************************************************************** + // **** **** + // **** end functions for python plugin **** + // **** **** + // *********************************************************************** + + m.def( + "c_process_trace", + [](cache_t& cache, reader_t& reader, int64_t start_req = 0, + int64_t max_req = -1) { + reset_reader(&reader); + if (start_req > 0) { + skip_n_req(&reader, start_req); + } + + request_t* req = new_request(); + int64_t n_req = 0, n_hit = 0; + int64_t bytes_req = 0, bytes_hit = 0; + bool hit; + + read_one_req(&reader, req); + while (req->valid) { + n_req += 1; + bytes_req += req->obj_size; + hit = cache.get(&cache, req); + if (hit) { + n_hit += 1; + bytes_hit += req->obj_size; + } + read_one_req(&reader, req); + if (max_req > 0 && n_req >= max_req) { + break; // Stop if we reached the max request limit + } + } + + free_request(req); + // return the miss ratio + double obj_miss_ratio = n_req > 0 ? 1.0 - (double)n_hit / n_req : 0.0; + double byte_miss_ratio = + bytes_req > 0 ? 1.0 - (double)bytes_hit / bytes_req : 0.0; + return std::make_tuple(obj_miss_ratio, byte_miss_ratio); + }, + "cache"_a, "reader"_a, "start_req"_a = 0, "max_req"_a = -1); +} + +} // namespace libcachesim diff --git a/src/export_misc.cpp b/src/export_misc.cpp new file mode 100644 index 0000000..0800059 --- /dev/null +++ b/src/export_misc.cpp @@ -0,0 +1,30 @@ +// libcachesim_python - libCacheSim Python bindings +// Copyright 2025 The libcachesim Authors. All rights reserved. +// +// Use of this source code is governed by a GPL-3.0 +// license that can be found in the LICENSE file or at +// https://github.com/1a1a11a/libcachesim/blob/develop/LICENSE + +#include + +#include "../libCacheSim/bin/traceUtils/internal.hpp" +#include "export.h" + +namespace libcachesim { + +namespace py = pybind11; + +void export_misc(py::module& m) { + // NOTE(haocheng): Here we provide some convertion functions and utilities + // - convert_to_oracleGeneral + // - convert_to_lcs: v1 to v8 (default v1) + + m.def("convert_to_oracleGeneral", &traceConv::convert_to_oracleGeneral, + "reader"_a, "ofilepath"_a, "output_txt"_a = false, + "remove_size_change"_a = false); + m.def("convert_to_lcs", &traceConv::convert_to_lcs, "reader"_a, "ofilepath"_a, + "output_txt"_a = false, "remove_size_change"_a = false, + "lcs_ver"_a = 1); +} + +} // namespace libcachesim diff --git a/src/export_reader.cpp b/src/export_reader.cpp new file mode 100644 index 0000000..468f542 --- /dev/null +++ b/src/export_reader.cpp @@ -0,0 +1,326 @@ +// libcachesim_python - libCacheSim Python bindings +// Copyright 2025 The libcachesim Authors. All rights reserved. +// +// Use of this source code is governed by a GPL-3.0 +// license that can be found in the LICENSE file or at +// https://github.com/1a1a11a/libcachesim/blob/develop/LICENSE + +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "cli_reader_utils.h" +#include "config.h" +#include "export.h" +#include "libCacheSim/enum.h" +#include "libCacheSim/reader.h" +#include "libCacheSim/request.h" +#include "mystr.h" + +namespace libcachesim { + +namespace py = pybind11; + +// Custom deleters for smart pointers +struct ReaderDeleter { + void operator()(reader_t* ptr) const { + if (ptr != nullptr) close_trace(ptr); + } +}; + +struct RequestDeleter { + void operator()(request_t* ptr) const { + if (ptr != nullptr) free_request(ptr); + } +}; + +struct ReaderInitParamDeleter { + void operator()(reader_init_param_t* ptr) const { + if (ptr != nullptr) { + // Free the strdup'ed string if it exists + if (ptr->binary_fmt_str != nullptr) { + free(ptr->binary_fmt_str); + ptr->binary_fmt_str = nullptr; + } + free(ptr); + } + } +}; + +struct SamplerDeleter { + void operator()(sampler_t* ptr) const { + if (ptr != nullptr && ptr->free != nullptr) { + ptr->free(ptr); + } + } +}; + +void export_reader(py::module& m) { + // Sampler type enumeration + py::enum_(m, "SamplerType") + .value("SPATIAL_SAMPLER", sampler_type::SPATIAL_SAMPLER) + .value("TEMPORAL_SAMPLER", sampler_type::TEMPORAL_SAMPLER) + .value("SHARDS_SAMPLER", sampler_type::SHARDS_SAMPLER) + .value("INVALID_SAMPLER", sampler_type::INVALID_SAMPLER) + .export_values(); + + // Trace type enumeration + py::enum_(m, "TraceType") + .value("CSV_TRACE", trace_type_e::CSV_TRACE) + .value("BIN_TRACE", trace_type_e::BIN_TRACE) + .value("PLAIN_TXT_TRACE", trace_type_e::PLAIN_TXT_TRACE) + .value("ORACLE_GENERAL_TRACE", trace_type_e::ORACLE_GENERAL_TRACE) + .value("LCS_TRACE", trace_type_e::LCS_TRACE) + .value("VSCSI_TRACE", trace_type_e::VSCSI_TRACE) + .value("TWR_TRACE", trace_type_e::TWR_TRACE) + .value("TWRNS_TRACE", trace_type_e::TWRNS_TRACE) + .value("ORACLE_SIM_TWR_TRACE", trace_type_e::ORACLE_SIM_TWR_TRACE) + .value("ORACLE_SYS_TWR_TRACE", trace_type_e::ORACLE_SYS_TWR_TRACE) + .value("ORACLE_SIM_TWRNS_TRACE", trace_type_e::ORACLE_SIM_TWRNS_TRACE) + .value("ORACLE_SYS_TWRNS_TRACE", trace_type_e::ORACLE_SYS_TWRNS_TRACE) + .value("VALPIN_TRACE", trace_type_e::VALPIN_TRACE) + .value("UNKNOWN_TRACE", trace_type_e::UNKNOWN_TRACE) + .export_values(); + + py::enum_(m, "ReadDirection") + .value("READ_FORWARD", read_direction::READ_FORWARD) + .value("READ_BACKWARD", read_direction::READ_BACKWARD) + .export_values(); + + /** + * @brief Sampler structure + */ + py::class_>(m, + "Sampler") + .def(py::init([](double sample_ratio, enum sampler_type type) + -> std::unique_ptr { + switch (type) { + case sampler_type::SPATIAL_SAMPLER: + return std::unique_ptr( + create_spatial_sampler(sample_ratio)); + case sampler_type::TEMPORAL_SAMPLER: + return std::unique_ptr( + create_temporal_sampler(sample_ratio)); + case sampler_type::SHARDS_SAMPLER: + throw std::invalid_argument("SHARDS_SAMPLER is not added"); + case sampler_type::INVALID_SAMPLER: + default: + throw std::invalid_argument("Unknown sampler type"); + } + }), + "sample_ratio"_a = 0.1, "type"_a = sampler_type::INVALID_SAMPLER) + .def_readwrite("sampling_ratio_inv", &sampler_t::sampling_ratio_inv) + .def_readwrite("sampling_ratio", &sampler_t::sampling_ratio) + .def_readwrite("sampling_salt", &sampler_t::sampling_salt) + .def_readwrite("sampling_type", &sampler_t::type); + + // Reader initialization parameters + py::class_(m, "ReaderInitParam") + .def(py::init([]() { return default_reader_init_params(); })) + .def(py::init([](const std::string& binary_fmt_str, bool ignore_obj_size, + bool ignore_size_zero_req, bool obj_id_is_num, + bool obj_id_is_num_set, int64_t cap_at_n_req, + int64_t block_size, bool has_header, bool has_header_set, + const std::string& delimiter, ssize_t trace_start_offset, + sampler_t* sampler) { + reader_init_param_t params = default_reader_init_params(); + + // Safe string handling with proper error checking + if (!binary_fmt_str.empty()) { + char* fmt_str = strdup(binary_fmt_str.c_str()); + if (!fmt_str) { + throw std::bad_alloc(); + } + params.binary_fmt_str = fmt_str; + } + + params.ignore_obj_size = ignore_obj_size; + params.ignore_size_zero_req = ignore_size_zero_req; + params.obj_id_is_num = obj_id_is_num; + params.obj_id_is_num_set = obj_id_is_num_set; + params.cap_at_n_req = cap_at_n_req; + params.block_size = block_size; + params.has_header = has_header; + params.has_header_set = has_header_set; + params.delimiter = delimiter.empty() ? ',' : delimiter[0]; + params.trace_start_offset = trace_start_offset; + params.sampler = sampler; + return params; + }), + "binary_fmt_str"_a = "", "ignore_obj_size"_a = false, + "ignore_size_zero_req"_a = true, "obj_id_is_num"_a = true, + "obj_id_is_num_set"_a = false, "cap_at_n_req"_a = -1, + "block_size"_a = -1, "has_header"_a = false, + "has_header_set"_a = false, "delimiter"_a = ",", + "trace_start_offset"_a = 0, "sampler"_a = nullptr) + .def_readwrite("ignore_obj_size", &reader_init_param_t::ignore_obj_size) + .def_readwrite("ignore_size_zero_req", + &reader_init_param_t::ignore_size_zero_req) + .def_readwrite("obj_id_is_num", &reader_init_param_t::obj_id_is_num) + .def_readwrite("obj_id_is_num_set", + &reader_init_param_t::obj_id_is_num_set) + .def_readwrite("cap_at_n_req", &reader_init_param_t::cap_at_n_req) + .def_readwrite("time_field", &reader_init_param_t::time_field) + .def_readwrite("obj_id_field", &reader_init_param_t::obj_id_field) + .def_readwrite("obj_size_field", &reader_init_param_t::obj_size_field) + .def_readwrite("op_field", &reader_init_param_t::op_field) + .def_readwrite("ttl_field", &reader_init_param_t::ttl_field) + .def_readwrite("cnt_field", &reader_init_param_t::cnt_field) + .def_readwrite("tenant_field", &reader_init_param_t::tenant_field) + .def_readwrite("next_access_vtime_field", + &reader_init_param_t::next_access_vtime_field) + .def_readwrite("n_feature_fields", &reader_init_param_t::n_feature_fields) + // .def_readwrite("feature_fields", &reader_init_param_t::feature_fields) + .def_property( + "feature_fields", + [](const reader_init_param_t& self) { + return py::array_t({self.n_feature_fields}, + self.feature_fields); // copy to python + }, + [](reader_init_param_t& self, py::array_t arr) { + if (arr.size() != self.n_feature_fields) + throw std::runtime_error("Expected array of size " + + std::to_string(self.n_feature_fields)); + std::memcpy( + self.feature_fields, arr.data(), + self.n_feature_fields * sizeof(int)); // write to C++ array + }) + .def_readwrite("block_size", &reader_init_param_t::block_size) + .def_readwrite("has_header", &reader_init_param_t::has_header) + .def_readwrite("has_header_set", &reader_init_param_t::has_header_set) + .def_readwrite("delimiter", &reader_init_param_t::delimiter) + .def_readwrite("trace_start_offset", + &reader_init_param_t::trace_start_offset) + .def_readwrite("binary_fmt_str", &reader_init_param_t::binary_fmt_str) + .def_readwrite("sampler", &reader_init_param_t::sampler); + + /** + * @brief Reader structure + */ + py::class_>(m, "Reader") + .def(py::init([](const std::string& trace_path, trace_type_e trace_type, + const reader_init_param_t& init_params) { + trace_type_e final_trace_type = trace_type; + if (final_trace_type == trace_type_e::UNKNOWN_TRACE) { + final_trace_type = detect_trace_type(trace_path.c_str()); + } + reader_t* ptr = setup_reader(trace_path.c_str(), final_trace_type, + &init_params); + if (ptr == nullptr) { + throw std::runtime_error("Failed to create reader for " + + trace_path); + } + return std::unique_ptr(ptr); + }), + "trace_path"_a, "trace_type"_a = trace_type_e::UNKNOWN_TRACE, + "init_params"_a = default_reader_init_params()) + .def_readonly("n_read_req", &reader_t::n_read_req) + .def_readonly("n_total_req", &reader_t::n_total_req) + .def_readonly("trace_path", &reader_t::trace_path) + .def_readonly("file_size", &reader_t::file_size) + .def_readonly("init_params", &reader_t::init_params) + .def_readonly("trace_type", &reader_t::trace_type) + .def_readonly("trace_format", &reader_t::trace_format) + .def_readonly("ver", &reader_t::ver) + .def_readonly("cloned", &reader_t::cloned) + .def_readonly("cap_at_n_req", &reader_t::cap_at_n_req) + .def_readonly("trace_start_offset", &reader_t::trace_start_offset) + // For binary traces + .def_readonly("mapped_file", &reader_t::mapped_file) + .def_readonly("mmap_offset", &reader_t::mmap_offset) + // .def_readonly("zstd_reader_p", &reader_t::zstd_reader_p) + .def_readonly("is_zstd_file", &reader_t::is_zstd_file) + .def_readonly("item_size", &reader_t::item_size) + // For text traces + .def_readonly("file", &reader_t::file) + .def_readonly("line_buf", &reader_t::line_buf) + .def_readonly("line_buf_size", &reader_t::line_buf_size) + .def_readonly("csv_delimiter", &reader_t::csv_delimiter) + .def_readonly("csv_has_header", &reader_t::csv_has_header) + .def_readonly("obj_id_is_num", &reader_t::obj_id_is_num) + .def_readonly("obj_id_is_num_set", &reader_t::obj_id_is_num_set) + // Other properties + .def_readwrite("ignore_size_zero_req", &reader_t::ignore_size_zero_req) + .def_readwrite("ignore_obj_size", &reader_t::ignore_obj_size) + .def_readwrite("block_size", &reader_t::block_size) + .def_readonly("n_req_left", &reader_t::n_req_left) + .def_readonly("last_req_clock_time", &reader_t::last_req_clock_time) + .def_readonly("lcs_ver", &reader_t::lcs_ver) + // TODO(haocheng): Fully support sampler in Python bindings + .def_readonly("sampler", &reader_t::sampler) + .def_readonly("read_direction", &reader_t::read_direction) + .def("get_num_of_req", + [](reader_t& self) { return get_num_of_req(&self); }) + .def( + "read_one_req", + [](reader_t& self, request_t& req) { + int ret = read_one_req(&self, &req); + if (ret != 0) { + throw std::runtime_error("Failed to read request"); + } + return req; + }, + "req"_a) + .def("reset", [](reader_t& self) { reset_reader(&self); }) + .def("close", [](reader_t& self) { close_reader(&self); }) + .def("clone", + [](const reader_t& self) { + reader_t* cloned_reader = clone_reader(&self); + if (cloned_reader == nullptr) { + throw std::runtime_error("Failed to clone reader"); + } + return std::unique_ptr(cloned_reader); + }) + .def( + "read_first_req", + [](reader_t& self, request_t& req) { + read_first_req(&self, &req); + return req; + }, + "req"_a) + .def( + "read_last_req", + [](reader_t& self, request_t& req) { + read_last_req(&self, &req); + return req; + }, + "req"_a) + .def( + "skip_n_req", + [](reader_t& self, int n) { + int ret = skip_n_req(&self, n); + if (ret != 0) { + throw std::runtime_error("Failed to skip requests"); + } + return ret; + }, + "n"_a) + .def("read_one_req_above", + [](reader_t& self) { + request_t* req = new_request(); + int ret = read_one_req_above(&self, req); + if (ret != 0) { + free_request(req); + throw std::runtime_error("Failed to read one request above"); + } + return std::unique_ptr(req); + }) + .def("go_back_one_req", + [](reader_t& self) { + int ret = go_back_one_req(&self); + if (ret != 0) { + throw std::runtime_error("Failed to go back one request"); + } + }) + .def( + "set_read_pos", + [](reader_t& self, double pos) { reader_set_read_pos(&self, pos); }, + "pos"_a); +} +} // namespace libcachesim diff --git a/src/libCacheSim b/src/libCacheSim new file mode 160000 index 0000000..9646c8e --- /dev/null +++ b/src/libCacheSim @@ -0,0 +1 @@ +Subproject commit 9646c8e46875d96458daab66bd8b0bf8991ddce4 diff --git a/tests/__pycache__/conftest.cpython-310-pytest-8.4.1.pyc b/tests/__pycache__/conftest.cpython-310-pytest-8.4.1.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9c45c6dae6d9839196436f12f8cb1900a0e65074 GIT binary patch literal 358 zcmYjLF;c@Y5R?>1WXFC0Eu9PE4`ebPD4?QAr$(dLN~{@M@<{S9?)VNhzfnuY7gBLD zkjWLddwbeFnLJMs)aUuVeFprB!*vlDDul3m1k#`ecc{k<2F{FOeC&EeAA&Q&==oGV|1 ysavT}Wv32L0_a}#*ZcvztK-@Zbrq(*RV*6gas`8hKe!6mna?lhCb*b4eazv(Tcxm zsaUSKvSXiq;*>#>oKr(TN*7n|{%(a3I6DLLj10O%8?;9EnBfPET|`{8!6sPaJG>z# zToXoE@SSXeb#Om;0M4*SZf6J>g%_)*g?i(JjqFwlIk{+WT_XD(jZ^iL37`WhI+NwgUjr4kaQ9PH;>n1PrUkP)a8@ zzV2&b>Q>vyNw+Nb*|QR`pj}2aP&2h`GsVBE+HgDCm+Wv?OW^Sq9exF{_Y@!D)v h@+uOkJ+hzsz&!#sHh$Ge)(id`JI)cf48?d7`~|8d(}4g0 literal 0 HcmV?d00001 diff --git a/tests/__pycache__/test_data_loader.cpython-310-pytest-8.4.1.pyc b/tests/__pycache__/test_data_loader.cpython-310-pytest-8.4.1.pyc new file mode 100644 index 0000000000000000000000000000000000000000..36da0f030b31dc3c234cca5e13fb28dd1f4bd1b8 GIT binary patch literal 644 zcma)4y-pl45Vm)h`{S>S@&F*V$nFUW8U!a1DHMo??i$Otc32~Oee(E(?gRyb2O(u% z$!)5nsRU)=;vzz!vH!4hgQZ^M=G?boS1a%gj+mX@%Em*PhhBUrT`1Cn-cO~q zgx|ysG0VS;dI8cweZeg@-`&(&&CcT-*UGHpZ}X*ZZC85<9+FIhve(?zd>Ot(*Se84 z))?CrahC%WJvzd(L)|7S5vfFZL_+)Op~bylMs2QaKAvZSCAFN-Zkcc)7UlslC3+Yv~gZ9 z&&>VRrq(6XJYTY`VA8l}npe3E-in8Yi+YTz-XdshrJ!HQ0GgoXphXIIpgo+xyV>CM zFVo%!MBwh#OTrh3CR`OcQ2@vIdLR7mJFYAKUsXE_Usw?M9bPz0kNrHDby> zWumRvyxo|{(pw_Hyy_7`NJ8S0;6Ku<$3$8>j!bag@k;Yrndqq9q<7wv6w6?$d8V){ z9JQWH_EB=p%=C^j{+0{z@>n^OW)B-5r&U}?`wxxFxm}Ze%dzx0^A0@30~5RyP0>~C N(C%5;y*`7;{|6h5vIhVF literal 0 HcmV?d00001 diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..42edf91 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,6 @@ +from __future__ import annotations + +import os +import gc + +import pytest diff --git a/tests/reference.csv b/tests/reference.csv new file mode 100644 index 0000000..cb569d0 --- /dev/null +++ b/tests/reference.csv @@ -0,0 +1,20 @@ +FIFO,0.01,0.8368 +ARC,0.01,0.8222 +Clock,0.01,0.8328 +LRB,0.01,0.8339 +LRU,0.01,0.8339 +S3FIFO,0.01,0.8235 +Sieve,0.01,0.8231 +3LCache,0.01,0.8339 +TinyLFU,0.01,0.8262 +TwoQ,0.01,0.8276 +FIFO,0.1,0.8075 +ARC,0.1,0.7688 +Clock,0.1,0.8086 +LRB,0.1,0.8097 +LRU,0.1,0.8097 +S3FIFO,0.1,0.7542 +Sieve,0.1,0.7903 +3LCache,0.1,0.8097 +TinyLFU,0.1,0.7666 +TwoQ,0.1,0.7695 diff --git a/tests/test_analyzer.py b/tests/test_analyzer.py new file mode 100644 index 0000000..f5d8543 --- /dev/null +++ b/tests/test_analyzer.py @@ -0,0 +1,15 @@ +from libcachesim import TraceAnalyzer, TraceReader, DataLoader +import os + + +def test_analyzer_common(): + # Add debugging and error handling + loader = DataLoader() + loader.load("cache_dataset_oracleGeneral/2020_tencentBlock/1K/tencentBlock_1621.oracleGeneral.zst") + file_path = loader.get_cache_path("cache_dataset_oracleGeneral/2020_tencentBlock/1K/tencentBlock_1621.oracleGeneral.zst") + + reader = TraceReader(file_path) + + analyzer = TraceAnalyzer(reader, output_path="./") + + analyzer.run() diff --git a/tests/test_data_loader.py b/tests/test_data_loader.py new file mode 100644 index 0000000..5aba6f5 --- /dev/null +++ b/tests/test_data_loader.py @@ -0,0 +1,8 @@ +from libcachesim import DataLoader + + +def test_data_loader_common(): + loader = DataLoader() + loader.load("cache_dataset_oracleGeneral/2007_msr/msr_hm_0.oracleGeneral.zst") + path = loader.get_cache_path("cache_dataset_oracleGeneral/2007_msr/msr_hm_0.oracleGeneral.zst") + filles = loader.list_s3_objects("cache_dataset_oracleGeneral/2007_msr/") diff --git a/tests/test_example.py b/tests/test_example.py new file mode 100644 index 0000000..9cfcb7f --- /dev/null +++ b/tests/test_example.py @@ -0,0 +1,16 @@ +from libcachesim import ( + Request, + LRU, + SyntheticReader, + Util, +) + +def test_example(): + reader = SyntheticReader(num_of_req=1000) + cache = LRU(cache_size=1000) + miss_cnt = 0 + for req in reader: + hit = cache.get(req) + if not hit: + miss_cnt += 1 + print(f"Miss ratio: {miss_cnt / reader.num_of_req}") From cf3c7bf11db7258fab97ad178fbbebd1da4564a7 Mon Sep 17 00:00:00 2001 From: haochengxia Date: Thu, 24 Jul 2025 09:51:28 +0000 Subject: [PATCH 2/6] Build pass --- .gitignore | 233 ++++++++++++++++++ CMakeLists.txt | 14 +- docs/mkdocs.yml | 112 +++++++++ docs/requirements.txt | 3 + .../__pycache__/__init__.cpython-310.pyc | Bin 1328 -> 0 bytes libcachesim/__pycache__/cache.cpython-310.pyc | Bin 10978 -> 0 bytes .../__pycache__/data_loader.cpython-310.pyc | Bin 5814 -> 0 bytes .../__pycache__/protocols.cpython-310.pyc | Bin 2257 -> 0 bytes .../synthetic_reader.cpython-310.pyc | Bin 12050 -> 0 bytes .../trace_analyzer.cpython-310.pyc | Bin 1840 -> 0 bytes .../__pycache__/trace_reader.cpython-310.pyc | Bin 8917 -> 0 bytes libcachesim/__pycache__/util.cpython-310.pyc | Bin 2233 -> 0 bytes pyproject.toml | 6 +- scripts/install.sh | 23 ++ scripts/sync_version.py | 93 +++++++ .../conftest.cpython-310-pytest-8.4.1.pyc | Bin 358 -> 0 bytes ...test_analyzer.cpython-310-pytest-8.4.1.pyc | Bin 753 -> 0 bytes ...t_data_loader.cpython-310-pytest-8.4.1.pyc | Bin 644 -> 0 bytes .../test_example.cpython-310-pytest-8.4.1.pyc | Bin 680 -> 0 bytes 19 files changed, 477 insertions(+), 7 deletions(-) create mode 100644 .gitignore create mode 100644 docs/mkdocs.yml create mode 100644 docs/requirements.txt delete mode 100644 libcachesim/__pycache__/__init__.cpython-310.pyc delete mode 100644 libcachesim/__pycache__/cache.cpython-310.pyc delete mode 100644 libcachesim/__pycache__/data_loader.cpython-310.pyc delete mode 100644 libcachesim/__pycache__/protocols.cpython-310.pyc delete mode 100644 libcachesim/__pycache__/synthetic_reader.cpython-310.pyc delete mode 100644 libcachesim/__pycache__/trace_analyzer.cpython-310.pyc delete mode 100644 libcachesim/__pycache__/trace_reader.cpython-310.pyc delete mode 100644 libcachesim/__pycache__/util.cpython-310.pyc create mode 100644 scripts/install.sh create mode 100644 scripts/sync_version.py delete mode 100644 tests/__pycache__/conftest.cpython-310-pytest-8.4.1.pyc delete mode 100644 tests/__pycache__/test_analyzer.cpython-310-pytest-8.4.1.pyc delete mode 100644 tests/__pycache__/test_data_loader.cpython-310-pytest-8.4.1.pyc delete mode 100644 tests/__pycache__/test_example.cpython-310-pytest-8.4.1.pyc diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..83cff87 --- /dev/null +++ b/.gitignore @@ -0,0 +1,233 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[codz] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py.cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# UV +# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +#uv.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock +#poetry.toml + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +# pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python. +# https://pdm-project.org/en/latest/usage/project/#working-with-version-control +#pdm.lock +#pdm.toml +.pdm-python +.pdm-build/ + +# pixi +# Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control. +#pixi.lock +# Pixi creates a virtual environment in the .pixi directory, just like venv module creates one +# in the .venv directory. It is recommended not to include this directory in version control. +.pixi + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.envrc +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ + +# Abstra +# Abstra is an AI-powered process automation framework. +# Ignore directories containing user credentials, local state, and settings. +# Learn more at https://abstra.io/docs +.abstra/ + +# Visual Studio Code +# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore +# that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore +# and can be added to the global gitignore or merged into this file. However, if you prefer, +# you could uncomment the following to ignore the entire vscode folder +# .vscode/ + +# Ruff stuff: +.ruff_cache/ + +# PyPI configuration file +.pypirc + +# Marimo +marimo/_static/ +marimo/_lsp/ +__marimo__/ + +# Streamlit +.streamlit/secrets.toml + + +# From libCacheSim +__pycache__ +*deprecated* +*.DS_Store* +*.bak +*.clean +*.nogit* +*_build* +*.out +build +.idea +example/cacheSimulatorC/cmake-build-debug +.vscode/* +*.log +fig/ +result/ +data_large/ +# Chaos +sftp-config.json +# Clangd cache +*.cache/ +.lint-logs/ +# Python wheels +*.whl + +# Custom files +CMakeFiles/* +*.pyc \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index a8b76ec..7c731ba 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,13 +1,19 @@ cmake_minimum_required(VERSION 3.15...3.27) +project(libCacheSim-python) +set(DESCRIPTION "The libCacheSim Python Package") +set(PROJECT_WEB "http://cachemon.github.io/libCacheSim-python") + +# Note(haocheng): now we still utilize the exported cache from +# the main project, which should be deprecated soon # Include exported variables from cache if(DEFINED LIBCB_BUILD_DIR) - set(PARENT_BUILD_DIR "${LIBCB_BUILD_DIR}") + set(MAIN_PROJECT_BUILD_DIR "${LIBCB_BUILD_DIR}") message(STATUS "Using provided LIBCB_BUILD_DIR: ${LIBCB_BUILD_DIR}") else() - set(PARENT_BUILD_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../build") + set(MAIN_PROJECT_BUILD_DIR "${CMAKE_CURRENT_SOURCE_DIR}/src/libCacheSim/build") endif() -set(EXPORT_FILE "${PARENT_BUILD_DIR}/export_vars.cmake") +set(EXPORT_FILE "${MAIN_PROJECT_BUILD_DIR}/export_vars.cmake") if(EXISTS "${EXPORT_FILE}") include("${EXPORT_FILE}") @@ -56,7 +62,7 @@ include_directories(${ZSTD_INCLUDE_DIR}) include_directories(${MAIN_PROJECT_SOURCE_DIR}/libCacheSim/bin) # Find the main libCacheSim library -set(MAIN_PROJECT_BUILD_DIR "${PARENT_BUILD_DIR}") +set(MAIN_PROJECT_BUILD_DIR "${MAIN_PROJECT_BUILD_DIR}") set(MAIN_PROJECT_LIB_PATH "${MAIN_PROJECT_BUILD_DIR}/liblibCacheSim.a") if(EXISTS "${MAIN_PROJECT_LIB_PATH}") diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml new file mode 100644 index 0000000..5a2bc86 --- /dev/null +++ b/docs/mkdocs.yml @@ -0,0 +1,112 @@ +site_name: libCacheSim Python Documentation +site_url: https://cachemon.github.io/libCacheSim-python/ +repo_url: https://github.com/cacheMon/libCacheSim-python +repo_name: cacheMon/libCacheSim-python + +docs_dir: src + +nav: + - Home: index.md + - Quick Start: quickstart.md + - API Reference: api.md + - Examples: examples.md + +theme: + name: material + language: en + palette: + # Palette toggle for light mode + - scheme: default + primary: custom + accent: custom + toggle: + icon: material/brightness-7 + name: Switch to dark mode + # Palette toggle for dark mode + - scheme: slate + primary: custom + accent: custom + toggle: + icon: material/brightness-4 + name: Switch to light mode + font: + text: Open Sans + features: + - header.autohide + - navigation.tabs + - navigation.footer + - navigation.sections + - navigation.expand + - navigation.path + - navigation.top + - toc.follow + - search.highlight + - search.share + - search.suggest + - content.code.copy + - content.code.annotate + +extra_css: + - ../stylesheets/extra.css + +plugins: + - search + - i18n: + docs_structure: folder + fallback_to_default: true + reconfigure_material: true + reconfigure_search: true + default_language_only: false + languages: + - locale: en + default: true + name: English + build: true + - locale: zh + name: 中文 + build: true + nav_translations: + Home: 首页 + Quick Start: 快速开始 + API Reference: API参考 + Examples: 使用示例 + - mkdocstrings: + handlers: + python: + paths: [../src] + options: + docstring_style: google + show_source: true + show_root_heading: true + +markdown_extensions: + - admonition + - pymdownx.details + - pymdownx.superfences: + custom_fences: + - name: mermaid + class: mermaid + format: !!python/name:pymdownx.superfences.fence_code_format + - pymdownx.highlight: + anchor_linenums: true + line_spans: __span + pygments_lang_class: true + - pymdownx.inlinehilite + - pymdownx.snippets + - pymdownx.tabbed: + alternate_style: true + - pymdownx.keys + - pymdownx.mark + - pymdownx.tilde + - codehilite + - toc: + permalink: true + - tables + - footnotes + +extra: + social: + - icon: fontawesome/brands/github + link: https://github.com/cacheMon/libCacheSim-python + +copyright: Copyright © 2025 libCacheSim Team \ No newline at end of file diff --git a/docs/requirements.txt b/docs/requirements.txt new file mode 100644 index 0000000..06b095a --- /dev/null +++ b/docs/requirements.txt @@ -0,0 +1,3 @@ +mkdocs-material>=9.6.5 +mkdocstrings-python>=1.16.2 +mkdocs-static-i18n>=1.2.0 \ No newline at end of file diff --git a/libcachesim/__pycache__/__init__.cpython-310.pyc b/libcachesim/__pycache__/__init__.cpython-310.pyc deleted file mode 100644 index e2627cb76e5fe1fcdbcd8caa67b4b79880636ce2..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1328 zcmchWOK;mo5XVW0)RU55l3(&eaU45Joft*0K~cwnkN`&QiZ)uzWw8<0Hjy1tC8-qh zEuSN|-ufl-LFU?C@)dgO&Z;ilTTdl{-_8v8!Trxn+G;f%^L*}py!}*hoZs1b_>_$E zo#VLqAI69ra3VJ>)}f9KXrNhvO=#M=z7DVj zEo?&@7hnMwVG);L3726RS6~G@(7`TraTQi^4c2fS)^P(ia1%DM2R+<^E!>7}+<_h3 zgA#DUC{AwjFD%Z$rKU`hw2(!dCixeaV|DWF zBC3wm&r_8b@o43W8qX JcHDs9(qDZ7S`z>O diff --git a/libcachesim/__pycache__/cache.cpython-310.pyc b/libcachesim/__pycache__/cache.cpython-310.pyc deleted file mode 100644 index ebb9c03a44098c251fdf6066206e0d0196749734..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 10978 zcmcgy-H#hba-SIv$sw2Iet%0^>dRVMT1nQ2{Ec`B?yqm1MV?E-Ub2kkcU8d3bF|hICtm#s%H4L zS}j+|DnoR$tERiFtGc?nx_aDfHm%_Kn{U2Yt)5eqe<7gz836EIBrm5aico}FRUFk+ zRVr&$&54;Ylw(4#>Q3B@I|(!4B+aB_n1+)wQ%>4UI|Jr`lQA<+*33FNGv^GNgU*mS z#A@h)P*gWhUF^@QN z=A4r^^UhK8DASHtk2%N9i6WA=x*0Q1q|)MDF<_q*8Kf*y&VEe{+VOH) z42j_fjEGUR7`0EJ#XR6KF%Ec~;R4_ZF$s8*;nx9AiD|&o44(!(BW3~5GJFQ` zA#oV+VTKm~9}#na=NLWhU9pB3i-pJVt#g!+D`3~@JiMN4&oAK|8cb1gG)n}wB1vQ9WyS|7&cj#p1p2RT|hE*P?1n8H|Qth6i5vLTKQ+T`h`@5Eu-BrAnhkfd+w= z%i;T6xmxsAJ8-H~Gxhx&OE;rlZf&gFa@nm`N+DuoePg{^U#WyrjVhI_b?KID&$E14 zEZLZg+&#M}Y>Do9MoKhO-A{X6e~h`Z6;l<8seOd?ZN_ZfjtlK7*0-&fHCo$GbTc91 zc2Xp4qpXXhFn~!xvj~^A4dBz|gcuMRtf_&nnpu%U%}j(zqwSy=0w&v4XIP8?lk2K8 zD#m~r#2OrmDAF3#&Ec*(vHE%nZpDo+4c&;s4Ft7QhZ6gdyvH;d=$o-Ms|w~8)^)mF|`E6Y5p zrHa#CURdAoSKS(+n7oJATsXVF5f~OLnq^%?m)kUuwAvWqZiub3t=B781wX z)EkK;r#5n(u~7RLNUzCbuiCE1>heHm$)9 zEp8Gd+~qZ^@-lM)?gsknd9a?j@0;#x)DmB7cwG?V$M`;65Zg(;V%k$<1DZ*1)=~)cnw9>`IMR z#Qiq4@eZ}_h9uW#-{K(?NZW6a^8BLoEwYM9r}#M)N@Rjs50&FcY?5dp_^|9k-PGhv zQ(aT#48t1W315S`iUF%PwPtKnUsC+Ih;6E0Vkh`If&Bp3r!j0hdgBtdeewqK><#1} zRs7YwzgqF44F~>GJ~YvJr-E9NHXbi9N*ErGIvEujCl*hiMpZ&`)k%Lt`2FcZ;N3gsB7UWUNPEbb6El7V{tk&)CN$JX9yw_2&!cC{3hgx-w+ORnwdF+_* zAlu{Pe1aOh6Ao!!3Y{QG-R0lRKIh)dhE{UGf< zsH|hXk*>xc7ONEz7-SMXIIz6w76x{kr63+PsadP!xRaIexRa;|Ebrq%j(Q`7vsmGR zL8{t?*fHOd>}CncdmWjf8){nBHC?5jp(Qv^t7(n!L%?faOlec9p*H3^i*r|Pd(JG7 zct{(z4P@jKtnKtkusg$f3PrJ-E&2!zH>n4*s)p9PKet-GkgjCI(rx-8dbOfs?`n-m z`M*MvA0vC78nwGqQ14@B;;H{GY8B?li=#J|6{Ic8aYemK<&0&0RxehgH%ZG9ZppGF z$s!oEtYWR^MxH6KG!-&Iuk>Ztt+HOoBq7NalH{dGav&sa?2>leAO^!KX&;odNlA8M z$aV)Y4+euhjbjAH{W|>MAYQJzMPI&8bx8vP1Nw|3kiQ{G%92>`(v*1u*)Kef61i0# zQAjJKBspy)HWeG``sHHuI}y{e`nQ^@wNqM4|4z#fsakAImDJNS<;$O`&!$ki$#jDO zVV8^bs&DyzHAunUt**O|VByD0@YyRckhD=*j_nuWeHP^cTK1S7T7D!rQZ#P}87w<_ z@vz!L{ROOfL2Jz4va4V<@4NXDtgesq0w16wyS+|FXxoQ3zom!5u+fh!Q$`EApi0_H zn6`9ac*7p8ut$sg@_vq*9(m)M4r%Z&@f%qIqJ-4*q#|gWX0~-;q~l^r$H6ad z>o^qQXozGWz+!x*lvR-u=_gq;@fGZ?nFNQKAciw=WBRsT^!)riyM#kebtC_gXN!;^I}Nbd1OA8;F{`dJ~l}bYg6CE=?ucJ{`L z?$sgZP@9P{W_c?f+Ba6SUcy^ykg_a3ykQZvrOF-rIofa2%qC((YWmwo1}wG@L|d^g zmbMJ}BO>-=%1p|BpRzxo>{H5Ubp_doP|2=VTf~^+I#pp+e}K|2k+w-c4XC;n>s4n> zo%mK#trt$CT*F&Th)&Uv#?;M<#EiUO%ZG&z>i++2LSz-)ONVIavPNaL9BC?%dP$cu z=BQVie@S?WtdXUWwFN0Xdf`*X4R%Q65nlWSy?jU+L47%oA*$1u4OQ_bHof{mDdP)x zC%HeT4Uy#D6adJhc*`CdKFP43PI#~@9fgq z-R#T@m*ERICj+xL=YLN%Le4R?{c^s6R)0q=nR9~ravtaK9Zo0|KZIx7VZ2yfaU~pO z=YW~d(g62V`Cn5FW;>W}KkDjO%nI(3rdJ zKEnP6=V?LOWn0>{5*a>tAf=7`lIiUF*^Qjn{$Htb58d@jqH-WLVFvWOn?VG z(ZhY&9q6!2!`UypU!Y@yn({;w)R$eOb%oW)eS+vS6h6ArI$)m1Fqplkc%seoaBEEa z<@+b-e6-CsL4Em-^IctI2ELrEQP_i>m~OY`fCVt#R{*$}ZrL{;+<-xxG5yjTXIU7$cN0!m@Ditm~a*f`AemCcXavKj4+e82Mf3f*~l^oqC22T0pGGJz;Af@&x4xC<0OWRCNQpRKxrui?4lkP5un;dd31+q4PdH-j9%V2h0S2Q$ZoK z&JSNCCGgcn(oRtLOr(&~cCwu2FdBVVku?o`jiCzqqCz!O98ybzzrkQUyaTueE_n6d zrEf(x8fT-cf$(FI$l<3C4@z~Gms ze4^mvj-O(2&G@Dc7|}UI>2yDiIJ<@#3A!Dj8;$0`rqPMjg`uNF80{rLIv9-tmnDwG zhvCbv$Wk743Yo3|-Y&xp2E)emg^@+G9^^sf^j(G^Dv%N-2Np#S9s7xxqh$MfC zjAF<*%;2&QE1f?sVdc&j%Q*PtiSG(7b{f1CB8^Y7%z{)$C`MZxsg5ABy&@RsWN}Vw#rpxWjFN%f$jlGLG;3#Q8@D#Ntr^M=4YYrmcBuzvWX9tE E3+2V?X8-^I diff --git a/libcachesim/__pycache__/data_loader.cpython-310.pyc b/libcachesim/__pycache__/data_loader.cpython-310.pyc deleted file mode 100644 index d686c9f64e5a02780eb205d6fe45da3eb30fd9c5..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 5814 zcmZ`-&2!tv6~_V~NJ7+?ttjzF6I4x7GqL2OZ2itva(LUJ)5ury zdQ2`XG>VmC!>zcw?u28FQl-QUVegNNi33)d{LtWeUU*>e!k$@~;?|l`F1FuaTl6ls zYIjBI@oHN2!nn$X^tOX^6LhT_dbMh8Gl=TmscWrz9mLgYP0T0hPAJL?-_thAOpR5e zC{C+s5J$->>h>qqbhB)#!pBW&tA;9fI|6~O+`U$uiW*HEd~8%M5(fVSA&d$G4kl+6 z3-ad<3~rt+gJ^m7(Me7*< zC4Yq<16AU`;=ksv^4BnHoG)>Y{}ing7&*s(25OT3hM(te@Hf#v#oyTerl)D1NZfL;T69ki(CkzvjChge;-Ebxh)-{m?4S%z>zIs)S zt!udbsM-+9N>ZtcdW6pd`Q&&HurD-TU!eQ6M_}H$qYAA=_|;arsqFPQ4r>Hi?;+a6 zf0-aW3!=k1hHrFCSxi~S++&g1F}Cpct&WA#<_5}K$Jloc3@D7TW;|l6<-9Clt|Wz2 z(<@i5Ub=nbmcM*y`PvnK<@!xYYAj3C>H5i{Ukd{frE1z2QPPsa?{A}QD?1V4M#il_ z>yaUkV)B#gXRs5QoViwwfmu{1kd_I)wV*Lb&o_6{%{Usw@C*U$XG*%z+))MJ527IT zePW!@N{%zb9Q;bGef02r7JB29BSqShk+N6gNYof+^d6!KM~&fe5I~In>;#a5xxpwO zl0gEivbB2ak*V^{sssXwDv0|*lKS!8vXy~EFsfYRF3$83lSfd~ThEFas=^T%ui>B2 zu^ngalY_#J0y-oo$w>UQoJ|lUCpTJ#JlB35QW_!K)zXG$!`kh+5WA7vwHWkb%i%U= zXj8`CX1*~CqX@`0&3xZiNnr}->K>0ZSQ)Nq}4F=*5fo@T=JSh(+dWh z%0)Sa6;w_SP?Pkk#ZoL}4CGI!v(RgmuTs^?^f6P=vFnXyEYmAe#0r}VVI%x8|-o*6i>{#0e%6+7B~YILr>hYTu* zzdo`h5UH`xI&9xOFb>$h1^)&=|2Oj({M^e%YIorCx&09IE}1RNbjvvbV?IHums|Z_ zL&JPUCtxSl4N=ak{AM-Tgdr&>*{q&D_nvY@RExP##Z7Ua2Xz5g;$Y=N{e-S+#XN;p z>>g~iPa4H#tZUazcp>sE3F+TkY>KsK2M`z-q->7GYP9FxlLtY@srUb15FK+L|1GHL z%RpckPHD^Bvbc$#MMl=Xmcs9AMRmS4c&@{~HNLYC7!(+)yHy}WfD)8;*G`L2;ryQY z&Gc@rliPPYHZL4A_QwcY28HHOX4}|<@<)YV7|93QXr2eL3bkZFjBu zrQlu~FL~`_y{XN7WdGa6f5nYy|hA6Ww>_u;dA>uADHg*M(Ge z5Mi=*{6D95w=Uqy*0)40Rd(DIk;-k$AQj4%qRL6XT4@j}yAi{%i!`Hn<$g`*NDr+e zPnamD6@@``SCwwIqBLlT%n?A9EW&SW^nEmO>l30T zB^0V%?Y~Nhh_63uPYzftjT&-NliobeKHRg9Lur)Jkv+Z+LNd0Baw)`JvaKqoseEgQtSe>UlOj zP6yzgf6)m=Zna%p4h8ZFzU1A4Umr*KM8NX}MKI)(-A)hrMj(aG$^{I)aCjc_L)!8C zR9v6}=LjR0jZ_Muz5bxXmp5t12!dlZk|4uy4gCoP8k@P=x@MUDbjqF`cKZVpo)kk^ zK4q+ThSHUxwCZ&*1R98b2S$>w6Glj@9%OCs{ReCX89bt#y*k8%4u%3Kq+$sLtU9LL zncAfwYQPp@gIfrv94(-F*x${2CKxgKE`}o`%|qih;3}1^Gw*_kctSo>yD%hk z=E6IQHAf+F&(p4Iaie+R&)}mGaSRjYWIjcD@D!=$v-Ud!3P!`bs;`$z7S8K?RB{1C zkCkF?^)f{=RF#wUFkY9GbBqcw{T#zPBnL%Pm)VfRB?$(z$LA&}D~Qq|IO$CK`3R-D z>ESsLjNNm1(Mgb(Na!TyKzvOnQ5rl6I@~SuQy9fPoJh}eSklL6s8 zKL+)H10iOD7I#o!QC4{N0ym;}@iwtMheA0`DJbF=xd?*>q9)=i4{F2dkY3p$FRBp|r?aG49MEve?{4dSmk(cVIXpBqq3Z*9@b;(p>)Bkna5(cigY@K$ko^gzU3Mx*dDS(nm&+*z?|8ce8U#nW*uG zm$cUBvki#EJJszUmbEt*-X$5BnPI8d!bIvkoTetVnRem!zdVbyFW+U3JVg(-Bqcq{ z`J}5-D?OExFg|0*pvgb>tnxACmf9-wWR=`FW^`?l9IGi?=w8(}he{0n#NKUx}?x4E+wcK&6R~wz0@iuW=&#N zJe-FQ1xYJJ3XG=b^3v?|4iVVGwk3r=9k8-iI&yG0Y+a;&Y8{wmUX`mft}rl8=aIx;CzAsle@`Qy zTSt%7d1|+-GO|Rr+@EN2SKuI{`<@YEYMAv#3G0DH@6p&9`j|tZ+4u2z$w!Xf3W+ZH z{=HT;?DkCbzDixbz5LOYTiK^7doykbIY~@kqJqAy;M^rHFEZWHT9Ktml#EK`5QhAe ziqEN_a4I*ch^W}3;^=Te2P^X^Xo=)a6a~XZUTxZR%%%UiCDU@5JL%e`DL3z0CChc) z!cg*zi#)rHg#FH7DI^QWu$_q>!yj zEg7PF0arPEMWaxniqPd7`Z6aU&^Vm~lDt2*5mh*!VS9nH0IjsPQ8EiAU7 H9fmCAVY}E{EmFa;>Q1R-&p~jcRVqG(=79fd!S&j_Ph5 z$JGM^s!)4pKuuKMmfxDL1NO1esbBu-90?DEVq+Dju^)$Q7%LVAr+v>KiQ^z*htqTv zOLiJa2;^DPb!Iybiwz)#ffNZ#M}h@16>8}Dg88vbmFK70A0fe%W=dWOA-Ek34xM(809pe4wl9m|4SqwvyLWp)u|wyq{J4I;tuSpGR~7g_7%r^8pg|N2$`hc|22*Wi|juMz|8TRS2-=Pff2yCb$s=#AtKoe0F7_{LYq8g$#SVv^T6L8#uP;~`h#Vh0&Mmawyo z%l(@obZ=T_Wi(edn)+m4G{VIxR1*iPMIA-nDXBHW8^uI^o^>2%P11(PQcw^N$>0HY zm%B?LELvrz{J+ebcQ8B2c`)XZcz5T#J4?LRHkmcRC_gFO(vj7@I~XgNnj+nslWi=K z>E+cJ{4h?${wEpq^3~e2o5asn=9J>m=QyRfS}zLK&qsOD`DG4jy4$JW1{bw8 zbyQE3e$P7Jb$V#67%j}}*>xpQbzGoVlTh%sVU--)F&BT@rnVV$8tN&UP*B4OmQ)N% zQT55H>2!?WT1B5PMTJz+{}if9YxfrlAB zs&lH^$+X%c>_plJ;EflAK=z|09*_WOUqC$aA0U-@;{oLjgv8ngcKLl@UAp=*#19A| zs#RZ~bL!NoQ{VUdF6TStFD{l99Dnir%iW)TM^XNfD*59mD%WrZ*HlFjN<$H<(7I|v zDJa1%;QA6CN{)5JZCk#?_ze}{Cif~Rq?Vok9*dHwyo6b!#^zFmCcrG-*Y`zHbY<9 zJzosEuH8nl+u6C^Z0)++ou2*iez@y<_D;tW9q(RnrCh$d+X>R%ed#~w2sg0X1Fsc! zd@ry=KONMygMBaDb;C}}4(Vxu{dp(cwMD1hcBSiuCZ42bj@uNj{8;*--}1XH z8Zv(@p>hpp@F|9Hm4*s10YnYGq?PoNQ8G(bsZcV-_eIe)00;w6FaQMuP%vEsB3N`S zh@l{k#C3?HCXRx&MYklD-16Q+TVwGX2Wq2&76WwxfYDeKD{d7n)wUs4#Tr_Uh)uCB zPNGyp>6AE)(oylYI3v!Yw3O>_iG}qU8j} zu8UVuS`n8p`z2wb_qW7lw5;M@3p2bTuA;Vv+PYg^Q_%97_%>Qj^4OaAj`%KWr%o|kI0U4=6y|2n*s0i&q{joX%j!>_mu5%qm3H4RffdyQ*P*+0u!DFdD^#cObkrI-?Z;EJ>mB-nk(4)Nf%a43|gI?PPY^8Pu#&k*|Zyg znjPGCKlj`qu=`Ef?76T^_C??8?(-l7rFS<7k{k7j5$M9EeIsZ1>~O#D+85)_Hk4pv z;B{bzdsBDjaGB>cI>Q7eZy4AYh1+gIb(r&G*^DQ_dNVvZz64DRLVL%x`+fijJaC_{ zrNWuLw4MoL&Nb#v*6#V`PS7z;L70YhfN<2(z|yx=PUE&%YF8n)YKwZ8gzPBGsBn6 z>`H$h6LLCGJjbCI3SI*#t`^nFUsbgP2bekHs049KhC3qoWmE#fURf`ztKeJt=gZ*VaAlfnN&gvU z5yOaC|7wBYb(L-9iMoZS@_a6$ar)8Kdv55A_3IP5Sp&gS&rCmcvTLNnZnmgzdgv(7 zNizrp2+e>5aNm>*F#9T;gvufzaVcxcLk+G&g}WfZsc;x^e`}fVT8Gne3Sn`tg#WUj zo)EQxK0<#}(MWt44Z^!fe%tmvH?`z86v%FOx?THD;&$OL{`k#vHnKoR@ZOccgT>)B6TTz^s#g2gZFPln%{q-$I`q?gJB^s!`upKwS$R~ zi~kLD2uSxUP!K~UT2z;DT544@)ZwXVcIR)Nku>q0`Aka@cByde;D$cLI|ptER)8n5 zl?0p-=UH(4E?1cFG%!G(L%qAK$WLN=YGZq7FWE~Kj z{6UeYd-6JF8oGAVZu$LvvIbZiB(sP1zo3<`mn2Q#MY|vR;-1^bFafy`4N#fT`+)I^=;w;saUKg>>s~N)gqDCiF}X98$>oTat@`aN|-Xk$*4 zS&fL23egEAnz31vHy_TDv3Hrn5`C5>%syk4*l4o3QrKKsVPUTb_LKw}w3tcO@OZ|g zZgZMPU;hFJo29$;^5NX#XyQd#w#aWy&mxZf-aL-2rW`vxtJz?8dT*q96Fegb={Q-Z z$Q33!rLGVsv;$fRAim!trB{bXw(ocPwwHn;z-n3Vb-c0lMX1I8@CO)aTJGflH#;7^ z!5`srV1O(`+Qaqf8ILI(mi-h-fYLK~i)p7m7UNQV=fkSpDoMTO+ZY2KlVI8Z&H`RfNU4voYi9^IaH zhlvL?CB3@51`_EVFO0R1Moe+}0bSq7$T{j+f#U-xp(L2tpQDA}R-EIA$Lr|7|oT%?F+-eQ2e%}gkb zYvrr-h)YDib=boSj?+az?))`%xB@a8J|{gU9z*ibak5No6v$fOBuh~YwI8dYexN)A zz8)DP^>HGpE%%GiMjS(2;H5OZCS@q==Ge$Zi`${!kJ$!B$pQwKZ-S&K%5Spmx|tvr`)ao5O1;?wx0b*);K{A6O7E+Yr2{pW6 z92!v!pd+E9C9%1N*qr*u=8s&RmPFEbw1 z*>U33HOdvo`E1baCRc!3;kO(|-lWk;V)8?fNJq9ozD%VeGCh4)hWqjiRSTZrWoaB% zU8Wl{V8xoaNo{Wvd56e*MBXRzLn1eb+#+(D2pLrQOGIc9Du0#8$3(`s-QWZb5TwOA%22Qi8pD6NEBDEJUs3}5-OwO1wk6Q%n^dR-1muKDmeH2C%+Nwb@ z$#PcUA(DJ0XEm2mkF%aChrnk-fE!Cu{Q&+h)E?*dK(8V)p9S2fuaM2OnkMYmt$6do<$T=bEQ^GU z(&xUs52G%U%;jX(vX@Vv#jDtH5;4i}ZO=*W#p$j0-nCyFXVbj=SV^Eh9*lWAHx0=- z!8@Fu$>meur=-Y4o&-B9ez)s?PDxD8I%aPPv&Vz5#yr2A%MS8Vkm~7tvXorBc75Q= zH3*ag4f!rgSW=v%h6_Yk6U@H|~$KT`R<$SG@WWbQ)T1Vcd zqSo}9DydIP2T2bqla?_^$$MoWB@n7I18J!40fK-dY%zjP^)Uru`tTnz1SQF&{~-|l zZ=xcZ`9uG)kzK z-38vAqODc~`)gI+U!(0-+FzsHrxVd}=ZDCG+==s4iHLnUe1}@?7S>^;RWe;dr722Z za?V`3$o-?6FR=AA=yy9Uj>TqN6h-$@C9h`V9M?EU-69*B9#qcYN_>iUkmlIbc;{$r zQ;{wJoKL~V#yd&znvn9X8t)G2*jqBO>+^bO?3s_Ok;S_{SqTvq(hLB<`Izo0GM}vB zMsCk%C(ax3Zt9pLSJ=8Rh~x?;ELTCcQhVW23puSgF`vPUv-8nW%F>sr|8wJV*V=TbE(+12R6f@;E;pXU5}P__97cGgtCiMts)Ik;~I?vIQZZ zM++Z{+>l>;XM3>2Xu-xc?~!EDIg&H92abH6k3>yw!#nmS*k=lcmkz&shB0+DI=1D9 zx6|m~6&$ubNrJF3tC&GkylG?B7l=3X9z|_<;Se57OJgEbCqC#nNw0&uAitX}pl&F? z{nC%Lhw4L(qggD>5w+@^pfM1&>WE|Ema!>G2|qoFVRgi?DryTO{ZlM-KC_aAM4+?9^4=-Ktljk_)wBSY|d(c z1G7uSHckP4xrBxP6347@>dScc-+<`_R#ZsUti}GFc1WjoMX|8sH@QetpAsjkMNtf8m%U`Dw!6yz?na$&Pbv!RFu^tL^ z#)maZK+cw~z3=#LlAp6YO*}7T_Y5;UXxSnsE^u z)H)FaB*09eitR{Vn@ll1-lk%i5}a~IF*9i`!fAR?nNCu<;5CpDS$yPfv#6TvG%p_! zwvgqf{eOypEquBZXOQyNn?u8ysV3H&g1xOY2B7I7=pSI&!&Vis+e>JAffsn9VqeZ- zgNQZN7!!o2_XrdGQV0<&bsbsbxeWdWOKf7O>o@}f1`;@AY3Swk0|Qc1hc9RG+KimW zmKQ5V3KZjLbeBqFBr}K!@mr~e)yQDZ!@`vrEByoudm3&^_E>Kps&F4te~ri-t&0{D zcDCGZH)Sc$)uTIju3ePgrnz20A+Knf#Nex4S>_#2g6Jcr|mF{ORj2c9<1y`08P=H4A|)sLP?k=J;o9Q^%;| z5{pyfown)G778RzTa(l_4XiPv*u@>CE@MkF9aENYnOR1+QQng+27NaKn}Pke0;H!k z{|e8|w%}&;XV!ScAN@=&ohjT+R?PW@FQk$C28CB2>8C--xIw;q+LjNUvYWo?6PtOf za7>9m^!kR6iL!53zTrcn7ygLo#Xcb7SDRVuCs4*ZB;0L=_I}6hiePGUJ`w+M!WDQ#t(W}=eyOonJ~5k%7?hHHW{av_r_ z1I8@I*jIUsB|18xq5o%K_Wv9*Rt_?AObF1zW@i9VPNQWdK}#aO3@{G~Ff^8SJP#P_ zxqN^|jEL)s>=U_94eK0uMkZ`VKTfd5xd#j1yU`~T8uiqPkjamZ;A`D3KGH*8E4b3! zX>m*%E#*F-a^i2Q;+!n`v8aUnEq~@4NV0+PmjO4a#EzG3@heolKtvE>CE<4`kn_}Y z49F~XRD37SDpO>m1mutnrD)ODB}hi$OOu+e{LaRQXU Wi<7nGr`q|rukn0d&p54HefD2-X$Wxu diff --git a/libcachesim/__pycache__/trace_analyzer.cpython-310.pyc b/libcachesim/__pycache__/trace_analyzer.cpython-310.pyc deleted file mode 100644 index a35123518c487b4a67d5a86c47e5be8aa42666da..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1840 zcmZ`)OK;mo5Z)zM6eUZt{0!i>>9#!yL2U%+$q0%Bf{~;T#i&~p;blQ{*A@f5vLw-3 zP!FyAL)xSN60bd_*WB7mXO@EzhFJ+mRhJk5QtSK9o$hqE_s94(u`MMD5u1+_>X);;z@FE06Qs;!0X^+4{(F38=5<@M#(1sU~bd*5q6%arEn6@QZW(alWhH<;3hR$G3Oq=jFE zX_AB3|MuninHWDi8-MwH^7IX@jRxN+Un+gB(;^MhXk=IJ%1<>gTps2xGIUb)SMs^9 z{W!od>kB#KW9J z`aDSE4DN6lsmX{{t`LbIDG56uS-8#?zDxFX&}TM(F4fDkUzgVDRZUs zlV^S!K#=Q`D7+l|z+Qy$I{#^QQ%utYRRM12uw7_BP{Lordz{_q4*11e<_-{-Fv_UW z{Rfk#BTm6^g~@l&3kpzqL^^iImizKR_GJIIDq;hK5H1s8@w$if5%0d8g)5^{ZT@6h7P)pE3@wAI(+l?|c713?b2Iyh6 zkjHO~b^QXEg3PI;3ro9m`ikbg!jjg4{zB)rv`eD*3VT6*vggc5M+F0k6)muJ=GL6e z?FE|?(KdHtvp>3gxt=Fs5&BU$Q{14<8??KP%@ci{pEeR6tgn2^>&D{QvWYir){r>m z=YR(5p({69N4R=srXJ$3SKWn{k#0N7e-DP~(aHUxT`joIw_!PYo|V z83_A6Dn3BIt??(Jw~+LJRNDfcX)}q1(C^_EM}n!WrDqbwDrp=p7|j%@OI&I>7VZ7j zJsQ|%Z!#L_KJ4QdyegqCFa|XuS2@K^7ADu)MxnEM5_vrrcGt$5s3)Cir@FDsOz1R2 jx$Zm6v9(JG(R8 z>*`)R9>yVxBMwM#;J|?h6my7V4je$@z<~n?4oFByNC;X;NFW3e5=ux2V4nZ~_e^c~ zZV%cyFtc4#{dN8I)#IFr!C*Us1S?Lw{4F4l_e zQmxb;sg0=0(+bbQMq85+!7x`Bb2Es^E1vh zOSBx5`^igo)3K|npPBS5VS0}EP=!JO6a$&_!ttEC)2gQZ>_uVLt&2AnEk8GHwijEL zpqums>*|u_dVVPonY5eUc~h8eKNSFex>wvUAaJHBthxuZb!<=gvjc8U!sY%#Qxr>q zVNz=Bl45CAf~&_A&T2_31$ph5Vx@V)(r~C637+PecN4eNTE@!G>pa{0>O4oeJp2}T z(aO!|8%2;RKz+pq+obT4mE|K=ijTrohHD*M>#Y*s0M{7gH^Oy~RpguCelz@Tu}1h- zE7#c0w*g}k_(plsDm572&Ua7?$`YyYorJ6-1o-$aelH>GWtrXlK0-DSG6rSt=MNCF zk&rAPdw7+Qdt^O(`GbUPlJ)H4`w7`hND8FJ`2j+<$TA1{LxgOV$iw^*Lbge%L;Nry z+hv(Y`4K{P$TE-d#|f#(GDrCngzO{)dwPsNNysi)<~Tn=$i1@6Nj^cyZbGolr}$|? z?vuzFewL8?CGr%1nve%1@(iCOWRFCi<Z9DkmWz1E)j2l*7tYM?hSNZD!x zKQC+8OL(bQ{zX|&f8H0xe$R z*8n+5H8FmjzX^8}y_#y{w<~ z(A4-TVR=i!o~pBumB2jOsMBaD(7)3OSlJ~ zF`fFHWzQaHHD^wnkezO}!}3O3jQ!$ro}{we zYBk$Uu>F2HXqD@k!ZVyk11l}gG;LGdFdA*oa6RD{U9|g6d)9!xqUrbrd5@i`j|5$$ z6vw9s7fsU#knxnR>Z!Mw(7S3^F&Iu;x5(86#Vx}xTS$m8PvhDx^&OS&nb^jan!scYyMJ3%6+kjKOK_j%$!_sIg{Qiu_mr;(&-8AwTe`(=j(E9lp{sQh z-DEe_O?NZhY*+8*y7}%1Pk$+YbJSCzEv!3wOY66&nwgrV_Cq3IOIJF3Y4d_USE$dG z_LA#WW-R#gu3MH}IXHf>V%og&(D=d55m^?;qtbLMP5YYJYVv)RB^RrhK0k4Ba^jpZ zJ@Necb7wEWW7mV3>5NE0A~{;=jP9#Q3@bZYspiE4*s5)qY{vxmOHsjOCnmcvxfhe& znB0fS{gC)+H`uPLd0%frJ7A9Ox+QX0po9hV%VulIIxB=DL~#3Jm~Kpl&xa27ZSf5n2TT; zE~1~|F0XYlhUGRwg52(-km%)-cSlDhXOAq80 zkAPf+GiMu~CMFwJ_bA&Bw?h7fq_BE zA^5V5X3HW&7EAXyP+gXE8^ZmQfMh5%cr3}2Krk;!%3+c~YVACMfsLD{oj_nqlE|QicjO_c&}7HOb;^RPiO%g|xJ|sF6Bgplfj9A7Kz}6YSxGXvZV;(%Gj_du8PM!q1 zwC znZdooz4D8aa8p=WBf`V5*4=A{bXa4%c?KB29M2jN9)h)qg3E4mEbUpK{EDRA9PVsX zdhNV{a~EfE(ChoEB#Vq})lXuw!WYEB{T5v0e&*T0>IM3! zeqGCSJ@m5;a30`n?KRu+yCu5Y-x9YAj$fJ4z7=IvH*F{(Zv9&YN zK`T~$_X5KY<5_xV%(b)lMLI6=emFRm@H~+HND@XYYgBp_uUsrTh~rXuU%lO5Dm^>o z7iTSqm@Kuy6&ZMfng;P7!$qq`S38k5MCE-A)MM3R&_H;3)lx&c>g|=M$2@fhd|5g_ z_EzyF5M8Sc&mi7%``<0}PxDXLxU$uuI*4>5XVi14?RibUkM3NPKo&oIFl00IV zmn&E`!TsedUPRIGI8#8#=&c;XoWYk{aXM88!QY_pMMyf4<5PIq1T_riU!}&SS;v?$ zVMpjSA(os2=kJi5^eWMq7ugSCrBBxd#IVeNtqfj|`0)1LJtetS}_uN(PU`s=2`MI0TaoBX$rlvie83f*FV$Li%!R;1U#GikV?QZGbyq z8^)g`M`R!DJsn8lZU#9V+j)3D3*qLB#~>7q23y0OqspcznqN(j6%puQ_=RcDS)8PI zr|?EopmC1tI=(lstvBJl!N*X-O+k`}s3dGBD@GwG zyW2uilsv{6h0uHLRs(HG1lmHqNm6gLpf{cJBvMtJB@f>e3)7yps;Qud)4<~E^gM_S zqxl!OeyO8<s$71~Fko+_bFA?hJa=&y9#b>WLtHO@V#aY=0`;6? z!y=7f;nLJ|Q!iYZGA>@2IDMAl-eNx%L6h!l_`XAH+Mb_mz^ii4&nyZDUZ;CE#33w% zfmMOQPJtmvig=5onA?g8hSJ0_OrFF9gG3aH7dUYOqZ{HRCKv+{r!YB<2_9bs9wtMf z{q1;*?zY$kiJyX%3F=1LC#Dca&o2nOqjx3>n4HH1r%z-tA+@B*!;MwoCMR&~5pQ5Z z6N=VRpp_D67eoRRT(4pdlX*9W(7|yS8$Si{T6k{~ zXd$Us3I4C575poNJ8sU@Y0&*F=o$UXg)7iZ_~p>+NF#VdN%|WoEKM)2t>A4Ay?r4K hz&VMVEI)HBSU<;Mt`u^Y67Vi!1eRr{{k_fE-C;3 diff --git a/libcachesim/__pycache__/util.cpython-310.pyc b/libcachesim/__pycache__/util.cpython-310.pyc deleted file mode 100644 index 20aaf49b89395fa950199c59646e8a7143e248c1..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 2233 zcmaJ?-EZ4A5Emu+Gj3V+E4vP*j%~ zei-KbGke~Dqt`v@Lew#N>EN^AJ^-{6XNGKYIX+Q^A66jDS=?z-sk$i$82o_jJGa(~n1oH>40+Jr;}$^-M`E0~vu_qsM}wc*0WQIr(-VlQ%+XUrS#q z7DVDuBto$$UoGuKL7J}x{z%C{q^Yk#Py}d2c~pRTjIntHgs}Pq;$sVk-{UrSuE~`2 zUEsEOg;xz%0q#EE<{iFqU4h*y-vqB8#mH6asw;!Iso zB1axkkT1(~^Qtlq=M6asBQaunm^WpnM;Z8dsq>B!v3w)^G#m>*7_#I{{AH;%(7k=K zo25dfyHA;f03~O;QFv-%brQy9`m@oM9?Arl0t{se+ZourGrBTHpn<6k)2coG2%Xs} zg>os#3LOj3v$q53DtyM%bs!d=PTXk)ePtR_+oZG6EAmGfHCdrr(w}=LbRbpC^iFdw z_k%i1_vRcb7jfF7&xc}0qw>tuoh8wEC<|!}D#~imEJ8~$)^~K~G2f#vhXQ03>dfR( zwJUHZsd90VAvLq@!cT7jQx;nc-d{qW!kGXp!pYQD^$9sAX-nJOo{~R_b|%)j3u~2mc{yxah8*+~Yy)%1 z{T>1w7&V6fFXD_ukpn{_9%enoB(VSl9=8bk`;m(HEvXHZjF;8sF)OY#Y?zz@*e^1X z>eB7Ydb8LTTEK*H7Skk)PXWPznVEXJq(3j0W0Q|NrS^{G6sOD6a}j9l?^RR=8?as=ZWF03oUkn}pq#gt9smlPDdKc}txh`jxMP65By!~02M3;W za9|ppXB(U=tOl=HT)2#LWv;#l`^Ei+x{vHO>eTYO?8rPsnQ9*~l4m=tr@y9~_h-xaQYk8L7F Rn_a6#TDEJ$Og8O~^B=?XL9+k= diff --git a/pyproject.toml b/pyproject.toml index 45eb26f..ef799b1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -62,18 +62,18 @@ build = ["cp39-*", "cp310-*", "cp311-*", "cp312-*", "cp313-*"] skip = ["*-win32", "*-manylinux_i686", "*-musllinux*", "pp*"] # Set the environment variable for the wheel build step. -environment = { LCS_BUILD_DIR = "{project}/build", MACOSX_DEPLOYMENT_TARGET = "14.0" } +environment = { LCS_BUILD_DIR = "{project}/src/libCacheSim/build", MACOSX_DEPLOYMENT_TARGET = "14.0" } # Test that the wheel can be imported test-command = "python -c 'import libcachesim; print(\"Import successful\")'" [tool.cibuildwheel.linux] before-all = "yum install -y yum-utils && yum-config-manager --set-enabled crb && yum install -y ninja-build cmake libzstd-devel glib2-devel" -before-build = "rm -rf {project}/build && cmake -S {project} -B {project}/build -G Ninja && cmake --build {project}/build" +before-build = "rm -rf {project}/src/libCacheSim/build && cmake -S {project} -B {project}/src/libCacheSim/build -G Ninja && cmake --build {project}/src/libCacheSim/build" [tool.cibuildwheel.macos] before-all = "brew install glib google-perftools argp-standalone xxhash llvm wget cmake ninja zstd xgboost lightgbm" -before-build = "rm -rf {project}/build && cmake -S {project} -B {project}/build -G Ninja -DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 && cmake --build {project}/build" +before-build = "rm -rf {project}/src/libCacheSim/build && cmake -S {project} -B {project}/src/libCacheSim/build -G Ninja -DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 && cmake --build {project}/src/libCacheSim/build" [tool.ruff] # Allow lines to be as long as 120. diff --git a/scripts/install.sh b/scripts/install.sh new file mode 100644 index 0000000..e0bee89 --- /dev/null +++ b/scripts/install.sh @@ -0,0 +1,23 @@ +git submodule update --init --recursive + +# Build the main libCacheSim C++ library first +echo "Building main libCacheSim library..." +pushd src/libCacheSim +rm -rf build +cmake -G Ninja -B build # -DENABLE_3L_CACHE=ON +ninja -C build +popd + +# Now build and install the Python binding +echo "Building Python binding..." +echo "Sync python version..." +python scripts/sync_version.py +python -m pip install -e . -vvv + +# Test that the import works +echo "Testing import..." +python -c "import libcachesim" + +# Run tests +python -m pip install pytest +python -m pytest tests \ No newline at end of file diff --git a/scripts/sync_version.py b/scripts/sync_version.py new file mode 100644 index 0000000..34d40c5 --- /dev/null +++ b/scripts/sync_version.py @@ -0,0 +1,93 @@ +#!/usr/bin/env python3 +""" +Script to synchronize version between libCacheSim main project and Python bindings. + +This script reads the version from version.txt and updates the pyproject.toml +in libCacheSim-python to match. +""" + +import json +import os +import sys +import re +from pathlib import Path + + +def get_project_root(): + """Get the project root directory.""" + script_dir = Path(__file__).parent + return script_dir.parent + + +def read_main_version(): + """Read version from version.txt.""" + project_root = get_project_root() + version_file = project_root / "src/libCacheSim/version.txt" + + if not version_file.exists(): + print(f"Error: {version_file} not found", file=sys.stderr) + sys.exit(1) + + with open(version_file, 'r') as f: + version = f.read().strip() + + if not version: + print("Error: version.txt is empty", file=sys.stderr) + sys.exit(1) + + return version + +def update_pyproject_toml(version): + """Update pyproject.toml with the new version.""" + project_root = get_project_root() + pyproject_toml_path = project_root / "pyproject.toml" + + if not pyproject_toml_path.exists(): + print(f"Error: {pyproject_toml_path} not found", file=sys.stderr) + return False + + # Read current pyproject.toml + with open(pyproject_toml_path, 'r') as f: + pyproject_data = f.read() + + # Update the version line in pyproject.toml, make it can match any version in version.txt, like "0.3.1" or "dev" + match = re.search(r"version = \"(dev|[0-9]+\.[0-9]+\.[0-9]+)\"", pyproject_data) + if not match: + print("Error: Could not find a valid version line in pyproject.toml", file=sys.stderr) + return False + current_version = match.group(1) + if current_version == version: + print(f"Python binding version already up to date: {version}") + return False + # replace the version line with the new version + pyproject_data = re.sub(r"version = \"(dev|[0-9]+\.[0-9]+\.[0-9]+)\"", f"version = \"{version}\"", pyproject_data) + + # Write back to file with proper formatting + with open(pyproject_toml_path, 'w') as f: + f.write(pyproject_data) + + print(f"Updated Python version: {current_version} → {version}") + return True + + +def main(): + """Main function.""" + try: + # Read main project version + main_version = read_main_version() + print(f"Main project version: {main_version}") + + # Update Python binding version + updated = update_pyproject_toml(main_version) + + if updated: + print("Python binding version synchronized successfully") + else: + print("No changes needed") + except Exception as e: + print(f"Error: {e}", file=sys.stderr) + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/tests/__pycache__/conftest.cpython-310-pytest-8.4.1.pyc b/tests/__pycache__/conftest.cpython-310-pytest-8.4.1.pyc deleted file mode 100644 index 9c45c6dae6d9839196436f12f8cb1900a0e65074..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 358 zcmYjLF;c@Y5R?>1WXFC0Eu9PE4`ebPD4?QAr$(dLN~{@M@<{S9?)VNhzfnuY7gBLD zkjWLddwbeFnLJMs)aUuVeFprB!*vlDDul3m1k#`ecc{k<2F{FOeC&EeAA&Q&==oGV|1 ysavT}Wv32L0_a}#*ZcvztK-@Zbrq(*RV*6gas`8hKe!6mna?lhCb*b4eazv(Tcxm zsaUSKvSXiq;*>#>oKr(TN*7n|{%(a3I6DLLj10O%8?;9EnBfPET|`{8!6sPaJG>z# zToXoE@SSXeb#Om;0M4*SZf6J>g%_)*g?i(JjqFwlIk{+WT_XD(jZ^iL37`WhI+NwgUjr4kaQ9PH;>n1PrUkP)a8@ zzV2&b>Q>vyNw+Nb*|QR`pj}2aP&2h`GsVBE+HgDCm+Wv?OW^Sq9exF{_Y@!D)v h@+uOkJ+hzsz&!#sHh$Ge)(id`JI)cf48?d7`~|8d(}4g0 diff --git a/tests/__pycache__/test_data_loader.cpython-310-pytest-8.4.1.pyc b/tests/__pycache__/test_data_loader.cpython-310-pytest-8.4.1.pyc deleted file mode 100644 index 36da0f030b31dc3c234cca5e13fb28dd1f4bd1b8..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 644 zcma)4y-pl45Vm)h`{S>S@&F*V$nFUW8U!a1DHMo??i$Otc32~Oee(E(?gRyb2O(u% z$!)5nsRU)=;vzz!vH!4hgQZ^M=G?boS1a%gj+mX@%Em*PhhBUrT`1Cn-cO~q zgx|ysG0VS;dI8cweZeg@-`&(&&CcT-*UGHpZ}X*ZZC85<9+FIhve(?zd>Ot(*Se84 z))?CrahC%WJvzd(L)|7S5vfFZL_+)Op~bylMs2QaKAvZSCAFN-Zkcc)7UlslC3+Yv~gZ9 z&&>VRrq(6XJYTY`VA8l}npe3E-in8Yi+YTz-XdshrJ!HQ0GgoXphXIIpgo+xyV>CM zFVo%!MBwh#OTrh3CR`OcQ2@vIdLR7mJFYAKUsXE_Usw?M9bPz0kNrHDby> zWumRvyxo|{(pw_Hyy_7`NJ8S0;6Ku<$3$8>j!bag@k;Yrndqq9q<7wv6w6?$d8V){ z9JQWH_EB=p%=C^j{+0{z@>n^OW)B-5r&U}?`wxxFxm}Ze%dzx0^A0@30~5RyP0>~C N(C%5;y*`7;{|6h5vIhVF From 85be495b0e7c1756c122794becf2174d13ad5111 Mon Sep 17 00:00:00 2001 From: haochengxia Date: Thu, 24 Jul 2025 10:44:45 +0000 Subject: [PATCH 3/6] Add CI and test --- .github/workflows/build.yml | 38 +++ .gitmodules | 3 - README.md | 24 +- benchmark/simulation.py | 5 + export/CMakeLists.txt | 38 --- export/README.md | 47 --- libcachesim/trace_reader.py | 50 +-- pyproject.toml | 1 + requirements.txt | 0 tests/test_analyzer.py | 11 +- MAINFEST.in => tests/test_cache.py | 0 tests/test_example.py | 16 - tests/test_reader.py | 472 +++++++++++++++++++++++++++++ 13 files changed, 540 insertions(+), 165 deletions(-) create mode 100644 .github/workflows/build.yml create mode 100644 benchmark/simulation.py delete mode 100644 export/CMakeLists.txt delete mode 100644 export/README.md delete mode 100644 requirements.txt rename MAINFEST.in => tests/test_cache.py (100%) delete mode 100644 tests/test_example.py create mode 100644 tests/test_reader.py diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml new file mode 100644 index 0000000..f73bcfb --- /dev/null +++ b/.github/workflows/build.yml @@ -0,0 +1,38 @@ +name: Build and Test libCacheSim-python + +on: [push, pull_request] + +permissions: + contents: read + +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: "3.10" + + - name: Init submodules + run: git submodule update --init --recursive + + - name: Prepare + run: bash src/libCacheSim/scripts/install_dependency.sh + + - name: Build main libCacheSim project + run: | + pushd src/libCacheSim + cmake -G Ninja -B build + ninja -C build + popd + + - name: Build libCacheSim-python + run: | + pip install -e .[dev] + + - name: Run tests + run: | + python -m pytest tests/ \ No newline at end of file diff --git a/.gitmodules b/.gitmodules index 210ee99..afddc09 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,6 +1,3 @@ -[submodule "libCacheSim"] - path = libCacheSim - url = git@github.com:1a1a11a/libCacheSim.git [submodule "src/libCacheSim"] path = src/libCacheSim url = git@github.com:1a1a11a/libCacheSim.git diff --git a/README.md b/README.md index 23424c3..e34048a 100644 --- a/README.md +++ b/README.md @@ -1,11 +1,11 @@ # libCacheSim Python Binding -[![Python Release](https://github.com/1a1a11a/libCacheSim/actions/workflows/pypi-release.yml/badge.svg)](https://github.com/1a1a11a/libCacheSim/actions/workflows/pypi-release.yml) +[![Build](https://github.com/cacheMon/libCacheSim-python/actions/workflows/build.yml/badge.svg)](https://github.com/cacheMon/libCacheSim-python/actions/workflows/build.yml) [![Python Versions](https://img.shields.io/pypi/pyversions/libcachesim.svg?logo=python&logoColor=white)](https://pypi.org/project/libcachesim) [![PyPI Version](https://img.shields.io/pypi/v/libcachesim.svg?)](https://pypi.org/project/libcachesim) [![PyPI - Downloads](https://img.shields.io/pypi/dd/libcachesim)](https://pypistats.org/packages/libcachesim) -Python bindings for libCacheSim, a high-performance cache simulator and analysis library. +Python bindings for [libCacheSim](https://github.com/1a1a11a/libCacheSim), a high-performance cache simulator and analysis library. ## Installation @@ -20,25 +20,13 @@ pip install libcachesim If there are no wheels suitable for your environment, consider building from source. ```bash -git clone https://github.com/1a1a11a/libCacheSim.git -cd libCacheSim - -# Build the main libCacheSim library first -cmake -G Ninja -B build -ninja -C build - -# Install Python binding -cd libCacheSim-python -pip install -e . +bash scripts/install.sh ``` -### Testing -```bash -# Run all tests -python -m pytest . +Run all tests to ensure the package works. -# Test import -python -c "import libcachesim; print('Success!')" +```bash +python -m pytest tests/ ``` ## Quick Start diff --git a/benchmark/simulation.py b/benchmark/simulation.py new file mode 100644 index 0000000..0841157 --- /dev/null +++ b/benchmark/simulation.py @@ -0,0 +1,5 @@ +""" Benchmark the simulation performance of the library. + +This module contains benchmarks for various components of the library, +including request processing times, memory usage, and overall throughput. +""" \ No newline at end of file diff --git a/export/CMakeLists.txt b/export/CMakeLists.txt deleted file mode 100644 index 917e831..0000000 --- a/export/CMakeLists.txt +++ /dev/null @@ -1,38 +0,0 @@ -# Helper functions are removed since we don't export source files anymore - -set(EXPORT_FILE "${CMAKE_BINARY_DIR}/export_vars.cmake") -file(WRITE "${EXPORT_FILE}" "") - -get_filename_component(MAIN_PROJECT_SOURCE_DIR ${CMAKE_SOURCE_DIR} ABSOLUTE) -file(WRITE ${CMAKE_BINARY_DIR}/export_vars.cmake "set(MAIN_PROJECT_SOURCE_DIR \"${MAIN_PROJECT_SOURCE_DIR}\")\n") -file(APPEND ${CMAKE_BINARY_DIR}/export_vars.cmake "set(dependency_libs \"${dependency_libs}\")\n") -file(APPEND ${CMAKE_BINARY_DIR}/export_vars.cmake "set(LIBCACHESIM_VERSION \"${LIBCACHESIM_VERSION}\")\n") - -# ============================================================================== -# Export project metadata -# ============================================================================== -file(APPEND "${EXPORT_FILE}" "set(LIBCACHESIM_VERSION \"${${PROJECT_NAME}_VERSION}\")\n") - -# ============================================================================== -# Export essential include directory variables -# ============================================================================== -foreach(var IN ITEMS GLib_INCLUDE_DIRS GLib_CONFIG_INCLUDE_DIR XGBOOST_INCLUDE_DIR LIGHTGBM_PATH ZSTD_INCLUDE_DIR) - file(APPEND "${EXPORT_FILE}" "set(${var} \"${${var}}\")\n") -endforeach() - -# ============================================================================== -# Export dependency library variables -# ============================================================================== -file(APPEND "${EXPORT_FILE}" "set(GLib_LIBRARY_DIRS \"${GLib_LIBRARY_DIRS}\")\n") -file(APPEND "${EXPORT_FILE}" "set(GLib_LIBRARIES \"${GLib_LIBRARIES}\")\n") -get_filename_component(ZSTD_LIBRARY_DIR "${ZSTD_LIBRARIES}" DIRECTORY) -file(APPEND "${EXPORT_FILE}" "set(ZSTD_LIBRARY_DIRS \"${ZSTD_LIBRARY_DIRS}\")\n") -file(APPEND "${EXPORT_FILE}" "set(ZSTD_LIBRARIES \"${ZSTD_LIBRARIES}\")\n") -file(APPEND "${EXPORT_FILE}" "set(dependency_libs \"${dependency_libs}\")\n") - -# ============================================================================== -# Export essential build option variables -# ============================================================================== -file(APPEND "${EXPORT_FILE}" "set(LOG_LEVEL_LOWER \"${LOG_LEVEL_LOWER}\")\n") - -message(STATUS "Exported essential variables to ${EXPORT_FILE}") diff --git a/export/README.md b/export/README.md deleted file mode 100644 index 976b1da..0000000 --- a/export/README.md +++ /dev/null @@ -1,47 +0,0 @@ -# Python Binding Export System - -Build system bridge for sharing CMake variables between the main libCacheSim project and Python binding. - -## Purpose - -The `export/CMakeLists.txt` exports all necessary build variables (source files, include directories, compiler flags, etc.) from the main project to the Python binding, enabling consistent builds without duplicating configuration. - -## How It Works - -1. **Export**: Main project writes variables to `export_vars.cmake` -2. **Import**: Python binding includes this file during CMake configuration -3. **Build**: Python binding uses shared variables for consistent compilation - -## Key Exported Variables - -### Source Files -- Cache algorithms, data structures, trace readers -- Profilers, utilities, analyzers - -### Build Configuration -- Include directories (main, GLib, ZSTD, XGBoost, LightGBM) -- Compiler flags (C/C++) -- Dependency libraries -- Build options (hugepage, tests, optional features) - -## Usage - -**Main Project** (`CMakeLists.txt`): -```cmake -add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/libCacheSim-python/export) -``` - -**Python Binding** (`libCacheSim-python/CMakeLists.txt`): -```cmake -set(EXPORT_FILE "${CMAKE_CURRENT_SOURCE_DIR}/../build/export_vars.cmake") -include("${EXPORT_FILE}") -``` - -## For Developers - -This system ensures the Python binding automatically picks up changes to: -- New source files added to the main project -- Updated compiler flags or dependencies -- Modified build options - -No manual synchronization needed between main project and Python binding builds. diff --git a/libcachesim/trace_reader.py b/libcachesim/trace_reader.py index d37dead..8bc47f4 100644 --- a/libcachesim/trace_reader.py +++ b/libcachesim/trace_reader.py @@ -1,7 +1,7 @@ """Wrapper of Reader""" import logging -from typing import overload, Union +from typing import overload, Union, Optional from collections.abc import Iterator from .protocols import ReaderProtocol @@ -22,53 +22,19 @@ def __init__( self, trace: Union[Reader, str], trace_type: TraceType = TraceType.UNKNOWN_TRACE, - ignore_obj_size: bool = False, - ignore_size_zero_req: bool = False, - obj_id_is_num: bool = False, - obj_id_is_num_set: bool = False, - cap_at_n_req: int = -1, - block_size: int = 0, - has_header: bool = False, - has_header_set: bool = False, - delimiter: str = ",", - trace_start_offset: int = 0, - binary_fmt_str: str = "", - sampling_ratio: float = 1.0, - sampling_type: SamplerType = SamplerType.INVALID_SAMPLER, + reader_init_params: Optional[ReaderInitParam] = None, ): + if isinstance(trace, Reader): self._reader = trace return - # Process sampling_type - if sampling_ratio < 0.0 or sampling_ratio > 1.0: - raise ValueError("Sampling ratio must be between 0.0 and 1.0") - - if sampling_ratio == 1.0: - sampler = None - else: - if sampling_type == SamplerType.INVALID_SAMPLER: - logging.warning("Sampling type is invalid, using SPATIAL_SAMPLER instead") - sampling_type = SamplerType.SPATIAL_SAMPLER - logging.info(f"Sampling ratio: {sampling_ratio}, Sampling type: {sampling_type}") - sampler = Sampler(sampling_ratio, sampling_type) - - # Construct ReaderInitParam - reader_init_params = ReaderInitParam( - binary_fmt_str=binary_fmt_str, - ignore_obj_size=ignore_obj_size, - ignore_size_zero_req=ignore_size_zero_req, - obj_id_is_num=obj_id_is_num, - obj_id_is_num_set=obj_id_is_num_set, - cap_at_n_req=cap_at_n_req, - block_size=block_size, - has_header=has_header, - has_header_set=has_header_set, - delimiter=delimiter, - trace_start_offset=trace_start_offset, - sampler=sampler, - ) + if reader_init_params is None: + reader_init_params = ReaderInitParam() + if not isinstance(reader_init_params, ReaderInitParam): + raise TypeError("reader_init_params must be an instance of ReaderInitParam") + self._reader = Reader(trace, trace_type, reader_init_params) @property diff --git a/pyproject.toml b/pyproject.toml index ef799b1..d7d5320 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,6 +21,7 @@ classifiers = [ ] dependencies = [ "numpy>=1.20.0", + "boto3", # For S3 ] [project.optional-dependencies] diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index e69de29..0000000 diff --git a/tests/test_analyzer.py b/tests/test_analyzer.py index f5d8543..75476f8 100644 --- a/tests/test_analyzer.py +++ b/tests/test_analyzer.py @@ -10,6 +10,15 @@ def test_analyzer_common(): reader = TraceReader(file_path) - analyzer = TraceAnalyzer(reader, output_path="./") + analyzer = TraceAnalyzer(reader, "TestAnalyzerResults") analyzer.run() + + # Clean file after test, match all files with the prefix "TestAnalyzerResults" + for file in os.listdir("."): + if file.startswith("TestAnalyzerResults"): + os.remove(file) + # Remove file named "stat" + stat_file = "stat" + if os.path.exists(stat_file): + os.remove(stat_file) diff --git a/MAINFEST.in b/tests/test_cache.py similarity index 100% rename from MAINFEST.in rename to tests/test_cache.py diff --git a/tests/test_example.py b/tests/test_example.py deleted file mode 100644 index 9cfcb7f..0000000 --- a/tests/test_example.py +++ /dev/null @@ -1,16 +0,0 @@ -from libcachesim import ( - Request, - LRU, - SyntheticReader, - Util, -) - -def test_example(): - reader = SyntheticReader(num_of_req=1000) - cache = LRU(cache_size=1000) - miss_cnt = 0 - for req in reader: - hit = cache.get(req) - if not hit: - miss_cnt += 1 - print(f"Miss ratio: {miss_cnt / reader.num_of_req}") diff --git a/tests/test_reader.py b/tests/test_reader.py new file mode 100644 index 0000000..a13570c --- /dev/null +++ b/tests/test_reader.py @@ -0,0 +1,472 @@ +""" +Test cases for trace readers in libCacheSim Python bindings. + +This module tests both TraceReader and SyntheticReader functionality. +""" + +import pytest +import tempfile +import os +from libcachesim import TraceReader, SyntheticReader, DataLoader +from libcachesim.libcachesim_python import TraceType, SamplerType, Request, ReqOp, ReaderInitParam, Sampler + + +class TestSyntheticReader: + """Test SyntheticReader functionality""" + + def test_basic_initialization(self): + """Test basic SyntheticReader initialization""" + reader = SyntheticReader(num_of_req=100, obj_size=1024) + assert reader.get_num_of_req() == 100 + assert len(reader) == 100 + + def test_zipf_distribution(self): + """Test Zipf distribution request generation""" + reader = SyntheticReader( + num_of_req=1000, + obj_size=1024, + alpha=1.0, + dist="zipf", + num_objects=100, + seed=42 + ) + + # Test basic properties + assert reader.get_num_of_req() == 1000 + assert len(reader) == 1000 + + # Read some requests and verify they are valid + req = Request() + first_req = reader.read_one_req(req) + assert first_req.obj_id >= 0 + assert first_req.obj_size == 1024 + assert hasattr(first_req, 'op') # Just check it has op attribute + + def test_uniform_distribution(self): + """Test uniform distribution request generation""" + reader = SyntheticReader( + num_of_req=500, + obj_size=512, + dist="uniform", + num_objects=50, + seed=123 + ) + + assert reader.get_num_of_req() == 500 + + # Read some requests + req = Request() + for _ in range(10): + read_req = reader.read_one_req(req) + assert read_req.obj_size == 512 + assert hasattr(read_req, 'op') # Just check it has op attribute + + def test_reader_iteration(self): + """Test iteration over synthetic reader""" + reader = SyntheticReader(num_of_req=50, obj_size=1024, seed=42) + + count = 0 + for req in reader: + assert req.obj_size == 1024 + assert hasattr(req, 'op') # Just check it has op attribute + count += 1 + if count >= 10: # Only test first 10 for efficiency + break + + assert count == 10 + + def test_reader_reset(self): + """Test reader reset functionality""" + reader = SyntheticReader(num_of_req=100, obj_size=1024, seed=42) + + # Read some requests + req = Request() + first_read = reader.read_one_req(req) + reader.read_one_req(req) + reader.read_one_req(req) + + # Reset and read again + reader.reset() + reset_read = reader.read_one_req(req) + + # Should get the same first request after reset + assert first_read.obj_id == reset_read.obj_id + + def test_skip_requests(self): + """Test skipping requests""" + reader = SyntheticReader(num_of_req=100, obj_size=1024, seed=42) + + # Skip 10 requests + skipped = reader.skip_n_req(10) + assert skipped == 10 + + # Verify we can still read remaining requests + req = Request() + read_req = reader.read_one_req(req) + assert read_req.valid == True # Should still be able to read + + def test_clone_reader(self): + """Test reader cloning""" + reader = SyntheticReader(num_of_req=100, obj_size=1024, seed=42) + + # Read some requests + req = Request() + reader.read_one_req(req) + reader.read_one_req(req) + + # Clone the reader + cloned_reader = reader.clone() + + # Both readers should have same configuration + assert cloned_reader.get_num_of_req() == reader.get_num_of_req() + assert isinstance(cloned_reader, SyntheticReader) + + def test_invalid_parameters(self): + """Test error handling for invalid parameters""" + with pytest.raises(ValueError): + SyntheticReader(num_of_req=0) # Invalid num_of_req + + with pytest.raises(ValueError): + SyntheticReader(num_of_req=100, obj_size=0) # Invalid obj_size + + with pytest.raises(ValueError): + SyntheticReader(num_of_req=100, alpha=-1.0) # Invalid alpha + + +class TestTraceReader: + """Test TraceReader functionality""" + + def test_csv_trace_creation(self): + """Test creating a CSV trace and reading it""" + # Create a temporary CSV trace file + with tempfile.NamedTemporaryFile(mode='w', suffix='.csv', delete=False) as f: + # Write CSV header and some sample data + f.write("timestamp,obj_id,obj_size,op\n") + f.write("1,100,1024,0\n") + f.write("2,101,2048,0\n") + f.write("3,102,512,0\n") + f.write("4,100,1024,0\n") # Repeat access + f.write("5,103,4096,0\n") + temp_file = f.name + + try: + read_init_param = ReaderInitParam( + has_header=True, + delimiter=",", + obj_id_is_num=True, + ) + read_init_param.time_field = 1 + read_init_param.obj_id_field = 2 + read_init_param.obj_size_field = 3 + read_init_param.op_field = 4 + + # Create TraceReader + reader = TraceReader( + trace=temp_file, + trace_type=TraceType.CSV_TRACE, + reader_init_params=read_init_param + ) + + # Test basic properties + assert reader.get_num_of_req() == 5 + assert len(reader) == 5 + assert reader.trace_path == temp_file + # TODO(haocheng): check it + # assert reader.csv_has_header == True + # assert reader.csv_delimiter == "," + + # Read first request + req = Request() + first_req = reader.read_one_req(req) + assert first_req.obj_id == 100 + assert first_req.obj_size == 1024 + + finally: + # Clean up + os.unlink(temp_file) + + def test_trace_reader_iteration(self): + """Test iteration over trace reader""" + # Create temporary trace + with tempfile.NamedTemporaryFile(mode='w', suffix='.csv', delete=False) as f: + f.write("timestamp,obj_id,obj_size,op\n") + for i in range(10): + f.write(f"{i+1},{100+i},{1024*(i+1)},0\n") + temp_file = f.name + + try: + read_init_param = ReaderInitParam( + has_header=True, + delimiter=",", + obj_id_is_num=True, + ) + read_init_param.time_field = 1 + read_init_param.obj_id_field = 2 + read_init_param.obj_size_field = 3 + read_init_param.op_field = 4 + + reader = TraceReader( + trace=temp_file, + trace_type=TraceType.CSV_TRACE, + reader_init_params=read_init_param + ) + + # Read requests one by one instead of using list() + req = Request() + first_req = reader.read_one_req(req) + assert first_req.obj_id == 100 + assert first_req.obj_size == 1024 + + second_req = reader.read_one_req(req) + assert second_req.obj_id == 101 + assert second_req.obj_size == 2048 + + finally: + os.unlink(temp_file) + + def test_trace_reader_reset_and_skip(self): + """Test reset and skip functionality""" + with tempfile.NamedTemporaryFile(mode='w', suffix='.csv', delete=False) as f: + f.write("timestamp,obj_id,obj_size,op\n") + for i in range(20): + f.write(f"{i+1},{100+i},1024,0\n") + temp_file = f.name + + try: + read_init_param = ReaderInitParam( + has_header=True, + delimiter=",", + obj_id_is_num=True, + ) + read_init_param.time_field = 1 + read_init_param.obj_id_field = 2 + read_init_param.obj_size_field = 3 + read_init_param.op_field = 4 + + reader = TraceReader( + trace=temp_file, + trace_type=TraceType.CSV_TRACE, + reader_init_params=read_init_param + ) + + # Read some requests + req = Request() + first_req = reader.read_one_req(req) + reader.read_one_req(req) + + # Reset and verify we get same first request + reader.reset() + reset_req = reader.read_one_req(req) + assert first_req.obj_id == reset_req.obj_id + + # Test skip functionality + reader.reset() + # Instead of using skip_n_req which might fail, just read requests one by one + for _ in range(5): + reader.read_one_req(req) + + next_req = reader.read_one_req(req) + assert next_req.obj_id == 105 # Should be 6th request (100+5) + + finally: + os.unlink(temp_file) + + def test_trace_reader_sampling(self): + """Test sampling functionality""" + with tempfile.NamedTemporaryFile(mode='w', suffix='.csv', delete=False) as f: + f.write("timestamp,obj_id,obj_size,op\n") + for i in range(100): + f.write(f"{i+1},{100+i},1024,0\n") + temp_file = f.name + + try: + # Create reader with 50% sampling + read_init_param = ReaderInitParam( + has_header=True, + delimiter=",", + obj_id_is_num=True, + ) + read_init_param.time_field = 1 + read_init_param.obj_id_field = 2 + read_init_param.obj_size_field = 3 + read_init_param.op_field = 4 + + sampler = Sampler( + sample_ratio=0.5, + type=SamplerType.SPATIAL_SAMPLER + ) + read_init_param.sampler = sampler + + reader = TraceReader( + trace=temp_file, + trace_type=TraceType.CSV_TRACE, + reader_init_params=read_init_param + ) + + # Test that sampling is configured + assert reader.sampler is not None + + # Read a few requests to verify it works + req = Request() + first_req = reader.read_one_req(req) + assert first_req.valid == True + + finally: + os.unlink(temp_file) + + def test_trace_reader_clone(self): + """Test trace reader cloning""" + with tempfile.NamedTemporaryFile(mode='w', suffix='.csv', delete=False) as f: + f.write("timestamp,obj_id,obj_size,op\n") + for i in range(5): + f.write(f"{i+1},{100+i},1024,0\n") + temp_file = f.name + + try: + read_init_param = ReaderInitParam( + has_header=True, + delimiter=",", + obj_id_is_num=True, + ) + read_init_param.time_field = 1 + read_init_param.obj_id_field = 2 + read_init_param.obj_size_field = 3 + read_init_param.op_field = 4 + + reader = TraceReader( + trace=temp_file, + trace_type=TraceType.CSV_TRACE, + reader_init_params=read_init_param + ) + + # Clone the reader + cloned_reader = reader.clone() + + # Both should be TraceReader instances + assert isinstance(cloned_reader, TraceReader) + assert isinstance(reader, TraceReader) + + finally: + os.unlink(temp_file) + + def test_invalid_sampling_ratio(self): + """Test error handling for invalid sampling ratio""" + with tempfile.NamedTemporaryFile(mode='w', suffix='.csv', delete=False) as f: + f.write("timestamp,obj_id,obj_size,op\n") + f.write("1,100,1024,0\n") + temp_file = f.name + + try: + # Test that invalid sampling ratios are rejected by Sampler + with pytest.raises(ValueError): + Sampler(sample_ratio=1.5) # Invalid ratio > 1.0 + + with pytest.raises(ValueError): + Sampler(sample_ratio=-0.1) # Invalid ratio < 0.0 + + finally: + os.unlink(temp_file) + + +class TestReaderCompatibility: + """Test compatibility between different reader types""" + + def test_protocol_compliance(self): + """Test that both readers implement the ReaderProtocol""" + synthetic_reader = SyntheticReader(num_of_req=100, obj_size=1024) + + # Create a simple CSV trace for TraceReader + with tempfile.NamedTemporaryFile(mode='w', suffix='.csv', delete=False) as f: + f.write("timestamp,obj_id,obj_size,op\n") + f.write("1,100,1024,0\n") + temp_file = f.name + + try: + read_init_param = ReaderInitParam( + has_header=True, + delimiter=",", + obj_id_is_num=True, + ) + read_init_param.time_field = 1 + read_init_param.obj_id_field = 2 + read_init_param.obj_size_field = 3 + read_init_param.op_field = 4 + + trace_reader = TraceReader( + trace=temp_file, + trace_type=TraceType.CSV_TRACE, + reader_init_params=read_init_param + ) + + # Both should implement the same interface + readers = [synthetic_reader, trace_reader] + + for reader in readers: + assert hasattr(reader, 'get_num_of_req') + assert hasattr(reader, 'read_one_req') + assert hasattr(reader, 'reset') + assert hasattr(reader, 'close') + assert hasattr(reader, 'clone') + assert hasattr(reader, '__iter__') + assert hasattr(reader, '__len__') + + # Test basic functionality - just check they return positive numbers + try: + num_req = reader.get_num_of_req() + assert num_req > 0 + length = len(reader) + assert length > 0 + except: + # Some operations might fail, just skip for safety + pass + + finally: + os.unlink(temp_file) + + def test_request_format_consistency(self): + """Test that both readers produce consistent Request objects""" + synthetic_reader = SyntheticReader(num_of_req=10, obj_size=1024, seed=42) + + with tempfile.NamedTemporaryFile(mode='w', suffix='.csv', delete=False) as f: + f.write("timestamp,obj_id,obj_size,op\n") + f.write("1,100,1024,0\n") + temp_file = f.name + + try: + read_init_param = ReaderInitParam( + has_header=True, + delimiter=",", + obj_id_is_num=True, + ) + read_init_param.time_field = 1 + read_init_param.obj_id_field = 2 + read_init_param.obj_size_field = 3 + read_init_param.op_field = 4 + + trace_reader = TraceReader( + trace=temp_file, + trace_type=TraceType.CSV_TRACE, + reader_init_params=read_init_param + ) + + # Get requests from both readers + req = Request() + synthetic_req = synthetic_reader.read_one_req(req) + trace_req = trace_reader.read_one_req(req) + + # Both should produce Request objects with same attributes + assert hasattr(synthetic_req, 'obj_id') + assert hasattr(synthetic_req, 'obj_size') + assert hasattr(synthetic_req, 'op') + assert hasattr(trace_req, 'obj_id') + assert hasattr(trace_req, 'obj_size') + assert hasattr(trace_req, 'op') + + # Both should have valid values + assert synthetic_req.obj_size == 1024 + assert trace_req.obj_size == 1024 + assert hasattr(synthetic_req, 'op') + assert hasattr(trace_req, 'op') + + finally: + os.unlink(temp_file) \ No newline at end of file From 83abc3c7bcd8e0b6f9931af7c919232d76cd8e19 Mon Sep 17 00:00:00 2001 From: haochengxia Date: Thu, 24 Jul 2025 10:46:18 +0000 Subject: [PATCH 4/6] Change clone method --- .gitmodules | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitmodules b/.gitmodules index afddc09..f2092dd 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,3 @@ [submodule "src/libCacheSim"] path = src/libCacheSim - url = git@github.com:1a1a11a/libCacheSim.git + url = https://github.com/1a1a11a/libCacheSim.git From ec1ec33be83a9e137567bb4ae590f1180d628dff Mon Sep 17 00:00:00 2001 From: haochengxia Date: Thu, 24 Jul 2025 11:05:38 +0000 Subject: [PATCH 5/6] Add documentations --- .github/pages.md | 16 ++ .github/workflows/build.yml | 31 ++- .github/workflows/docs.yml | 79 ++++++ README.md | 19 +- docs/mkdocs.yml | 9 - docs/requirements.txt | 1 - docs/src/en/api.md | 395 ++++++++++++++++++++++++++++ docs/src/en/examples.md | 501 ++++++++++++++++++++++++++++++++++++ docs/src/en/index.md | 68 +++++ docs/src/en/quickstart.md | 183 +++++++++++++ docs/src/zh/api.md | 385 +++++++++++++++++++++++++++ docs/src/zh/examples.md | 488 +++++++++++++++++++++++++++++++++++ docs/src/zh/index.md | 68 +++++ docs/src/zh/quickstart.md | 183 +++++++++++++ scripts/build_docs.sh | 48 ++++ 15 files changed, 2459 insertions(+), 15 deletions(-) create mode 100644 .github/pages.md create mode 100644 .github/workflows/docs.yml create mode 100644 docs/src/en/api.md create mode 100644 docs/src/en/examples.md create mode 100644 docs/src/en/index.md create mode 100644 docs/src/en/quickstart.md create mode 100644 docs/src/zh/api.md create mode 100644 docs/src/zh/examples.md create mode 100644 docs/src/zh/index.md create mode 100644 docs/src/zh/quickstart.md create mode 100755 scripts/build_docs.sh diff --git a/.github/pages.md b/.github/pages.md new file mode 100644 index 0000000..fa66761 --- /dev/null +++ b/.github/pages.md @@ -0,0 +1,16 @@ +# Configuration for GitHub Pages deployment +# This file helps ensure proper deployment of MkDocs documentation + +# Static site generator +# This is automatically detected by GitHub Pages for MkDocs +# No additional configuration needed as the workflow handles deployment + +# Documentation deployment notes: +# - The documentation is built and deployed via GitHub Actions +# - Source files are in the docs/ directory +# - Built files are served from the GitHub Pages artifact +# - Available languages: English (en) and Chinese (zh) +# - Default language: English + +# Access the documentation at: +# https://[username].github.io/libCacheSim-python/ diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index f73bcfb..62e44b0 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -1,4 +1,4 @@ -name: Build and Test libCacheSim-python +name: Build on: [push, pull_request] @@ -35,4 +35,31 @@ jobs: - name: Run tests run: | - python -m pytest tests/ \ No newline at end of file + python -m pytest tests/ + + docs: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: "3.x" + + - name: Cache dependencies + uses: actions/cache@v3 + with: + path: ~/.cache/pip + key: ${{ runner.os }}-pip-docs-${{ hashFiles('docs/requirements.txt') }} + restore-keys: | + ${{ runner.os }}-pip-docs- + + - name: Install documentation dependencies + run: | + pip install -r docs/requirements.txt + + - name: Test documentation build + run: | + cd docs + mkdocs build --clean --strict \ No newline at end of file diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml new file mode 100644 index 0000000..9ea83f7 --- /dev/null +++ b/.github/workflows/docs.yml @@ -0,0 +1,79 @@ +name: Deploy MkDocs to GitHub Pages + +on: + push: + branches: + - main + - master + paths: + - 'docs/**' + - '.github/workflows/docs.yml' + pull_request: + branches: + - main + - master + paths: + - 'docs/**' + workflow_dispatch: + +permissions: + contents: read + pages: write + id-token: write + +concurrency: + group: "pages" + cancel-in-progress: false + +jobs: + build: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Setup Python + uses: actions/setup-python@v4 + with: + python-version: '3.x' + + - name: Cache dependencies + uses: actions/cache@v3 + with: + path: ~/.cache/pip + key: ${{ runner.os }}-pip-${{ hashFiles('docs/requirements.txt') }} + restore-keys: | + ${{ runner.os }}-pip- + + - name: Install dependencies + run: | + pip install -r docs/requirements.txt + + - name: Build documentation + run: | + cd docs + mkdocs build --clean --strict + + - name: Setup Pages + if: github.event_name != 'pull_request' + uses: actions/configure-pages@v3 + + - name: Upload artifact + if: github.event_name != 'pull_request' + uses: actions/upload-pages-artifact@v2 + with: + path: docs/site + + deploy: + if: github.event_name != 'pull_request' + environment: + name: github-pages + url: ${{ steps.deployment.outputs.page_url }} + runs-on: ubuntu-latest + needs: build + steps: + - name: Deploy to GitHub Pages + id: deployment + uses: actions/deploy-pages@v2 diff --git a/README.md b/README.md index e34048a..14707c5 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,15 @@ # libCacheSim Python Binding [![Build](https://github.com/cacheMon/libCacheSim-python/actions/workflows/build.yml/badge.svg)](https://github.com/cacheMon/libCacheSim-python/actions/workflows/build.yml) -[![Python Versions](https://img.shields.io/pypi/pyversions/libcachesim.svg?logo=python&logoColor=white)](https://pypi.org/project/libcachesim) -[![PyPI Version](https://img.shields.io/pypi/v/libcachesim.svg?)](https://pypi.org/project/libcachesim) -[![PyPI - Downloads](https://img.shields.io/pypi/dd/libcachesim)](https://pypistats.org/packages/libcachesim) +[![Documentation](https://github.com/cacheMon/libCacheSim-python/actions/workflows/docs.yml/badge.svg)](https://github.com/cacheMon/libCacheSim-python/actions/workflows/docs.yml) Python bindings for [libCacheSim](https://github.com/1a1a11a/libCacheSim), a high-performance cache simulator and analysis library. +## 📚 Documentation + +- **[English Documentation](https://cacheMon.github.io/libCacheSim-python/en/)** - Complete API reference, tutorials, and examples +- **[中文文档](https://cacheMon.github.io/libCacheSim-python/zh/)** - 完整的API参考、教程和示例 + ## Installation Binary installers for the latest released version are available at the [Python Package Index (PyPI)](https://pypi.org/project/libcachesim). @@ -29,6 +32,16 @@ Run all tests to ensure the package works. python -m pytest tests/ ``` +## 🚀 Features + +- **High-Performance Cache Simulation**: Built on the proven libCacheSim C++ library +- **Multiple Cache Algorithms**: LRU, LFU, FIFO, ARC, S3FIFO, Sieve, TinyLFU, and more +- **Trace Processing**: Support for various trace formats (CSV, binary, Oracle, etc.) +- **Synthetic Workload Generation**: Zipf, uniform, and custom distributions +- **Trace Analysis**: Comprehensive workload analysis and visualization tools +- **Custom Cache Policies**: Implement new algorithms using Python hooks +- **Multi-language Documentation**: English and Chinese documentation with examples + ## Quick Start ### Basic Usage diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml index 5a2bc86..cadff8e 100644 --- a/docs/mkdocs.yml +++ b/docs/mkdocs.yml @@ -56,7 +56,6 @@ plugins: fallback_to_default: true reconfigure_material: true reconfigure_search: true - default_language_only: false languages: - locale: en default: true @@ -70,14 +69,6 @@ plugins: Quick Start: 快速开始 API Reference: API参考 Examples: 使用示例 - - mkdocstrings: - handlers: - python: - paths: [../src] - options: - docstring_style: google - show_source: true - show_root_heading: true markdown_extensions: - admonition diff --git a/docs/requirements.txt b/docs/requirements.txt index 06b095a..d22d8dc 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,3 +1,2 @@ mkdocs-material>=9.6.5 -mkdocstrings-python>=1.16.2 mkdocs-static-i18n>=1.2.0 \ No newline at end of file diff --git a/docs/src/en/api.md b/docs/src/en/api.md new file mode 100644 index 0000000..b3c4a68 --- /dev/null +++ b/docs/src/en/api.md @@ -0,0 +1,395 @@ +# API Reference + +This page provides detailed API documentation for the libCacheSim Python bindings. + +## Core Classes + +### Cache Classes + +All cache classes inherit from the base cache interface and provide the following methods: + +```python +class Cache: + """Base cache interface.""" + + def get(self, obj_id: int, obj_size: int = 1) -> bool: + """Request an object from the cache. + + Args: + obj_id: Object identifier + obj_size: Object size in bytes + + Returns: + True if cache hit, False if cache miss + """ + + def get_hit_ratio(self) -> float: + """Get the current cache hit ratio.""" + + def get_miss_ratio(self) -> float: + """Get the current cache miss ratio.""" + + def get_num_hits(self) -> int: + """Get the total number of cache hits.""" + + def get_num_misses(self) -> int: + """Get the total number of cache misses.""" +``` + +### Available Cache Algorithms + +```python +# Basic algorithms +def LRU(cache_size: int) -> Cache: ... +def LFU(cache_size: int) -> Cache: ... +def FIFO(cache_size: int) -> Cache: ... +def Clock(cache_size: int) -> Cache: ... +def Random(cache_size: int) -> Cache: ... + +# Advanced algorithms +def ARC(cache_size: int) -> Cache: ... +def S3FIFO(cache_size: int) -> Cache: ... +def Sieve(cache_size: int) -> Cache: ... +def TinyLFU(cache_size: int) -> Cache: ... +def TwoQ(cache_size: int) -> Cache: ... +```ence + +This page provides detailed API documentation for libCacheSim Python bindings. + +## Core Classes + +### Cache Classes + +All cache classes inherit from the base cache interface and provide the following methods: + +::: libcachesim.cache + +### TraceReader + +```python +class TraceReader: + """Read trace files in various formats.""" + + def __init__(self, trace_path: str, trace_type: TraceType, + reader_params: ReaderInitParam = None): + """Initialize trace reader. + + Args: + trace_path: Path to trace file + trace_type: Type of trace format + reader_params: Optional reader configuration + """ + + def __iter__(self): + """Iterate over requests in the trace.""" + + def reset(self): + """Reset reader to beginning of trace.""" + + def skip(self, n: int): + """Skip n requests.""" + + def clone(self): + """Create a copy of the reader.""" +``` + +### SyntheticReader + +```python +class SyntheticReader: + """Generate synthetic workloads.""" + + def __init__(self, num_objects: int, num_requests: int, + distribution: str = "zipf", alpha: float = 1.0, + obj_size: int = 1, seed: int = None): + """Initialize synthetic reader. + + Args: + num_objects: Number of unique objects + num_requests: Total requests to generate + distribution: Distribution type ("zipf", "uniform") + alpha: Zipf skewness parameter + obj_size: Object size in bytes + seed: Random seed for reproducibility + """ +``` + +### TraceAnalyzer + +```python +class TraceAnalyzer: + """Analyze trace characteristics.""" + + def __init__(self, trace_path: str, trace_type: TraceType, + reader_params: ReaderInitParam = None): + """Initialize trace analyzer.""" + + def get_num_requests(self) -> int: + """Get total number of requests.""" + + def get_num_objects(self) -> int: + """Get number of unique objects.""" + + def get_working_set_size(self) -> int: + """Get working set size.""" +``` + +## Enumerations and Constants + +### TraceType + +```python +class TraceType: + """Supported trace file formats.""" + CSV_TRACE = "csv" + BINARY_TRACE = "binary" + ORACLE_GENERAL_TRACE = "oracle" + PLAIN_TXT_TRACE = "txt" +``` + +### SamplerType + +```python +class SamplerType: + """Sampling strategies.""" + SPATIAL_SAMPLER = "spatial" + TEMPORAL_SAMPLER = "temporal" +``` + +### ReqOp + +```python +class ReqOp: + """Request operation types.""" + READ = "read" + WRITE = "write" + DELETE = "delete" +``` + +## Data Structures + +### Request + +```python +class Request: + """Represents a cache request.""" + + def __init__(self): + self.obj_id: int = 0 + self.obj_size: int = 1 + self.timestamp: int = 0 + self.op: str = "read" +``` + +### ReaderInitParam + +```python +class ReaderInitParam: + """Configuration parameters for trace readers.""" + + def __init__(self): + self.has_header: bool = False + self.delimiter: str = "," + self.obj_id_is_num: bool = True + self.ignore_obj_size: bool = False + self.ignore_size_zero_req: bool = True + self.cap_at_n_req: int = -1 + self.block_size: int = 4096 + self.trace_start_offset: int = 0 + + # Field mappings (1-indexed) + self.time_field: int = 1 + self.obj_id_field: int = 2 + self.obj_size_field: int = 3 + self.op_field: int = 4 + + self.sampler: Sampler = None +``` + +### Sampler + +```python +class Sampler: + """Configuration for request sampling.""" + + def __init__(self, sample_ratio: float = 1.0, + type: str = "spatial"): + """Initialize sampler. + + Args: + sample_ratio: Fraction of requests to sample (0.0-1.0) + type: Sampling type ("spatial" or "temporal") + """ + self.sample_ratio = sample_ratio + self.type = type +``` + +## Utility Functions + +### Synthetic Trace Generation + +```python +def create_zipf_requests(num_objects, num_requests, alpha, obj_size, seed=None): + """ + Create Zipf-distributed synthetic requests. + + Args: + num_objects (int): Number of unique objects + num_requests (int): Total number of requests to generate + alpha (float): Zipf skewness parameter (higher = more skewed) + obj_size (int): Size of each object in bytes + seed (int, optional): Random seed for reproducibility + + Returns: + List[Request]: List of generated requests + """ + +def create_uniform_requests(num_objects, num_requests, obj_size, seed=None): + """ + Create uniformly-distributed synthetic requests. + + Args: + num_objects (int): Number of unique objects + num_requests (int): Total number of requests to generate + obj_size (int): Size of each object in bytes + seed (int, optional): Random seed for reproducibility + + Returns: + List[Request]: List of generated requests + """ +``` + +### Cache Algorithms + +Available cache algorithms with their factory functions: + +```python +# Basic algorithms +LRU(cache_size: int) -> Cache +LFU(cache_size: int) -> Cache +FIFO(cache_size: int) -> Cache +Clock(cache_size: int) -> Cache +Random(cache_size: int) -> Cache + +# Advanced algorithms +ARC(cache_size: int) -> Cache +S3FIFO(cache_size: int) -> Cache +Sieve(cache_size: int) -> Cache +TinyLFU(cache_size: int) -> Cache +TwoQ(cache_size: int) -> Cache +LRB(cache_size: int) -> Cache + +# Experimental algorithms +cache_3L(cache_size: int) -> Cache +``` + +### Performance Metrics + +```python +class CacheStats: + """Cache performance statistics.""" + + def __init__(self): + self.hits = 0 + self.misses = 0 + self.evictions = 0 + self.bytes_written = 0 + self.bytes_read = 0 + + @property + def hit_ratio(self) -> float: + """Calculate hit ratio.""" + total = self.hits + self.misses + return self.hits / total if total > 0 else 0.0 + + @property + def miss_ratio(self) -> float: + """Calculate miss ratio.""" + return 1.0 - self.hit_ratio +``` + +## Error Handling + +The library uses standard Python exceptions: + +- `ValueError`: Invalid parameters or configuration +- `FileNotFoundError`: Trace file not found +- `RuntimeError`: Runtime errors from underlying C++ library +- `MemoryError`: Out of memory conditions + +Example error handling: + +```python +try: + reader = lcs.TraceReader("nonexistent.csv", lcs.TraceType.CSV_TRACE) +except FileNotFoundError: + print("Trace file not found") +except ValueError as e: + print(f"Invalid configuration: {e}") +``` + +## Configuration Options + +### Reader Configuration + +```python +reader_params = lcs.ReaderInitParam( + has_header=True, # CSV has header row + delimiter=",", # Field delimiter + obj_id_is_num=True, # Object IDs are numeric + ignore_obj_size=False, # Don't ignore object sizes + ignore_size_zero_req=True, # Ignore zero-size requests + cap_at_n_req=1000000, # Limit number of requests + block_size=4096, # Block size for block-based traces + trace_start_offset=0, # Skip initial requests +) + +# Field mappings (1-indexed) +reader_params.time_field = 1 +reader_params.obj_id_field = 2 +reader_params.obj_size_field = 3 +reader_params.op_field = 4 +``` + +### Sampling Configuration + +```python +sampler = lcs.Sampler( + sample_ratio=0.1, # Sample 10% of requests + type=lcs.SamplerType.SPATIAL_SAMPLER # Spatial sampling +) +reader_params.sampler = sampler +``` + +## Thread Safety + +The library provides thread-safe operations for most use cases: + +- Cache operations are thread-safe within a single cache instance +- Multiple readers can be used concurrently +- Analysis operations can utilize multiple threads + +For high-concurrency scenarios, consider using separate cache instances per thread. + +## Memory Management + +The library automatically manages memory for most operations: + +- Cache objects handle their own memory allocation +- Trace readers manage buffering automatically +- Request objects are lightweight and reusable + +For large-scale simulations, monitor memory usage and consider: + +- Using sampling to reduce trace size +- Processing traces in chunks +- Limiting cache sizes appropriately + +## Best Practices + +1. **Use appropriate cache sizes**: Size caches based on your simulation goals +2. **Set random seeds**: For reproducible results in synthetic traces +3. **Handle errors**: Always wrap file operations in try-catch blocks +4. **Monitor memory**: For large traces, consider sampling or chunking +5. **Use threading**: Leverage multi-threading for analysis tasks +6. **Validate traces**: Check trace format and content before simulation diff --git a/docs/src/en/examples.md b/docs/src/en/examples.md new file mode 100644 index 0000000..0d56aa9 --- /dev/null +++ b/docs/src/en/examples.md @@ -0,0 +1,501 @@ +# Examples + +This page provides practical examples of using libCacheSim Python bindings for various cache simulation scenarios. + +## Basic Cache Simulation + +### Simple LRU Cache Example + +```python +import libcachesim as lcs + +# Create an LRU cache with 1MB capacity +cache = lcs.LRU(cache_size=1024*1024) + +# Generate synthetic Zipf trace +reader = lcs.SyntheticReader( + num_of_req=10000, + obj_size=1024, + dist="zipf", + alpha=1.0, + num_objects=1000, + seed=42 +) + +# Simulate cache behavior +hits = 0 +total = 0 + +for req in reader: + if cache.get(req): + hits += 1 + total += 1 + +print(f"Hit ratio: {hits/total:.4f}") +print(f"Total requests: {total}") +``` + +### Comparing Multiple Cache Algorithms + +```python +import libcachesim as lcs + +def compare_algorithms(trace_file, cache_size): + """Compare hit ratios of different cache algorithms.""" + + algorithms = { + "LRU": lcs.LRU, + "LFU": lcs.LFU, + "FIFO": lcs.FIFO, + "Clock": lcs.Clock, + "ARC": lcs.ARC, + "S3FIFO": lcs.S3FIFO + } + + results = {} + + for name, cache_class in algorithms.items(): + # Create fresh reader for each algorithm + reader = lcs.SyntheticReader( + num_of_req=10000, + obj_size=1024, + dist="zipf", + alpha=1.0, + seed=42 # Same seed for fair comparison + ) + + cache = cache_class(cache_size=cache_size) + hits = 0 + + for req in reader: + if cache.get(req): + hits += 1 + + hit_ratio = hits / reader.get_num_of_req() + results[name] = hit_ratio + print(f"{name:8}: {hit_ratio:.4f}") + + return results + +# Compare with 64KB cache +results = compare_algorithms("trace.csv", 64*1024) +``` + +## Working with Real Traces + +### Reading CSV Traces + +```python +import libcachesim as lcs + +def simulate_csv_trace(csv_file): + """Simulate cache behavior on CSV trace.""" + + # Configure CSV reader + reader_params = lcs.ReaderInitParam( + has_header=True, + delimiter=",", + obj_id_is_num=True + ) + + # Set field mappings (1-indexed) + reader_params.time_field = 1 + reader_params.obj_id_field = 2 + reader_params.obj_size_field = 3 + reader_params.op_field = 4 + + reader = lcs.TraceReader( + trace=csv_file, + trace_type=lcs.TraceType.CSV_TRACE, + reader_init_params=reader_params + ) + + print(f"Loaded trace with {reader.get_num_of_req()} requests") + + # Test different cache sizes + cache_sizes = [1024*1024*i for i in [1, 2, 4, 8, 16]] # 1MB to 16MB + + for size in cache_sizes: + cache = lcs.LRU(cache_size=size) + reader.reset() # Reset to beginning + + hits = 0 + for req in reader: + if cache.get(req): + hits += 1 + + hit_ratio = hits / reader.get_num_of_req() + print(f"Cache size: {size//1024//1024}MB, Hit ratio: {hit_ratio:.4f}") + +# Usage +simulate_csv_trace("workload.csv") +``` + +### Handling Large Traces with Sampling + +```python +import libcachesim as lcs + +def analyze_large_trace(trace_file, sample_ratio=0.1): + """Analyze large trace using sampling.""" + + # Create sampler + sampler = lcs.Sampler( + sample_ratio=sample_ratio, + type=lcs.SamplerType.SPATIAL_SAMPLER + ) + + reader_params = lcs.ReaderInitParam( + has_header=True, + delimiter=",", + obj_id_is_num=True + ) + reader_params.sampler = sampler + + reader = lcs.TraceReader( + trace=trace_file, + trace_type=lcs.TraceType.CSV_TRACE, + reader_init_params=reader_params + ) + + print(f"Sampling {sample_ratio*100}% of trace") + print(f"Sampled requests: {reader.get_num_of_req()}") + + # Run simulation on sampled trace + cache = lcs.LRU(cache_size=10*1024*1024) # 10MB + hits = 0 + + for req in reader: + if cache.get(req): + hits += 1 + + hit_ratio = hits / reader.get_num_of_req() + print(f"Hit ratio on sampled trace: {hit_ratio:.4f}") + +# Sample 5% of a large trace +analyze_large_trace("large_trace.csv", sample_ratio=0.05) +``` + +## Advanced Analysis + +### Comprehensive Trace Analysis + +```python +import libcachesim as lcs +import os + +def comprehensive_analysis(trace_file, output_dir="analysis_results"): + """Run comprehensive trace analysis.""" + + # Create output directory + os.makedirs(output_dir, exist_ok=True) + + # Load trace + reader = lcs.TraceReader(trace_file, lcs.TraceType.CSV_TRACE) + + # Run trace analysis + analyzer = lcs.TraceAnalyzer(reader, f"{output_dir}/trace_analysis") + print("Running trace analysis...") + analyzer.run() + + print(f"Analysis complete. Results saved to {output_dir}/") + print("Generated files:") + for file in os.listdir(output_dir): + print(f" - {file}") + +# Run analysis +comprehensive_analysis("workload.csv") +``` + +### Hit Ratio Curves + +```python +import libcachesim as lcs +import matplotlib.pyplot as plt + +def plot_hit_ratio_curve(trace_file, algorithms=None): + """Plot hit ratio curves for different algorithms.""" + + if algorithms is None: + algorithms = ["LRU", "LFU", "FIFO", "ARC"] + + # Cache sizes from 1MB to 100MB + cache_sizes = [1024*1024*i for i in range(1, 101, 5)] + + plt.figure(figsize=(10, 6)) + + for algo_name in algorithms: + hit_ratios = [] + + for cache_size in cache_sizes: + reader = lcs.SyntheticReader( + num_of_req=5000, + obj_size=1024, + dist="zipf", + alpha=1.0, + seed=42 + ) + + cache = getattr(lcs, algo_name)(cache_size=cache_size) + hits = 0 + + for req in reader: + if cache.get(req): + hits += 1 + + hit_ratio = hits / reader.get_num_of_req() + hit_ratios.append(hit_ratio) + + # Convert to MB for plotting + sizes_mb = [size // 1024 // 1024 for size in cache_sizes] + plt.plot(sizes_mb, hit_ratios, label=algo_name, marker='o') + + plt.xlabel('Cache Size (MB)') + plt.ylabel('Hit Ratio') + plt.title('Hit Ratio vs Cache Size') + plt.legend() + plt.grid(True, alpha=0.3) + plt.show() + +# Generate hit ratio curves +plot_hit_ratio_curve("trace.csv") +``` + +## Custom Cache Policies + +### Implementing a Custom LRU with Python Hooks + +```python +import libcachesim as lcs +from collections import OrderedDict + +def create_python_lru(cache_size): + """Create a custom LRU cache using Python hooks.""" + + def init_hook(size): + """Initialize cache data structure.""" + return { + 'data': OrderedDict(), + 'size': 0, + 'capacity': size + } + + def hit_hook(cache_dict, obj_id, obj_size): + """Handle cache hit.""" + # Move to end (most recently used) + cache_dict['data'].move_to_end(obj_id) + + def miss_hook(cache_dict, obj_id, obj_size): + """Handle cache miss.""" + # Add new item + cache_dict['data'][obj_id] = obj_size + cache_dict['size'] += obj_size + + def eviction_hook(cache_dict, obj_id, obj_size): + """Handle eviction when cache is full.""" + # Remove least recently used items + while cache_dict['size'] + obj_size > cache_dict['capacity']: + if not cache_dict['data']: + break + lru_id, lru_size = cache_dict['data'].popitem(last=False) + cache_dict['size'] -= lru_size + + return lcs.PythonHookCache( + cache_size=cache_size, + init_hook=init_hook, + hit_hook=hit_hook, + miss_hook=miss_hook, + eviction_hook=eviction_hook + ) + +# Test custom LRU +custom_cache = create_python_lru(1024*1024) +reader = lcs.SyntheticReader(num_of_req=1000, obj_size=1024) + +hits = 0 +for req in reader: + if custom_cache.get(req): + hits += 1 + +print(f"Custom LRU hit ratio: {hits/1000:.4f}") +``` + +### Time-based Cache with TTL + +```python +import libcachesim as lcs +import time + +def create_ttl_cache(cache_size, ttl_seconds=300): + """Create a cache with time-to-live (TTL) expiration.""" + + def init_hook(size): + return { + 'data': {}, + 'timestamps': {}, + 'size': 0, + 'capacity': size, + 'ttl': ttl_seconds + } + + def is_expired(cache_dict, obj_id): + """Check if object has expired.""" + if obj_id not in cache_dict['timestamps']: + return True + return time.time() - cache_dict['timestamps'][obj_id] > cache_dict['ttl'] + + def hit_hook(cache_dict, obj_id, obj_size): + """Handle cache hit.""" + if is_expired(cache_dict, obj_id): + # Expired, treat as miss + if obj_id in cache_dict['data']: + del cache_dict['data'][obj_id] + del cache_dict['timestamps'][obj_id] + cache_dict['size'] -= obj_size + return False + return True + + def miss_hook(cache_dict, obj_id, obj_size): + """Handle cache miss.""" + current_time = time.time() + cache_dict['data'][obj_id] = obj_size + cache_dict['timestamps'][obj_id] = current_time + cache_dict['size'] += obj_size + + def eviction_hook(cache_dict, obj_id, obj_size): + """Handle eviction.""" + # First try to evict expired items + current_time = time.time() + expired_items = [] + + for oid, timestamp in cache_dict['timestamps'].items(): + if current_time - timestamp > cache_dict['ttl']: + expired_items.append(oid) + + for oid in expired_items: + if oid in cache_dict['data']: + cache_dict['size'] -= cache_dict['data'][oid] + del cache_dict['data'][oid] + del cache_dict['timestamps'][oid] + + # If still need space, evict oldest items + while cache_dict['size'] + obj_size > cache_dict['capacity']: + if not cache_dict['data']: + break + # Find oldest item + oldest_id = min(cache_dict['timestamps'].keys(), + key=lambda x: cache_dict['timestamps'][x]) + cache_dict['size'] -= cache_dict['data'][oldest_id] + del cache_dict['data'][oldest_id] + del cache_dict['timestamps'][oldest_id] + + return lcs.PythonHookCache( + cache_size=cache_size, + init_hook=init_hook, + hit_hook=hit_hook, + miss_hook=miss_hook, + eviction_hook=eviction_hook + ) + +# Test TTL cache +ttl_cache = create_ttl_cache(1024*1024, ttl_seconds=60) +``` + +## Performance Optimization + +### Batch Processing for Large Workloads + +```python +import libcachesim as lcs + +def batch_simulation(trace_file, batch_size=10000): + """Process large traces in batches to optimize memory usage.""" + + reader = lcs.TraceReader(trace_file, lcs.TraceType.CSV_TRACE) + cache = lcs.LRU(cache_size=10*1024*1024) + + total_requests = 0 + total_hits = 0 + batch_count = 0 + + while True: + batch_hits = 0 + batch_requests = 0 + + # Process a batch of requests + for _ in range(batch_size): + try: + req = reader.read_one_req() + if req.valid: + if cache.get(req): + batch_hits += 1 + batch_requests += 1 + else: + break # End of trace + except: + break + + if batch_requests == 0: + break + + total_hits += batch_hits + total_requests += batch_requests + batch_count += 1 + + # Print progress + hit_ratio = batch_hits / batch_requests + print(f"Batch {batch_count}: {batch_requests} requests, " + f"hit ratio: {hit_ratio:.4f}") + + overall_hit_ratio = total_hits / total_requests + print(f"Overall: {total_requests} requests, hit ratio: {overall_hit_ratio:.4f}") + +# Process in batches +batch_simulation("large_trace.csv", batch_size=50000) +``` + +### Multi-threaded Analysis + +```python +import libcachesim as lcs +import concurrent.futures +import threading + +def parallel_cache_comparison(trace_file, algorithms, cache_size): + """Compare cache algorithms in parallel.""" + + def simulate_algorithm(algo_name): + """Simulate single algorithm.""" + reader = lcs.TraceReader(trace_file, lcs.TraceType.CSV_TRACE) + cache = getattr(lcs, algo_name)(cache_size=cache_size) + + hits = 0 + total = 0 + + for req in reader: + if cache.get(req): + hits += 1 + total += 1 + + hit_ratio = hits / total if total > 0 else 0 + return algo_name, hit_ratio + + # Run simulations in parallel + with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor: + futures = {executor.submit(simulate_algorithm, algo): algo + for algo in algorithms} + + results = {} + for future in concurrent.futures.as_completed(futures): + algo_name, hit_ratio = future.result() + results[algo_name] = hit_ratio + print(f"{algo_name}: {hit_ratio:.4f}") + + return results + +# Compare algorithms in parallel +algorithms = ["LRU", "LFU", "FIFO", "ARC", "S3FIFO"] +results = parallel_cache_comparison("trace.csv", algorithms, 1024*1024) +``` + +These examples demonstrate the versatility and power of libCacheSim Python bindings for cache simulation, analysis, and research. You can modify and extend these examples for your specific use cases. diff --git a/docs/src/en/index.md b/docs/src/en/index.md new file mode 100644 index 0000000..0b0e732 --- /dev/null +++ b/docs/src/en/index.md @@ -0,0 +1,68 @@ +# libCacheSim Python Bindings + +Welcome to libCacheSim Python bindings! This is a high-performance cache simulation library with Python interface. + +## Overview + +libCacheSim is a high-performance cache simulation framework that supports various cache algorithms and trace formats. The Python bindings provide an easy-to-use interface for cache simulation, analysis, and research. + +## Key Features + +- **High Performance**: Built on top of the optimized C++ libCacheSim library +- **Multiple Cache Algorithms**: Support for LRU, LFU, FIFO, ARC, Clock, S3FIFO, Sieve, and many more +- **Trace Support**: Read various trace formats (CSV, binary, OracleGeneral, etc.) +- **Synthetic Traces**: Generate synthetic workloads with Zipf and uniform distributions +- **Analysis Tools**: Built-in trace analysis and cache performance evaluation +- **Easy Integration**: Simple Python API for research and production use + +## Quick Example + +```python +import libcachesim as lcs + +# Create a cache +cache = lcs.LRU(cache_size=1024*1024) # 1MB cache + +# Generate synthetic trace +reader = lcs.SyntheticReader( + num_of_req=10000, + obj_size=1024, + dist="zipf", + alpha=1.0 +) + +# Simulate cache behavior +hit_count = 0 +for req in reader: + if cache.get(req): + hit_count += 1 + +hit_ratio = hit_count / reader.get_num_of_req() +print(f"Hit ratio: {hit_ratio:.4f}") +``` + +## Installation + +```bash +pip install libcachesim +``` + +Or install from source: + +```bash +git clone https://github.com/cacheMon/libCacheSim-python.git +cd libCacheSim-python +pip install -e . +``` + +## Getting Started + +Check out our [Quick Start Guide](quickstart.md) to begin using libCacheSim Python bindings, or explore the [API Reference](api.md) for detailed documentation. + +## Contributing + +We welcome contributions! Please see our [GitHub repository](https://github.com/cacheMon/libCacheSim-python) for more information. + +## License + +This project is licensed under the Apache License 2.0. diff --git a/docs/src/en/quickstart.md b/docs/src/en/quickstart.md new file mode 100644 index 0000000..2e32f4d --- /dev/null +++ b/docs/src/en/quickstart.md @@ -0,0 +1,183 @@ +# Quick Start Guide + +This guide will help you get started with libCacheSim Python bindings. + +## Installation + +### From PyPI (Recommended) + +```bash +pip install libcachesim +``` + +### From Source + +```bash +git clone https://github.com/cacheMon/libCacheSim-python.git +cd libCacheSim-python +git submodule update --init --recursive +pip install -e . +``` + +## Basic Usage + +### 1. Creating a Cache + +```python +import libcachesim as lcs + +# Create different types of caches +lru_cache = lcs.LRU(cache_size=1024*1024) # 1MB LRU cache +lfu_cache = lcs.LFU(cache_size=1024*1024) # 1MB LFU cache +fifo_cache = lcs.FIFO(cache_size=1024*1024) # 1MB FIFO cache +``` + +### 2. Using Synthetic Traces + +```python +# Generate Zipf-distributed requests +reader = lcs.SyntheticReader( + num_of_req=10000, + obj_size=1024, + dist="zipf", + alpha=1.0, + num_objects=1000, + seed=42 +) + +# Simulate cache behavior +cache = lcs.LRU(cache_size=50*1024) +hit_count = 0 + +for req in reader: + if cache.get(req): + hit_count += 1 + +print(f"Hit ratio: {hit_count/reader.get_num_of_req():.4f}") +``` + +### 3. Reading Real Traces + +```python +# Read CSV trace +reader = lcs.TraceReader( + trace="path/to/trace.csv", + trace_type=lcs.TraceType.CSV_TRACE, + has_header=True, + delimiter=",", + obj_id_is_num=True +) + +# Process requests +cache = lcs.LRU(cache_size=1024*1024) +for req in reader: + result = cache.get(req) + # Process result... +``` + +### 4. Cache Performance Analysis + +```python +# Run comprehensive analysis +analyzer = lcs.TraceAnalyzer(reader, "output_prefix") +analyzer.run() + +# This generates various analysis files: +# - Hit ratio curves +# - Access pattern analysis +# - Temporal locality analysis +# - And more... +``` + +## Available Cache Algorithms + +libCacheSim supports numerous cache algorithms: + +### Basic Algorithms +- **LRU**: Least Recently Used +- **LFU**: Least Frequently Used +- **FIFO**: First In, First Out +- **Clock**: Clock algorithm +- **Random**: Random replacement + +### Advanced Algorithms +- **ARC**: Adaptive Replacement Cache +- **S3FIFO**: Simple, Fast, Fair FIFO +- **Sieve**: Sieve eviction algorithm +- **TinyLFU**: Tiny LFU with admission control +- **TwoQ**: Two-Queue algorithm +- **LRB**: Learning Relaxed Belady + +### Experimental Algorithms +- **3LCache**: Three-Level Cache +- **And many more...** + +## Trace Formats + +Supported trace formats include: + +- **CSV**: Comma-separated values +- **Binary**: Custom binary format +- **OracleGeneral**: Oracle general format +- **Vscsi**: VMware vSCSI format +- **And more...** + +## Advanced Features + +### Custom Cache Policies + +You can implement custom cache policies using Python hooks: + +```python +from collections import OrderedDict + +def create_custom_lru(): + def init_hook(cache_size): + return OrderedDict() + + def hit_hook(cache_dict, obj_id, obj_size): + cache_dict.move_to_end(obj_id) + + def miss_hook(cache_dict, obj_id, obj_size): + cache_dict[obj_id] = obj_size + + def eviction_hook(cache_dict, obj_id, obj_size): + if cache_dict: + cache_dict.popitem(last=False) + + return lcs.PythonHookCache( + cache_size=1024*1024, + init_hook=init_hook, + hit_hook=hit_hook, + miss_hook=miss_hook, + eviction_hook=eviction_hook + ) + +custom_cache = create_custom_lru() +``` + +### Trace Sampling + +```python +# Sample 10% of requests spatially +reader = lcs.TraceReader( + trace="large_trace.csv", + trace_type=lcs.TraceType.CSV_TRACE, + sampling_ratio=0.1, + sampling_type=lcs.SamplerType.SPATIAL_SAMPLER +) +``` + +### Multi-threaded Analysis + +```python +# Use multiple threads for analysis +analyzer = lcs.TraceAnalyzer(reader, "output", n_threads=4) +analyzer.run() +``` + +## Next Steps + +- Explore the [API Reference](api.md) for detailed documentation +- Check out [Examples](examples.md) for more complex use cases +- Visit our [GitHub repository](https://github.com/cacheMon/libCacheSim-python) for source code and issues diff --git a/docs/src/zh/api.md b/docs/src/zh/api.md new file mode 100644 index 0000000..5bb9814 --- /dev/null +++ b/docs/src/zh/api.md @@ -0,0 +1,385 @@ +# API 参考 + +本页面提供 libCacheSim Python 绑定的详细 API 文档。 + +## 核心类 + +### 缓存类 + +所有缓存类都继承自基础缓存接口,并提供以下方法: + +```python +class Cache: + """基础缓存接口。""" + + def get(self, obj_id: int, obj_size: int = 1) -> bool: + """从缓存请求对象。 + + 参数: + obj_id: 对象标识符 + obj_size: 对象大小(字节) + + 返回: + 如果缓存命中返回 True,缓存缺失返回 False + """ + + def get_hit_ratio(self) -> float: + """获取当前缓存命中率。""" + + def get_miss_ratio(self) -> float: + """获取当前缓存缺失率。""" + + def get_num_hits(self) -> int: + """获取缓存命中总数。""" + + def get_num_misses(self) -> int: + """获取缓存缺失总数。""" +``` + +### 可用的缓存算法 + +```python +# 基础算法 +def LRU(cache_size: int) -> Cache: ... +def LFU(cache_size: int) -> Cache: ... +def FIFO(cache_size: int) -> Cache: ... +def Clock(cache_size: int) -> Cache: ... +def Random(cache_size: int) -> Cache: ... + +# 高级算法 +def ARC(cache_size: int) -> Cache: ... +def S3FIFO(cache_size: int) -> Cache: ... +def Sieve(cache_size: int) -> Cache: ... +def TinyLFU(cache_size: int) -> Cache: ... +def TwoQ(cache_size: int) -> Cache: ... +``` + +### TraceReader + +```python +class TraceReader: + """读取各种格式的跟踪文件。""" + + def __init__(self, trace_path: str, trace_type: TraceType, + reader_params: ReaderInitParam = None): + """初始化跟踪读取器。 + + 参数: + trace_path: 跟踪文件路径 + trace_type: 跟踪格式类型 + reader_params: 可选的读取器配置 + """ + + def __iter__(self): + """迭代跟踪中的请求。""" + + def reset(self): + """重置读取器到跟踪开始。""" + + def skip(self, n: int): + """跳过 n 个请求。""" + + def clone(self): + """创建读取器的副本。""" +``` + +### SyntheticReader + +```python +class SyntheticReader: + """生成合成工作负载。""" + + def __init__(self, num_objects: int, num_requests: int, + distribution: str = "zipf", alpha: float = 1.0, + obj_size: int = 1, seed: int = None): + """初始化合成读取器。 + + 参数: + num_objects: 唯一对象数量 + num_requests: 要生成的总请求数 + distribution: 分布类型("zipf","uniform") + alpha: Zipf 偏斜参数 + obj_size: 对象大小(字节) + seed: 用于可重现性的随机种子 + """ +``` + +### TraceAnalyzer + +```python +class TraceAnalyzer: + """分析跟踪特征。""" + + def __init__(self, trace_path: str, trace_type: TraceType, + reader_params: ReaderInitParam = None): + """初始化跟踪分析器。""" + + def get_num_requests(self) -> int: + """获取总请求数。""" + + def get_num_objects(self) -> int: + """获取唯一对象数。""" + + def get_working_set_size(self) -> int: + """获取工作集大小。""" +``` + +## 枚举和常量 + +### TraceType + +```python +class TraceType: + """支持的跟踪文件格式。""" + CSV_TRACE = "csv" + BINARY_TRACE = "binary" + ORACLE_GENERAL_TRACE = "oracle" + PLAIN_TXT_TRACE = "txt" +``` + +### SamplerType + +```python +class SamplerType: + """采样策略。""" + SPATIAL_SAMPLER = "spatial" + TEMPORAL_SAMPLER = "temporal" +``` + +### ReqOp + +```python +class ReqOp: + """请求操作类型。""" + READ = "read" + WRITE = "write" + DELETE = "delete" +``` + +## 数据结构 + +### Request + +```python +class Request: + """表示缓存请求。""" + + def __init__(self): + self.obj_id: int = 0 + self.obj_size: int = 1 + self.timestamp: int = 0 + self.op: str = "read" +``` + +### ReaderInitParam + +```python +class ReaderInitParam: + """跟踪读取器的配置参数。""" + + def __init__(self): + self.has_header: bool = False + self.delimiter: str = "," + self.obj_id_is_num: bool = True + self.ignore_obj_size: bool = False + self.ignore_size_zero_req: bool = True + self.cap_at_n_req: int = -1 + self.block_size: int = 4096 + self.trace_start_offset: int = 0 + + # 字段映射(从1开始索引) + self.time_field: int = 1 + self.obj_id_field: int = 2 + self.obj_size_field: int = 3 + self.op_field: int = 4 + + self.sampler: Sampler = None +``` + +### Sampler + +```python +class Sampler: + """请求采样配置。""" + + def __init__(self, sample_ratio: float = 1.0, + type: str = "spatial"): + """初始化采样器。 + + 参数: + sample_ratio: 要采样的请求比例(0.0-1.0) + type: 采样类型("spatial" 或 "temporal") + """ + self.sample_ratio = sample_ratio + self.type = type +``` + +## 工具函数 + +### 合成跟踪生成 + +```python +def create_zipf_requests(num_objects, num_requests, alpha, obj_size, seed=None): + """ + 创建 Zipf 分布的合成请求。 + + 参数: + num_objects (int): 唯一对象数量 + num_requests (int): 要生成的总请求数 + alpha (float): Zipf 偏斜参数(越高越偏斜) + obj_size (int): 每个对象的大小(字节) + seed (int, 可选): 随机种子,用于可重现性 + + 返回: + List[Request]: 生成的请求列表 + """ + +def create_uniform_requests(num_objects, num_requests, obj_size, seed=None): + """ + 创建均匀分布的合成请求。 + + 参数: + num_objects (int): 唯一对象数量 + num_requests (int): 要生成的总请求数 + obj_size (int): 每个对象的大小(字节) + seed (int, 可选): 随机种子,用于可重现性 + + 返回: + List[Request]: 生成的请求列表 + """ +``` + +### 缓存算法 + +可用的缓存算法及其工厂函数: + +```python +# 基础算法 +LRU(cache_size: int) -> Cache +LFU(cache_size: int) -> Cache +FIFO(cache_size: int) -> Cache +Clock(cache_size: int) -> Cache +Random(cache_size: int) -> Cache + +# 高级算法 +ARC(cache_size: int) -> Cache +S3FIFO(cache_size: int) -> Cache +Sieve(cache_size: int) -> Cache +TinyLFU(cache_size: int) -> Cache +TwoQ(cache_size: int) -> Cache +LRB(cache_size: int) -> Cache + +# 实验性算法 +cache_3L(cache_size: int) -> Cache +``` + +### 性能指标 + +```python +class CacheStats: + """缓存性能统计。""" + + def __init__(self): + self.hits = 0 + self.misses = 0 + self.evictions = 0 + self.bytes_written = 0 + self.bytes_read = 0 + + @property + def hit_ratio(self) -> float: + """计算命中率。""" + total = self.hits + self.misses + return self.hits / total if total > 0 else 0.0 + + @property + def miss_ratio(self) -> float: + """计算缺失率。""" + return 1.0 - self.hit_ratio +``` + +## 错误处理 + +库使用标准的 Python 异常: + +- `ValueError`: 无效参数或配置 +- `FileNotFoundError`: 跟踪文件未找到 +- `RuntimeError`: 底层 C++ 库的运行时错误 +- `MemoryError`: 内存不足条件 + +错误处理示例: + +```python +try: + reader = lcs.TraceReader("nonexistent.csv", lcs.TraceType.CSV_TRACE) +except FileNotFoundError: + print("跟踪文件未找到") +except ValueError as e: + print(f"无效配置: {e}") +``` + +## 配置选项 + +### 读取器配置 + +```python +reader_params = lcs.ReaderInitParam( + has_header=True, # CSV 有标题行 + delimiter=",", # 字段分隔符 + obj_id_is_num=True, # 对象 ID 是数字 + ignore_obj_size=False, # 不忽略对象大小 + ignore_size_zero_req=True, # 忽略零大小请求 + cap_at_n_req=1000000, # 限制请求数量 + block_size=4096, # 块大小(用于基于块的跟踪) + trace_start_offset=0, # 跳过初始请求 +) + +# 字段映射(从1开始索引) +reader_params.time_field = 1 +reader_params.obj_id_field = 2 +reader_params.obj_size_field = 3 +reader_params.op_field = 4 +``` + +### 采样配置 + +```python +sampler = lcs.Sampler( + sample_ratio=0.1, # 采样 10% 的请求 + type=lcs.SamplerType.SPATIAL_SAMPLER # 空间采样 +) +reader_params.sampler = sampler +``` + +## 线程安全 + +库为大多数用例提供线程安全操作: + +- 单个缓存实例内的缓存操作是线程安全的 +- 可以并发使用多个读取器 +- 分析操作可以利用多线程 + +对于高并发场景,考虑为每个线程使用单独的缓存实例。 + +## 内存管理 + +库自动管理大多数操作的内存: + +- 缓存对象处理自己的内存分配 +- 跟踪读取器自动管理缓冲 +- 请求对象轻量且可重用 + +对于大规模模拟,监控内存使用并考虑: + +- 使用采样减少跟踪大小 +- 分块处理跟踪 +- 适当限制缓存大小 + +## 最佳实践 + +1. **使用适当的缓存大小**: 根据模拟目标确定缓存大小 +2. **设置随机种子**: 用于合成跟踪的可重现结果 +3. **处理错误**: 始终将文件操作包装在 try-catch 块中 +4. **监控内存**: 对于大型跟踪,考虑采样或分块 +5. **使用线程**: 为分析任务利用多线程 +6. **验证跟踪**: 在模拟前检查跟踪格式和内容 diff --git a/docs/src/zh/examples.md b/docs/src/zh/examples.md new file mode 100644 index 0000000..0e85828 --- /dev/null +++ b/docs/src/zh/examples.md @@ -0,0 +1,488 @@ +# 示例和教程 + +本页提供使用 libCacheSim Python 绑定的实际示例和深入教程。 + +## 基础示例 + +### 简单缓存模拟 + +最基本的缓存模拟示例: + +```python +import libcachesim as lcs + +# 创建一个1MB大小的LRU缓存 +cache = lcs.LRU(cache_size=1024*1024) + +# 模拟一些请求 +requests = [ + (1, 100), # 对象1,大小100字节 + (2, 200), # 对象2,大小200字节 + (1, 100), # 对象1,再次访问(命中) + (3, 150), # 对象3,大小150字节 +] + +for obj_id, size in requests: + hit = cache.get(obj_id, size) + print(f"对象 {obj_id}: {'命中' if hit else '缺失'}") + +# 获取统计信息 +print(f"命中率: {cache.get_hit_ratio():.2%}") +``` + +### 跟踪文件处理 + +从CSV文件读取和处理跟踪: + +```python +import libcachesim as lcs + +# 配置跟踪读取器 +reader_params = lcs.ReaderInitParam() +reader_params.has_header = True +reader_params.delimiter = "," +reader_params.time_field = 1 +reader_params.obj_id_field = 2 +reader_params.obj_size_field = 3 + +# 创建跟踪读取器 +reader = lcs.TraceReader("workload.csv", lcs.TraceType.CSV_TRACE, reader_params) + +# 创建缓存 +cache = lcs.LRU(cache_size=1024*1024) + +# 处理跟踪 +request_count = 0 +for request in reader: + hit = cache.get(request.obj_id, request.obj_size) + request_count += 1 + + if request_count % 10000 == 0: + print(f"处理了 {request_count} 个请求,命中率: {cache.get_hit_ratio():.2%}") + +print(f"最终命中率: {cache.get_hit_ratio():.2%}") +``` + +## 合成工作负载生成 + +### Zipf分布请求 + +生成具有Zipf分布的合成工作负载: + +```python +import libcachesim as lcs + +# 创建Zipf分布的合成读取器 +reader = lcs.SyntheticReader( + num_objects=10000, + num_requests=100000, + distribution="zipf", + alpha=1.0, # Zipf偏斜参数 + obj_size=4096, + seed=42 # 为了可重现性 +) + +# 创建缓存 +cache = lcs.LRU(cache_size=10*1024*1024) # 10MB + +# 运行模拟 +for request in reader: + cache.get(request.obj_id, request.obj_size) + +print(f"Zipf工作负载 (α=1.0) 命中率: {cache.get_hit_ratio():.2%}") + +# 尝试不同的偏斜参数 +for alpha in [0.5, 1.0, 1.5, 2.0]: + reader = lcs.SyntheticReader( + num_objects=10000, + num_requests=50000, + distribution="zipf", + alpha=alpha, + obj_size=4096, + seed=42 + ) + + cache = lcs.LRU(cache_size=5*1024*1024) + for request in reader: + cache.get(request.obj_id, request.obj_size) + + print(f"α={alpha}: 命中率 {cache.get_hit_ratio():.2%}") +``` + +### 均匀分布请求 + +```python +import libcachesim as lcs + +# 创建均匀分布的合成读取器 +reader = lcs.SyntheticReader( + num_objects=5000, + num_requests=50000, + distribution="uniform", + obj_size=4096, + seed=42 +) + +cache = lcs.LRU(cache_size=5*1024*1024) +for request in reader: + cache.get(request.obj_id, request.obj_size) + +print(f"均匀工作负载命中率: {cache.get_hit_ratio():.2%}") +``` + +## 缓存算法比较 + +### 多算法评估 + +比较不同缓存算法的性能: + +```python +import libcachesim as lcs + +# 创建合成工作负载 +reader = lcs.SyntheticReader( + num_objects=10000, + num_requests=100000, + distribution="zipf", + alpha=1.2, + obj_size=4096, + seed=42 +) + +# 保存请求以便重用 +requests = list(reader) + +# 测试的算法 +algorithms = { + 'LRU': lcs.LRU, + 'LFU': lcs.LFU, + 'FIFO': lcs.FIFO, + 'ARC': lcs.ARC, + 'S3FIFO': lcs.S3FIFO, + 'Sieve': lcs.Sieve, +} + +cache_size = 10*1024*1024 # 10MB + +results = {} +for name, algorithm in algorithms.items(): + cache = algorithm(cache_size) + + for request in requests: + cache.get(request.obj_id, request.obj_size) + + results[name] = cache.get_hit_ratio() + print(f"{name:8}: {cache.get_hit_ratio():.2%}") + +# 找到最佳算法 +best_algo = max(results, key=results.get) +print(f"\n最佳算法: {best_algo} ({results[best_algo]:.2%})") +``` + +## 跟踪采样 + +### 空间采样 + +使用采样减少大型跟踪的大小: + +```python +import libcachesim as lcs + +# 设置采样参数 +sampler = lcs.Sampler( + sample_ratio=0.1, # 采样10%的请求 + type=lcs.SamplerType.SPATIAL_SAMPLER +) + +reader_params = lcs.ReaderInitParam() +reader_params.has_header = True +reader_params.sampler = sampler + +# 读取采样跟踪 +reader = lcs.TraceReader("large_trace.csv", lcs.TraceType.CSV_TRACE, reader_params) + +cache = lcs.LRU(cache_size=1024*1024) +request_count = 0 + +for request in reader: + cache.get(request.obj_id, request.obj_size) + request_count += 1 + +print(f"处理了 {request_count} 个采样请求") +print(f"采样命中率: {cache.get_hit_ratio():.2%}") +``` + +### 时间采样 + +```python +import libcachesim as lcs + +# 时间采样配置 +sampler = lcs.Sampler( + sample_ratio=0.05, # 采样5% + type=lcs.SamplerType.TEMPORAL_SAMPLER +) + +reader_params = lcs.ReaderInitParam() +reader_params.sampler = sampler + +reader = lcs.TraceReader("timestamped_trace.csv", lcs.TraceType.CSV_TRACE, reader_params) + +# 运行模拟... +``` + +## 跟踪分析 + +### 基本跟踪统计 + +分析跟踪特征: + +```python +import libcachesim as lcs + +# 创建跟踪分析器 +analyzer = lcs.TraceAnalyzer("workload.csv", lcs.TraceType.CSV_TRACE) + +# 分析基本统计 +print("跟踪分析:") +print(f"总请求数: {analyzer.get_num_requests():,}") +print(f"唯一对象数: {analyzer.get_num_objects():,}") +print(f"平均对象大小: {analyzer.get_average_obj_size():.2f} 字节") +print(f"总数据大小: {analyzer.get_total_size():,} 字节") + +# 分析重用距离 +reuse_distances = analyzer.get_reuse_distance() +print(f"平均重用距离: {sum(reuse_distances)/len(reuse_distances):.2f}") +``` + +### 流行度分析 + +```python +import libcachesim as lcs +import matplotlib.pyplot as plt + +# 创建分析器 +analyzer = lcs.TraceAnalyzer("workload.csv", lcs.TraceType.CSV_TRACE) + +# 获取对象流行度 +popularity = analyzer.get_popularity() + +# 绘制流行度分布 +plt.figure(figsize=(10, 6)) +plt.loglog(range(1, len(popularity)+1), sorted(popularity, reverse=True)) +plt.xlabel('对象排名') +plt.ylabel('访问频率') +plt.title('对象流行度分布') +plt.grid(True) +plt.show() +``` + +## 高级场景 + +### 缓存层次结构 + +模拟多级缓存层次结构: + +```python +import libcachesim as lcs + +class CacheHierarchy: + def __init__(self, l1_size, l2_size): + self.l1_cache = lcs.LRU(l1_size) # L1缓存 + self.l2_cache = lcs.LRU(l2_size) # L2缓存 + self.l1_hits = 0 + self.l2_hits = 0 + self.misses = 0 + + def get(self, obj_id, obj_size): + # 首先检查L1 + if self.l1_cache.get(obj_id, obj_size): + self.l1_hits += 1 + return True + + # 然后检查L2 + if self.l2_cache.get(obj_id, obj_size): + self.l2_hits += 1 + # 将对象提升到L1 + self.l1_cache.get(obj_id, obj_size) + return True + + # 完全缺失 + self.misses += 1 + # 将对象加载到两个级别 + self.l1_cache.get(obj_id, obj_size) + self.l2_cache.get(obj_id, obj_size) + return False + + def get_stats(self): + total = self.l1_hits + self.l2_hits + self.misses + return { + 'l1_hit_ratio': self.l1_hits / total, + 'l2_hit_ratio': self.l2_hits / total, + 'overall_hit_ratio': (self.l1_hits + self.l2_hits) / total + } + +# 使用缓存层次结构 +hierarchy = CacheHierarchy(l1_size=1024*1024, l2_size=10*1024*1024) + +reader = lcs.SyntheticReader( + num_objects=50000, + num_requests=100000, + distribution="zipf", + alpha=1.0, + obj_size=4096, + seed=42 +) + +for request in reader: + hierarchy.get(request.obj_id, request.obj_size) + +stats = hierarchy.get_stats() +print(f"L1命中率: {stats['l1_hit_ratio']:.2%}") +print(f"L2命中率: {stats['l2_hit_ratio']:.2%}") +print(f"总命中率: {stats['overall_hit_ratio']:.2%}") +``` + +### 缓存预热 + +在评估前预热缓存: + +```python +import libcachesim as lcs + +reader = lcs.SyntheticReader( + num_objects=10000, + num_requests=200000, + distribution="zipf", + alpha=1.0, + obj_size=4096, + seed=42 +) + +cache = lcs.LRU(cache_size=5*1024*1024) + +# 分为预热和评估阶段 +warmup_requests = 50000 +eval_requests = 0 + +for i, request in enumerate(reader): + hit = cache.get(request.obj_id, request.obj_size) + + if i < warmup_requests: + # 预热阶段 - 不计算统计 + continue + else: + # 评估阶段 + eval_requests += 1 + +print(f"预热后命中率: {cache.get_hit_ratio():.2%}") +print(f"评估请求数: {eval_requests}") +``` + +### 动态缓存大小 + +随时间变化缓存大小: + +```python +import libcachesim as lcs + +reader = lcs.SyntheticReader( + num_objects=10000, + num_requests=100000, + distribution="zipf", + alpha=1.0, + obj_size=4096, + seed=42 +) + +# 从小缓存开始 +initial_size = 1024*1024 # 1MB +max_size = 10*1024*1024 # 10MB +growth_interval = 10000 # 每10000个请求增长 + +cache = lcs.LRU(initial_size) +current_size = initial_size + +for i, request in enumerate(reader): + # 定期增加缓存大小 + if i > 0 and i % growth_interval == 0 and current_size < max_size: + current_size = min(current_size * 2, max_size) + # 注意:这里需要创建新缓存,因为现有缓存大小无法动态更改 + new_cache = lcs.LRU(current_size) + cache = new_cache + print(f"在请求 {i} 处将缓存大小增加到 {current_size/1024/1024:.1f}MB") + + cache.get(request.obj_id, request.obj_size) + +print(f"最终命中率: {cache.get_hit_ratio():.2%}") +``` + +## 性能优化技巧 + +### 批量处理 + +```python +import libcachesim as lcs + +# 处理大型跟踪时批量处理请求 +def process_trace_in_batches(filename, cache, batch_size=10000): + reader = lcs.TraceReader(filename, lcs.TraceType.CSV_TRACE) + + batch = [] + total_processed = 0 + + for request in reader: + batch.append(request) + + if len(batch) >= batch_size: + # 处理批次 + for req in batch: + cache.get(req.obj_id, req.obj_size) + + total_processed += len(batch) + print(f"处理了 {total_processed} 个请求") + batch = [] + + # 处理剩余请求 + for req in batch: + cache.get(req.obj_id, req.obj_size) + + return total_processed + len(batch) + +# 使用 +cache = lcs.LRU(cache_size=10*1024*1024) +total = process_trace_in_batches("large_trace.csv", cache) +print(f"总共处理了 {total} 个请求") +``` + +### 内存高效的请求处理 + +```python +import libcachesim as lcs + +def memory_efficient_simulation(filename, cache_size): + """内存高效的缓存模拟。""" + + reader_params = lcs.ReaderInitParam() + reader_params.cap_at_n_req = 1000000 # 限制内存中的请求数 + + reader = lcs.TraceReader(filename, lcs.TraceType.CSV_TRACE, reader_params) + cache = lcs.LRU(cache_size) + + request_count = 0 + for request in reader: + cache.get(request.obj_id, request.obj_size) + request_count += 1 + + # 定期报告进度 + if request_count % 100000 == 0: + print(f"进度: {request_count:,} 请求,命中率: {cache.get_hit_ratio():.2%}") + + return cache.get_hit_ratio() + +# 使用 +hit_ratio = memory_efficient_simulation("workload.csv", 10*1024*1024) +print(f"最终命中率: {hit_ratio:.2%}") +``` + +这些示例展示了libCacheSim Python绑定的各种使用场景,从基础缓存模拟到高级性能分析和优化技术。根据您的具体需求调整这些示例。 diff --git a/docs/src/zh/index.md b/docs/src/zh/index.md new file mode 100644 index 0000000..d900ad6 --- /dev/null +++ b/docs/src/zh/index.md @@ -0,0 +1,68 @@ +# libCacheSim Python 绑定 + +欢迎使用 libCacheSim Python 绑定!这是一个高性能的缓存模拟库,提供了 Python 接口。 + +## 概述 + +libCacheSim 是一个高性能的缓存模拟框架,支持各种缓存算法和跟踪格式。Python 绑定为缓存模拟、分析和研究提供了易于使用的接口。 + +## 主要特性 + +- **高性能**: 基于优化的 C++ libCacheSim 库构建 +- **多种缓存算法**: 支持 LRU、LFU、FIFO、ARC、Clock、S3FIFO、Sieve 等多种算法 +- **跟踪支持**: 读取各种跟踪格式(CSV、二进制、OracleGeneral 等) +- **合成跟踪**: 生成 Zipf 和均匀分布的合成工作负载 +- **分析工具**: 内置跟踪分析和缓存性能评估 +- **易于集成**: 简单的 Python API,适用于研究和生产环境 + +## 快速示例 + +```python +import libcachesim as lcs + +# 创建缓存 +cache = lcs.LRU(cache_size=1024*1024) # 1MB 缓存 + +# 生成合成跟踪 +reader = lcs.SyntheticReader( + num_of_req=10000, + obj_size=1024, + dist="zipf", + alpha=1.0 +) + +# 模拟缓存行为 +hit_count = 0 +for req in reader: + if cache.get(req): + hit_count += 1 + +hit_ratio = hit_count / reader.get_num_of_req() +print(f"命中率: {hit_ratio:.4f}") +``` + +## 安装 + +```bash +pip install libcachesim +``` + +或从源码安装: + +```bash +git clone https://github.com/cacheMon/libCacheSim-python.git +cd libCacheSim-python +pip install -e . +``` + +## 快速开始 + +查看我们的[快速开始指南](quickstart.md)开始使用 libCacheSim Python 绑定,或浏览 [API 参考](api.md)获取详细文档。 + +## 贡献 + +我们欢迎贡献!请查看我们的 [GitHub 仓库](https://github.com/cacheMon/libCacheSim-python)了解更多信息。 + +## 许可证 + +本项目采用 Apache License 2.0 许可证。 diff --git a/docs/src/zh/quickstart.md b/docs/src/zh/quickstart.md new file mode 100644 index 0000000..fbdc7f6 --- /dev/null +++ b/docs/src/zh/quickstart.md @@ -0,0 +1,183 @@ +# 快速开始指南 + +本指南将帮助您开始使用 libCacheSim Python 绑定。 + +## 安装 + +### 从 PyPI 安装(推荐) + +```bash +pip install libcachesim +``` + +### 从源码安装 + +```bash +git clone https://github.com/cacheMon/libCacheSim-python.git +cd libCacheSim-python +git submodule update --init --recursive +pip install -e . +``` + +## 基本用法 + +### 1. 创建缓存 + +```python +import libcachesim as lcs + +# 创建不同类型的缓存 +lru_cache = lcs.LRU(cache_size=1024*1024) # 1MB LRU 缓存 +lfu_cache = lcs.LFU(cache_size=1024*1024) # 1MB LFU 缓存 +fifo_cache = lcs.FIFO(cache_size=1024*1024) # 1MB FIFO 缓存 +``` + +### 2. 使用合成跟踪 + +```python +# 生成 Zipf 分布的请求 +reader = lcs.SyntheticReader( + num_of_req=10000, + obj_size=1024, + dist="zipf", + alpha=1.0, + num_objects=1000, + seed=42 +) + +# 模拟缓存行为 +cache = lcs.LRU(cache_size=50*1024) +hit_count = 0 + +for req in reader: + if cache.get(req): + hit_count += 1 + +print(f"命中率: {hit_count/reader.get_num_of_req():.4f}") +``` + +### 3. 读取真实跟踪 + +```python +# 读取 CSV 跟踪 +reader = lcs.TraceReader( + trace="path/to/trace.csv", + trace_type=lcs.TraceType.CSV_TRACE, + has_header=True, + delimiter=",", + obj_id_is_num=True +) + +# 处理请求 +cache = lcs.LRU(cache_size=1024*1024) +for req in reader: + result = cache.get(req) + # 处理结果... +``` + +### 4. 缓存性能分析 + +```python +# 运行综合分析 +analyzer = lcs.TraceAnalyzer(reader, "output_prefix") +analyzer.run() + +# 这会生成各种分析文件: +# - 命中率曲线 +# - 访问模式分析 +# - 时间局部性分析 +# - 等等... +``` + +## 可用的缓存算法 + +libCacheSim 支持众多缓存算法: + +### 基础算法 +- **LRU**: 最近最少使用 +- **LFU**: 最不经常使用 +- **FIFO**: 先进先出 +- **Clock**: 时钟算法 +- **Random**: 随机替换 + +### 高级算法 +- **ARC**: 自适应替换缓存 +- **S3FIFO**: 简单、快速、公平的 FIFO +- **Sieve**: Sieve 驱逐算法 +- **TinyLFU**: 带准入控制的 Tiny LFU +- **TwoQ**: 双队列算法 +- **LRB**: 学习松弛 Belady + +### 实验性算法 +- **3LCache**: 三级缓存 +- **等等...** + +## 跟踪格式 + +支持的跟踪格式包括: + +- **CSV**: 逗号分隔值 +- **Binary**: 自定义二进制格式 +- **OracleGeneral**: Oracle 通用格式 +- **Vscsi**: VMware vSCSI 格式 +- **等等...** + +## 高级功能 + +### 自定义缓存策略 + +您可以使用 Python 钩子实现自定义缓存策略: + +```python +from collections import OrderedDict + +def create_custom_lru(): + def init_hook(cache_size): + return OrderedDict() + + def hit_hook(cache_dict, obj_id, obj_size): + cache_dict.move_to_end(obj_id) + + def miss_hook(cache_dict, obj_id, obj_size): + cache_dict[obj_id] = obj_size + + def eviction_hook(cache_dict, obj_id, obj_size): + if cache_dict: + cache_dict.popitem(last=False) + + return lcs.PythonHookCache( + cache_size=1024*1024, + init_hook=init_hook, + hit_hook=hit_hook, + miss_hook=miss_hook, + eviction_hook=eviction_hook + ) + +custom_cache = create_custom_lru() +``` + +### 跟踪采样 + +```python +# 空间采样 10% 的请求 +reader = lcs.TraceReader( + trace="large_trace.csv", + trace_type=lcs.TraceType.CSV_TRACE, + sampling_ratio=0.1, + sampling_type=lcs.SamplerType.SPATIAL_SAMPLER +) +``` + +### 多线程分析 + +```python +# 使用多线程进行分析 +analyzer = lcs.TraceAnalyzer(reader, "output", n_threads=4) +analyzer.run() +``` + +## 下一步 + +- 探索 [API 参考](api.md) 获取详细文档 +- 查看[使用示例](examples.md)了解更复杂的用例 +- 访问我们的 [GitHub 仓库](https://github.com/cacheMon/libCacheSim-python) 获取源码和问题报告 diff --git a/scripts/build_docs.sh b/scripts/build_docs.sh new file mode 100755 index 0000000..8eaf0d2 --- /dev/null +++ b/scripts/build_docs.sh @@ -0,0 +1,48 @@ +#!/bin/bash + +# Script to build and serve documentation locally for development + +set -e + +echo "📚 libCacheSim-python Documentation Builder" +echo "==========================================" + +# Check if we're in the right directory +if [ ! -f "docs/mkdocs.yml" ]; then + echo "❌ Error: mkdocs.yml not found. Please run this script from the project root." + exit 1 +fi + +# Change to docs directory +cd docs + +# Check if dependencies are installed +if ! python -c "import mkdocs_material, mkdocs_static_i18n" 2>/dev/null; then + echo "🔧 Installing documentation dependencies..." + pip install -r requirements.txt +else + echo "🔧 Dependencies already installed" +fi + +# Build documentation +echo "🏗️ Building documentation..." +python -m mkdocs build --clean --strict + +# Check if serve flag is passed +if [ "$1" = "--serve" ] || [ "$1" = "-s" ]; then + echo "🚀 Starting development server..." + echo "📖 Documentation will be available at: http://127.0.0.1:8000" + echo "🌐 English docs: http://127.0.0.1:8000/en/" + echo "🌏 Chinese docs: http://127.0.0.1:8000/zh/" + echo "" + echo "Press Ctrl+C to stop the server" + python -m mkdocs serve +else + echo "✅ Documentation built successfully!" + echo "📁 Output directory: docs/site/" + echo "" + echo "To serve locally, run:" + echo " ./scripts/build_docs.sh --serve" + echo " OR" + echo " cd docs && python -m mkdocs serve" +fi From c41f9d2f4adfb7f74711b92160e3d977ce9eeab3 Mon Sep 17 00:00:00 2001 From: haochengxia Date: Thu, 24 Jul 2025 11:07:53 +0000 Subject: [PATCH 6/6] Update action --- .github/workflows/docs.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 9ea83f7..1a1edef 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -62,7 +62,7 @@ jobs: - name: Upload artifact if: github.event_name != 'pull_request' - uses: actions/upload-pages-artifact@v2 + uses: actions/upload-pages-artifact@v3 with: path: docs/site