diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 8b67b2b..bc97a64 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -1,6 +1,16 @@
name: Build
-on: [push, pull_request]
+on:
+ push:
+ paths:
+ - 'src/**'
+ - 'libcachesim/**'
+ - 'tests/**'
+ pull_request:
+ paths:
+ - 'src/**'
+ - 'libcachesim/**'
+ - 'tests/**'
permissions:
contents: read
diff --git a/README.md b/README.md
index 6a04cdb..462bb65 100644
--- a/README.md
+++ b/README.md
@@ -1,15 +1,10 @@
# libCacheSim Python Binding
[](https://github.com/cacheMon/libCacheSim-python/actions/workflows/build.yml)
-[](https://github.com/cacheMon/libCacheSim-python/actions/workflows/docs.yml)
+[](docs.libcachesim.com/python)
Python bindings for [libCacheSim](https://github.com/1a1a11a/libCacheSim), a high-performance cache simulator and analysis library.
-## 📚 Documentation
-
-- **[English Documentation](https://cacheMon.github.io/libCacheSim-python/en/)** - Complete API reference, tutorials, and examples
-- **[中文文档](https://cacheMon.github.io/libCacheSim-python/zh/)** - 完整的API参考、教程和示例
-
## Installation
Binary installers for the latest released version are available at the [Python Package Index (PyPI)](https://pypi.org/project/libcachesim).
@@ -32,16 +27,6 @@ Run all tests to ensure the package works.
python -m pytest tests/
```
-## 🚀 Features
-
-- **High-Performance Cache Simulation**: Built on the proven libCacheSim C++ library
-- **Multiple Cache Algorithms**: LRU, LFU, FIFO, ARC, S3FIFO, Sieve, TinyLFU, and more
-- **Trace Processing**: Support for various trace formats (CSV, binary, Oracle, etc.)
-- **Synthetic Workload Generation**: Zipf, uniform, and custom distributions
-- **Trace Analysis**: Comprehensive workload analysis and visualization tools
-- **Custom Cache Policies**: Implement new algorithms using Python hooks
-- **Multi-language Documentation**: English and Chinese documentation with examples
-
## Quick Start
### Basic Usage
@@ -63,276 +48,168 @@ print(cache.get(req)) # True (second access)
### Trace Processing
-To simulate with traces, we need to read the request of traces correctly. `open_trace` is an unified interface for trace reading, which accepet three parameters:
-
-- `trace_path`: trace path, can be relative or absolutive path.
-- `type` (optional): if not given, we will automatically infer the type of trace according to the suffix of the trace file.
-- `params` (optional): if not given, default params are applied.
-
```python
import libcachesim as lcs
-# Open trace and process efficiently
-reader = lcs.open_trace(
- trace_path = "./data/cloudPhysicsIO.oracleGeneral.bin",
- type = lcs.TraceType.ORACLE_GENERAL_TRACE,
- params = lcs.ReaderInitParam(ignore_obj_size=True)
-)
-cache = lcs.S3FIFO(cache_size=1024*1024)
+# Step 1: Get one trace from S3 bucket
+URI = "cache_dataset_oracleGeneral/2007_msr/msr_hm_0.oracleGeneral.zst"
+dl = lcs.DataLoader()
+dl.load(URI)
-# Process entire trace efficiently (C++ backend)
-obj_miss_ratio, byte_miss_ratio = cache.process_trace(reader)
-print(f"Object miss ratio: {obj_miss_ratio:.4f}, Byte miss ratio: {byte_miss_ratio:.4f}")
+# Step 2: Open trace and process efficiently
+reader = lcs.TraceReader(dl.get_cache_path(URI))
+# Step 3: Initialize cache
cache = lcs.S3FIFO(cache_size=1024*1024)
-# Process with limits and time ranges
-obj_miss_ratio, byte_miss_ratio = cache.process_trace(
- reader,
- start_req=0,
- max_req=1000
-)
+
+# Step 4: Process entire trace efficiently (C++ backend)
+obj_miss_ratio, byte_miss_ratio = cache.process_trace(reader)
print(f"Object miss ratio: {obj_miss_ratio:.4f}, Byte miss ratio: {byte_miss_ratio:.4f}")
```
+> [!NOTE]
+> We DO NOT ignore the object size by defaults, you can add `reader_init_params = lcs.ReaderInitParam(ignore_obj_size=False)` to the initialization of `TraceReader` if needed.
+
## Custom Cache Policies
Implement custom cache replacement algorithms using pure Python functions - **no C/C++ compilation required**.
### Python Hook Cache Overview
-The `PluginCache` allows you to define custom caching behavior through Python callback functions. This is perfect for:
-- Prototyping new cache algorithms
-- Educational purposes and learning
-- Research and experimentation
-- Custom business logic implementation
+The `PluginCache` allows you to define custom caching behavior through Python callback functions. You need to implement these callback functions:
-### Hook Functions
+| Function | Signature | Description |
+|----------|-----------|-------------|
+| `init_hook` | `((common_cache_params: CommonCacheParams)) -> Any` | Initialize your data structure |
+| `hit_hook` | `(data: Any, request: Request) -> None` | Handle cache hits |
+| `miss_hook` | `(data: Any, request: Request) -> None` | Handle cache misses |
+| `eviction_hook` | `(data: Any, request: Request) -> int` | Return object ID to evict |
+| `remove_hook` | `(data: Any, obj_id: int) -> None` | Clean up when object removed |
+| `free_hook` | `(data: Any) -> None` | [Optional] Final cleanup |
-You need to implement these callback functions:
-
-- **`init_hook(cache_size: int) -> Any`**: Initialize your data structure
-- **`hit_hook(data: Any, obj_id: int, obj_size: int) -> None`**: Handle cache hits
-- **`miss_hook(data: Any, obj_id: int, obj_size: int) -> None`**: Handle cache misses
-- **`eviction_hook(data: Any, obj_id: int, obj_size: int) -> int`**: Return object ID to evict
-- **`remove_hook(data: Any, obj_id: int) -> None`**: Clean up when object removed
-- **`free_hook(data: Any) -> None`**: [Optional] Final cleanup
-
-### Example: Custom LRU Implementation
+
+An example for LRU
```python
-import libcachesim as lcs
from collections import OrderedDict
+from libcachesim import PluginCache, CommonCacheParams, Request, SyntheticReader, LRU
-# Create a Python hook-based cache
-cache = lcs.PluginCache(cache_size=1024*1024, cache_name="MyLRU")
-
-# Define LRU policy hooks
-def init_hook(cache_size):
- return OrderedDict() # Track access order
-def hit_hook(lru_dict, obj_id, obj_size):
- lru_dict.move_to_end(obj_id) # Move to most recent
+class StandaloneLRU:
+ def __init__(self):
+ self.cache_data = OrderedDict()
-def miss_hook(lru_dict, obj_id, obj_size):
- lru_dict[obj_id] = True # Add to end
+ def cache_hit(self, obj_id):
+ if obj_id in self.cache_data:
+ obj_size = self.cache_data.pop(obj_id)
+ self.cache_data[obj_id] = obj_size
-def eviction_hook(lru_dict, obj_id, obj_size):
- return next(iter(lru_dict)) # Return least recent
+ def cache_miss(self, obj_id, obj_size):
+ self.cache_data[obj_id] = obj_size
-def remove_hook(lru_dict, obj_id):
- lru_dict.pop(obj_id, None)
+ def cache_eviction(self):
+ evicted_id, _ = self.cache_data.popitem(last=False)
+ return evicted_id
-# Set the hooks
-cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook)
-
-# Use it like any other cache
-req = lcs.Request()
-req.obj_id = 1
-req.obj_size = 100
-hit = cache.get(req)
-print(f"Cache hit: {hit}") # Should be False (miss)
-```
+ def cache_remove(self, obj_id):
+ if obj_id in self.cache_data:
+ del self.cache_data[obj_id]
-### Example: Custom FIFO Implementation
-```python
-import libcachesim as lcs
-from collections import deque
-from contextlib import suppress
+def cache_init_hook(common_cache_params: CommonCacheParams):
+ return StandaloneLRU()
-cache = lcs.PluginCache(cache_size=1024, cache_name="CustomFIFO")
-def init_hook(cache_size):
- return deque() # Use deque for FIFO order
+def cache_hit_hook(cache, request: Request):
+ cache.cache_hit(request.obj_id)
-def hit_hook(fifo_queue, obj_id, obj_size):
- pass # FIFO doesn't reorder on hit
-def miss_hook(fifo_queue, obj_id, obj_size):
- fifo_queue.append(obj_id) # Add to end of queue
+def cache_miss_hook(cache, request: Request):
+ cache.cache_miss(request.obj_id, request.obj_size)
-def eviction_hook(fifo_queue, obj_id, obj_size):
- return fifo_queue[0] # Return first item (oldest)
-def remove_hook(fifo_queue, obj_id):
- with suppress(ValueError):
- fifo_queue.remove(obj_id)
+def cache_eviction_hook(cache, request: Request):
+ return cache.cache_eviction()
-# Set the hooks and test
-cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook)
-req = lcs.Request(obj_id=1, obj_size=100)
-hit = cache.get(req)
-print(f"Cache hit: {hit}") # Should be False (miss)
-```
+def cache_remove_hook(cache, obj_id):
+ cache.cache_remove(obj_id)
-## Available Algorithms
-
-### Built-in Cache Algorithms
-
-#### Basic Algorithms
-- **FIFO**: First-In-First-Out
-- **LRU**: Least Recently Used
-- **LFU**: Least Frequently Used
-- **LFUDA**: LFU with Dynamic Aging
-- **Clock**: Clock/Second-chance algorithm
-
-#### Advanced Algorithms
-- **QDLP**: Queue Demotion with Lazy Promotion
-- **S3FIFO**: Simple, Fast, Fair FIFO (recommended for most workloads)
-- **Sieve**: High-performance eviction algorithm
-- **ARC**: Adaptive Replacement Cache
-- **TwoQ**: Two-Queue algorithm
-- **SLRU**: Segmented LRU
-- **TinyLFU**: TinyLFU with window
-- **WTinyLFU**: Windowed TinyLFU
-
-#### Research/ML Algorithms
-- **LeCaR**: Learning Cache Replacement (adaptive)
-- **Cacheus**: Cache replacement policy
-- **LRB**: Learning-based cache (if enabled)
-- **GLCache**: Machine learning-based cache
-- **ThreeLCache**: Three-level cache hierarchy (if enabled)
-
-#### Optimal Algorithms (for analysis)
-- **Belady**: Optimal offline algorithm
-- **BeladySize**: Size-aware optimal algorithm
-```python
-import libcachesim as lcs
+def cache_free_hook(cache):
+ cache.cache_data.clear()
-# All algorithms use the same unified interface
-cache_size = 1024 * 1024 # 1MB
-lru_cache = lcs.LRU(cache_size)
-s3fifo_cache = lcs.S3FIFO(cache_size)
-sieve_cache = lcs.Sieve(cache_size)
-arc_cache = lcs.ARC(cache_size)
-
-# All caches work identically
-req = lcs.Request()
-req.obj_id = 1
-req.obj_size = 100
-hit = lru_cache.get(req)
-print(hit)
-```
-
-## Examples and Testing
-
-### Algorithm Comparison
-```python
-import libcachesim as lcs
-
-def compare_algorithms(trace_path):
- reader = lcs.open_trace(trace_path, lcs.TraceType.VSCSI_TRACE)
- algorithms = ['LRU', 'S3FIFO', 'Sieve', 'ARC']
- for algo_name in algorithms:
- cache = getattr(lcs, algo_name)(cache_size=1024*1024)
- obj_miss_ratio, byte_miss_ratio = cache.process_trace(reader)
- print(f"{algo_name}\t\tObj: {obj_miss_ratio:.4f}, Byte: {byte_miss_ratio:.4f}")
-
-compare_algorithms("./data/cloudPhysicsIO.vscsi")
+plugin_lru_cache = PluginCache(
+ cache_size=1024,
+ cache_init_hook=cache_init_hook,
+ cache_hit_hook=cache_hit_hook,
+ cache_miss_hook=cache_miss_hook,
+ cache_eviction_hook=cache_eviction_hook,
+ cache_remove_hook=cache_remove_hook,
+ cache_free_hook=cache_free_hook,
+ cache_name="CustomizedLRU",
+)
```
+
-### Performance Benchmarking
-```python
-import time
-
-def benchmark_cache(cache, num_requests=100000):
- """Benchmark cache performance"""
- start_time = time.time()
- for i in range(num_requests):
- req = lcs.Request()
- req.obj_id = i % 1000 # Working set of 1000 objects
- req.obj_size = 100
- cache.get(req)
- end_time = time.time()
- throughput = num_requests / (end_time - start_time)
- print(f"Processed {num_requests} requests in {end_time - start_time:.2f}s")
- print(f"Throughput: {throughput:.0f} requests/sec")
-
-# Compare performance
-lru_cache = lcs.LRU(cache_size=1024*1024)
-s3fifo_cache = lcs.S3FIFO(cache_size=1024*1024)
-
-print("LRU Performance:")
-benchmark_cache(lru_cache)
-
-print("\nS3FIFO Performance:")
-benchmark_cache(s3fifo_cache)
-```
-## Advanced Usage
+Another simple implementation via hook functions for S3FIFO respectively is given in [examples](examples/plugin_cache/s3fifo.py).
-### Multi-Format Trace Processing
+### Getting Help
-```python
-import libcachesim as lcs
+- Check [project documentation](docs.libcachesim.com/python) for detailed guides
+- Open issues on [GitHub](https://github.com/cacheMon/libCacheSim-python/issues)
+- Review [examples](/example) in the main repository
-# Supported trace types
-trace_types = {
- "oracle": lcs.TraceType.ORACLE_GENERAL_TRACE,
- "csv": lcs.TraceType.CSV_TRACE,
- "vscsi": lcs.TraceType.VSCSI_TRACE,
- "txt": lcs.TraceType.PLAIN_TXT_TRACE
-}
+---
+## Reference
+
+ Please cite the following papers if you use libCacheSim.
-# Open different trace formats
-oracle_reader = lcs.open_trace("./data/cloudPhysicsIO.oracleGeneral.bin", trace_types["oracle"])
-csv_reader = lcs.open_trace("./data/cloudPhysicsIO.txt", trace_types["txt"])
-
-# Process traces with different caches
-caches = [
- lcs.LRU(cache_size=1024*1024),
- lcs.S3FIFO(cache_size=1024*1024),
- lcs.Sieve(cache_size=1024*1024)
-]
-
-for i, cache in enumerate(caches):
- miss_ratio_oracle = cache.process_trace(oracle_reader)[0]
- miss_ratio_csv = cache.process_trace(csv_reader)[0]
- print(f"Cache {i} miss ratio: {miss_ratio_oracle:.4f}, {miss_ratio_csv:.4f}")
```
+@inproceedings{yang2020-workload,
+ author = {Juncheng Yang and Yao Yue and K. V. Rashmi},
+ title = {A large-scale analysis of hundreds of in-memory cache clusters at Twitter},
+ booktitle = {14th USENIX Symposium on Operating Systems Design and Implementation (OSDI 20)},
+ year = {2020},
+ isbn = {978-1-939133-19-9},
+ pages = {191--208},
+ url = {https://www.usenix.org/conference/osdi20/presentation/yang},
+ publisher = {USENIX Association},
+}
-## Troubleshooting
-
-### Common Issues
+@inproceedings{yang2023-s3fifo,
+ title = {FIFO Queues Are All You Need for Cache Eviction},
+ author = {Juncheng Yang and Yazhuo Zhang and Ziyue Qiu and Yao Yue and K.V. Rashmi},
+ isbn = {9798400702297},
+ publisher = {Association for Computing Machinery},
+ booktitle = {Symposium on Operating Systems Principles (SOSP'23)},
+ pages = {130–149},
+ numpages = {20},
+ year={2023}
+}
-**Import Error**: Make sure libCacheSim C++ library is built first:
-```bash
-cmake -G Ninja -B build && ninja -C build
+@inproceedings{yang2023-qdlp,
+ author = {Juncheng Yang and Ziyue Qiu and Yazhuo Zhang and Yao Yue and K.V. Rashmi},
+ title = {FIFO Can Be Better than LRU: The Power of Lazy Promotion and Quick Demotion},
+ year = {2023},
+ isbn = {9798400701955},
+ publisher = {Association for Computing Machinery},
+ doi = {10.1145/3593856.3595887},
+ booktitle = {Proceedings of the 19th Workshop on Hot Topics in Operating Systems (HotOS23)},
+ pages = {70–79},
+ numpages = {10},
+}
```
+If you used libCacheSim in your research, please cite the above papers.
-**Performance Issues**: Use `process_trace()` for large workloads instead of individual `get()` calls for better performance.
+
-**Memory Usage**: Monitor cache statistics (`cache.occupied_byte`) and ensure proper cache size limits for your system.
+---
-**Custom Cache Issues**: Validate your custom implementation against built-in algorithms using the test functions above.
-**Install with uv**: Since automatically building with `uv` will fail due to incomplete source code, please force install the binary file via `uv pip install libcachesim --only-binary=:all:`.
+## License
+See [LICENSE](LICENSE) for details.
-### Getting Help
-
-- Check the [main documentation](../doc/) for detailed guides
-- Open issues on [GitHub](https://github.com/1a1a11a/libCacheSim/issues)
-- Review [examples](/example) in the main repository
+---
\ No newline at end of file
diff --git a/docs/src/en/index.md b/docs/src/en/index.md
index 0b0e732..2eba51f 100644
--- a/docs/src/en/index.md
+++ b/docs/src/en/index.md
@@ -65,4 +65,4 @@ We welcome contributions! Please see our [GitHub repository](https://github.com/
## License
-This project is licensed under the Apache License 2.0.
+This project is licensed under the GPL-3.0 License.
diff --git a/docs/src/zh/index.md b/docs/src/zh/index.md
index d900ad6..997399a 100644
--- a/docs/src/zh/index.md
+++ b/docs/src/zh/index.md
@@ -65,4 +65,4 @@ pip install -e .
## 许可证
-本项目采用 Apache License 2.0 许可证。
+本项目采用 GPL-3.0 许可证。
diff --git a/examples/README.md b/examples/README.md
deleted file mode 100644
index 9f9f2e0..0000000
--- a/examples/README.md
+++ /dev/null
@@ -1,280 +0,0 @@
-# libCacheSim Python Examples
-
-This directory contains examples demonstrating how to use libCacheSim Python bindings for cache simulation and trace generation.
-
-## Overview
-
-libCacheSim Python bindings provide a powerful interface for:
-
-- Cache simulation with various eviction policies (LRU, FIFO, ARC, etc.)
-- Synthetic trace generation (Zipf and Uniform distributions)
-- Real trace analysis and processing
-- Custom cache policy implementation with Python hooks
-- Unified interface supporting all cache algorithms
-
-## Example Files
-
-### 1. Stream Request Generation (`stream_request_example.py`)
-
-Demonstrates how to generate synthetic request traces and use them for cache simulation:
-
-```python
-import libcachesim as lcs
-
-# Create Zipf-distributed requests
-zipf_generator = lcs.create_zipf_requests(
- num_objects=1000, # 1000 unique objects
- num_requests=10000, # 10000 requests
- alpha=1.0, # Zipf skewness
- obj_size=4000, # Object size in bytes
- seed=42 # For reproducibility
-)
-
-# Test with LRU cache
-cache = lcs.LRU(cache_size=50*1024*1024) # 50MB cache for better hit ratio
-miss_count = sum(1 for req in zipf_generator if not cache.get(req))
-print(f"Final miss ratio: {miss_count / 10000:.3f}")
-```
-
-**Features**:
-- Memory efficient: No temporary files created
-- Fast: Direct Request object generation
-- Reproducible: Support for random seeds
-- Flexible: Easy parameter adjustment
-
-### 2. Unified Interface Demo (`demo_unified_interface.py`)
-
-Shows the unified interface for all cache policies, including built-in and custom Python hook caches:
-
-```python
-import libcachesim as lcs
-
-cache_size = 1024 * 1024 # 1MB
-
-# Create different cache policies
-caches = {
- "LRU": lcs.LRU(cache_size),
- "FIFO": lcs.FIFO(cache_size),
- "ARC": lcs.ARC(cache_size),
-}
-
-# Create Python hook cache
-python_cache = lcs.PluginCache(cache_size, "CustomLRU")
-# Set hook functions...
-caches["Custom Python LRU"] = python_cache
-
-# Unified interface testing
-test_req = lcs.Request()
-test_req.obj_id = 1
-test_req.obj_size = 1024
-
-for name, cache in caches.items():
- result = cache.get(test_req)
- print(f"{name}: {'HIT' if result else 'MISS'}")
-```
-
-**Benefits of Unified Interface**:
-- Same API for all cache policies
-- Easy to switch between different algorithms
-- Efficient C++ backend trace processing
-- Consistent properties and statistics
-
-### 3. Python Hook Cache (`python_hook_cache_example.py`)
-
-Demonstrates how to create custom cache policies using Python hooks:
-
-```python
-import libcachesim as lcs
-from collections import OrderedDict
-
-class LRUPolicy:
- def __init__(self, cache_size):
- self.access_order = OrderedDict()
-
- def on_hit(self, obj_id, obj_size):
- self.access_order.move_to_end(obj_id)
-
- def on_miss(self, obj_id, obj_size):
- self.access_order[obj_id] = True
-
- def evict(self, obj_id, obj_size):
- return next(iter(self.access_order))
-
-def create_lru_cache(cache_size):
- cache = lcs.PluginCache(cache_size, "PythonLRU")
-
- def init_hook(cache_size):
- return LRUPolicy(cache_size)
-
- # Set other hooks...
- cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook)
- return cache
-```
-
-**Custom Policy Features**:
-- Pure Python cache logic implementation
-- Support for LRU, FIFO and other policies
-- Flexible hook system
-- Same interface as built-in policies
-
-### 4. Zipf Trace Examples (`zipf_trace_example.py`)
-
-Shows synthetic trace generation methods and algorithm comparison:
-
-```python
-import libcachesim as lcs
-
-# Method 1: Create Zipf-distributed request generator
-zipf_generator = lcs.create_zipf_requests(
- num_objects=1000,
- num_requests=10000,
- alpha=1.0,
- obj_size=1024,
- seed=42
-)
-
-# Method 2: Create uniform-distributed request generator
-uniform_generator = lcs.create_uniform_requests(
- num_objects=1000,
- num_requests=10000,
- obj_size=1024,
- seed=42
-)
-
-# Compare different Zipf parameters
-alphas = [0.5, 1.0, 1.5, 2.0]
-for alpha in alphas:
- generator = lcs.create_zipf_requests(1000, 10000, alpha=alpha, seed=42)
- cache = lcs.LRU(1024*1024)
- hit_count = sum(1 for req in generator if cache.get(req))
- hit_ratio = hit_count / 10000
- print(f"α={alpha}: Hit ratio={hit_ratio:.4f}")
-```
-
-**Synthetic Trace Features**:
-- Higher α values create more skewed access patterns
-- Memory efficient: No temporary files created
-- Request generators for flexible processing
-- Suitable for simulating real workloads
-
-## Key Features
-
-### Trace Generation
-- `create_zipf_requests()`: Create Zipf-distributed request generator
-- `create_uniform_requests()`: Create uniform-distributed request generator
-
-### Cache Algorithms
-- **Classic algorithms**: `LRU()`, `FIFO()`, `ARC()`, `Clock()`
-- **Modern algorithms**: `S3FIFO()`, `Sieve()`, `TinyLFU()`
-- **Custom policies**: `PluginCache()`
-
-### Trace Processing
-- `open_trace()`: Open real trace files
-- `process_trace()`: High-performance trace processing
-
-## Basic Usage Examples
-
-### 1. Compare Cache Algorithms
-
-```python
-import libcachesim as lcs
-
-# Test different algorithms
-algorithms = ['LRU', 'FIFO', 'ARC', 'S3FIFO']
-cache_size = 1024*1024
-
-for algo_name in algorithms:
- # Create fresh workload for each algorithm
- generator = lcs.create_zipf_requests(1000, 10000, alpha=1.0, seed=42)
- cache = getattr(lcs, algo_name)(cache_size)
- hit_count = sum(1 for req in generator if cache.get(req))
- print(f"{algo_name}: {hit_count/10000:.3f}")
-```
-
-### 2. Parameter Sensitivity Analysis
-
-```python
-import libcachesim as lcs
-
-# Test different Zipf parameters
-for alpha in [0.5, 1.0, 1.5, 2.0]:
- generator = lcs.create_zipf_requests(1000, 10000, alpha=alpha, seed=42)
- cache = lcs.LRU(cache_size=512*1024)
-
- hit_count = sum(1 for req in generator if cache.get(req))
- print(f"α={alpha}: Hit ratio={hit_count/10000:.3f}")
-```
-
-## Parameters
-
-### Trace Generation Parameters
-- `num_objects`: Number of unique objects
-- `num_requests`: Number of requests to generate
-- `alpha`: Zipf skewness (α=1.0 for classic Zipf)
-- `obj_size`: Object size in bytes (default: 4000)
-- `seed`: Random seed for reproducibility
-
-### Cache Parameters
-- `cache_size`: Cache capacity in bytes
-- Algorithm-specific parameters (e.g.,`fifo_size_ratio` for S3FIFO)
-
-## Running Examples
-
-```bash
-# Navigate to examples directory
-cd libCacheSim-python/examples
-
-# Run stream-based trace generation
-python stream_request_example.py
-
-# Run unified interface demo
-python demo_unified_interface.py
-
-# Run Python hook cache example
-python python_hook_cache_example.py
-
-# Run Zipf trace examples
-python zipf_trace_example.py
-
-# Run all tests
-python -m pytest ../tests/ -v
-```
-
-## Performance Tips
-
-1. **Use appropriate cache and object sizes**:
- ```python
- # Good: cache can hold multiple objects
- cache = lcs.LRU(cache_size=1024*1024) # 1MB
- generator = lcs.create_zipf_requests(1000, 10000, obj_size=1024) # 1KB objects
- ```
-
-2. **Use seeds for reproducible experiments**:
- ```python
- generator = lcs.create_zipf_requests(1000, 10000, seed=42)
- ```
-
-3. **Process large traces with C++ backend**:
- ```python
- # Fast: C++ processing
- obj_miss_ratio, byte_miss_ratio = lcs.process_trace(cache, reader)
-
- # Slow: Python loop
- for req in reader:
- cache.get(req)
- ```
-
-4. **Understand Zipf parameter effects**:
- - α=0.5: Slightly skewed, close to uniform distribution
- - α=1.0: Classic Zipf distribution
- - α=2.0: Highly skewed, few objects get most accesses
-
-## Testing
-
-Run comprehensive tests:
-
-```bash
-python -m pytest ../tests/test_trace_generator.py -v
-python -m pytest ../tests/test_eviction.py -v
-python -m pytest ../tests/test_process_trace.py -v
-```
diff --git a/examples/basic_usage.py b/examples/basic_usage.py
new file mode 100644
index 0000000..e8dd208
--- /dev/null
+++ b/examples/basic_usage.py
@@ -0,0 +1,29 @@
+import libcachesim as lcs
+
+# Step 1: Get one trace from S3 bucket
+URI = "cache_dataset_oracleGeneral/2007_msr/msr_hm_0.oracleGeneral.zst"
+dl = lcs.DataLoader()
+dl.load(URI)
+
+# Step 2: Open trace and process efficiently
+reader = lcs.TraceReader(
+ trace = dl.get_cache_path(URI),
+ trace_type = lcs.TraceType.ORACLE_GENERAL_TRACE,
+ reader_init_params = lcs.ReaderInitParam(ignore_obj_size=False)
+)
+
+# Step 3: Initialize cache
+cache = lcs.S3FIFO(cache_size=1024*1024)
+
+# Step 4: Process entire trace efficiently (C++ backend)
+obj_miss_ratio, byte_miss_ratio = cache.process_trace(reader)
+print(f"Object miss ratio: {obj_miss_ratio:.4f}, Byte miss ratio: {byte_miss_ratio:.4f}")
+
+# Step 4.1: Process with limited number of requests
+cache = lcs.S3FIFO(cache_size=1024*1024)
+obj_miss_ratio, byte_miss_ratio = cache.process_trace(
+ reader,
+ start_req=0,
+ max_req=1000
+)
+print(f"Object miss ratio: {obj_miss_ratio:.4f}, Byte miss ratio: {byte_miss_ratio:.4f}")
\ No newline at end of file
diff --git a/examples/demo_unified_interface.py b/examples/demo_unified_interface.py
deleted file mode 100644
index 0cb629f..0000000
--- a/examples/demo_unified_interface.py
+++ /dev/null
@@ -1,131 +0,0 @@
-#!/usr/bin/env python3
-"""
-Demo script showing the unified interface for all cache policies.
-This demonstrates how to use both native and Python hook-based caches
-with the same API for seamless algorithm comparison and switching.
-"""
-
-import sys
-import os
-
-# Add parent directory for development testing
-sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
-
-try:
- import libcachesim as lcs
-except ImportError as e:
- print(f"Error importing libcachesim: {e}")
- print("Make sure the Python binding is built and installed")
- sys.exit(1)
-
-from collections import OrderedDict
-
-
-def create_trace_reader():
- """Helper function to create a trace reader."""
- data_file = os.path.join(
- os.path.dirname(os.path.dirname(os.path.dirname(__file__))), "data", "cloudPhysicsIO.oracleGeneral.bin"
- )
- if not os.path.exists(data_file):
- print(f"Warning: Trace file not found at {data_file}")
- return None
- return lcs.open_trace(data_file, lcs.TraceType.ORACLE_GENERAL_TRACE)
-
-
-def create_demo_lru_hooks():
- """Create demo LRU hooks for Python-based cache policy."""
-
- def init_hook(cache_size):
- print(f" Initializing custom LRU with {cache_size} bytes")
- return OrderedDict()
-
- def hit_hook(lru_dict, obj_id, obj_size):
- if obj_id in lru_dict:
- lru_dict.move_to_end(obj_id)
-
- def miss_hook(lru_dict, obj_id, obj_size):
- lru_dict[obj_id] = obj_size
-
- def eviction_hook(lru_dict, obj_id, obj_size):
- if lru_dict:
- return next(iter(lru_dict))
- return obj_id
-
- def remove_hook(lru_dict, obj_id):
- lru_dict.pop(obj_id, None)
-
- return init_hook, hit_hook, miss_hook, eviction_hook, remove_hook
-
-
-def demo_unified_interface():
- """Demonstrate the unified interface across different cache policies."""
- print("libCacheSim Python Binding - Unified Interface Demo")
- print("=" * 60)
-
- cache_size = 1024 * 1024 # 1MB
-
- # Create different cache policies
- caches = {
- "LRU": lcs.LRU(cache_size),
- "FIFO": lcs.FIFO(cache_size),
- "ARC": lcs.ARC(cache_size),
- }
-
- # Create Python hook-based LRU
- python_cache = lcs.PluginCache(cache_size, "CustomLRU")
- init_hook, hit_hook, miss_hook, eviction_hook, remove_hook = create_demo_lru_hooks()
- python_cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook)
- caches["Custom Python LRU"] = python_cache
-
- print(f"Testing {len(caches)} different cache policies with unified interface:")
-
- # Demo 1: Single request interface
- print("1. Single Request Interface:")
- print(" All caches use: cache.get(request)")
-
- test_req = lcs.Request()
- test_req.obj_id = 1
- test_req.obj_size = 1024
-
- for name, cache in caches.items():
- result = cache.get(test_req)
- print(f" {name:20s}: {'HIT' if result else 'MISS'}")
-
- # Demo 2: Unified properties interface
- print("\n2. Unified Properties Interface:")
- print(" All caches provide: cache_size, n_obj, occupied_byte, n_req")
-
- for name, cache in caches.items():
- print(
- f" {name:20s}: size={cache.cache_size}, objs={cache.n_obj}, "
- f"bytes={cache.occupied_byte}, reqs={cache.n_req}"
- )
-
- # Demo 3: Efficient trace processing
- print("\n3. Efficient Trace Processing Interface:")
- print(" All caches use: cache.process_trace(reader, max_req=N)")
-
- max_requests = 1000
-
- for name, cache in caches.items():
- # Create fresh reader for each cache
- reader = create_trace_reader()
- if not reader:
- print(f" {name:20s}: trace file not available")
- continue
-
- obj_miss_ratio, byte_miss_ratio = cache.process_trace(reader, max_req=max_requests)
- print(f" {name:20s}: obj_miss_ratio={obj_miss_ratio:.4f}, byte_miss_ratio={byte_miss_ratio:.4f}")
-
- print("\nKey Benefits of Unified Interface:")
- print(" • Same API for all cache policies (built-in + custom)")
- print(" • Easy to switch between different algorithms")
- print(" • Efficient trace processing in C++ (no Python overhead)")
- print(" • Consistent properties and statistics")
- print(" • Type-safe and well-documented")
-
- print("\nDemo completed! All cache policies work with the same interface.")
-
-
-if __name__ == "__main__":
- demo_unified_interface()
diff --git a/examples/plugin_cache.py b/examples/plugin_cache/lru.py
similarity index 98%
rename from examples/plugin_cache.py
rename to examples/plugin_cache/lru.py
index 04940b3..da17836 100644
--- a/examples/plugin_cache.py
+++ b/examples/plugin_cache/lru.py
@@ -62,7 +62,7 @@ def cache_free_hook(cache):
reader = SyntheticReader(
num_of_req=100000,
- num_objects=100,
+ num_objects=10000,
obj_size=100,
seed=42,
alpha=0.8,
diff --git a/examples/plugin_cache/s3fifo.py b/examples/plugin_cache/s3fifo.py
new file mode 100644
index 0000000..829710d
--- /dev/null
+++ b/examples/plugin_cache/s3fifo.py
@@ -0,0 +1,216 @@
+# An example of plugin for s3fifo
+
+# NOTE(haocheng): the one shows that with plugin system, we can make cache as lego blocks
+# Happy caching!
+
+import libcachesim as lcs
+from collections import OrderedDict
+from collections import deque
+from libcachesim import PluginCache, CommonCacheParams, Request, S3FIFO, FIFO, SyntheticReader
+
+# NOTE(haocheng): we only support ignore object size for now
+class StandaloneS3FIFO:
+ def __init__(self,
+ small_size_ratio: float = 0.1,
+ ghost_size_ratio: float = 0.9,
+ move_to_main_threshold: int = 2,
+ cache_size: int = 1024):
+ self.cache_size = cache_size
+ small_fifo_size = int(small_size_ratio * cache_size)
+ main_fifo_size = cache_size - small_fifo_size
+ ghost_fifo_size = int(ghost_size_ratio * cache_size)
+
+ self.small_set = set()
+ self.main_set = set()
+ self.ghost_set = deque(maxlen=ghost_fifo_size)
+
+ self.small_fifo = FIFO(small_fifo_size)
+ self.main_fifo = FIFO(main_fifo_size)
+ self.ghost_fifo = FIFO(ghost_fifo_size)
+
+ # Frequency tracking
+ self.freq = {}
+
+ # Other parameters
+ self.max_freq = 3
+ self.move_to_main_threshold = move_to_main_threshold
+
+ self.has_evicted = False # Mark if we start to evict, only after full we will start eviction
+ self.hit_on_ghost = False
+
+ def cache_hit(self, req: Request):
+ hit_small = False
+ hit_main = False
+ if self.small_fifo.find(req, update_cache=False):
+ self.freq[req.obj_id] += 1
+
+ if self.main_fifo.find(req, update_cache=False):
+ self.freq[req.obj_id] += 1
+
+ def cache_miss(self, req: Request):
+ if not self.hit_on_ghost:
+ obj = self.ghost_fifo.find(req, update_cache=False)
+ if obj is not None:
+ self.hit_on_ghost = True
+ # remove from ghost set
+ self.ghost_fifo.remove(req.obj_id)
+ self.ghost_set.remove(req.obj_id)
+
+
+ # NOTE(haocheng): first we need to know this miss object has record in ghost or not
+ if not self.hit_on_ghost:
+ if req.obj_size >= self.small_fifo.cache_size:
+ # If object is too large, we do not process it
+ return
+
+ # If is initialization state, we need to insert to small fifo,
+ # then we can insert to main fifo
+ if not self.has_evicted and self.small_fifo.get_occupied_byte() >= self.small_fifo.cache_size:
+ obj = self.main_fifo.insert(req)
+ self.main_set.add(obj.obj_id)
+ else:
+ obj = self.small_fifo.insert(req)
+ self.small_set.add(obj.obj_id)
+ else:
+ obj = self.main_fifo.insert(req)
+ self.main_set.add(req.obj_id)
+ self.hit_on_ghost = False
+ self.freq[obj.obj_id] = 0
+
+ def cache_evict_small(self, req: Request):
+ has_evicted = False
+ evicted_id = None
+ real_evicted_id = None
+ while not has_evicted and self.small_fifo.get_occupied_byte() > 0:
+ obj_to_evict = self.small_fifo.to_evict(req)
+ evicted_id = obj_to_evict.obj_id # Store the ID before any operations
+ if self.freq[obj_to_evict.obj_id] >= self.move_to_main_threshold:
+ new_req = Request(obj_id=evicted_id, obj_size=1)
+ self.main_fifo.insert(new_req)
+ self.main_set.add(evicted_id)
+ # Reset frequency
+ self.freq[evicted_id] = 0
+ else:
+ new_req = Request(obj_id=evicted_id, obj_size=1)
+ self.ghost_fifo.get(new_req)
+ self.ghost_set.append(evicted_id)
+ has_evicted = True
+ real_evicted_id = evicted_id
+ flag = self.small_fifo.remove(evicted_id)
+ self.small_set.remove(evicted_id)
+ assert flag, "Should be able to remove"
+ return real_evicted_id
+
+ def cache_evict_main(self, req: Request):
+ has_evicted = False
+ evicted_id = None
+ while not has_evicted and self.main_fifo.get_occupied_byte() > 0:
+ obj_to_evict = self.main_fifo.to_evict(req)
+ assert obj_to_evict is not None
+ evicted_id = obj_to_evict.obj_id # Store the ID before any operations
+ freq = self.freq[evicted_id]
+ if freq >= 1:
+ # Reinsert with decremented frequency
+ self.main_fifo.remove(evicted_id)
+ self.main_set.remove(evicted_id)
+ new_req = Request(obj_id=evicted_id, obj_size=1)
+ self.main_fifo.insert(new_req)
+ self.main_set.add(evicted_id)
+ self.freq[evicted_id] = min(freq, self.max_freq) - 1
+ else:
+ flag = self.main_fifo.remove(evicted_id)
+ self.main_set.remove(evicted_id)
+ has_evicted = True
+ # print(f"Evicted {evicted_id}")
+ return evicted_id
+
+ def cache_evict(self, req: Request):
+ if not self.hit_on_ghost:
+ obj = self.ghost_fifo.find(req, update_cache=False)
+ if obj is not None:
+ self.hit_on_ghost = True
+ # remove from ghost set
+ self.ghost_fifo.remove(req.obj_id)
+ self.ghost_set.remove(req.obj_id)
+
+ self.has_evicted = True
+ cond = (self.main_fifo.get_occupied_byte() > self.main_fifo.cache_size)
+ if (cond or (self.small_fifo.get_occupied_byte() == 0)):
+ obj_id = self.cache_evict_main(req)
+ else:
+ obj_id = self.cache_evict_small(req)
+
+ if obj_id is not None:
+ del self.freq[obj_id]
+
+ return obj_id
+
+ def cache_remove(self, obj_id):
+ removed = False
+ removed |= self.small_fifo.remove(obj_id)
+ removed |= self.ghost_fifo.remove(obj_id)
+ removed |= self.main_fifo.remove(obj_id)
+ return removed
+
+def cache_init_hook(common_cache_params: CommonCacheParams):
+ return StandaloneS3FIFO(cache_size=common_cache_params.cache_size)
+
+def cache_hit_hook(cache, request: Request):
+ cache.cache_hit(request)
+
+def cache_miss_hook(cache, request: Request):
+ cache.cache_miss(request)
+
+def cache_eviction_hook(cache, request: Request):
+ evicted_id = None
+ while evicted_id is None:
+ evicted_id = cache.cache_evict(request)
+ return evicted_id
+
+def cache_remove_hook(cache, obj_id):
+ cache.cache_remove(obj_id)
+
+def cache_free_hook(cache):
+ cache.small_fifo.clear()
+ cache.small_freq.clear()
+ cache.ghost_fifo.clear()
+ cache.ghost_freq.clear()
+ cache.main_fifo.clear()
+ cache.main_freq.clear()
+
+cache = PluginCache(
+ cache_size=1024,
+ cache_init_hook=cache_init_hook,
+ cache_hit_hook=cache_hit_hook,
+ cache_miss_hook=cache_miss_hook,
+ cache_eviction_hook=cache_eviction_hook,
+ cache_remove_hook=cache_remove_hook,
+ cache_free_hook=cache_free_hook,
+ cache_name="S3FIFO")
+
+URI = "cache_dataset_oracleGeneral/2007_msr/msr_hm_0.oracleGeneral.zst"
+dl = lcs.DataLoader()
+dl.load(URI)
+
+# Step 2: Open trace and process efficiently
+reader = lcs.TraceReader(
+ trace = dl.get_cache_path(URI),
+ trace_type = lcs.TraceType.ORACLE_GENERAL_TRACE,
+ reader_init_params = lcs.ReaderInitParam(ignore_obj_size=True)
+)
+
+ref_s3fifo = S3FIFO(cache_size=1024, small_size_ratio=0.1, ghost_size_ratio=0.9, move_to_main_threshold=2)
+
+# for req in reader:
+# hit = cache.get(req)
+# ref_hit = ref_s3fifo.get(req)
+# assert hit == ref_hit, f"Cache hit mismatch: {hit} != {ref_hit}"
+
+req_miss_ratio, byte_miss_ratio = cache.process_trace(reader)
+ref_req_miss_ratio, ref_byte_miss_ratio = ref_s3fifo.process_trace(reader)
+print(f"Plugin req miss ratio: {req_miss_ratio}, ref req miss ratio: {ref_req_miss_ratio}")
+print(f"Plugin byte miss ratio: {byte_miss_ratio}, ref byte miss ratio: {ref_byte_miss_ratio}")
+
+assert req_miss_ratio == ref_req_miss_ratio
+assert byte_miss_ratio == ref_byte_miss_ratio
+print("All requests processed successfully. Plugin cache matches reference S3FIFO cache.")
\ No newline at end of file
diff --git a/examples/python_hook_cache_example.py b/examples/python_hook_cache_example.py
deleted file mode 100644
index fa309d4..0000000
--- a/examples/python_hook_cache_example.py
+++ /dev/null
@@ -1,178 +0,0 @@
-#!/usr/bin/env python3
-"""
-Example demonstrating how to create custom cache policies using Python hooks.
-
-This example shows how to implement LRU and FIFO cache policies using the
-PluginCache class, which allows users to define cache behavior using
-pure Python functions instead of C/C++ plugins.
-"""
-
-import libcachesim as lcs
-from collections import OrderedDict, deque
-from contextlib import suppress
-
-
-class LRUPolicy:
- """LRU (Least Recently Used) cache policy implementation."""
-
- def __init__(self, cache_size):
- self.cache_size = cache_size
- self.access_order = OrderedDict() # obj_id -> True (for ordering)
-
- def on_hit(self, obj_id, obj_size):
- """Move accessed object to end (most recent)."""
- if obj_id in self.access_order:
- # Move to end (most recent)
- self.access_order.move_to_end(obj_id)
-
- def on_miss(self, obj_id, obj_size):
- """Add new object to end (most recent)."""
- self.access_order[obj_id] = True
-
- def evict(self, obj_id, obj_size):
- """Return the least recently used object ID."""
- if self.access_order:
- # Return first item (least recent)
- victim_id = next(iter(self.access_order))
- return victim_id
- raise RuntimeError("No objects to evict")
-
- def on_remove(self, obj_id):
- """Remove object from tracking."""
- self.access_order.pop(obj_id, None)
-
-
-class FIFOPolicy:
- """FIFO (First In First Out) cache policy implementation."""
-
- def __init__(self, cache_size):
- self.cache_size = cache_size
- self.insertion_order = deque() # obj_id queue
-
- def on_hit(self, obj_id, obj_size):
- """FIFO doesn't change order on hits."""
- pass
-
- def on_miss(self, obj_id, obj_size):
- """Add new object to end of queue."""
- self.insertion_order.append(obj_id)
-
- def evict(self, obj_id, obj_size):
- """Return the first inserted object ID."""
- if self.insertion_order:
- victim_id = self.insertion_order.popleft()
- return victim_id
- raise RuntimeError("No objects to evict")
-
- def on_remove(self, obj_id):
- """Remove object from tracking."""
- with suppress(ValueError):
- self.insertion_order.remove(obj_id)
-
-
-def create_lru_cache(cache_size):
- """Create an LRU cache using Python hooks."""
- cache = lcs.PluginCache(cache_size, "PythonLRU")
-
- def init_hook(cache_size):
- return LRUPolicy(cache_size)
-
- def hit_hook(policy, obj_id, obj_size):
- policy.on_hit(obj_id, obj_size)
-
- def miss_hook(policy, obj_id, obj_size):
- policy.on_miss(obj_id, obj_size)
-
- def eviction_hook(policy, obj_id, obj_size):
- return policy.evict(obj_id, obj_size)
-
- def remove_hook(policy, obj_id):
- policy.on_remove(obj_id)
-
- def free_hook(policy):
- # Python garbage collection handles cleanup
- pass
-
- cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook, free_hook)
- return cache
-
-
-def create_fifo_cache(cache_size):
- """Create a FIFO cache using Python hooks."""
- cache = lcs.PluginCache(cache_size, "PythonFIFO")
-
- def init_hook(cache_size):
- return FIFOPolicy(cache_size)
-
- def hit_hook(policy, obj_id, obj_size):
- policy.on_hit(obj_id, obj_size)
-
- def miss_hook(policy, obj_id, obj_size):
- policy.on_miss(obj_id, obj_size)
-
- def eviction_hook(policy, obj_id, obj_size):
- return policy.evict(obj_id, obj_size)
-
- def remove_hook(policy, obj_id):
- policy.on_remove(obj_id)
-
- cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook)
- return cache
-
-
-def test_cache_policy(cache, name):
- """Test a cache policy with sample requests."""
- print(f"\n=== Testing {name} Cache ===")
-
- # Test requests: obj_id, obj_size
- test_requests = [
- (1, 100),
- (2, 100),
- (3, 100),
- (4, 100),
- (5, 100), # Fill cache
- (1, 100), # Hit
- (6, 100), # Miss, should evict something
- (2, 100), # Hit or miss depending on policy
- (7, 100), # Miss, should evict something
- ]
-
- hits = 0
- misses = 0
-
- for obj_id, obj_size in test_requests:
- req = lcs.Request()
- req.obj_id = obj_id
- req.obj_size = obj_size
-
- hit = cache.get(req)
- if hit:
- hits += 1
- print(f"Request {obj_id}: HIT")
- else:
- misses += 1
- print(f"Request {obj_id}: MISS")
-
- print(f"Total: {hits} hits, {misses} misses")
- print(f"Cache stats: {cache.n_obj} objects, {cache.occupied_byte} bytes occupied")
-
-
-def main():
- """Main example function."""
- cache_size = 500 # Bytes (can hold 5 objects of size 100 each)
-
- # Test LRU cache
- lru_cache = create_lru_cache(cache_size)
- test_cache_policy(lru_cache, "LRU")
-
- # Test FIFO cache
- fifo_cache = create_fifo_cache(cache_size)
- test_cache_policy(fifo_cache, "FIFO")
-
- print("\n=== Comparison ===")
- print("LRU keeps recently accessed items, evicting least recently used")
- print("FIFO keeps items in insertion order, evicting oldest inserted")
-
-
-if __name__ == "__main__":
- main()
diff --git a/examples/stream_request_example.py b/examples/stream_request_example.py
deleted file mode 100644
index eed213b..0000000
--- a/examples/stream_request_example.py
+++ /dev/null
@@ -1,154 +0,0 @@
-#!/usr/bin/env python3
-"""
-Example: Using stream request generators for cache simulation.
-
-This example demonstrates how to use the stream request generators
-to create synthetic traces and run cache simulations without creating
-temporary files.
-"""
-
-import libcachesim as lcs
-
-
-def main():
- """Demonstrate stream request generators."""
- print("libCacheSim Stream Request Generation Example")
- print("=" * 50)
-
- # Example 1: Basic Zipf generation with appropriate cache size
- print("\n1. Basic Zipf Request Generation")
- print("-" * 30)
-
- # Use reasonable cache and object sizes
- cache_size = 50 * 1024 * 1024 # 50MB cache
- obj_size = 1024 # 1KB objects
- num_objects = 1000
- num_requests = 10000
-
- # Create a cache
- cache = lcs.LRU(cache_size=cache_size)
-
- # Create a Zipf-distributed request generator
- zipf_generator = lcs.create_zipf_requests(
- num_objects=num_objects,
- num_requests=num_requests,
- alpha=1.0, # Zipf skewness
- obj_size=obj_size, # Object size in bytes
- seed=42, # For reproducibility
- )
-
- print(f"Cache size: {cache_size // 1024 // 1024}MB")
- print(f"Object size: {obj_size}B")
- print(f"Generated {num_requests} Zipf requests for {num_objects} objects")
-
- # Process the requests directly
- hit_count = 0
- for i, req in enumerate(zipf_generator):
- if cache.get(req):
- hit_count += 1
-
- # Print progress every 2000 requests
- if (i + 1) % 2000 == 0:
- current_hit_ratio = hit_count / (i + 1)
- print(f"Processed {i + 1} requests, hit ratio: {current_hit_ratio:.3f}")
-
- final_hit_ratio = hit_count / num_requests
- print(f"Final hit ratio: {final_hit_ratio:.3f}")
-
- # Example 2: Uniform distribution comparison
- print("\n2. Uniform Request Generation")
- print("-" * 30)
-
- # Create a uniform-distributed request generator
- uniform_generator = lcs.create_uniform_requests(
- num_objects=num_objects, num_requests=num_requests, obj_size=obj_size, seed=42
- )
-
- print(f"Generated {num_requests} uniform requests for {num_objects} objects")
-
- # Reset cache and process uniform requests
- cache = lcs.LRU(cache_size=cache_size)
- hit_count = 0
-
- for i, req in enumerate(uniform_generator):
- if cache.get(req):
- hit_count += 1
-
- if (i + 1) % 2000 == 0:
- current_hit_ratio = hit_count / (i + 1)
- print(f"Processed {i + 1} requests, hit ratio: {current_hit_ratio:.3f}")
-
- final_hit_ratio = hit_count / num_requests
- print(f"Final hit ratio: {final_hit_ratio:.3f}")
-
- # Example 3: Compare different Zipf alpha values
- print("\n3. Zipf Alpha Parameter Comparison")
- print("-" * 30)
-
- alphas = [0.5, 1.0, 1.5, 2.0]
- print(f"{'Alpha':<8} {'Hit Ratio':<12} {'Description'}")
- print("-" * 40)
-
- for alpha in alphas:
- generator = lcs.create_zipf_requests(
- num_objects=num_objects, num_requests=num_requests, alpha=alpha, obj_size=obj_size, seed=42
- )
-
- cache = lcs.LRU(cache_size=cache_size)
- hit_count = sum(1 for req in generator if cache.get(req))
- hit_ratio = hit_count / num_requests
-
- # Describe the skewness
- if alpha < 0.8:
- description = "Low skew (nearly uniform)"
- elif alpha < 1.2:
- description = "Classic Zipf"
- elif alpha < 1.8:
- description = "High skew"
- else:
- description = "Very high skew"
-
- print(f"{alpha:<8.1f} {hit_ratio:<12.3f} {description}")
-
- # Example 4: Cache size sensitivity
- print("\n4. Cache Size Sensitivity")
- print("-" * 30)
-
- # Fixed workload
- generator = lcs.create_zipf_requests(
- num_objects=num_objects, num_requests=num_requests, alpha=1.0, obj_size=obj_size, seed=42
- )
-
- cache_sizes = [
- 1 * 1024 * 1024, # 1MB
- 5 * 1024 * 1024, # 5MB
- 10 * 1024 * 1024, # 10MB
- 50 * 1024 * 1024, # 50MB
- ]
-
- print(f"{'Cache Size':<12} {'Hit Ratio':<12} {'Objects Fit'}")
- print("-" * 36)
-
- for cache_size in cache_sizes:
- cache = lcs.LRU(cache_size=cache_size)
-
- # Create fresh generator for each test
- test_generator = lcs.create_zipf_requests(
- num_objects=num_objects, num_requests=num_requests, alpha=1.0, obj_size=obj_size, seed=42
- )
-
- hit_count = sum(1 for req in test_generator if cache.get(req))
- hit_ratio = hit_count / num_requests
- objects_fit = cache_size // obj_size
-
- print(f"{cache_size // 1024 // 1024}MB{'':<8} {hit_ratio:<12.3f} ~{objects_fit}")
-
- print("\nNotes:")
- print("- Higher α values create more skewed access patterns")
- print("- Skewed patterns generally have higher hit ratios")
- print("- Cache size affects performance, but beyond a point diminishing returns")
- print(f"- Working set: {num_objects} objects × {obj_size}B = {num_objects * obj_size // 1024}KB")
-
-
-if __name__ == "__main__":
- main()
diff --git a/examples/zipf_trace_example.py b/examples/zipf_trace_example.py
deleted file mode 100644
index 662ae0f..0000000
--- a/examples/zipf_trace_example.py
+++ /dev/null
@@ -1,243 +0,0 @@
-#!/usr/bin/env python3
-"""
-Example demonstrating trace generation and cache simulation in libCacheSim Python bindings.
-
-This example shows how to:
-1. Generate synthetic request traces using available APIs
-2. Use the generated traces with cache simulations
-3. Compare different algorithms and parameters
-"""
-
-import libcachesim as lcs
-
-
-def example_basic_trace_generation():
- """Basic example of generating synthetic traces."""
- print("=== Basic Synthetic Trace Generation ===")
-
- # Generate Zipf requests using available API
- num_objects = 1000
- num_requests = 10000
- alpha = 1.0
- obj_size = 1024 # 1KB objects
-
- # Create Zipf-distributed requests
- zipf_requests = lcs.create_zipf_requests(
- num_objects=num_objects, num_requests=num_requests, alpha=alpha, obj_size=obj_size, seed=42
- )
-
- print(f"Generated {num_requests} Zipf requests with α={alpha}")
- print(f"Object size: {obj_size}B, Number of unique objects: {num_objects}")
-
- # Use the requests with a cache
- cache = lcs.LRU(cache_size=50 * 1024 * 1024) # 50MB cache
- hit_count = sum(1 for req in zipf_requests if cache.get(req))
- hit_ratio = hit_count / num_requests
- print(f"LRU cache hit ratio: {hit_ratio:.4f}")
-
- return hit_ratio
-
-
-def example_compare_zipf_parameters():
- """Compare different Zipf parameters."""
- print("\n=== Comparing Zipf Parameters ===")
-
- num_objects = 1000
- num_requests = 10000
- cache_size = 50 * 1024 * 1024 # 50MB
- obj_size = 1024 # 1KB objects
-
- alphas = [0.5, 1.0, 1.5, 2.0]
- results = {}
-
- print(f"{'Alpha':<8} {'LRU':<8} {'FIFO':<8} {'ARC':<8} {'Clock':<8}")
- print("-" * 40)
-
- for alpha in alphas:
- # Test with different cache policies
- policies = {
- "LRU": lcs.LRU(cache_size),
- "FIFO": lcs.FIFO(cache_size),
- "ARC": lcs.ARC(cache_size),
- "Clock": lcs.Clock(cache_size),
- }
-
- results[alpha] = {}
- hit_ratios = []
- for name, cache in policies.items():
- # Create fresh request iterator for each cache
- test_requests = lcs.create_zipf_requests(
- num_objects=num_objects, num_requests=num_requests, alpha=alpha, obj_size=obj_size, seed=42
- )
- hit_count = sum(1 for req in test_requests if cache.get(req))
- hit_ratio = hit_count / num_requests
- results[alpha][name] = hit_ratio
- hit_ratios.append(f"{hit_ratio:.3f}")
-
- print(f"{alpha:<8.1f} {hit_ratios[0]:<8} {hit_ratios[1]:<8} {hit_ratios[2]:<8} {hit_ratios[3]:<8}")
-
- return results
-
-
-def example_algorithm_comparison():
- """Compare different cache algorithms."""
- print("\n=== Cache Algorithm Comparison ===")
-
- # Fixed workload parameters
- num_objects = 1000
- num_requests = 10000
- alpha = 1.0
- obj_size = 1024
- cache_size = 10 * 1024 * 1024 # 10MB
-
- # Available algorithms
- algorithms = {
- "LRU": lcs.LRU,
- "FIFO": lcs.FIFO,
- "ARC": lcs.ARC,
- "Clock": lcs.Clock,
- "S3FIFO": lcs.S3FIFO,
- "Sieve": lcs.Sieve,
- }
-
- print(f"Testing with: {num_objects} objects, {num_requests} requests")
- print(f"Cache size: {cache_size // 1024 // 1024}MB, Object size: {obj_size}B")
- print(f"Zipf alpha: {alpha}")
- print()
-
- print(f"{'Algorithm':<10} {'Hit Ratio':<12} {'Description'}")
- print("-" * 45)
-
- results = {}
- for name, cache_class in algorithms.items():
- try:
- # Create fresh requests for each algorithm
- requests = lcs.create_zipf_requests(
- num_objects=num_objects, num_requests=num_requests, alpha=alpha, obj_size=obj_size, seed=42
- )
-
- cache = cache_class(cache_size)
- hit_count = sum(1 for req in requests if cache.get(req))
- hit_ratio = hit_count / num_requests
- results[name] = hit_ratio
-
- # Add descriptions
- descriptions = {
- "LRU": "Least Recently Used",
- "FIFO": "First In First Out",
- "ARC": "Adaptive Replacement Cache",
- "Clock": "Clock/Second Chance",
- "S3FIFO": "Simple Scalable FIFO",
- "Sieve": "Lazy Promotion",
- }
-
- print(f"{name:<10} {hit_ratio:<12.4f} {descriptions.get(name, '')}")
-
- except Exception as e:
- print(f"{name:<10} {'ERROR':<12} {str(e)}")
-
- return results
-
-
-def example_uniform_vs_zipf():
- """Compare uniform vs Zipf distributions."""
- print("\n=== Uniform vs Zipf Distribution Comparison ===")
-
- num_objects = 1000
- num_requests = 10000
- obj_size = 1024
- cache_size = 10 * 1024 * 1024
-
- # Test uniform distribution
- uniform_requests = lcs.create_uniform_requests(
- num_objects=num_objects, num_requests=num_requests, obj_size=obj_size, seed=42
- )
-
- cache = lcs.LRU(cache_size)
- uniform_hits = sum(1 for req in uniform_requests if cache.get(req))
- uniform_hit_ratio = uniform_hits / num_requests
-
- # Test Zipf distribution
- zipf_requests = lcs.create_zipf_requests(
- num_objects=num_objects, num_requests=num_requests, alpha=1.0, obj_size=obj_size, seed=42
- )
-
- cache = lcs.LRU(cache_size)
- zipf_hits = sum(1 for req in zipf_requests if cache.get(req))
- zipf_hit_ratio = zipf_hits / num_requests
-
- print(f"{'Distribution':<12} {'Hit Ratio':<12} {'Description'}")
- print("-" * 45)
- print(f"{'Uniform':<12} {uniform_hit_ratio:<12.4f} {'All objects equally likely'}")
- print(f"{'Zipf (α=1.0)':<12} {zipf_hit_ratio:<12.4f} {'Some objects much more popular'}")
-
- print(
- f"\nObservation: Zipf typically shows{'higher' if zipf_hit_ratio > uniform_hit_ratio else 'lower'} hit ratios"
- )
- print("due to locality of reference (hot objects get cached)")
-
-
-def example_cache_size_analysis():
- """Analyze the effect of different cache sizes."""
- print("\n=== Cache Size Sensitivity Analysis ===")
-
- num_objects = 1000
- num_requests = 10000
- alpha = 1.0
- obj_size = 1024
-
- cache_sizes = [
- 1 * 1024 * 1024, # 1MB
- 5 * 1024 * 1024, # 5MB
- 10 * 1024 * 1024, # 10MB
- 25 * 1024 * 1024, # 25MB
- 50 * 1024 * 1024, # 50MB
- ]
-
- print(f"{'Cache Size':<12} {'Objects Fit':<12} {'Hit Ratio':<12} {'Efficiency'}")
- print("-" * 55)
-
- for cache_size in cache_sizes:
- requests = lcs.create_zipf_requests(
- num_objects=num_objects, num_requests=num_requests, alpha=alpha, obj_size=obj_size, seed=42
- )
-
- cache = lcs.LRU(cache_size)
- hit_count = sum(1 for req in requests if cache.get(req))
- hit_ratio = hit_count / num_requests
- objects_fit = cache_size // obj_size
- efficiency = hit_ratio / (cache_size / (1024 * 1024)) # hit ratio per MB
-
- print(f"{cache_size // 1024 // 1024}MB{'':<8} {objects_fit:<12} {hit_ratio:<12.4f} {efficiency:<12.4f}")
-
-
-def main():
- """Run all examples."""
- print("libCacheSim Python Bindings - Trace Generation Examples")
- print("=" * 60)
-
- try:
- # Run examples
- example_basic_trace_generation()
- example_compare_zipf_parameters()
- example_algorithm_comparison()
- example_uniform_vs_zipf()
- example_cache_size_analysis()
-
- print("\n" + "=" * 60)
- print("All examples completed successfully!")
- print("\nKey Takeaways:")
- print("• Higher Zipf α values create more skewed access patterns")
- print("• Skewed patterns generally result in higher cache hit ratios")
- print("• Different algorithms perform differently based on workload")
- print("• Cache size has diminishing returns beyond working set size")
-
- except Exception as e:
- print(f"Error running examples: {e}")
- import traceback
-
- traceback.print_exc()
-
-
-if __name__ == "__main__":
- main()
diff --git a/libcachesim/__init__.py b/libcachesim/__init__.py
index bd194bf..c9fc1e7 100644
--- a/libcachesim/__init__.py
+++ b/libcachesim/__init__.py
@@ -6,6 +6,7 @@
Cache,
Request,
ReqOp,
+ ReaderInitParam,
TraceType,
SamplerType,
AnalysisParam,
@@ -65,6 +66,7 @@
"Cache",
"Request",
"ReqOp",
+ "ReaderInitParam",
"TraceType",
"SamplerType",
"AnalysisParam",
diff --git a/libcachesim/cache.py b/libcachesim/cache.py
index 99a17aa..b61a512 100644
--- a/libcachesim/cache.py
+++ b/libcachesim/cache.py
@@ -54,7 +54,7 @@ def __init__(self, _cache: Cache):
def get(self, req: Request) -> bool:
return self._cache.get(req)
- def find(self, req: Request, update_cache: bool = True) -> CacheObject:
+ def find(self, req: Request, update_cache: bool = True) -> Optional[CacheObject]:
return self._cache.find(req, update_cache)
def can_insert(self, req: Request) -> bool:
diff --git a/libcachesim/protocols.py b/libcachesim/protocols.py
index 58eeddb..74a45f8 100644
--- a/libcachesim/protocols.py
+++ b/libcachesim/protocols.py
@@ -23,7 +23,7 @@ class ReaderProtocol(Protocol):
"""
def get_num_of_req(self) -> int: ...
- def read_one_req(self, req: Request) -> Request: ...
+ def read_one_req(self) -> Request: ...
def skip_n_req(self, n: int) -> int: ...
def reset(self) -> None: ...
def close(self) -> None: ...
diff --git a/libcachesim/synthetic_reader.py b/libcachesim/synthetic_reader.py
index 16f8a10..b429242 100644
--- a/libcachesim/synthetic_reader.py
+++ b/libcachesim/synthetic_reader.py
@@ -85,11 +85,12 @@ def obj_ids(self) -> np.ndarray:
def get_num_of_req(self) -> int:
return self.num_of_req
- def read_one_req(self, req: Request) -> Request:
+ def read_one_req(self) -> Request:
"""Read one request and fill Request object"""
+ req = Request()
if self.current_pos >= self.num_of_req:
req.valid = False
- return req
+ return req # return invalid request
obj_id = self.obj_ids[self.current_pos]
req.obj_id = obj_id
@@ -194,8 +195,7 @@ def __next__(self) -> Request:
if self.current_pos >= self.num_of_req:
raise StopIteration
- req = Request()
- return self.read_one_req(req)
+ return self.read_one_req()
def __getitem__(self, index: int) -> Request:
"""Support index access"""
diff --git a/libcachesim/trace_reader.py b/libcachesim/trace_reader.py
index 8d70741..20a2aba 100644
--- a/libcachesim/trace_reader.py
+++ b/libcachesim/trace_reader.py
@@ -167,8 +167,12 @@ def read_direction(self) -> ReadDirection:
def get_num_of_req(self) -> int:
return self._reader.get_num_of_req()
- def read_one_req(self, req: Request) -> Request:
- return self._reader.read_one_req(req)
+ def read_one_req(self) -> Request:
+ req = Request()
+ ret = self._reader.read_one_req(req) # return 0 if success
+ if ret != 0:
+ raise RuntimeError("Failed to read one request")
+ return req
def reset(self) -> None:
self._reader.reset()
@@ -198,19 +202,23 @@ def set_read_pos(self, pos: float) -> None:
self._reader.set_read_pos(pos)
def __iter__(self) -> Iterator[Request]:
- return self._reader.__iter__()
+ self._reader.reset()
+ return self
def __len__(self) -> int:
return self._reader.get_num_of_req()
def __next__(self) -> Request:
- if self._reader.n_req_left == 0:
+ req = Request()
+ ret = self._reader.read_one_req(req)
+ if ret != 0:
raise StopIteration
- return self._reader.read_one_req()
+ return req
def __getitem__(self, index: int) -> Request:
if index < 0 or index >= self._reader.get_num_of_req():
raise IndexError("Index out of range")
self._reader.reset()
self._reader.skip_n_req(index)
- return self._reader.read_one_req()
+ req = Request()
+ return self._reader.read_one_req(req)
diff --git a/src/export_cache.cpp b/src/export_cache.cpp
index cff2031..8fb3f04 100644
--- a/src/export_cache.cpp
+++ b/src/export_cache.cpp
@@ -281,7 +281,7 @@ void export_cache(py::module& m) {
"find",
[](cache_t& self, const request_t& req, const bool update_cache) {
cache_obj_t* obj = self.find(&self, &req, update_cache);
- return py::cast(obj, py::return_value_policy::reference);
+ return obj ? py::cast(obj, py::return_value_policy::reference) : py::none();
},
"req"_a, "update_cache"_a = true)
.def(
diff --git a/src/export_reader.cpp b/src/export_reader.cpp
index 5eb6ce5..8f286f3 100644
--- a/src/export_reader.cpp
+++ b/src/export_reader.cpp
@@ -264,7 +264,7 @@ void export_reader(py::module& m) {
if (ret != 0) {
throw std::runtime_error("Failed to read request");
}
- return req;
+ return ret;
},
"req"_a)
.def("reset", [](reader_t& self) { reset_reader(&self); })
diff --git a/tests/test_reader.py b/tests/test_reader.py
index bb3cb7b..688217a 100644
--- a/tests/test_reader.py
+++ b/tests/test_reader.py
@@ -30,7 +30,7 @@ def test_zipf_distribution(self):
# Read some requests and verify they are valid
req = Request()
- first_req = reader.read_one_req(req)
+ first_req = reader.read_one_req()
assert first_req.obj_id >= 0
assert first_req.obj_size == 1024
assert hasattr(first_req, "op") # Just check it has op attribute
@@ -44,7 +44,7 @@ def test_uniform_distribution(self):
# Read some requests
req = Request()
for _ in range(10):
- read_req = reader.read_one_req(req)
+ read_req = reader.read_one_req()
assert read_req.obj_size == 512
assert hasattr(read_req, "op") # Just check it has op attribute
@@ -68,13 +68,13 @@ def test_reader_reset(self):
# Read some requests
req = Request()
- first_read = reader.read_one_req(req)
- reader.read_one_req(req)
- reader.read_one_req(req)
+ first_read = reader.read_one_req()
+ reader.read_one_req()
+ reader.read_one_req()
# Reset and read again
reader.reset()
- reset_read = reader.read_one_req(req)
+ reset_read = reader.read_one_req()
# Should get the same first request after reset
assert first_read.obj_id == reset_read.obj_id
@@ -89,7 +89,7 @@ def test_skip_requests(self):
# Verify we can still read remaining requests
req = Request()
- read_req = reader.read_one_req(req)
+ read_req = reader.read_one_req()
assert read_req.valid == True # Should still be able to read
def test_clone_reader(self):
@@ -98,8 +98,8 @@ def test_clone_reader(self):
# Read some requests
req = Request()
- reader.read_one_req(req)
- reader.read_one_req(req)
+ reader.read_one_req()
+ reader.read_one_req()
# Clone the reader
cloned_reader = reader.clone()
@@ -160,7 +160,7 @@ def test_csv_trace_creation(self):
# Read first request
req = Request()
- first_req = reader.read_one_req(req)
+ first_req = reader.read_one_req()
assert first_req.obj_id == 100
assert first_req.obj_size == 1024
@@ -192,11 +192,11 @@ def test_trace_reader_iteration(self):
# Read requests one by one instead of using list()
req = Request()
- first_req = reader.read_one_req(req)
+ first_req = reader.read_one_req()
assert first_req.obj_id == 100
assert first_req.obj_size == 1024
- second_req = reader.read_one_req(req)
+ second_req = reader.read_one_req()
assert second_req.obj_id == 101
assert second_req.obj_size == 2048
@@ -226,21 +226,21 @@ def test_trace_reader_reset_and_skip(self):
# Read some requests
req = Request()
- first_req = reader.read_one_req(req)
- reader.read_one_req(req)
+ first_req = reader.read_one_req()
+ reader.read_one_req()
# Reset and verify we get same first request
reader.reset()
- reset_req = reader.read_one_req(req)
+ reset_req = reader.read_one_req()
assert first_req.obj_id == reset_req.obj_id
# Test skip functionality
reader.reset()
# Instead of using skip_n_req which might fail, just read requests one by one
for _ in range(5):
- reader.read_one_req(req)
+ reader.read_one_req()
- next_req = reader.read_one_req(req)
+ next_req = reader.read_one_req()
assert next_req.obj_id == 105 # Should be 6th request (100+5)
finally:
@@ -276,7 +276,7 @@ def test_trace_reader_sampling(self):
# Read a few requests to verify it works
req = Request()
- first_req = reader.read_one_req(req)
+ first_req = reader.read_one_req()
assert first_req.valid == True
finally:
@@ -411,8 +411,8 @@ def test_request_format_consistency(self):
# Get requests from both readers
req = Request()
- synthetic_req = synthetic_reader.read_one_req(req)
- trace_req = trace_reader.read_one_req(req)
+ synthetic_req = synthetic_reader.read_one_req()
+ trace_req = trace_reader.read_one_req()
# Both should produce Request objects with same attributes
assert hasattr(synthetic_req, "obj_id")