Skip to content

Commit 741fc49

Browse files
committed
Clean up
1 parent 2d23a17 commit 741fc49

11 files changed

Lines changed: 995 additions & 114 deletions

File tree

.github/workflows/python.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ jobs:
66
build:
77
runs-on: ubuntu-latest
88
steps:
9-
- uses: actions/checkout@v3
9+
- uses: actions/checkout@v4
1010

1111
- name: Set up Python
1212
uses: actions/setup-python@v4

libCacheSim-python/README.md

Lines changed: 47 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,8 @@ Python bindings for libCacheSim, a high-performance cache simulator.
55
## Installation
66

77
```bash
8-
pip install .
9-
```
10-
11-
## Development
12-
13-
```bash
14-
pip install -e .
8+
cd ..
9+
bash scripts/install_python.sh
1510
```
1611

1712
Test
@@ -192,3 +187,48 @@ When implementing `PythonHookCachePolicy`, you need to provide these hook functi
192187
- **`free_hook(plugin_data: Any) -> None`**: [Optional] Clean up plugin resources
193188

194189
The `plugin_data` is whatever object you return from `init_hook()` - it can be any Python object like a list, dict, class instance, etc.
190+
191+
### Unified Interface
192+
193+
All cache policies (both built-in and Python hook-based) share the same unified interface:
194+
195+
```python
196+
import libcachesim as cachesim
197+
198+
# All cache policies work the same way
199+
cache = cachesim.LRU(cache_size=1024*1024)
200+
# or
201+
cache = cachesim.PythonHookCachePolicy(cache_size=1024*1024)
202+
# cache.set_hooks(...) for Python hook cache
203+
204+
# Unified interface for all caches:
205+
req = cachesim.Request()
206+
req.obj_id = 1
207+
req.obj_size = 100
208+
hit = cache.get(req) # Process single request
209+
210+
reader = cachesim.open_trace("trace.bin", cachesim.TraceType.ORACLE_GENERAL_TRACE.value)
211+
miss_ratio = cache.process_trace(reader) # Process entire trace efficiently
212+
213+
# Unified properties for all caches:
214+
print(f"Cache size: {cache.cache_size}")
215+
print(f"Objects: {cache.n_obj}")
216+
print(f"Occupied bytes: {cache.occupied_byte}")
217+
print(f"Requests processed: {cache.n_req}")
218+
```
219+
220+
### Efficient Trace Processing
221+
222+
The `process_trace` method processes trace data entirely on the C++ side to minimize overhead:
223+
224+
```python
225+
# Process entire trace with optional limits
226+
miss_ratio = cache.process_trace(
227+
reader,
228+
max_req=10000, # Process max 10K requests
229+
max_sec=3600, # Process max 1 hour of trace
230+
start_time=1000, # Start from timestamp 1000
231+
end_time=5000 # End at timestamp 5000
232+
)
233+
print(f"Miss ratio: {miss_ratio:.4f}")
234+
```
Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
#!/usr/bin/env python3
2+
"""
3+
Demo script showing the unified interface for all cache policies.
4+
This demonstrates how to use both native and Python hook-based caches
5+
with the same API for seamless algorithm comparison and switching.
6+
"""
7+
8+
import sys
9+
import os
10+
11+
# Add parent directory for development testing
12+
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
13+
14+
try:
15+
import libcachesim as lcs
16+
except ImportError as e:
17+
print(f"Error importing libcachesim: {e}")
18+
print("Make sure the Python binding is built and installed")
19+
sys.exit(1)
20+
21+
from collections import OrderedDict
22+
23+
24+
def create_trace_reader():
25+
"""Helper function to create a trace reader."""
26+
data_file = os.path.join(
27+
os.path.dirname(os.path.dirname(os.path.dirname(__file__))),
28+
"data",
29+
"cloudPhysicsIO.oracleGeneral.bin"
30+
)
31+
if not os.path.exists(data_file):
32+
print(f"Warning: Trace file not found at {data_file}")
33+
return None
34+
return lcs.open_trace(data_file, lcs.TraceType.ORACLE_GENERAL_TRACE.value)
35+
36+
37+
def create_demo_lru_hooks():
38+
"""Create demo LRU hooks for Python-based cache policy."""
39+
40+
def init_hook(cache_size):
41+
print(f" Initializing custom LRU with {cache_size} bytes")
42+
return OrderedDict()
43+
44+
def hit_hook(lru_dict, obj_id, obj_size):
45+
if obj_id in lru_dict:
46+
lru_dict.move_to_end(obj_id)
47+
48+
def miss_hook(lru_dict, obj_id, obj_size):
49+
lru_dict[obj_id] = obj_size
50+
51+
def eviction_hook(lru_dict, obj_id, obj_size):
52+
if lru_dict:
53+
return next(iter(lru_dict))
54+
return obj_id
55+
56+
def remove_hook(lru_dict, obj_id):
57+
lru_dict.pop(obj_id, None)
58+
59+
return init_hook, hit_hook, miss_hook, eviction_hook, remove_hook
60+
61+
62+
def demo_unified_interface():
63+
"""Demonstrate the unified interface across different cache policies."""
64+
print("libCacheSim Python Binding - Unified Interface Demo")
65+
print("=" * 60)
66+
67+
cache_size = 1024 * 1024 # 1MB
68+
69+
# Create different cache policies
70+
caches = {
71+
"LRU": lcs.LRU(cache_size),
72+
"FIFO": lcs.FIFO(cache_size),
73+
"ARC": lcs.ARC(cache_size),
74+
}
75+
76+
# Create Python hook-based LRU
77+
python_cache = lcs.PythonHookCachePolicy(cache_size, "CustomLRU")
78+
init_hook, hit_hook, miss_hook, eviction_hook, remove_hook = create_demo_lru_hooks()
79+
python_cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook)
80+
caches["Custom Python LRU"] = python_cache
81+
82+
print(f"Testing {len(caches)} different cache policies with unified interface:")
83+
84+
# Demo 1: Single request interface
85+
print("1. Single Request Interface:")
86+
print(" All caches use: cache.get(request)")
87+
88+
test_req = lcs.Request()
89+
test_req.obj_id = 1
90+
test_req.obj_size = 1024
91+
92+
for name, cache in caches.items():
93+
result = cache.get(test_req)
94+
print(f" {name:20s}: {'HIT' if result else 'MISS'}")
95+
96+
# Demo 2: Unified properties interface
97+
print("\n2. Unified Properties Interface:")
98+
print(" All caches provide: cache_size, n_obj, occupied_byte, n_req")
99+
100+
for name, cache in caches.items():
101+
print(f" {name:20s}: size={cache.cache_size}, objs={cache.n_obj}, "
102+
f"bytes={cache.occupied_byte}, reqs={cache.n_req}")
103+
104+
# Demo 3: Efficient trace processing
105+
print("\n3. Efficient Trace Processing Interface:")
106+
print(" All caches use: cache.process_trace(reader, max_req=N)")
107+
108+
max_requests = 1000
109+
110+
for name, cache in caches.items():
111+
# Create fresh reader for each cache
112+
reader = create_trace_reader()
113+
if not reader:
114+
print(f" {name:20s}: trace file not available")
115+
continue
116+
117+
miss_ratio = cache.process_trace(reader, max_req=max_requests)
118+
print(f" {name:20s}: miss_ratio={miss_ratio:.4f}")
119+
120+
print("\nKey Benefits of Unified Interface:")
121+
print(" • Same API for all cache policies (built-in + custom)")
122+
print(" • Easy to switch between different algorithms")
123+
print(" • Efficient trace processing in C++ (no Python overhead)")
124+
print(" • Consistent properties and statistics")
125+
print(" • Type-safe and well-documented")
126+
127+
print("\nDemo completed! All cache policies work with the same interface.")
128+
129+
130+
if __name__ == "__main__":
131+
demo_unified_interface()

libCacheSim-python/libcachesim/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88
__version__,
99
create_cache,
1010
open_trace,
11+
process_trace,
12+
process_trace_python_hook,
1113
)
1214
from .const import TraceType
1315
from .eviction import (
@@ -44,5 +46,7 @@
4446
"__version__",
4547
"create_cache",
4648
"open_trace",
49+
"process_trace",
50+
"process_trace_python_hook",
4751
# TODO(haocheng): add more eviction policies
4852
]

libCacheSim-python/libcachesim/eviction.py

Lines changed: 103 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,25 @@ def get(self, req: Request) -> bool:
3131
def __repr__(self) -> str:
3232
pass
3333

34+
@abstractmethod
35+
def process_trace(self, reader, max_req=-1, max_sec=-1, start_time=-1, end_time=-1):
36+
"""Process a trace with this cache and return miss ratio.
37+
38+
This method processes trace data entirely on the C++ side to avoid
39+
data movement overhead between Python and C++.
40+
41+
Args:
42+
reader: The trace reader instance
43+
max_req: Maximum number of requests to process (-1 for no limit)
44+
max_sec: Maximum seconds to process (-1 for no limit)
45+
start_time: Start time filter (-1 for no filter)
46+
end_time: End time filter (-1 for no filter)
47+
48+
Returns:
49+
float: Miss ratio (0.0 to 1.0)
50+
"""
51+
pass
52+
3453

3554
class EvictionPolicy(EvictionPolicyBase):
3655
"""Base class for all eviction policies."""
@@ -44,9 +63,54 @@ def init_cache(self, cache_size: int, **kwargs) -> Cache:
4463
def get(self, req: Request) -> bool:
4564
return self.cache.get(req)
4665

66+
def process_trace(self, reader, max_req=-1, max_sec=-1, start_time=-1, end_time=-1):
67+
"""Process a trace with this cache and return miss ratio.
68+
69+
This method processes trace data entirely on the C++ side to avoid
70+
data movement overhead between Python and C++.
71+
72+
Args:
73+
reader: The trace reader instance
74+
max_req: Maximum number of requests to process (-1 for no limit)
75+
max_sec: Maximum seconds to process (-1 for no limit)
76+
start_time: Start time filter (-1 for no filter)
77+
end_time: End time filter (-1 for no filter)
78+
79+
Returns:
80+
float: Miss ratio (0.0 to 1.0)
81+
82+
Example:
83+
>>> cache = LRU(1024*1024)
84+
>>> reader = open_trace("trace.csv", TraceType.CSV_TRACE)
85+
>>> miss_ratio = cache.process_trace(reader)
86+
>>> print(f"Miss ratio: {miss_ratio:.4f}")
87+
"""
88+
from ._libcachesim import process_trace
89+
return process_trace(self.cache, reader, max_req, max_sec, start_time, end_time)
90+
4791
def __repr__(self):
4892
return f"{self.__class__.__name__}(cache_size={self.cache.cache_size})"
4993

94+
@property
95+
def n_req(self):
96+
"""Number of requests processed."""
97+
return self.cache.n_req
98+
99+
@property
100+
def n_obj(self):
101+
"""Number of objects currently in cache."""
102+
return self.cache.n_obj
103+
104+
@property
105+
def occupied_byte(self):
106+
"""Number of bytes currently occupied in cache."""
107+
return self.cache.occupied_byte
108+
109+
@property
110+
def cache_size(self):
111+
"""Total cache size in bytes."""
112+
return self.cache.cache_size
113+
50114

51115
class FIFO(EvictionPolicy):
52116
"""First In First Out replacement policy.
@@ -356,7 +420,7 @@ class PythonHookCachePolicy(EvictionPolicyBase):
356420
>>> hit = cache.get(req)
357421
"""
358422
def __init__(self, cache_size: int, cache_name: str = "PythonHookCache"):
359-
self.cache_size = cache_size
423+
self._cache_size = cache_size
360424
self.cache_name = cache_name
361425
self.cache = PythonHookCache(cache_size, cache_name)
362426
self._hooks_set = False
@@ -391,6 +455,38 @@ def get(self, req: Request) -> bool:
391455
raise RuntimeError("Hooks must be set before using the cache. Call set_hooks() first.")
392456
return self.cache.get(req)
393457

458+
def process_trace(self, reader, max_req=-1, max_sec=-1, start_time=-1, end_time=-1):
459+
"""Process a trace with this cache and return miss ratio.
460+
461+
This method processes trace data entirely on the C++ side to avoid
462+
data movement overhead between Python and C++.
463+
464+
Args:
465+
reader: The trace reader instance
466+
max_req: Maximum number of requests to process (-1 for no limit)
467+
max_sec: Maximum seconds to process (-1 for no limit)
468+
start_time: Start time filter (-1 for no filter)
469+
end_time: End time filter (-1 for no filter)
470+
471+
Returns:
472+
float: Miss ratio (0.0 to 1.0)
473+
474+
Raises:
475+
RuntimeError: If hooks have not been set
476+
477+
Example:
478+
>>> cache = PythonHookCachePolicy(1024*1024)
479+
>>> cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook)
480+
>>> reader = open_trace("trace.csv", TraceType.CSV_TRACE)
481+
>>> miss_ratio = cache.process_trace(reader)
482+
>>> print(f"Miss ratio: {miss_ratio:.4f}")
483+
"""
484+
if not self._hooks_set:
485+
raise RuntimeError("Hooks must be set before processing trace. Call set_hooks() first.")
486+
487+
from ._libcachesim import process_trace_python_hook
488+
return process_trace_python_hook(self.cache, reader, max_req, max_sec, start_time, end_time)
489+
394490
@property
395491
def n_req(self):
396492
"""Number of requests processed."""
@@ -406,6 +502,11 @@ def occupied_byte(self):
406502
"""Number of bytes currently occupied in cache."""
407503
return self.cache.occupied_byte
408504

505+
@property
506+
def cache_size(self):
507+
"""Total cache size in bytes."""
508+
return self.cache.cache_size
509+
409510
def __repr__(self):
410-
return f"{self.__class__.__name__}(cache_size={self.cache_size}, " \
511+
return f"{self.__class__.__name__}(cache_size={self._cache_size}, " \
411512
f"cache_name='{self.cache_name}', hooks_set={self._hooks_set})"

libCacheSim-python/pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ build-backend = "scikit_build_core.build"
55

66
[project]
77
name = "libcachesim"
8-
version = "0.0.1"
8+
version = "0.3.1"
99
description="Python bindings for libCacheSim"
1010
readme = "README.md"
1111
requires-python = ">=3.9"

0 commit comments

Comments
 (0)