Skip to content

Commit e205c13

Browse files
authored
[Feat] Complete process_trace and reader_init_param (#271)
* [FEAT] Complete process_trace and reader_init_param * Apply copilot suggestion * Add check for zero division * Cleanup * Feat: unify process_trace output * Ruff check * Add ruff lint and format check into pre-commit * Fix copilot comments
1 parent 14f971a commit e205c13

22 files changed

Lines changed: 621 additions & 476 deletions

README.md

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -333,8 +333,8 @@ import libcachesim as lcs
333333
reader = lcs.create_zipf_requests(num_objects=1000, num_requests=10000) # synthetic trace
334334
# reader = lcs.open_trace("./data/cloudPhysicsIO.oracleGeneral.bin") # real trace
335335
cache = lcs.FIFO(cache_size=1024*1024)
336-
miss_ratio = cache.process_trace(reader)
337-
print(f"Miss ratio: {miss_ratio:.4f}")
336+
obj_miss_ratio, byte_miss_ratio = cache.process_trace(reader)
337+
print(f"Obj miss ratio: {obj_miss_ratio:.4f}, byte miss ratio: {byte_miss_ratio:.4f}")
338338
```
339339

340340
### Extending new algorithm
@@ -344,6 +344,7 @@ With python package, you can extend new algorithm to test your own eviction desi
344344
```python
345345
import libcachesim as lcs
346346
from collections import deque
347+
from contextlib import suppress
347348

348349
cache = lcs.PythonHookCachePolicy(cache_size=1024, cache_name="CustomFIFO")
349350

@@ -360,15 +361,18 @@ def eviction_hook(fifo_queue, obj_id, obj_size):
360361
return fifo_queue[0] # Return first item (oldest)
361362

362363
def remove_hook(fifo_queue, obj_id):
363-
if fifo_queue and fifo_queue[0] == obj_id:
364-
fifo_queue.popleft()
364+
with suppress(ValueError):
365+
fifo_queue.remove(obj_id)
365366

366367
# Set the hooks and test
367368
cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook)
368369

369-
reader = lcs.open_trace("./data/cloudPhysicsIO.oracleGeneral.bin")
370-
miss_ratio = cache.process_trace(reader)
371-
print(f"Miss ratio: {miss_ratio:.4f}")
370+
reader = lcs.open_trace(
371+
trace_path="./data/cloudPhysicsIO.oracleGeneral.bin",
372+
params=lcs.ReaderInitParam(ignore_obj_size=True)
373+
)
374+
obj_miss_ratio, byte_miss_ratio = cache.process_trace(reader)
375+
print(f"Obj miss ratio: {obj_miss_ratio:.4f}, byte miss ratio: {byte_miss_ratio:.4f}")
372376
```
373377

374378

doc/quickstart_plugin.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -264,8 +264,8 @@ cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook)
264264

265265
# Process traces efficiently
266266
reader = lcs.open_trace("./data/cloudPhysicsIO.vscsi", lcs.TraceType.VSCSI_TRACE)
267-
miss_ratio = cache.process_trace(reader)
268-
print(f"Miss ratio: {miss_ratio:.4f}")
267+
obj_miss_ratio, byte_miss_ratio = cache.process_trace(reader)
268+
print(f"Obj miss ratio: {obj_miss_ratio:.4f}, byte miss ratio: {byte_miss_ratio:.4f}")
269269

270270
# Or process individual requests
271271
req = lcs.Request(obj_id=1, obj_size=100)

libCacheSim-python/README.md

Lines changed: 27 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,7 @@
33
[![Python Release](https://github.com/1a1a11a/libCacheSim/actions/workflows/pypi-release.yml/badge.svg)](https://github.com/1a1a11a/libCacheSim/actions/workflows/pypi-release.yml)
44
[![Python Versions](https://img.shields.io/pypi/pyversions/libcachesim.svg?logo=python&logoColor=white)](https://pypi.org/project/libcachesim)
55
[![PyPI Version](https://img.shields.io/pypi/v/libcachesim.svg?)](https://pypi.org/project/libcachesim)
6-
![PyPI - Downloads](https://img.shields.io/pypi/dd/libcachesim)
7-
6+
[![PyPI - Downloads](https://img.shields.io/pypi/dd/libcachesim)](https://pypistats.org/packages/libcachesim)
87

98
Python bindings for libCacheSim, a high-performance cache simulator and analysis library.
109

@@ -63,24 +62,35 @@ print(cache.get(req)) # True (second access)
6362

6463
### Trace Processing
6564

65+
To simulate with traces, we need to read the request of traces correctly. `open_trace` is an unified interface for trace reading, which accepet three parameters:
66+
67+
- `trace_path`: trace path, can be relative or absolutive path.
68+
- `type` (optional): if not given, we will automatically infer the type of trace according to the suffix of the trace file.
69+
- `params` (optional): if not given, default params are applied.
70+
6671
```python
6772
import libcachesim as lcs
6873

6974
# Open trace and process efficiently
70-
reader = lcs.open_trace("./data/cloudPhysicsIO.oracleGeneral.bin", lcs.TraceType.ORACLE_GENERAL_TRACE)
75+
reader = lcs.open_trace(
76+
trace_path = "./data/cloudPhysicsIO.oracleGeneral.bin",
77+
type = lcs.TraceType.ORACLE_GENERAL_TRACE,
78+
params = lcs.ReaderInitParam(ignore_obj_size=True)
79+
)
7180
cache = lcs.S3FIFO(cache_size=1024*1024)
7281

7382
# Process entire trace efficiently (C++ backend)
74-
miss_ratio = cache.process_trace(reader)
75-
print(f"Miss ratio: {miss_ratio:.4f}")
83+
obj_miss_ratio, byte_miss_ratio = cache.process_trace(reader)
84+
print(f"Object miss ratio: {obj_miss_ratio:.4f}, Byte miss ratio: {byte_miss_ratio:.4f}")
7685

86+
cache = lcs.S3FIFO(cache_size=1024*1024)
7787
# Process with limits and time ranges
78-
miss_ratio = cache.process_trace(
88+
obj_miss_ratio, byte_miss_ratio = cache.process_trace(
7989
reader,
80-
start_req=100,
90+
start_req=0,
8191
max_req=1000
8292
)
83-
print(f"Miss ratio: {miss_ratio:.4f}")
93+
print(f"Object miss ratio: {obj_miss_ratio:.4f}, Byte miss ratio: {byte_miss_ratio:.4f}")
8494
```
8595

8696
## Custom Cache Policies
@@ -147,8 +157,8 @@ print(f"Cache hit: {hit}") # Should be False (miss)
147157
```python
148158
import libcachesim as lcs
149159
from collections import deque
160+
from contextlib import suppress
150161

151-
# Create a custom FIFO cache
152162
cache = lcs.PythonHookCachePolicy(cache_size=1024, cache_name="CustomFIFO")
153163

154164
def init_hook(cache_size):
@@ -164,15 +174,13 @@ def eviction_hook(fifo_queue, obj_id, obj_size):
164174
return fifo_queue[0] # Return first item (oldest)
165175

166176
def remove_hook(fifo_queue, obj_id):
167-
if fifo_queue and fifo_queue[0] == obj_id:
168-
fifo_queue.popleft()
177+
with suppress(ValueError):
178+
fifo_queue.remove(obj_id)
169179

170180
# Set the hooks and test
171181
cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook)
172182

173-
req = lcs.Request()
174-
req.obj_id = 1
175-
req.obj_size = 100
183+
req = lcs.Request(obj_id=1, obj_size=100)
176184
hit = cache.get(req)
177185
print(f"Cache hit: {hit}") # Should be False (miss)
178186
```
@@ -225,6 +233,7 @@ req = lcs.Request()
225233
req.obj_id = 1
226234
req.obj_size = 100
227235
hit = lru_cache.get(req)
236+
print(hit)
228237
```
229238

230239
## Examples and Testing
@@ -238,8 +247,8 @@ def compare_algorithms(trace_path):
238247
algorithms = ['LRU', 'S3FIFO', 'Sieve', 'ARC']
239248
for algo_name in algorithms:
240249
cache = getattr(lcs, algo_name)(cache_size=1024*1024)
241-
miss_ratio = cache.process_trace(reader)
242-
print(f"{algo_name}\t\t{miss_ratio:.4f}")
250+
obj_miss_ratio, byte_miss_ratio = cache.process_trace(reader)
251+
print(f"{algo_name}\t\tObj: {obj_miss_ratio:.4f}, Byte: {byte_miss_ratio:.4f}")
243252

244253
compare_algorithms("./data/cloudPhysicsIO.vscsi")
245254
```
@@ -299,8 +308,8 @@ caches = [
299308
]
300309

301310
for i, cache in enumerate(caches):
302-
miss_ratio_oracle = cache.process_trace(oracle_reader)
303-
miss_ratio_csv = cache.process_trace(csv_reader)
311+
miss_ratio_oracle = cache.process_trace(oracle_reader)[0]
312+
miss_ratio_csv = cache.process_trace(csv_reader)[0]
304313
print(f"Cache {i} miss ratio: {miss_ratio_oracle:.4f}, {miss_ratio_csv:.4f}")
305314
```
306315

libCacheSim-python/examples/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -257,7 +257,7 @@ python -m pytest ../tests/ -v
257257
3. **Process large traces with C++ backend**:
258258
```python
259259
# Fast: C++ processing
260-
miss_ratio = lcs.process_trace(cache, reader)
260+
obj_miss_ratio, byte_miss_ratio = lcs.process_trace(cache, reader)
261261

262262
# Slow: Python loop
263263
for req in reader:

libCacheSim-python/examples/demo_unified_interface.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
import os
1010

1111
# Add parent directory for development testing
12-
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
12+
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
1313

1414
try:
1515
import libcachesim as lcs
@@ -24,9 +24,7 @@
2424
def create_trace_reader():
2525
"""Helper function to create a trace reader."""
2626
data_file = os.path.join(
27-
os.path.dirname(os.path.dirname(os.path.dirname(__file__))),
28-
"data",
29-
"cloudPhysicsIO.oracleGeneral.bin"
27+
os.path.dirname(os.path.dirname(os.path.dirname(__file__))), "data", "cloudPhysicsIO.oracleGeneral.bin"
3028
)
3129
if not os.path.exists(data_file):
3230
print(f"Warning: Trace file not found at {data_file}")
@@ -98,8 +96,10 @@ def demo_unified_interface():
9896
print(" All caches provide: cache_size, n_obj, occupied_byte, n_req")
9997

10098
for name, cache in caches.items():
101-
print(f" {name:20s}: size={cache.cache_size}, objs={cache.n_obj}, "
102-
f"bytes={cache.occupied_byte}, reqs={cache.n_req}")
99+
print(
100+
f" {name:20s}: size={cache.cache_size}, objs={cache.n_obj}, "
101+
f"bytes={cache.occupied_byte}, reqs={cache.n_req}"
102+
)
103103

104104
# Demo 3: Efficient trace processing
105105
print("\n3. Efficient Trace Processing Interface:")
@@ -114,8 +114,8 @@ def demo_unified_interface():
114114
print(f" {name:20s}: trace file not available")
115115
continue
116116

117-
miss_ratio = cache.process_trace(reader, max_req=max_requests)
118-
print(f" {name:20s}: miss_ratio={miss_ratio:.4f}")
117+
obj_miss_ratio, byte_miss_ratio = cache.process_trace(reader, max_req=max_requests)
118+
print(f" {name:20s}: obj_miss_ratio={obj_miss_ratio:.4f}, byte_miss_ratio={byte_miss_ratio:.4f}")
119119

120120
print("\nKey Benefits of Unified Interface:")
121121
print(" • Same API for all cache policies (built-in + custom)")

libCacheSim-python/examples/python_hook_cache_example.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,11 @@ def test_cache_policy(cache, name):
126126

127127
# Test requests: obj_id, obj_size
128128
test_requests = [
129-
(1, 100), (2, 100), (3, 100), (4, 100), (5, 100), # Fill cache
129+
(1, 100),
130+
(2, 100),
131+
(3, 100),
132+
(4, 100),
133+
(5, 100), # Fill cache
130134
(1, 100), # Hit
131135
(6, 100), # Miss, should evict something
132136
(2, 100), # Hit or miss depending on policy

libCacheSim-python/examples/stream_request_example.py

Lines changed: 10 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -32,9 +32,9 @@ def main():
3232
zipf_generator = lcs.create_zipf_requests(
3333
num_objects=num_objects,
3434
num_requests=num_requests,
35-
alpha=1.0, # Zipf skewness
36-
obj_size=obj_size, # Object size in bytes
37-
seed=42 # For reproducibility
35+
alpha=1.0, # Zipf skewness
36+
obj_size=obj_size, # Object size in bytes
37+
seed=42, # For reproducibility
3838
)
3939

4040
print(f"Cache size: {cache_size // 1024 // 1024}MB")
@@ -61,10 +61,7 @@ def main():
6161

6262
# Create a uniform-distributed request generator
6363
uniform_generator = lcs.create_uniform_requests(
64-
num_objects=num_objects,
65-
num_requests=num_requests,
66-
obj_size=obj_size,
67-
seed=42
64+
num_objects=num_objects, num_requests=num_requests, obj_size=obj_size, seed=42
6865
)
6966

7067
print(f"Generated {num_requests} uniform requests for {num_objects} objects")
@@ -94,11 +91,7 @@ def main():
9491

9592
for alpha in alphas:
9693
generator = lcs.create_zipf_requests(
97-
num_objects=num_objects,
98-
num_requests=num_requests,
99-
alpha=alpha,
100-
obj_size=obj_size,
101-
seed=42
94+
num_objects=num_objects, num_requests=num_requests, alpha=alpha, obj_size=obj_size, seed=42
10295
)
10396

10497
cache = lcs.LRU(cache_size=cache_size)
@@ -123,16 +116,12 @@ def main():
123116

124117
# Fixed workload
125118
generator = lcs.create_zipf_requests(
126-
num_objects=num_objects,
127-
num_requests=num_requests,
128-
alpha=1.0,
129-
obj_size=obj_size,
130-
seed=42
119+
num_objects=num_objects, num_requests=num_requests, alpha=1.0, obj_size=obj_size, seed=42
131120
)
132121

133122
cache_sizes = [
134-
1 * 1024 * 1024, # 1MB
135-
5 * 1024 * 1024, # 5MB
123+
1 * 1024 * 1024, # 1MB
124+
5 * 1024 * 1024, # 5MB
136125
10 * 1024 * 1024, # 10MB
137126
50 * 1024 * 1024, # 50MB
138127
]
@@ -145,18 +134,14 @@ def main():
145134

146135
# Create fresh generator for each test
147136
test_generator = lcs.create_zipf_requests(
148-
num_objects=num_objects,
149-
num_requests=num_requests,
150-
alpha=1.0,
151-
obj_size=obj_size,
152-
seed=42
137+
num_objects=num_objects, num_requests=num_requests, alpha=1.0, obj_size=obj_size, seed=42
153138
)
154139

155140
hit_count = sum(1 for req in test_generator if cache.get(req))
156141
hit_ratio = hit_count / num_requests
157142
objects_fit = cache_size // obj_size
158143

159-
print(f"{cache_size//1024//1024}MB{'':<8} {hit_ratio:<12.3f} ~{objects_fit}")
144+
print(f"{cache_size // 1024 // 1024}MB{'':<8} {hit_ratio:<12.3f} ~{objects_fit}")
160145

161146
print("\nNotes:")
162147
print("- Higher α values create more skewed access patterns")

0 commit comments

Comments
 (0)