1a1a11a
diff --git a/‎.github/workflows/python.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/python.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎libCacheSim-python/README.md‎
Lines changed: 47 additions & 7 deletions b/‎libCacheSim-python/README.md‎
Lines changed: 47 additions & 7 deletions
diff --git a/‎libCacheSim-python/examples/demo_unified_interface.py‎
Lines changed: 131 additions & 0 deletions b/‎libCacheSim-python/examples/demo_unified_interface.py‎
Lines changed: 131 additions & 0 deletions
diff --git a/‎libCacheSim-python/libcachesim/__init__.py‎
Lines changed: 4 additions & 0 deletions b/‎libCacheSim-python/libcachesim/__init__.py‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎libCacheSim-python/libcachesim/eviction.py‎
Lines changed: 103 additions & 2 deletions b/‎libCacheSim-python/libcachesim/eviction.py‎
Lines changed: 103 additions & 2 deletions
diff --git a/‎libCacheSim-python/pyproject.toml‎
Lines changed: 1 addition & 1 deletion b/‎libCacheSim-python/pyproject.toml‎
Lines changed: 1 addition & 1 deletion
@@ -6,7 +6,7 @@ jobs:
   build:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
 
       - name: Set up Python
         uses: actions/setup-python@v4
 
@@ -5,13 +5,8 @@ Python bindings for libCacheSim, a high-performance cache simulator.
 ## Installation
 
 ```bash
-pip install .
-```
-
-## Development
-
-```bash
-pip install -e .
+cd ..
+bash scripts/install_python.sh
 ```
 
 Test
@@ -192,3 +187,48 @@ When implementing `PythonHookCachePolicy`, you need to provide these hook functi
 - **`free_hook(plugin_data: Any) -> None`**: [Optional] Clean up plugin resources
 
 The `plugin_data` is whatever object you return from `init_hook()` - it can be any Python object like a list, dict, class instance, etc.
+
+### Unified Interface
+
+All cache policies (both built-in and Python hook-based) share the same unified interface:
+
+```python
+import libcachesim as cachesim
+
+# All cache policies work the same way
+cache = cachesim.LRU(cache_size=1024*1024)
+# or
+cache = cachesim.PythonHookCachePolicy(cache_size=1024*1024)
+# cache.set_hooks(...) for Python hook cache
+
+# Unified interface for all caches:
+req = cachesim.Request()
+req.obj_id = 1
+req.obj_size = 100
+hit = cache.get(req)                    # Process single request
+
+reader = cachesim.open_trace("trace.bin", cachesim.TraceType.ORACLE_GENERAL_TRACE.value)
+miss_ratio = cache.process_trace(reader)  # Process entire trace efficiently
+
+# Unified properties for all caches:
+print(f"Cache size: {cache.cache_size}")
+print(f"Objects: {cache.n_obj}")
+print(f"Occupied bytes: {cache.occupied_byte}")
+print(f"Requests processed: {cache.n_req}")
+```
+
+### Efficient Trace Processing
+
+The `process_trace` method processes trace data entirely on the C++ side to minimize overhead:
+
+```python
+# Process entire trace with optional limits
+miss_ratio = cache.process_trace(
+    reader,
+    max_req=10000,      # Process max 10K requests
+    max_sec=3600,       # Process max 1 hour of trace
+    start_time=1000,    # Start from timestamp 1000
+    end_time=5000       # End at timestamp 5000
+)
+print(f"Miss ratio: {miss_ratio:.4f}")
+```
@@ -0,0 +1,131 @@
+#!/usr/bin/env python3
+"""
+Demo script showing the unified interface for all cache policies.
+This demonstrates how to use both native and Python hook-based caches
+with the same API for seamless algorithm comparison and switching.
+"""
+
+import sys
+import os
+
+# Add parent directory for development testing
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
+
+try:
+    import libcachesim as lcs
+except ImportError as e:
+    print(f"Error importing libcachesim: {e}")
+    print("Make sure the Python binding is built and installed")
+    sys.exit(1)
+
+from collections import OrderedDict
+
+
+def create_trace_reader():
+    """Helper function to create a trace reader."""
+    data_file = os.path.join(
+        os.path.dirname(os.path.dirname(os.path.dirname(__file__))),
+        "data",
+        "cloudPhysicsIO.oracleGeneral.bin"
+    )
+    if not os.path.exists(data_file):
+        print(f"Warning: Trace file not found at {data_file}")
+        return None
+    return lcs.open_trace(data_file, lcs.TraceType.ORACLE_GENERAL_TRACE.value)
+
+
+def create_demo_lru_hooks():
+    """Create demo LRU hooks for Python-based cache policy."""
+
+    def init_hook(cache_size):
+        print(f"  Initializing custom LRU with {cache_size} bytes")
+        return OrderedDict()
+
+    def hit_hook(lru_dict, obj_id, obj_size):
+        if obj_id in lru_dict:
+            lru_dict.move_to_end(obj_id)
+
+    def miss_hook(lru_dict, obj_id, obj_size):
+        lru_dict[obj_id] = obj_size
+
+    def eviction_hook(lru_dict, obj_id, obj_size):
+        if lru_dict:
+            return next(iter(lru_dict))
+        return obj_id
+
+    def remove_hook(lru_dict, obj_id):
+        lru_dict.pop(obj_id, None)
+
+    return init_hook, hit_hook, miss_hook, eviction_hook, remove_hook
+
+
+def demo_unified_interface():
+    """Demonstrate the unified interface across different cache policies."""
+    print("libCacheSim Python Binding - Unified Interface Demo")
+    print("=" * 60)
+
+    cache_size = 1024 * 1024  # 1MB
+
+    # Create different cache policies
+    caches = {
+        "LRU": lcs.LRU(cache_size),
+        "FIFO": lcs.FIFO(cache_size),
+        "ARC": lcs.ARC(cache_size),
+    }
+
+    # Create Python hook-based LRU
+    python_cache = lcs.PythonHookCachePolicy(cache_size, "CustomLRU")
+    init_hook, hit_hook, miss_hook, eviction_hook, remove_hook = create_demo_lru_hooks()
+    python_cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook)
+    caches["Custom Python LRU"] = python_cache
+
+    print(f"Testing {len(caches)} different cache policies with unified interface:")
+
+    # Demo 1: Single request interface
+    print("1. Single Request Interface:")
+    print("   All caches use: cache.get(request)")
+
+    test_req = lcs.Request()
+    test_req.obj_id = 1
+    test_req.obj_size = 1024
+
+    for name, cache in caches.items():
+        result = cache.get(test_req)
+        print(f"   {name:20s}: {'HIT' if result else 'MISS'}")
+
+    # Demo 2: Unified properties interface
+    print("\n2. Unified Properties Interface:")
+    print("   All caches provide: cache_size, n_obj, occupied_byte, n_req")
+
+    for name, cache in caches.items():
+        print(f"   {name:20s}: size={cache.cache_size}, objs={cache.n_obj}, "
+              f"bytes={cache.occupied_byte}, reqs={cache.n_req}")
+
+    # Demo 3: Efficient trace processing
+    print("\n3. Efficient Trace Processing Interface:")
+    print("   All caches use: cache.process_trace(reader, max_req=N)")
+
+    max_requests = 1000
+
+    for name, cache in caches.items():
+        # Create fresh reader for each cache
+        reader = create_trace_reader()
+        if not reader:
+            print(f"   {name:20s}: trace file not available")
+            continue
+
+        miss_ratio = cache.process_trace(reader, max_req=max_requests)
+        print(f"   {name:20s}: miss_ratio={miss_ratio:.4f}")
+
+    print("\nKey Benefits of Unified Interface:")
+    print("   • Same API for all cache policies (built-in + custom)")
+    print("   • Easy to switch between different algorithms")
+    print("   • Efficient trace processing in C++ (no Python overhead)")
+    print("   • Consistent properties and statistics")
+    print("   • Type-safe and well-documented")
+
+    print("\nDemo completed! All cache policies work with the same interface.")
+
+
+if __name__ == "__main__":
+    demo_unified_interface()
@@ -8,6 +8,8 @@
     __version__,
     create_cache,
     open_trace,
+    process_trace,
+    process_trace_python_hook,
 )
 from .const import TraceType
 from .eviction import (
@@ -44,5 +46,7 @@
     "__version__",
     "create_cache",
     "open_trace",
+    "process_trace",
+    "process_trace_python_hook",
     # TODO(haocheng): add more eviction policies
 ]
@@ -31,6 +31,25 @@ def get(self, req: Request) -> bool:
     def __repr__(self) -> str:
         pass
 
+    @abstractmethod
+    def process_trace(self, reader, max_req=-1, max_sec=-1, start_time=-1, end_time=-1):
+        """Process a trace with this cache and return miss ratio.
+
+        This method processes trace data entirely on the C++ side to avoid
+        data movement overhead between Python and C++.
+
+        Args:
+            reader: The trace reader instance
+            max_req: Maximum number of requests to process (-1 for no limit)
+            max_sec: Maximum seconds to process (-1 for no limit)
+            start_time: Start time filter (-1 for no filter)
+            end_time: End time filter (-1 for no filter)
+
+        Returns:
+            float: Miss ratio (0.0 to 1.0)
+        """
+        pass
+
 
 class EvictionPolicy(EvictionPolicyBase):
     """Base class for all eviction policies."""
@@ -44,9 +63,54 @@ def init_cache(self, cache_size: int, **kwargs) -> Cache:
     def get(self, req: Request) -> bool:
         return self.cache.get(req)
 
+    def process_trace(self, reader, max_req=-1, max_sec=-1, start_time=-1, end_time=-1):
+        """Process a trace with this cache and return miss ratio.
+
+        This method processes trace data entirely on the C++ side to avoid
+        data movement overhead between Python and C++.
+
+        Args:
+            reader: The trace reader instance
+            max_req: Maximum number of requests to process (-1 for no limit)
+            max_sec: Maximum seconds to process (-1 for no limit)
+            start_time: Start time filter (-1 for no filter)
+            end_time: End time filter (-1 for no filter)
+
+        Returns:
+            float: Miss ratio (0.0 to 1.0)
+
+        Example:
+            >>> cache = LRU(1024*1024)
+            >>> reader = open_trace("trace.csv", TraceType.CSV_TRACE)
+            >>> miss_ratio = cache.process_trace(reader)
+            >>> print(f"Miss ratio: {miss_ratio:.4f}")
+        """
+        from ._libcachesim import process_trace
+        return process_trace(self.cache, reader, max_req, max_sec, start_time, end_time)
+
     def __repr__(self):
         return f"{self.__class__.__name__}(cache_size={self.cache.cache_size})"
 
+    @property
+    def n_req(self):
+        """Number of requests processed."""
+        return self.cache.n_req
+
+    @property
+    def n_obj(self):
+        """Number of objects currently in cache."""
+        return self.cache.n_obj
+
+    @property
+    def occupied_byte(self):
+        """Number of bytes currently occupied in cache."""
+        return self.cache.occupied_byte
+
+    @property
+    def cache_size(self):
+        """Total cache size in bytes."""
+        return self.cache.cache_size
+
 
 class FIFO(EvictionPolicy):
     """First In First Out replacement policy.
@@ -356,7 +420,7 @@ class PythonHookCachePolicy(EvictionPolicyBase):
         >>> hit = cache.get(req)
     """
     def __init__(self, cache_size: int, cache_name: str = "PythonHookCache"):
-        self.cache_size = cache_size
+        self._cache_size = cache_size
         self.cache_name = cache_name
         self.cache = PythonHookCache(cache_size, cache_name)
         self._hooks_set = False
@@ -391,6 +455,38 @@ def get(self, req: Request) -> bool:
             raise RuntimeError("Hooks must be set before using the cache. Call set_hooks() first.")
         return self.cache.get(req)
 
+    def process_trace(self, reader, max_req=-1, max_sec=-1, start_time=-1, end_time=-1):
+        """Process a trace with this cache and return miss ratio.
+
+        This method processes trace data entirely on the C++ side to avoid
+        data movement overhead between Python and C++.
+
+        Args:
+            reader: The trace reader instance
+            max_req: Maximum number of requests to process (-1 for no limit)
+            max_sec: Maximum seconds to process (-1 for no limit)
+            start_time: Start time filter (-1 for no filter)
+            end_time: End time filter (-1 for no filter)
+
+        Returns:
+            float: Miss ratio (0.0 to 1.0)
+
+        Raises:
+            RuntimeError: If hooks have not been set
+
+        Example:
+            >>> cache = PythonHookCachePolicy(1024*1024)
+            >>> cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook)
+            >>> reader = open_trace("trace.csv", TraceType.CSV_TRACE)
+            >>> miss_ratio = cache.process_trace(reader)
+            >>> print(f"Miss ratio: {miss_ratio:.4f}")
+        """
+        if not self._hooks_set:
+            raise RuntimeError("Hooks must be set before processing trace. Call set_hooks() first.")
+
+        from ._libcachesim import process_trace_python_hook
+        return process_trace_python_hook(self.cache, reader, max_req, max_sec, start_time, end_time)
+
     @property
     def n_req(self):
         """Number of requests processed."""
@@ -406,6 +502,11 @@ def occupied_byte(self):
         """Number of bytes currently occupied in cache."""
         return self.cache.occupied_byte
 
+    @property
+    def cache_size(self):
+        """Total cache size in bytes."""
+        return self.cache.cache_size
+
     def __repr__(self):
-        return f"{self.__class__.__name__}(cache_size={self.cache_size}, " \
+        return f"{self.__class__.__name__}(cache_size={self._cache_size}, " \
                f"cache_name='{self.cache_name}', hooks_set={self._hooks_set})"
@@ -5,7 +5,7 @@ build-backend = "scikit_build_core.build"
 
 [project]
 name = "libcachesim"
-version = "0.0.1"
+version = "0.3.1"
 description="Python bindings for libCacheSim"
 readme = "README.md"
 requires-python = ">=3.9"