From 9d0b8a7206fb0d933495fbc615fe1b5a9ba7967d Mon Sep 17 00:00:00 2001
From: haochengxia <xhc_1007@163.com>
Date: Sun, 20 Jul 2025 23:33:41 -0400
Subject: [PATCH 1/4] Pybind refactor

---
 .vscode/settings.json                         |    8 +-
 libCacheSim-python/CMakeLists.txt             |   32 +-
 libCacheSim-python/libcachesim/__init__.py    |   96 +-
 libCacheSim-python/libcachesim/__init__.pyi   |  472 +++----
 libCacheSim-python/libcachesim/cache.py       |  396 ++++++
 libCacheSim-python/libcachesim/const.py       |    1 -
 libCacheSim-python/libcachesim/eviction.py    |  713 ----------
 libCacheSim-python/libcachesim/protocols.py   |   71 +
 .../libcachesim/synthetic_reader.py           |  408 ++++++
 .../libcachesim/trace_analyzer.py             |   29 +
 .../libcachesim/trace_generator.py            |  215 ---
 .../libcachesim/trace_reader.py               |  251 ++++
 libCacheSim-python/libcachesim/util.py        |   50 +
 libCacheSim-python/src/exception.cpp          |   56 +
 libCacheSim-python/src/exception.h            |   33 +
 libCacheSim-python/src/export.cpp             |   38 +
 libCacheSim-python/src/export.h               |   27 +
 libCacheSim-python/src/export_analyzer.cpp    |  136 ++
 libCacheSim-python/src/export_cache.cpp       |  493 +++++++
 libCacheSim-python/src/export_misc.cpp        |   30 +
 libCacheSim-python/src/export_reader.cpp      |  312 +++++
 libCacheSim-python/src/pylibcachesim.cpp      | 1223 -----------------
 libCacheSim-python/tests/conftest.py          |   26 -
 libCacheSim-python/tests/test_eviction.py     |   62 -
 libCacheSim-python/tests/test_example.py      |   16 +
 .../tests/test_process_trace.py               |  220 ---
 .../tests/test_python_hook_cache.py           |  205 ---
 .../tests/test_trace_generator.py             |  135 --
 .../tests/test_unified_interface.py           |  181 ---
 libCacheSim-python/tests/utils.py             |   16 -
 libCacheSim/traceReader/CMakeLists.txt        |    6 +-
 scripts/install_python_dev.sh                 |    2 +-
 32 files changed, 2641 insertions(+), 3318 deletions(-)
 create mode 100644 libCacheSim-python/libcachesim/cache.py
 delete mode 100644 libCacheSim-python/libcachesim/const.py
 delete mode 100644 libCacheSim-python/libcachesim/eviction.py
 create mode 100644 libCacheSim-python/libcachesim/protocols.py
 create mode 100644 libCacheSim-python/libcachesim/synthetic_reader.py
 create mode 100644 libCacheSim-python/libcachesim/trace_analyzer.py
 delete mode 100644 libCacheSim-python/libcachesim/trace_generator.py
 create mode 100644 libCacheSim-python/libcachesim/trace_reader.py
 create mode 100644 libCacheSim-python/libcachesim/util.py
 create mode 100644 libCacheSim-python/src/exception.cpp
 create mode 100644 libCacheSim-python/src/exception.h
 create mode 100644 libCacheSim-python/src/export.cpp
 create mode 100644 libCacheSim-python/src/export.h
 create mode 100644 libCacheSim-python/src/export_analyzer.cpp
 create mode 100644 libCacheSim-python/src/export_cache.cpp
 create mode 100644 libCacheSim-python/src/export_misc.cpp
 create mode 100644 libCacheSim-python/src/export_reader.cpp
 delete mode 100644 libCacheSim-python/src/pylibcachesim.cpp
 delete mode 100644 libCacheSim-python/tests/test_eviction.py
 create mode 100644 libCacheSim-python/tests/test_example.py
 delete mode 100644 libCacheSim-python/tests/test_process_trace.py
 delete mode 100644 libCacheSim-python/tests/test_python_hook_cache.py
 delete mode 100644 libCacheSim-python/tests/test_trace_generator.py
 delete mode 100644 libCacheSim-python/tests/test_unified_interface.py
 delete mode 100644 libCacheSim-python/tests/utils.py

diff --git a/.vscode/settings.json b/.vscode/settings.json
index 407bc808a..986387493 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -108,7 +108,10 @@
     "editor.formatOnSave": true,
     "editor.insertSpaces": true,
     "editor.detectIndentation": true,
-    "editor.rulers": [80, 100],
+    "editor.rulers": [
+        80,
+        100
+    ],
     "editor.wordWrap": "wordWrapColumn",
     "editor.wordWrapColumn": 100,
     "files.trimTrailingWhitespace": true,
@@ -133,5 +136,6 @@
         "**/*.code-search": true
     },
     "git.ignoreLimitWarning": true,
-    "terminal.integrated.cwd": "${workspaceFolder}"
+    "terminal.integrated.cwd": "${workspaceFolder}",
+    "python.formatting.provider": "yapf"
 }
diff --git a/libCacheSim-python/CMakeLists.txt b/libCacheSim-python/CMakeLists.txt
index aebee06c3..a8b76ec79 100644
--- a/libCacheSim-python/CMakeLists.txt
+++ b/libCacheSim-python/CMakeLists.txt
@@ -76,22 +76,32 @@ else()
     message(FATAL_ERROR "Pre-built libCacheSim library not found. Please build the main project first: cd .. && cmake -G Ninja -B build && ninja -C build")
 endif()
 
-python_add_library(_libcachesim MODULE
-    src/pylibcachesim.cpp
+include_directories(src)
+
+python_add_library(libcachesim_python MODULE
+    src/export.cpp
+    src/export_cache.cpp
+    src/export_reader.cpp
+    src/export_analyzer.cpp
+    src/export_misc.cpp
+    src/exception.cpp
     ${MAIN_PROJECT_SOURCE_DIR}/libCacheSim/bin/cli_reader_utils.c
+    ${MAIN_PROJECT_SOURCE_DIR}/libCacheSim/bin/traceUtils/traceConvLCS.cpp
+    ${MAIN_PROJECT_SOURCE_DIR}/libCacheSim/bin/traceUtils/traceConvOracleGeneral.cpp
+    ${MAIN_PROJECT_SOURCE_DIR}/libCacheSim/bin/traceUtils/utils.cpp
     WITH_SOABI
 )
 
-set_target_properties(_libcachesim PROPERTIES
+set_target_properties(libcachesim_python PROPERTIES
     POSITION_INDEPENDENT_CODE ON
     INSTALL_RPATH_USE_LINK_PATH TRUE
     BUILD_WITH_INSTALL_RPATH TRUE
     INSTALL_RPATH "$ORIGIN"
 )
 
-target_compile_definitions(_libcachesim PRIVATE VERSION_INFO=${PROJECT_VERSION})
+target_compile_definitions(libcachesim_python PRIVATE VERSION_INFO=${PROJECT_VERSION})
 
-target_link_libraries(_libcachesim PRIVATE
+target_link_libraries(libcachesim_python PRIVATE
     ${LIBCACHESIM_TARGET}
     pybind11::headers
     pybind11::module
@@ -102,8 +112,8 @@ target_link_libraries(_libcachesim PRIVATE
 # Add platform-specific link options and libraries
 if(CMAKE_SYSTEM_NAME STREQUAL "Linux")
     # GNU ld option, only available on Linux
-    target_link_options(_libcachesim PRIVATE -Wl,--no-as-needed)
-    target_link_libraries(_libcachesim PRIVATE dl)
+    target_link_options(libcachesim_python PRIVATE -Wl,--no-as-needed)
+    target_link_libraries(libcachesim_python PRIVATE dl)
 elseif(CMAKE_SYSTEM_NAME STREQUAL "Darwin")
     # macOS doesn't need --no-as-needed
     # dl functions are part of the system library on macOS
@@ -112,21 +122,21 @@ elseif(CMAKE_SYSTEM_NAME STREQUAL "Darwin")
     # Find argp library on macOS
     find_library(ARGP_LIBRARY argp PATHS /opt/homebrew/lib /usr/local/lib)
     if(ARGP_LIBRARY)
-        target_link_libraries(_libcachesim PRIVATE ${ARGP_LIBRARY})
+        target_link_libraries(libcachesim_python PRIVATE ${ARGP_LIBRARY})
     endif()
 
     # Find and link other dependencies that might be needed
     find_library(INTL_LIBRARY intl PATHS /opt/homebrew/lib /usr/local/lib)
     if(INTL_LIBRARY)
-        target_link_libraries(_libcachesim PRIVATE ${INTL_LIBRARY})
+        target_link_libraries(libcachesim_python PRIVATE ${INTL_LIBRARY})
     endif()
 else()
     # Other platforms - try to link dl if available
     find_library(DL_LIBRARY dl)
     if(DL_LIBRARY)
-        target_link_libraries(_libcachesim PRIVATE ${DL_LIBRARY})
+        target_link_libraries(libcachesim_python PRIVATE ${DL_LIBRARY})
     endif()
 endif()
 
 # install to wheel directory
-install(TARGETS _libcachesim LIBRARY DESTINATION libcachesim)
+install(TARGETS libcachesim_python LIBRARY DESTINATION libcachesim)
diff --git a/libCacheSim-python/libcachesim/__init__.py b/libCacheSim-python/libcachesim/__init__.py
index 47e693cde..b9424a37b 100644
--- a/libCacheSim-python/libcachesim/__init__.py
+++ b/libCacheSim-python/libcachesim/__init__.py
@@ -2,83 +2,89 @@
 
 from __future__ import annotations
 
-from ._libcachesim import (
+from .libcachesim_python import (
     Cache,
-    Reader,
-    ReaderInitParam,
     Request,
     ReqOp,
     TraceType,
+    SamplerType,
     __doc__,
     __version__,
-    open_trace,
-    process_trace,
-    process_trace_python_hook,
 )
-from .eviction import (
-    ARC,
-    Belady,
-    BeladySize,
-    Cacheus,
-    Clock,
+
+from .cache import (
+    CacheBase,
+    # Core algorithms
+    LRU,
     FIFO,
-    LeCaR,
     LFU,
-    LFUDA,
-    LRB,
-    LRU,
-    PythonHookCachePolicy,
-    QDLP,
+    ARC,
+    Clock,
+    Random,
+    # Advanced algorithms
     S3FIFO,
     Sieve,
-    SLRU,
-    ThreeLCache,
-    TinyLFU,
+    LIRS,
     TwoQ,
+    SLRU,
     WTinyLFU,
-)
-from .trace_generator import (
-    create_zipf_requests,
-    create_uniform_requests,
+    LeCaR,
+    LFUDA,
+    ClockPro,
+    Cacheus,
+    # Optimal algorithms
+    Belady,
+    BeladySize,
+    # Plugin cache
+    PythonHookCachePolicy,
 )
 
+from .trace_reader import TraceReader
+from .trace_analyzer import TraceAnalyzer
+from .synthetic_reader import SyntheticReader, create_zipf_requests, create_uniform_requests
+from .util import Util
+
 __all__ = [
     # Core classes
     "Cache",
-    "Reader",
     "Request",
-    "ReaderInitParam",
-    # Trace types and operations
-    "TraceType",
     "ReqOp",
-    # Cache policies
+    "TraceType",
+    "SamplerType",
+    # Cache base class
+    "CacheBase",
+    # Core cache algorithms
     "LRU",
     "FIFO",
+    "LFU",
     "ARC",
     "Clock",
-    "LFU",
-    "LFUDA",
-    "SLRU",
+    "Random",
+    # Advanced cache algorithms
     "S3FIFO",
     "Sieve",
-    "TinyLFU",
-    "WTinyLFU",
+    "LIRS",
     "TwoQ",
-    "ThreeLCache",
-    "Belady",
-    "BeladySize",
-    "LRB",
-    "QDLP",
+    "SLRU",
+    "WTinyLFU",
     "LeCaR",
+    "LFUDA",
+    "ClockPro",
     "Cacheus",
-    # Custom cache policy
+    # Optimal algorithms
+    "Belady",
+    "BeladySize",
+    # Plugin cache
     "PythonHookCachePolicy",
-    # Functions
-    "open_trace",
-    "process_trace",
-    "process_trace_python_hook",
+    # Readers and analyzers
+    "TraceReader",
+    "TraceAnalyzer",
+    "SyntheticReader",
+    # Trace generators
     "create_zipf_requests",
     "create_uniform_requests",
+    # Utilities
+    "Util",
     # Metadata
     "__doc__",
     "__version__",
diff --git a/libCacheSim-python/libcachesim/__init__.pyi b/libCacheSim-python/libcachesim/__init__.pyi
index 6992a74ae..213eb1eb8 100644
--- a/libCacheSim-python/libcachesim/__init__.pyi
+++ b/libCacheSim-python/libcachesim/__init__.pyi
@@ -1,293 +1,247 @@
-"""
-libCacheSim Python bindings
---------------------------
-
-.. currentmodule:: libcachesim
-
-.. autosummary::
-    :toctree: _generate
-
-    open_trace
-    ARC
-    Clock
-    FIFO
-    LRB
-    LRU
-    S3FIFO
-    Sieve
-    ThreeLCache
-    TinyLFU
-    TwoQ
-    Cache
-    Request
-    Reader
-    reader_init_param_t
-    TraceType
-    PythonHookCachePolicy
-    process_trace
-    process_trace_python_hook
-    create_zipf_requests
-    create_uniform_requests
-"""
-
-from typing import Any, Callable, Optional, Union, overload
+from __future__ import annotations
+from typing import bool, int, str, tuple
 from collections.abc import Iterator
 
-from _libcachesim import TraceType, ReqOp
-
-def open_trace(
-    trace_path: str,
-    type: Optional[TraceType] = None,
-    reader_init_param: Optional[Union[dict, reader_init_param_t]] = None,
-) -> Reader: ...
-def process_trace(
-    cache: Cache,
-    reader: Reader,
-    start_req: int = 0,
-    max_req: int = -1,
-) -> tuple[float, float]:
-    """
-    Process a trace with a cache and return miss ratio.
-    """
-
-def process_trace_python_hook(
-    cache: PythonHookCache,
-    reader: Reader,
-    start_req: int = 0,
-    max_req: int = -1,
-) -> tuple[float, float]:
-    """
-    Process a trace with a Python hook cache and return miss ratio.
-    """
-
-# Trace generation functions
-def create_zipf_requests(
-    num_objects: int,
-    num_requests: int,
-    alpha: float = 1.0,
-    obj_size: int = 4000,
-    time_span: int = 86400 * 7,
-    start_obj_id: int = 0,
-    seed: Optional[int] = None,
-) -> Iterator[Request]:
-    """Create a Zipf-distributed request generator.
-
-    Args:
-        num_objects (int): Number of unique objects
-        num_requests (int): Number of requests to generate
-        alpha (float): Zipf skewness parameter (alpha >= 0)
-        obj_size (int): Object size in bytes
-        time_span (int): Time span in seconds
-        start_obj_id (int): Starting object ID
-        seed (int, optional): Random seed for reproducibility
-
-    Returns:
-        Iterator[Request]: A generator that yields Request objects
-    """
-
-def create_uniform_requests(
-    num_objects: int,
-    num_requests: int,
-    obj_size: int = 4000,
-    time_span: int = 86400 * 7,
-    start_obj_id: int = 0,
-    seed: Optional[int] = None,
-) -> Iterator[Request]:
-    """Create a uniform-distributed request generator.
-
-    Args:
-        num_objects (int): Number of unique objects
-        num_requests (int): Number of requests to generate
-        obj_size (int): Object size in bytes
-        time_span (int): Time span in seconds
-        start_obj_id (int): Starting object ID
-        seed (int, optional): Random seed for reproducibility
-
-    Returns:
-        Iterator[Request]: A generator that yields Request objects
-    """
-
-class reader_init_param_t:
-    time_field: int
-    obj_id_field: int
-    obj_size_field: int
-    delimiter: str
-    has_header: bool
-    binary_fmt_str: str
-
-class Cache:
-    n_req: int
-    cache_size: int
-    @property
-    def n_obj(self) -> int: ...
-    @property
-    def occupied_byte(self) -> int: ...
-    def get(self, req: Request) -> bool: ...
+from .libcachesim_python import ReqOp, TraceType, SamplerType
+from .protocols import ReaderProtocol, CacheProtocol
 
 class Request:
     clock_time: int
     hv: int
     obj_id: int
     obj_size: int
+    ttl: int
     op: ReqOp
+    valid: bool
+    next_access_vtime: int
 
-    @overload
-    def __init__(self) -> None: ...
-    @overload
-    def __init__(
-        self, obj_id: int, obj_size: int = 1, clock_time: int = 0, hv: int = 0, op: ReqOp = ReqOp.GET
-    ) -> None: ...
     def __init__(
-        self, obj_id: Optional[int] = None, obj_size: int = 1, clock_time: int = 0, hv: int = 0, op: ReqOp = ReqOp.GET
-    ) -> None:
-        """Create a request instance.
-
-        Args:
-            obj_id (int, optional): The object ID.
-            obj_size (int): The object size. (default: 1)
-            clock_time (int): The clock time. (default: 0)
-            hv (int): The hash value. (default: 0)
-            op (ReqOp): The operation. (default: ReqOp.GET)
-
-        Returns:
-            Request: A new request instance.
-        """
-
-class Reader:
-    n_read_req: int
-    n_total_req: int
-    trace_path: str
-    file_size: int
-    def get_wss(self, ignore_obj_size: bool = False) -> int: ...
-    def seek(self, offset: int, from_beginning: bool = False) -> None: ...
-    def __iter__(self) -> Reader: ...
-    def __next__(self) -> Request: ...
-
-class PythonHookCache:
-    n_req: int
-    n_obj: int
-    occupied_byte: int
+        self,
+        obj_size: int = 1,
+        op: ReqOp = ReqOp.READ,
+        valid: bool = True,
+        obj_id: int = 0,
+        clock_time: int = 0,
+        hv: int = 0,
+        next_access_vtime: int = -2,
+        ttl: int = 0,
+    ): ...
+    def __init__(self): ...
+
+class CacheObject:
+    obj_id: int
+    obj_size: int
+
+class CommonCacheParams:
     cache_size: int
+    default_ttl: int
+    hashpower: int
+    consider_obj_metadata: bool
 
-    def __init__(self, cache_size: int, cache_name: str = "PythonHookCache") -> None: ...
-    def set_hooks(
-        self,
-        init_hook: Callable[[int], Any],
-        hit_hook: Callable[[Any, int, int], None],
-        miss_hook: Callable[[Any, int, int], None],
-        eviction_hook: Callable[[Any, int, int], int],
-        remove_hook: Callable[[Any, int], None],
-        free_hook: Optional[Callable[[Any], None]] = None,
-    ) -> None: ...
-    def get(self, req: Request) -> bool: ...
+class Cache:
+    cache_size: int
+    default_ttl: int
+    obj_md_size: int
+    n_req: int
+    cache_name: str
+    init_params: CommonCacheParams
 
-# Base class for all eviction policies
-class EvictionPolicyBase:
-    """Abstract base class for all eviction policies."""
+    def __init__(self, init_params: CommonCacheParams, cache_specific_params: str = ""): ...
     def get(self, req: Request) -> bool: ...
-    def process_trace(self, reader: Reader, start_req: int = 0, max_req: int = -1) -> tuple[float, float]: ...
-    @property
-    def n_req(self) -> int: ...
-    @property
-    def n_obj(self) -> int: ...
-    @property
-    def occupied_byte(self) -> int: ...
+    def find(self, req: Request, update_cache: bool = True) -> CacheObject: ...
+    def can_insert(self, req: Request) -> bool: ...
+    def insert(self, req: Request) -> CacheObject: ...
+    def need_eviction(self, req: Request) -> bool: ...
+    def evict(self, req: Request) -> CacheObject: ...
+    def remove(self, obj_id: int) -> bool: ...
+    def to_evict(self, req: Request) -> CacheObject: ...
+    def get_occupied_byte(self) -> int: ...
+    def get_n_obj(self) -> int: ...
+    def print_cache(self) -> str: ...
+
+class CacheBase(CacheProtocol):
+    """Base class implementing CacheProtocol"""
+    def __init__(self, _cache: Cache): ...
+    def get(self, req: Request) -> bool: ...
+    def find(self, req: Request, update_cache: bool = True) -> CacheObject: ...
+    def can_insert(self, req: Request) -> bool: ...
+    def insert(self, req: Request) -> CacheObject: ...
+    def need_eviction(self, req: Request) -> bool: ...
+    def evict(self, req: Request) -> CacheObject: ...
+    def remove(self, obj_id: int) -> bool: ...
+    def to_evict(self, req: Request) -> CacheObject: ...
+    def get_occupied_byte(self) -> int: ...
+    def get_n_obj(self) -> int: ...
+    def print_cache(self) -> str: ...
+    def process_trace(self, reader: ReaderProtocol, start_req: int = 0, max_req: int = -1) -> tuple[float, float]: ...
     @property
     def cache_size(self) -> int: ...
-    def __repr__(self) -> str: ...
+    @property
+    def cache_name(self) -> str: ...
 
-# Eviction policy classes
-class ARC(EvictionPolicyBase):
-    """Adaptive Replacement Cache policy."""
-    def __init__(self, cache_size: int) -> None: ...
+# Core cache algorithms
+class LRU(CacheBase):
+    def __init__(
+        self, cache_size: int, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False
+    ): ...
 
-class Belady(EvictionPolicyBase):
-    """Belady replacement policy (optimal offline algorithm)."""
-    def __init__(self, cache_size: int) -> None: ...
+class FIFO(CacheBase):
+    def __init__(
+        self, cache_size: int, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False
+    ): ...
 
-class BeladySize(EvictionPolicyBase):
-    """BeladySize replacement policy (optimal offline algorithm with size consideration)."""
-    def __init__(self, cache_size: int) -> None: ...
+class LFU(CacheBase):
+    def __init__(
+        self, cache_size: int, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False
+    ): ...
 
-class Cacheus(EvictionPolicyBase):
-    """Cacheus replacement policy."""
-    def __init__(self, cache_size: int) -> None: ...
+class ARC(CacheBase):
+    def __init__(
+        self, cache_size: int, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False
+    ): ...
+
+class Clock(CacheBase):
+    def __init__(
+        self, cache_size: int, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False
+    ): ...
+
+class Random(CacheBase):
+    def __init__(
+        self, cache_size: int, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False
+    ): ...
+
+# Advanced algorithms
+class S3FIFO(CacheBase):
+    def __init__(
+        self, cache_size: int, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False
+    ): ...
 
-class Clock(EvictionPolicyBase):
-    """Clock (Second Chance or FIFO-Reinsertion) replacement policy."""
-    def __init__(self, cache_size: int, n_bit_counter: int = 1, init_freq: int = 0) -> None: ...
+class Sieve(CacheBase):
+    def __init__(
+        self, cache_size: int, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False
+    ): ...
 
-class FIFO(EvictionPolicyBase):
-    """First In First Out replacement policy."""
-    def __init__(self, cache_size: int) -> None: ...
+class LIRS(CacheBase):
+    def __init__(
+        self, cache_size: int, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False
+    ): ...
 
-class LeCaR(EvictionPolicyBase):
-    """LeCaR (Learning Cache Replacement) adaptive replacement policy."""
-    def __init__(self, cache_size: int) -> None: ...
+class TwoQ(CacheBase):
+    def __init__(
+        self, cache_size: int, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False
+    ): ...
 
-class LFU(EvictionPolicyBase):
-    """LFU (Least Frequently Used) replacement policy."""
-    def __init__(self, cache_size: int) -> None: ...
+class SLRU(CacheBase):
+    def __init__(
+        self, cache_size: int, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False
+    ): ...
 
-class LFUDA(EvictionPolicyBase):
-    """LFUDA (LFU with Dynamic Aging) replacement policy."""
-    def __init__(self, cache_size: int) -> None: ...
+class WTinyLFU(CacheBase):
+    def __init__(
+        self, cache_size: int, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False
+    ): ...
 
-class LRB(EvictionPolicyBase):
-    """LRB (Learning Relaxed Belady) replacement policy."""
-    def __init__(self, cache_size: int, objective: str = "byte-miss-ratio") -> None: ...
+class LeCaR(CacheBase):
+    def __init__(
+        self, cache_size: int, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False
+    ): ...
 
-class LRU(EvictionPolicyBase):
-    """Least Recently Used replacement policy."""
-    def __init__(self, cache_size: int) -> None: ...
+class LFUDA(CacheBase):
+    def __init__(
+        self, cache_size: int, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False
+    ): ...
 
-class QDLP(EvictionPolicyBase):
-    """QDLP (Queue Demotion with Lazy Promotion) replacement policy."""
-    def __init__(self, cache_size: int) -> None: ...
+class ClockPro(CacheBase):
+    def __init__(
+        self, cache_size: int, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False
+    ): ...
 
-class S3FIFO(EvictionPolicyBase):
-    """S3FIFO replacement policy."""
+class Cacheus(CacheBase):
+    def __init__(
+        self, cache_size: int, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False
+    ): ...
+
+# Optimal algorithms
+class Belady(CacheBase):
+    def __init__(
+        self, cache_size: int, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False
+    ): ...
+
+class BeladySize(CacheBase):
+    def __init__(
+        self, cache_size: int, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False
+    ): ...
+
+# Plugin cache
+class PythonHookCachePolicy(CacheBase):
     def __init__(
         self,
         cache_size: int,
-        fifo_size_ratio: float = 0.1,
-        ghost_size_ratio: float = 0.9,
-        move_to_main_threshold: int = 2,
-    ) -> None: ...
-
-class Sieve(EvictionPolicyBase):
-    """Sieve replacement policy."""
-    def __init__(self, cache_size: int) -> None: ...
-
-class SLRU(EvictionPolicyBase):
-    """SLRU (Segmented LRU) replacement policy."""
-    def __init__(self, cache_size: int) -> None: ...
-
-class ThreeLCache(EvictionPolicyBase):
-    """ThreeL cache replacement policy."""
-    def __init__(self, cache_size: int, objective: str = "byte-miss-ratio") -> None: ...
-
-class TinyLFU(EvictionPolicyBase):
-    """TinyLFU replacement policy."""
-    def __init__(self, cache_size: int, main_cache: str = "SLRU", window_size: float = 0.01) -> None: ...
-
-class TwoQ(EvictionPolicyBase):
-    """2Q replacement policy."""
-    def __init__(self, cache_size: int, ain_size_ratio: float = 0.25, aout_size_ratio: float = 0.5) -> None: ...
-
-class WTinyLFU(EvictionPolicyBase):
-    """WTinyLFU (Windowed TinyLFU) replacement policy."""
-    def __init__(self, cache_size: int, main_cache: str = "SLRU", window_size: float = 0.01) -> None: ...
-
-class PythonHookCachePolicy(EvictionPolicyBase):
-    """Python hook-based cache policy."""
-    def __init__(self, cache_size: int, cache_name: str = "PythonHookCache") -> None: ...
-    def set_hooks(
+        cache_name: str = "PythonHookCache",
+        default_ttl: int = 25920000,
+        hashpower: int = 24,
+        consider_obj_metadata: bool = False,
+        cache_init_hook=None,
+        cache_hit_hook=None,
+        cache_miss_hook=None,
+        cache_eviction_hook=None,
+        cache_remove_hook=None,
+        cache_free_hook=None,
+    ): ...
+    def set_hooks(self, init_hook, hit_hook, miss_hook, eviction_hook, remove_hook, free_hook=None): ...
+
+# Readers
+class TraceReader(ReaderProtocol):
+    c_reader: bool
+    def __init__(self, trace: str, trace_type: TraceType = TraceType.UNKNOWN_TRACE, **kwargs): ...
+
+class SyntheticReader(ReaderProtocol):
+    c_reader: bool
+    def __init__(
         self,
-        init_hook: Callable[[int], Any],
-        hit_hook: Callable[[Any, int, int], None],
-        miss_hook: Callable[[Any, int, int], None],
-        eviction_hook: Callable[[Any, int, int], int],
-        remove_hook: Callable[[Any, int], None],
-        free_hook: Optional[Callable[[Any], None]] = None,
-    ) -> None: ...
+        num_of_req: int,
+        obj_size: int = 4000,
+        time_span: int = 604800,
+        start_obj_id: int = 0,
+        seed: int | None = None,
+        alpha: float = 1.0,
+        dist: str = "zipf",
+        num_objects: int | None = None,
+    ): ...
+
+# Trace generators
+def create_zipf_requests(
+    num_objects: int,
+    num_requests: int,
+    alpha: float = 1.0,
+    obj_size: int = 4000,
+    time_span: int = 604800,
+    start_obj_id: int = 0,
+    seed: int | None = None,
+) -> Iterator[Request]: ...
+def create_uniform_requests(
+    num_objects: int,
+    num_requests: int,
+    obj_size: int = 4000,
+    time_span: int = 604800,
+    start_obj_id: int = 0,
+    seed: int | None = None,
+) -> Iterator[Request]: ...
+
+# Analyzer
+class TraceAnalyzer:
+    def __init__(self, analyzer): ...
+    def analyze(self, reader: ReaderProtocol, output_path: str, analysis_param, analysis_option) -> None: ...
+
+# Utilities
+class Util:
+    @staticmethod
+    def convert_to_oracleGeneral(reader, ofilepath, output_txt: bool = False, remove_size_change: bool = False): ...
+    @staticmethod
+    def convert_to_lcs(
+        reader, ofilepath, output_txt: bool = False, remove_size_change: bool = False, lcs_ver: int = 1
+    ): ...
+    @staticmethod
+    def process_trace(
+        cache: CacheBase, reader: ReaderProtocol, start_req: int = 0, max_req: int = -1
+    ) -> tuple[float, float]: ...
diff --git a/libCacheSim-python/libcachesim/cache.py b/libCacheSim-python/libcachesim/cache.py
new file mode 100644
index 000000000..3f3a2bd38
--- /dev/null
+++ b/libCacheSim-python/libcachesim/cache.py
@@ -0,0 +1,396 @@
+from abc import ABC
+from typing import Protocol
+from .libcachesim_python import (
+    CommonCacheParams,
+    Request,
+    CacheObject,
+    Cache,
+    # Core cache algorithms
+    LRU_init,
+    FIFO_init,
+    LFU_init,
+    ARC_init,
+    Clock_init,
+    Random_init,
+    LIRS_init,
+    TwoQ_init,
+    SLRU_init,
+    # Advanced algorithms
+    S3FIFO_init,
+    Sieve_init,
+    WTinyLFU_init,
+    LeCaR_init,
+    LFUDA_init,
+    ClockPro_init,
+    Cacheus_init,
+    # Optimal algorithms
+    Belady_init,
+    BeladySize_init,
+    # Probabilistic algorithms
+    LRU_Prob_init,
+    flashProb_init,
+    # Size-based algorithms
+    Size_init,
+    GDSF_init,
+    # Hyperbolic algorithms
+    Hyperbolic_init,
+    # Plugin cache
+    pypluginCache_init,
+    # Process trace function
+    c_process_trace,
+)
+
+from .protocols import CacheProtocol, ReaderProtocol
+
+
+class CacheBase(CacheProtocol):
+    """Base class for all cache implementations that implements CacheProtocol"""
+
+    _cache: Cache  # Internal C++ cache object
+
+    def __init__(self, _cache: Cache):
+        self._cache = _cache
+
+    def get(self, req: Request) -> bool:
+        return self._cache.get(req)
+
+    def find(self, req: Request, update_cache: bool = True) -> CacheObject:
+        return self._cache.find(req, update_cache)
+
+    def can_insert(self, req: Request) -> bool:
+        return self._cache.can_insert(req)
+
+    def insert(self, req: Request) -> CacheObject:
+        return self._cache.insert(req)
+
+    def need_eviction(self, req: Request) -> bool:
+        return self._cache.need_eviction(req)
+
+    def evict(self, req: Request) -> CacheObject:
+        return self._cache.evict(req)
+
+    def remove(self, obj_id: int) -> bool:
+        return self._cache.remove(obj_id)
+
+    def to_evict(self, req: Request) -> CacheObject:
+        return self._cache.to_evict(req)
+
+    def get_occupied_byte(self) -> int:
+        return self._cache.get_occupied_byte()
+
+    def get_n_obj(self) -> int:
+        return self._cache.get_n_obj()
+
+    def print_cache(self) -> str:
+        return self._cache.print_cache()
+
+    def process_trace(self, reader: ReaderProtocol, start_req: int = 0, max_req: int = -1) -> tuple[float, float]:
+        """Process trace with this cache and return miss ratios"""
+        if hasattr(reader, "c_reader") and reader.c_reader:
+            # C++ reader with _reader attribute
+            if hasattr(reader, "_reader"):
+                return c_process_trace(self._cache, reader._reader, start_req, max_req)
+            else:
+                raise ValueError("C++ reader missing _reader attribute")
+        else:
+            # Python reader - use Python implementation
+            return self._process_trace_python(reader, start_req, max_req)
+
+    def _process_trace_python(
+        self, reader: ReaderProtocol, start_req: int = 0, max_req: int = -1
+    ) -> tuple[float, float]:
+        """Python fallback for processing traces"""
+        reader.reset()
+        if start_req > 0:
+            reader.skip_n_req(start_req)
+
+        n_req = 0
+        n_hit = 0
+        bytes_req = 0
+        bytes_hit = 0
+
+        for req in reader:
+            if not req.valid:
+                break
+
+            n_req += 1
+            bytes_req += req.obj_size
+
+            if self.get(req):
+                n_hit += 1
+                bytes_hit += req.obj_size
+
+            if max_req > 0 and n_req >= max_req:
+                break
+
+        obj_miss_ratio = 1.0 - (n_hit / n_req) if n_req > 0 else 0.0
+        byte_miss_ratio = 1.0 - (bytes_hit / bytes_req) if bytes_req > 0 else 0.0
+        return obj_miss_ratio, byte_miss_ratio
+
+    # Properties
+    @property
+    def cache_size(self) -> int:
+        return self._cache.cache_size
+
+    @property
+    def cache_name(self) -> str:
+        return self._cache.cache_name
+
+
+def _create_common_params(
+    cache_size: int, default_ttl: int = 86400 * 300, hashpower: int = 24, consider_obj_metadata: bool = False
+) -> CommonCacheParams:
+    """Helper to create common cache parameters"""
+    return CommonCacheParams(
+        cache_size=cache_size,
+        default_ttl=default_ttl,
+        hashpower=hashpower,
+        consider_obj_metadata=consider_obj_metadata,
+    )
+
+
+# Core cache algorithms
+class LRU(CacheBase):
+    """Least Recently Used cache"""
+
+    def __init__(
+        self, cache_size: int, default_ttl: int = 86400 * 300, hashpower: int = 24, consider_obj_metadata: bool = False
+    ):
+        super().__init__(
+            _cache=LRU_init(_create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata))
+        )
+
+
+class FIFO(CacheBase):
+    """First In First Out cache"""
+
+    def __init__(
+        self, cache_size: int, default_ttl: int = 86400 * 300, hashpower: int = 24, consider_obj_metadata: bool = False
+    ):
+        super().__init__(
+            _cache=FIFO_init(_create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata))
+        )
+
+
+class LFU(CacheBase):
+    """Least Frequently Used cache"""
+
+    def __init__(
+        self, cache_size: int, default_ttl: int = 86400 * 300, hashpower: int = 24, consider_obj_metadata: bool = False
+    ):
+        super().__init__(
+            _cache=LFU_init(_create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata))
+        )
+
+
+class ARC(CacheBase):
+    """Adaptive Replacement Cache"""
+
+    def __init__(
+        self, cache_size: int, default_ttl: int = 86400 * 300, hashpower: int = 24, consider_obj_metadata: bool = False
+    ):
+        super().__init__(
+            _cache=ARC_init(_create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata))
+        )
+
+
+class Clock(CacheBase):
+    """Clock replacement algorithm"""
+
+    def __init__(
+        self, cache_size: int, default_ttl: int = 86400 * 300, hashpower: int = 24, consider_obj_metadata: bool = False
+    ):
+        super().__init__(
+            _cache=Clock_init(_create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata))
+        )
+
+
+class Random(CacheBase):
+    """Random replacement cache"""
+
+    def __init__(
+        self, cache_size: int, default_ttl: int = 86400 * 300, hashpower: int = 24, consider_obj_metadata: bool = False
+    ):
+        super().__init__(
+            _cache=Random_init(_create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata))
+        )
+
+
+# Advanced algorithms
+class S3FIFO(CacheBase):
+    """S3-FIFO cache algorithm"""
+
+    def __init__(
+        self, cache_size: int, default_ttl: int = 86400 * 300, hashpower: int = 24, consider_obj_metadata: bool = False
+    ):
+        super().__init__(
+            _cache=S3FIFO_init(_create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata))
+        )
+
+
+class Sieve(CacheBase):
+    """Sieve cache algorithm"""
+
+    def __init__(
+        self, cache_size: int, default_ttl: int = 86400 * 300, hashpower: int = 24, consider_obj_metadata: bool = False
+    ):
+        super().__init__(
+            _cache=Sieve_init(_create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata))
+        )
+
+
+class LIRS(CacheBase):
+    """Low Inter-reference Recency Set"""
+
+    def __init__(
+        self, cache_size: int, default_ttl: int = 86400 * 300, hashpower: int = 24, consider_obj_metadata: bool = False
+    ):
+        super().__init__(
+            _cache=LIRS_init(_create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata))
+        )
+
+
+class TwoQ(CacheBase):
+    """2Q replacement algorithm"""
+
+    def __init__(
+        self, cache_size: int, default_ttl: int = 86400 * 300, hashpower: int = 24, consider_obj_metadata: bool = False
+    ):
+        super().__init__(
+            _cache=TwoQ_init(_create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata))
+        )
+
+
+class SLRU(CacheBase):
+    """Segmented LRU"""
+
+    def __init__(
+        self, cache_size: int, default_ttl: int = 86400 * 300, hashpower: int = 24, consider_obj_metadata: bool = False
+    ):
+        super().__init__(
+            _cache=SLRU_init(_create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata))
+        )
+
+
+class WTinyLFU(CacheBase):
+    """Window TinyLFU"""
+
+    def __init__(
+        self, cache_size: int, default_ttl: int = 86400 * 300, hashpower: int = 24, consider_obj_metadata: bool = False
+    ):
+        super().__init__(
+            _cache=WTinyLFU_init(_create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata))
+        )
+
+
+class LeCaR(CacheBase):
+    """Learning Cache Replacement"""
+
+    def __init__(
+        self, cache_size: int, default_ttl: int = 86400 * 300, hashpower: int = 24, consider_obj_metadata: bool = False
+    ):
+        super().__init__(
+            _cache=LeCaR_init(_create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata))
+        )
+
+
+class LFUDA(CacheBase):
+    """LFU with Dynamic Aging"""
+
+    def __init__(
+        self, cache_size: int, default_ttl: int = 86400 * 300, hashpower: int = 24, consider_obj_metadata: bool = False
+    ):
+        super().__init__(
+            _cache=LFUDA_init(_create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata))
+        )
+
+
+class ClockPro(CacheBase):
+    """Clock-Pro replacement algorithm"""
+
+    def __init__(
+        self, cache_size: int, default_ttl: int = 86400 * 300, hashpower: int = 24, consider_obj_metadata: bool = False
+    ):
+        super().__init__(
+            _cache=ClockPro_init(_create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata))
+        )
+
+
+class Cacheus(CacheBase):
+    """Cacheus algorithm"""
+
+    def __init__(
+        self, cache_size: int, default_ttl: int = 86400 * 300, hashpower: int = 24, consider_obj_metadata: bool = False
+    ):
+        super().__init__(
+            _cache=Cacheus_init(_create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata))
+        )
+
+
+# Optimal algorithms
+class Belady(CacheBase):
+    """Belady's optimal algorithm"""
+
+    def __init__(
+        self, cache_size: int, default_ttl: int = 86400 * 300, hashpower: int = 24, consider_obj_metadata: bool = False
+    ):
+        super().__init__(
+            _cache=Belady_init(_create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata))
+        )
+
+
+class BeladySize(CacheBase):
+    """Belady's optimal algorithm with size consideration"""
+
+    def __init__(
+        self, cache_size: int, default_ttl: int = 86400 * 300, hashpower: int = 24, consider_obj_metadata: bool = False
+    ):
+        super().__init__(
+            _cache=BeladySize_init(_create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata))
+        )
+
+
+# Plugin cache for custom Python implementations
+def nop_method(*args, **kwargs):
+    """No-operation method for default hooks"""
+    pass
+
+
+class PythonHookCachePolicy(CacheBase):
+    """Python plugin cache for custom implementations"""
+
+    def __init__(
+        self,
+        cache_size: int,
+        cache_name: str = "PythonHookCache",
+        default_ttl: int = 86400 * 300,
+        hashpower: int = 24,
+        consider_obj_metadata: bool = False,
+        cache_init_hook=nop_method,
+        cache_hit_hook=nop_method,
+        cache_miss_hook=nop_method,
+        cache_eviction_hook=nop_method,
+        cache_remove_hook=nop_method,
+        cache_free_hook=nop_method,
+    ):
+        self.cache_name = cache_name
+        self.common_cache_params = _create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata)
+
+        super().__init__(
+            _cache=pypluginCache_init(
+                self.common_cache_params,
+                cache_name,
+                cache_init_hook,
+                cache_hit_hook,
+                cache_miss_hook,
+                cache_eviction_hook,
+                cache_remove_hook,
+                cache_free_hook,
+            )
+        )
+
+    def set_hooks(self, init_hook, hit_hook, miss_hook, eviction_hook, remove_hook, free_hook=nop_method):
+        """Set the cache hooks after initialization"""
+        # Note: This would require C++ side support to change hooks after creation
+        # For now, hooks should be set during initialization
+        pass
diff --git a/libCacheSim-python/libcachesim/const.py b/libCacheSim-python/libcachesim/const.py
deleted file mode 100644
index 9d48db4f9..000000000
--- a/libCacheSim-python/libcachesim/const.py
+++ /dev/null
@@ -1 +0,0 @@
-from __future__ import annotations
diff --git a/libCacheSim-python/libcachesim/eviction.py b/libCacheSim-python/libcachesim/eviction.py
deleted file mode 100644
index 63599ec0f..000000000
--- a/libCacheSim-python/libcachesim/eviction.py
+++ /dev/null
@@ -1,713 +0,0 @@
-"""Registry of eviction policies."""
-
-from __future__ import annotations
-
-from abc import ABC, abstractmethod
-
-from ._libcachesim import (
-    ARC_init,
-    Belady_init,
-    BeladySize_init,
-    Cacheus_init,
-    Cache,
-    Clock_init,
-    FIFO_init,
-    LeCaR_init,
-    LFU_init,
-    LFUDA_init,
-    LRB_init,
-    LRU_init,
-    QDLP_init,
-    Reader,
-    Request,
-    S3FIFO_init,
-    Sieve_init,
-    SLRU_init,
-    ThreeLCache_init,
-    TinyLFU_init,
-    TwoQ_init,
-    WTinyLFU_init,
-    PythonHookCache,
-)
-
-from .trace_generator import _ZipfRequestGenerator, _UniformRequestGenerator
-
-# Define generator types once to avoid repeated tuple creation
-_GENERATOR_TYPES = (_ZipfRequestGenerator, _UniformRequestGenerator)
-
-
-class EvictionPolicyBase(ABC):
-    """Abstract base class for all eviction policies."""
-
-    @abstractmethod
-    def get(self, req: Request) -> bool:
-        pass
-
-    @abstractmethod
-    def __repr__(self) -> str:
-        pass
-
-    @abstractmethod
-    def process_trace(self, reader, start_req=0, max_req=-1) -> tuple[float, float]:
-        """Process a trace with this cache and return miss ratio.
-
-        This method processes trace data entirely on the C++ side to avoid
-        data movement overhead between Python and C++.
-
-        Args:
-            reader: The trace reader instance
-            start_req: Start request index (-1 for no limit)
-            max_req: Number of requests to process (-1 for no limit)
-
-        Returns:
-            tuple[float, float]: Object miss ratio (0.0 to 1.0) and byte miss ratio (0.0 to 1.0)
-        """
-        pass
-
-
-class EvictionPolicy(EvictionPolicyBase):
-    """Base class for all eviction policies."""
-
-    def __init__(self, cache_size: int, **kwargs) -> None:
-        self.cache: Cache = self.init_cache(cache_size, **kwargs)
-
-    @abstractmethod
-    def init_cache(self, cache_size: int, **kwargs) -> Cache:
-        pass
-
-    def get(self, req: Request) -> bool:
-        return self.cache.get(req)
-
-    def process_trace(self, reader, start_req=0, max_req=-1) -> tuple[float, float]:
-        """Process a trace with this cache and return miss ratio.
-
-        This method processes trace data entirely on the C++ side to avoid
-        data movement overhead between Python and C++.
-
-        Args:
-            reader: The trace reader instance
-            start_req: Start request index (-1 for no limit)
-            max_req: Number of requests to process (-1 for no limit)
-
-        Returns:
-            tuple[float, float]: Object miss ratio (0.0 to 1.0) and byte miss ratio (0.0 to 1.0)
-        Example:
-            >>> cache = LRU(1024*1024)
-            >>> reader = open_trace("trace.csv", TraceType.CSV_TRACE)
-            >>> obj_miss_ratio, byte_miss_ratio = cache.process_trace(reader)
-            >>> print(f"Obj miss ratio: {obj_miss_ratio:.4f}, byte miss ratio: {byte_miss_ratio:.4f}")
-        """
-        obj_miss_ratio = 0.0
-        byte_miss_ratio = 0.0
-        if not isinstance(reader, Reader):
-            # streaming generator
-            if isinstance(reader, _GENERATOR_TYPES):
-                miss_cnt = 0
-                byte_miss_cnt = 0
-                total_byte = 0
-                for req in reader:
-                    hit = self.get(req)
-                    total_byte += req.obj_size
-                    if not hit:
-                        miss_cnt += 1
-                        byte_miss_cnt += req.obj_size
-                obj_miss_ratio = miss_cnt / len(reader) if len(reader) > 0 else 0.0
-                byte_miss_ratio = byte_miss_cnt / total_byte if total_byte > 0 else 0.0
-                return obj_miss_ratio, byte_miss_ratio
-        else:
-            from ._libcachesim import process_trace
-
-            obj_miss_ratio, byte_miss_ratio = process_trace(self.cache, reader, start_req, max_req)
-
-        return obj_miss_ratio, byte_miss_ratio
-
-    def __repr__(self):
-        return f"{self.__class__.__name__}(cache_size={self.cache.cache_size})"
-
-    @property
-    def n_req(self):
-        """Number of requests processed."""
-        return self.cache.n_req
-
-    @property
-    def n_obj(self):
-        """Number of objects currently in cache."""
-        return self.cache.n_obj
-
-    @property
-    def occupied_byte(self):
-        """Number of bytes currently occupied in cache."""
-        return self.cache.occupied_byte
-
-    @property
-    def cache_size(self):
-        """Total cache size in bytes."""
-        return self.cache.cache_size
-
-
-class FIFO(EvictionPolicy):
-    """First In First Out replacement policy.
-
-    Args:
-        cache_size: Size of the cache
-    """
-
-    def init_cache(self, cache_size: int, **kwargs) -> Cache:  # noqa: ARG002
-        return FIFO_init(cache_size)
-
-
-class Clock(EvictionPolicy):
-    """Clock (Second Chance or FIFO-Reinsertion) replacement policy.
-
-    Args:
-        cache_size: Size of the cache
-        n_bit_counter: Number of bits for counter (default: 1)
-        init_freq: Initial frequency value (default: 0)
-    """
-
-    def __init__(self, cache_size: int, n_bit_counter: int = 1, init_freq: int = 0):
-        super().__init__(cache_size, n_bit_counter=n_bit_counter, init_freq=init_freq)
-
-    def init_cache(self, cache_size: int, **kwargs):
-        init_freq = kwargs.get("init_freq", 0)
-        n_bit_counter = kwargs.get("n_bit_counter", 1)
-
-        if n_bit_counter < 1 or n_bit_counter > 32:
-            msg = "n_bit_counter must be between 1 and 32"
-            raise ValueError(msg)
-        if init_freq < 0 or init_freq > 2**n_bit_counter - 1:
-            msg = "init_freq must be between 0 and 2^n_bit_counter - 1"
-            raise ValueError(msg)
-
-        self.init_freq = init_freq
-        self.n_bit_counter = n_bit_counter
-
-        return Clock_init(cache_size, n_bit_counter, init_freq)
-
-    def __repr__(self):
-        return (
-            f"{self.__class__.__name__}(cache_size={self.cache.cache_size}, "
-            f"n_bit_counter={self.n_bit_counter}, "
-            f"init_freq={self.init_freq})"
-        )
-
-
-class TwoQ(EvictionPolicy):
-    """2Q replacement policy.
-
-    2Q has three queues: Ain, Aout, Am. When a obj hits in Aout, it will be
-    inserted into Am otherwise it will be inserted into Ain.
-
-    Args:
-        cache_size: Total size of the cache
-        ain_size_ratio: Size ratio for Ain queue (default: 0.25)
-        aout_size_ratio: Size ratio for Aout queue (default: 0.5)
-    """
-
-    def __init__(self, cache_size: int, ain_size_ratio: float = 0.25, aout_size_ratio: float = 0.5):
-        super().__init__(cache_size, ain_size_ratio=ain_size_ratio, aout_size_ratio=aout_size_ratio)
-
-    def init_cache(self, cache_size: int, **kwargs):
-        ain_size_ratio = kwargs.get("ain_size_ratio", 0.25)
-        aout_size_ratio = kwargs.get("aout_size_ratio", 0.5)
-
-        if ain_size_ratio <= 0 or aout_size_ratio <= 0:
-            msg = "ain_size_ratio and aout_size_ratio must be greater than 0"
-            raise ValueError(msg)
-
-        self.ain_size_ratio = ain_size_ratio
-        self.aout_size_ratio = aout_size_ratio
-
-        return TwoQ_init(cache_size, ain_size_ratio, aout_size_ratio)
-
-    def __repr__(self):
-        return (
-            f"{self.__class__.__name__}(cache_size={self.cache.cache_size}, "
-            f"ain_size_ratio={self.ain_size_ratio}, "
-            f"aout_size_ratio={self.aout_size_ratio})"
-        )
-
-
-class LRB(EvictionPolicy):
-    """LRB (Learning Relaxed Belady) replacement policy.
-
-    LRB is a learning-based replacement policy that uses a neural network to
-    predict the future access patterns of the cache, randomly select one obj
-    outside the Belady boundary to evict.
-
-    Args:
-        cache_size: Size of the cache
-        objective: Objective function to optimize (default: "byte-miss-ratio")
-    """
-
-    def __init__(self, cache_size: int, objective: str = "byte-miss-ratio"):
-        super().__init__(cache_size, objective=objective)
-
-    def init_cache(self, cache_size: int, **kwargs) -> Cache:
-        objective = kwargs.get("objective", "byte-miss-ratio")
-
-        if objective not in ["byte-miss-ratio", "byte-hit-ratio"]:
-            msg = "objective must be either 'byte-miss-ratio' or 'byte-hit-ratio'"
-            raise ValueError(msg)
-
-        self.objective = objective
-
-        return LRB_init(cache_size, objective)
-
-    def __repr__(self):
-        return f"{self.__class__.__name__}(cache_size={self.cache.cache_size}, objective={self.objective})"
-
-
-class LRU(EvictionPolicy):
-    """Least Recently Used replacement policy.
-
-    Args:
-        cache_size: Size of the cache
-    """
-
-    def init_cache(self, cache_size: int, **kwargs):  # noqa: ARG002
-        return LRU_init(cache_size)
-
-
-class ARC(EvictionPolicy):
-    """Adaptive Replacement Cache policy.
-
-    ARC is a two-tiered cache with two LRU caches (T1 and T2) and two ghost
-    lists (B1 and B2). T1 records the obj accessed only once, T2 records
-    the obj accessed more than once. ARC has an internal parameter `p` to
-    learn and dynamically control the size of T1 and T2.
-
-    Args:
-        cache_size: Size of the cache
-    """
-
-    def init_cache(self, cache_size: int, **kwargs):  # noqa: ARG002
-        return ARC_init(cache_size)
-
-
-class S3FIFO(EvictionPolicy):
-    """S3FIFO replacement policy.
-
-    S3FIFO consists of three FIFO queues: Small, Main, and Ghost. Small
-    queue gets the obj and records the freq.
-    When small queue is full, if the obj to evict satisfies the threshold,
-    it will be moved to main queue. Otherwise, it will be evicted from small
-    queue and inserted into ghost queue.
-    When main queue is full, the obj to evict will be evicted and reinserted
-    like Clock.
-    If obj hits in the ghost queue, it will be moved to main queue.
-
-    Args:
-        cache_size: Size of the cache
-        fifo_size_ratio: Size ratio for FIFO queue (default: 0.1)
-        ghost_size_ratio: Size ratio for ghost queue (default: 0.9)
-        move_to_main_threshold: Threshold for moving obj from ghost to main (default: 2)
-    """
-
-    def __init__(
-        self,
-        cache_size: int,
-        fifo_size_ratio: float = 0.1,
-        ghost_size_ratio: float = 0.9,
-        move_to_main_threshold: int = 2,
-    ):
-        super().__init__(
-            cache_size,
-            fifo_size_ratio=fifo_size_ratio,
-            ghost_size_ratio=ghost_size_ratio,
-            move_to_main_threshold=move_to_main_threshold,
-        )
-
-    def init_cache(self, cache_size: int, **kwargs):
-        fifo_size_ratio = kwargs.get("fifo_size_ratio", 0.1)
-        ghost_size_ratio = kwargs.get("ghost_size_ratio", 0.9)
-        move_to_main_threshold = kwargs.get("move_to_main_threshold", 2)
-
-        if fifo_size_ratio <= 0 or ghost_size_ratio <= 0:
-            msg = "fifo_size_ratio and ghost_size_ratio must be greater than 0"
-            raise ValueError(msg)
-        if move_to_main_threshold < 0:
-            msg = "move_to_main_threshold must be greater or equal to 0"
-            raise ValueError(msg)
-
-        self.fifo_size_ratio = fifo_size_ratio
-        self.ghost_size_ratio = ghost_size_ratio
-        self.move_to_main_threshold = move_to_main_threshold
-
-        return S3FIFO_init(cache_size, fifo_size_ratio, ghost_size_ratio, move_to_main_threshold)
-
-    def __repr__(self):
-        return (
-            f"{self.__class__.__name__}(cache_size={self.cache.cache_size}, "
-            f"fifo_size_ratio={self.fifo_size_ratio}, "
-            f"ghost_size_ratio={self.ghost_size_ratio}, "
-            f"move_to_main_threshold={self.move_to_main_threshold})"
-        )
-
-
-class Sieve(EvictionPolicy):
-    """Sieve replacement policy.
-
-    FIFO-Reinsertion with check pointer.
-
-    Args:
-        cache_size: Size of the cache
-    """
-
-    def init_cache(self, cache_size: int, **kwargs):  # noqa: ARG002
-        return Sieve_init(cache_size)
-
-
-class ThreeLCache(EvictionPolicy):
-    """3L-Cache replacement policy.
-
-    Args:
-        cache_size: Size of the cache
-        objective: Objective function to optimize (default: "byte-miss-ratio")
-    """
-
-    def __init__(self, cache_size: int, objective: str = "byte-miss-ratio"):
-        super().__init__(cache_size, objective=objective)
-
-    def init_cache(self, cache_size: int, **kwargs):
-        objective = kwargs.get("objective", "byte-miss-ratio")
-
-        if objective not in ["byte-miss-ratio", "byte-hit-ratio"]:
-            msg = "objective must be either 'byte-miss-ratio' or 'byte-hit-ratio'"
-            raise ValueError(msg)
-
-        self.objective = objective
-
-        return ThreeLCache_init(cache_size, objective)
-
-    def __repr__(self):
-        return f"{self.__class__.__name__}(cache_size={self.cache.cache_size}, objective={self.objective})"
-
-
-class TinyLFU(EvictionPolicy):
-    """TinyLFU replacement policy.
-
-    Args:
-        cache_size: Size of the cache
-        main_cache: Main cache to use (default: "SLRU")
-        window_size: Window size for TinyLFU (default: 0.01)
-    """
-
-    def __init__(self, cache_size: int, main_cache: str = "SLRU", window_size: float = 0.01):
-        super().__init__(cache_size, main_cache=main_cache, window_size=window_size)
-
-    def init_cache(self, cache_size: int, **kwargs):
-        main_cache = kwargs.get("main_cache", "SLRU")
-        window_size = kwargs.get("window_size", 0.01)
-
-        if window_size <= 0:
-            msg = "window_size must be greater than 0"
-            raise ValueError(msg)
-
-        self.main_cache = main_cache
-        self.window_size = window_size
-
-        return TinyLFU_init(cache_size, main_cache, window_size)
-
-    def __repr__(self):
-        return (
-            f"{self.__class__.__name__}(cache_size={self.cache.cache_size}, "
-            f"main_cache={self.main_cache}, "
-            f"window_size={self.window_size})"
-        )
-
-
-class LFU(EvictionPolicy):
-    """LFU (Least Frequently Used) replacement policy.
-
-    Args:
-        cache_size: Size of the cache
-    """
-
-    def init_cache(self, cache_size: int, **kwargs):  # noqa: ARG002
-        return LFU_init(cache_size)
-
-
-class LFUDA(EvictionPolicy):
-    """LFUDA (LFU with Dynamic Aging) replacement policy.
-
-    Args:
-        cache_size: Size of the cache
-    """
-
-    def init_cache(self, cache_size: int, **kwargs):  # noqa: ARG002
-        return LFUDA_init(cache_size)
-
-
-class SLRU(EvictionPolicy):
-    """SLRU (Segmented LRU) replacement policy.
-
-    Args:
-        cache_size: Size of the cache
-    """
-
-    def init_cache(self, cache_size: int, **kwargs):  # noqa: ARG002
-        return SLRU_init(cache_size)
-
-
-class Belady(EvictionPolicy):
-    """Belady replacement policy (optimal offline algorithm).
-
-    Note: Requires oracle trace with future access information.
-
-    Args:
-        cache_size: Size of the cache
-    """
-
-    def init_cache(self, cache_size: int, **kwargs):  # noqa: ARG002
-        return Belady_init(cache_size)
-
-
-class BeladySize(EvictionPolicy):
-    """BeladySize replacement policy (optimal offline algorithm with size consideration).
-
-    Note: Requires oracle trace with future access information.
-
-    Args:
-        cache_size: Size of the cache
-    """
-
-    def init_cache(self, cache_size: int, **kwargs):  # noqa: ARG002
-        return BeladySize_init(cache_size)
-
-
-class QDLP(EvictionPolicy):
-    """QDLP (Queue Demotion with Lazy Promotion) replacement policy.
-
-    Args:
-        cache_size: Size of the cache
-    """
-
-    def init_cache(self, cache_size: int, **kwargs):  # noqa: ARG002
-        return QDLP_init(cache_size)
-
-
-class LeCaR(EvictionPolicy):
-    """LeCaR (Learning Cache Replacement) adaptive replacement policy.
-
-    Args:
-        cache_size: Size of the cache
-    """
-
-    def init_cache(self, cache_size: int, **kwargs):  # noqa: ARG002
-        return LeCaR_init(cache_size)
-
-
-class Cacheus(EvictionPolicy):
-    """Cacheus replacement policy.
-
-    Args:
-        cache_size: Size of the cache
-    """
-
-    def init_cache(self, cache_size: int, **kwargs):  # noqa: ARG002
-        return Cacheus_init(cache_size)
-
-
-class WTinyLFU(EvictionPolicy):
-    """WTinyLFU (Windowed TinyLFU) replacement policy.
-
-    Args:
-        cache_size: Size of the cache
-        main_cache: Main cache to use (default: "SLRU")
-        window_size: Window size for TinyLFU (default: 0.01)
-    """
-
-    def __init__(self, cache_size: int, main_cache: str = "SLRU", window_size: float = 0.01):
-        super().__init__(cache_size, main_cache=main_cache, window_size=window_size)
-
-    def init_cache(self, cache_size: int, **kwargs):
-        main_cache = kwargs.get("main_cache", "SLRU")
-        window_size = kwargs.get("window_size", 0.01)
-
-        if window_size <= 0:
-            msg = "window_size must be greater than 0"
-            raise ValueError(msg)
-
-        self.main_cache = main_cache
-        self.window_size = window_size
-
-        return WTinyLFU_init(cache_size, main_cache, window_size)
-
-    def __repr__(self):
-        return (
-            f"{self.__class__.__name__}(cache_size={self.cache.cache_size}, "
-            f"main_cache={self.main_cache}, "
-            f"window_size={self.window_size})"
-        )
-
-
-class PythonHookCachePolicy(EvictionPolicyBase):
-    """Python hook-based cache that allows defining custom policies using Python functions.
-
-    This cache implementation allows users to define custom cache replacement algorithms
-    using pure Python functions instead of compiling C/C++ plugins. Users provide hook
-    functions for cache initialization, hit handling, miss handling, eviction decisions,
-    and cleanup.
-
-    Args:
-        cache_size: Size of the cache in bytes
-        cache_name: Optional name for the cache (default: "PythonHookCache")
-
-    Hook Functions Required:
-        init_hook(cache_size: int) -> Any:
-            Initialize plugin data structures. Return any object to be passed to other hooks.
-
-        hit_hook(plugin_data: Any, obj_id: int, obj_size: int) -> None:
-            Handle cache hit events. Update internal state as needed.
-
-        miss_hook(plugin_data: Any, obj_id: int, obj_size: int) -> None:
-            Handle cache miss events. Update internal state for new object.
-
-        eviction_hook(plugin_data: Any, obj_id: int, obj_size: int) -> int:
-            Determine which object to evict. Return the object ID to be evicted.
-
-        remove_hook(plugin_data: Any, obj_id: int) -> None:
-            Clean up when objects are removed from cache.
-
-        free_hook(plugin_data: Any) -> None: [Optional]
-            Clean up plugin resources when cache is destroyed.
-
-    Example:
-        >>> from collections import OrderedDict
-        >>>
-        >>> cache = PythonHookCachePolicy(1024)
-        >>>
-        >>> def init_hook(cache_size):
-        ...     return OrderedDict()  # LRU tracking
-        >>>
-        >>> def hit_hook(lru_dict, obj_id, obj_size):
-        ...     lru_dict.move_to_end(obj_id)  # Move to end (most recent)
-        >>>
-        >>> def miss_hook(lru_dict, obj_id, obj_size):
-        ...     lru_dict[obj_id] = True  # Add to end
-        >>>
-        >>> def eviction_hook(lru_dict, obj_id, obj_size):
-        ...     return next(iter(lru_dict))  # Return least recent
-        >>>
-        >>> def remove_hook(lru_dict, obj_id):
-        ...     lru_dict.pop(obj_id, None)
-        >>>
-        >>> cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook)
-        >>>
-        >>> req = Request()
-        >>> req.obj_id = 1
-        >>> req.obj_size = 100
-        >>> hit = cache.get(req)
-    """
-
-    def __init__(self, cache_size: int, cache_name: str = "PythonHookCache"):
-        self._cache_size = cache_size
-        self.cache_name = cache_name
-        self.cache = PythonHookCache(cache_size, cache_name)
-        self._hooks_set = False
-
-    def set_hooks(self, init_hook, hit_hook, miss_hook, eviction_hook, remove_hook, free_hook=None):
-        """Set the hook functions for the cache.
-
-        Args:
-            init_hook: Function called during cache initialization
-            hit_hook: Function called on cache hit
-            miss_hook: Function called on cache miss
-            eviction_hook: Function called to select eviction candidate
-            remove_hook: Function called when object is removed
-            free_hook: Optional function called during cache cleanup
-        """
-        self.cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook, free_hook)
-        self._hooks_set = True
-
-    def get(self, req: Request) -> bool:
-        """Process a cache request.
-
-        Args:
-            req: The cache request to process
-
-        Returns:
-            True if cache hit, False if cache miss
-
-        Raises:
-            RuntimeError: If hooks have not been set
-        """
-        if not self._hooks_set:
-            raise RuntimeError("Hooks must be set before using the cache. Call set_hooks() first.")
-        return self.cache.get(req)
-
-    def process_trace(self, reader, start_req=0, max_req=-1) -> tuple[float, float]:
-        """Process a trace with this cache and return miss ratio.
-
-        This method processes trace data entirely on the C++ side to avoid
-        data movement overhead between Python and C++.
-
-        Args:
-            reader: The trace reader instance
-            start_req: Start request index (-1 for no limit)
-            n_req: Number of requests to process (-1 for no limit)
-
-        Returns:
-            tuple[float, float]: Object miss ratio (0.0 to 1.0) and byte miss ratio (0.0 to 1.0)
-
-        Raises:
-            RuntimeError: If hooks have not been set
-
-        Example:
-            >>> cache = PythonHookCachePolicy(1024*1024)
-            >>> cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook)
-            >>> reader = open_trace("trace.csv", TraceType.CSV_TRACE)
-            >>> obj_miss_ratio, byte_miss_ratio = cache.process_trace(reader)
-            >>> print(f"Obj miss ratio: {obj_miss_ratio:.4f}, byte miss ratio: {byte_miss_ratio:.4f}")
-        """
-        if not self._hooks_set:
-            raise RuntimeError("Hooks must be set before processing trace. Call set_hooks() first.")
-        obj_miss_ratio = 0.0
-        byte_miss_ratio = 0.0
-        if not isinstance(reader, Reader):
-            # streaming generator
-            if isinstance(reader, _GENERATOR_TYPES):
-                miss_cnt = 0
-                byte_miss_cnt = 0
-                total_byte = 0
-                for req in reader:
-                    hit = self.get(req)
-                    total_byte += req.obj_size
-                    if not hit:
-                        miss_cnt += 1
-                        byte_miss_cnt += req.obj_size
-                obj_miss_ratio = miss_cnt / len(reader) if len(reader) > 0 else 0.0
-                byte_miss_ratio = byte_miss_cnt / total_byte if total_byte > 0 else 0.0
-                return obj_miss_ratio, byte_miss_ratio
-        else:
-            from ._libcachesim import process_trace_python_hook
-
-            obj_miss_ratio, byte_miss_ratio = process_trace_python_hook(self.cache, reader, start_req, max_req)
-        return obj_miss_ratio, byte_miss_ratio
-
-    @property
-    def n_req(self):
-        """Number of requests processed."""
-        return self.cache.n_req
-
-    @property
-    def n_obj(self):
-        """Number of objects currently in cache."""
-        return self.cache.n_obj
-
-    @property
-    def occupied_byte(self):
-        """Number of bytes currently occupied in cache."""
-        return self.cache.occupied_byte
-
-    @property
-    def cache_size(self):
-        """Total cache size in bytes."""
-        return self.cache.cache_size
-
-    def __repr__(self):
-        return (
-            f"{self.__class__.__name__}(cache_size={self._cache_size}, "
-            f"cache_name='{self.cache_name}', hooks_set={self._hooks_set})"
-        )
diff --git a/libCacheSim-python/libcachesim/protocols.py b/libCacheSim-python/libcachesim/protocols.py
new file mode 100644
index 000000000..d362946a0
--- /dev/null
+++ b/libCacheSim-python/libcachesim/protocols.py
@@ -0,0 +1,71 @@
+from __future__ import annotations
+
+from typing import Protocol, TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from .libcachesim_python import Request, CacheObject, Reader, Analyzer
+
+
+class CacheProtocol(Protocol):
+    def get(self, req: Request) -> bool: ...
+
+    def find(self, req: Request, update_cache: bool = True) -> CacheObject: ...
+
+    def can_insert(self, req: Request) -> bool: ...
+
+    def insert(self, req: Request) -> CacheObject: ...
+
+    def need_eviction(self, req: Request) -> bool: ...
+
+    def evict(self, req: Request) -> CacheObject: ...
+
+    def remove(self, obj_id: int) -> bool: ...
+
+    def to_evict(self, req: Request) -> CacheObject: ...
+
+    def get_occupied_byte(self) -> int: ...
+
+    def get_n_obj(self) -> int: ...
+
+    def print_cache(self) -> str: ...
+
+    def process_trace(self, reader: "ReaderProtocol", start_req: int = 0, max_req: int = -1) -> tuple[float, float]: ...
+
+    # Properties
+    @property
+    def cache_size(self) -> int: ...
+
+    @property
+    def cache_name(self) -> str: ...
+
+
+class ReaderProtocol(Protocol):
+    def get_num_of_req(self) -> int: ...
+
+    def read_one_req(self, req: Request) -> Request: ...
+
+    def reset(self) -> None: ...
+
+    def close(self) -> None: ...
+
+    def clone(self) -> ReaderProtocol: ...
+
+    def read_first_req(self, req: Request) -> Request: ...
+
+    def read_last_req(self, req: Request) -> Request: ...
+
+    def skip_n_req(self, n: int) -> int: ...
+
+    def read_one_req_above(self, req: Request) -> Request: ...
+
+    def go_back_one_req(self) -> None: ...
+
+    def set_read_pos(self, pos: float) -> None: ...
+
+    def get_read_pos(self) -> float: ...
+
+
+class AnalyzerProtocol(Protocol):
+    def run(self) -> None: ...
+
+    def cleanup(self) -> None: ...
diff --git a/libCacheSim-python/libcachesim/synthetic_reader.py b/libCacheSim-python/libcachesim/synthetic_reader.py
new file mode 100644
index 000000000..c9d3575fc
--- /dev/null
+++ b/libCacheSim-python/libcachesim/synthetic_reader.py
@@ -0,0 +1,408 @@
+"""
+Trace generator module for libCacheSim Python bindings.
+
+This module provides functions to generate synthetic traces with different distributions.
+"""
+
+import numpy as np
+import random
+from typing import Optional, Union, Any
+from collections.abc import Iterator
+from .libcachesim_python import Request, ReqOp
+
+from .protocols import ReaderProtocol
+
+
+class SyntheticReader(ReaderProtocol):
+    """Efficient synthetic request generator supporting multiple distributions"""
+
+    def __init__(
+        self,
+        num_of_req: int,
+        obj_size: int = 4000,
+        time_span: int = 86400 * 7,
+        start_obj_id: int = 0,
+        seed: Optional[int] = None,
+        alpha: float = 1.0,
+        dist: str = "zipf",
+        num_objects: Optional[int] = None,
+    ):
+        """
+        Initialize synthetic reader.
+
+        Args:
+            num_of_req: Number of requests to generate
+            obj_size: Object size in bytes
+            time_span: Time span in seconds
+            start_obj_id: Starting object ID
+            seed: Random seed for reproducibility
+            alpha: Zipf skewness parameter (only for dist="zipf")
+            dist: Distribution type ("zipf" or "uniform")
+            num_objects: Number of unique objects (defaults to num_of_req)
+        """
+        if num_of_req <= 0:
+            raise ValueError("num_of_req must be positive")
+        if obj_size <= 0:
+            raise ValueError("obj_size must be positive")
+        if time_span <= 0:
+            raise ValueError("time_span must be positive")
+        if alpha < 0:
+            raise ValueError("alpha must be non-negative")
+        if dist not in ["zipf", "uniform"]:
+            raise ValueError(f"Unsupported distribution: {dist}")
+
+        self.num_of_req = num_of_req
+        self.obj_size = obj_size
+        self.time_span = time_span
+        self.start_obj_id = start_obj_id
+        self.seed = seed
+        self.alpha = alpha
+        self.dist = dist
+        self.num_objects = num_objects or num_of_req
+        self.current_pos = 0
+
+        # Set the reader type - this is a Python reader, not C++
+        self.c_reader = False
+
+        # Set random seed for reproducibility
+        if seed is not None:
+            np.random.seed(seed)
+            random.seed(seed)
+
+        # Lazy generation: generate object IDs only when needed
+        self._obj_ids: Optional[np.ndarray] = None
+
+    @property
+    def obj_ids(self) -> np.ndarray:
+        """Lazy generation of object ID array"""
+        if self._obj_ids is None:
+            if self.dist == "zipf":
+                self._obj_ids = _gen_zipf(self.num_objects, self.alpha, self.num_of_req, self.start_obj_id)
+            elif self.dist == "uniform":
+                self._obj_ids = _gen_uniform(self.num_objects, self.num_of_req, self.start_obj_id)
+        return self._obj_ids
+
+    def get_num_of_req(self) -> int:
+        return self.num_of_req
+
+    def read_one_req(self, req: Request) -> Request:
+        """Read one request and fill Request object"""
+        if self.current_pos >= self.num_of_req:
+            req.valid = False
+            return req
+
+        obj_id = self.obj_ids[self.current_pos]
+        req.obj_id = obj_id
+        req.obj_size = self.obj_size
+        req.clock_time = self.current_pos * self.time_span // self.num_of_req
+        req.op = ReqOp.OP_NOP
+        req.valid = True
+
+        self.current_pos += 1
+        return req
+
+    def reset(self) -> None:
+        """Reset read position to beginning"""
+        self.current_pos = 0
+
+    def close(self) -> None:
+        """Close reader and release resources"""
+        self._obj_ids = None
+
+    def clone(self) -> "SyntheticReader":
+        """Create a copy of the reader"""
+        return SyntheticReader(
+            num_of_req=self.num_of_req,
+            obj_size=self.obj_size,
+            time_span=self.time_span,
+            start_obj_id=self.start_obj_id,
+            seed=self.seed,
+            alpha=self.alpha,
+            dist=self.dist,
+            num_objects=self.num_objects,
+        )
+
+    def read_first_req(self, req: Request) -> Request:
+        """Read the first request"""
+        if self.num_of_req == 0:
+            req.valid = False
+            return req
+
+        obj_id = self.obj_ids[0]
+        req.obj_id = obj_id
+        req.obj_size = self.obj_size
+        req.clock_time = 0
+        req.op = ReqOp.OP_NOP
+        req.valid = True
+        return req
+
+    def read_last_req(self, req: Request) -> Request:
+        """Read the last request"""
+        if self.num_of_req == 0:
+            req.valid = False
+            return req
+
+        obj_id = self.obj_ids[-1]
+        req.obj_id = obj_id
+        req.obj_size = self.obj_size
+        req.clock_time = (self.num_of_req - 1) * self.time_span // self.num_of_req
+        req.op = ReqOp.OP_NOP
+        req.valid = True
+        return req
+
+    def skip_n_req(self, n: int) -> int:
+        """Skip n requests"""
+        self.current_pos = min(self.current_pos + n, self.num_of_req)
+        return self.current_pos
+
+    def read_one_req_above(self, req: Request) -> Request:
+        """Read one request above current position"""
+        if self.current_pos + 1 >= self.num_of_req:
+            req.valid = False
+            return req
+
+        obj_id = self.obj_ids[self.current_pos + 1]
+        req.obj_id = obj_id
+        req.obj_size = self.obj_size
+        req.clock_time = (self.current_pos + 1) * self.time_span // self.num_of_req
+        req.op = ReqOp.OP_NOP
+        req.valid = True
+        return req
+
+    def go_back_one_req(self) -> None:
+        """Go back one request"""
+        self.current_pos = max(0, self.current_pos - 1)
+
+    def set_read_pos(self, pos: float) -> None:
+        """Set read position"""
+        self.current_pos = max(0, min(int(pos), self.num_of_req))
+
+    def get_read_pos(self) -> float:
+        """Get current read position"""
+        return float(self.current_pos)
+
+    def __iter__(self) -> Iterator[Request]:
+        """Iterator implementation"""
+        self.reset()
+        return self
+
+    def __len__(self) -> int:
+        return self.num_of_req
+
+    def __next__(self) -> Request:
+        """Next element for iterator"""
+        if self.current_pos >= self.num_of_req:
+            raise StopIteration
+
+        req = Request()
+        return self.read_one_req(req)
+
+    def __getitem__(self, index: int) -> Request:
+        """Support index access"""
+        if index < 0 or index >= self.num_of_req:
+            raise IndexError("Index out of range")
+
+        req = Request()
+        obj_id = self.obj_ids[index]
+        req.obj_id = obj_id
+        req.obj_size = self.obj_size
+        req.clock_time = index * self.time_span // self.num_of_req
+        req.op = ReqOp.OP_NOP
+        req.valid = True
+        return req
+
+
+def _gen_zipf(m: int, alpha: float, n: int, start: int = 0) -> np.ndarray:
+    """Generate Zipf-distributed workload.
+
+    Args:
+        m: Number of objects
+        alpha: Skewness parameter (alpha >= 0)
+        n: Number of requests
+        start: Starting object ID
+
+    Returns:
+        Array of object IDs following Zipf distribution
+    """
+    if m <= 0 or n <= 0:
+        raise ValueError("num_objects and num_requests must be positive")
+    if alpha < 0:
+        raise ValueError("alpha must be non-negative")
+
+    # Optimization: for alpha=0 (uniform), use uniform distribution directly
+    if alpha == 0:
+        return _gen_uniform(m, n, start)
+
+    # Calculate Zipf distribution PMF
+    np_tmp = np.power(np.arange(1, m + 1), -alpha)
+    np_zeta = np.cumsum(np_tmp)
+    dist_map = np_zeta / np_zeta[-1]
+
+    # Generate random samples
+    r = np.random.uniform(0, 1, n)
+    return np.searchsorted(dist_map, r) + start
+
+
+def _gen_uniform(m: int, n: int, start: int = 0) -> np.ndarray:
+    """Generate uniform-distributed workload.
+
+    Args:
+        m: Number of objects
+        n: Number of requests
+        start: Starting object ID
+
+    Returns:
+        Array of object IDs following uniform distribution
+    """
+    if m <= 0 or n <= 0:
+        raise ValueError("num_objects and num_requests must be positive")
+    return np.random.randint(0, m, n) + start
+
+
+class _BaseRequestGenerator:
+    """Base class for request generators to reduce code duplication"""
+
+    def __init__(
+        self,
+        num_objects: int,
+        num_requests: int,
+        obj_size: int = 4000,
+        time_span: int = 86400 * 7,
+        start_obj_id: int = 0,
+        seed: Optional[int] = None,
+    ):
+        """Initialize base request generator."""
+        if num_objects <= 0 or num_requests <= 0:
+            raise ValueError("num_objects and num_requests must be positive")
+        if obj_size <= 0:
+            raise ValueError("obj_size must be positive")
+        if time_span <= 0:
+            raise ValueError("time_span must be positive")
+
+        self.num_requests = num_requests
+        self.obj_size = obj_size
+        self.time_span = time_span
+
+        # Set random seed
+        if seed is not None:
+            np.random.seed(seed)
+            random.seed(seed)
+
+        # Subclasses must implement this method
+        self.obj_ids = self._generate_obj_ids(num_objects, num_requests, start_obj_id)
+
+    def _generate_obj_ids(self, num_objects: int, num_requests: int, start_obj_id: int) -> np.ndarray:
+        """Subclasses must implement this method to generate object IDs"""
+        raise NotImplementedError("Subclasses must implement _generate_obj_ids")
+
+    def __iter__(self) -> Iterator[Request]:
+        """Iterate over generated requests"""
+        for i, obj_id in enumerate(self.obj_ids):
+            req = Request()
+            req.clock_time = i * self.time_span // self.num_requests
+            req.obj_id = obj_id
+            req.obj_size = self.obj_size
+            req.op = ReqOp.OP_NOP
+            req.valid = True
+            yield req
+
+    def __len__(self) -> int:
+        """Return number of requests"""
+        return self.num_requests
+
+
+class _ZipfRequestGenerator(_BaseRequestGenerator):
+    """Zipf-distributed request generator"""
+
+    def __init__(
+        self,
+        num_objects: int,
+        num_requests: int,
+        alpha: float = 1.0,
+        obj_size: int = 4000,
+        time_span: int = 86400 * 7,
+        start_obj_id: int = 0,
+        seed: Optional[int] = None,
+    ):
+        """Initialize Zipf request generator."""
+        if alpha < 0:
+            raise ValueError("alpha must be non-negative")
+        self.alpha = alpha
+        super().__init__(num_objects, num_requests, obj_size, time_span, start_obj_id, seed)
+
+    def _generate_obj_ids(self, num_objects: int, num_requests: int, start_obj_id: int) -> np.ndarray:
+        """Generate Zipf-distributed object IDs"""
+        return _gen_zipf(num_objects, self.alpha, num_requests, start_obj_id)
+
+
+class _UniformRequestGenerator(_BaseRequestGenerator):
+    """Uniform-distributed request generator"""
+
+    def _generate_obj_ids(self, num_objects: int, num_requests: int, start_obj_id: int) -> np.ndarray:
+        """Generate uniformly-distributed object IDs"""
+        return _gen_uniform(num_objects, num_requests, start_obj_id)
+
+
+def create_zipf_requests(
+    num_objects: int,
+    num_requests: int,
+    alpha: float = 1.0,
+    obj_size: int = 4000,
+    time_span: int = 86400 * 7,
+    start_obj_id: int = 0,
+    seed: Optional[int] = None,
+) -> _ZipfRequestGenerator:
+    """Create a Zipf-distributed request generator.
+
+    Args:
+        num_objects: Number of unique objects
+        num_requests: Number of requests to generate
+        alpha: Zipf skewness parameter (alpha >= 0)
+        obj_size: Object size in bytes
+        time_span: Time span in seconds
+        start_obj_id: Starting object ID
+        seed: Random seed for reproducibility
+
+    Returns:
+        Generator that yields Request objects
+    """
+    return _ZipfRequestGenerator(
+        num_objects=num_objects,
+        num_requests=num_requests,
+        alpha=alpha,
+        obj_size=obj_size,
+        time_span=time_span,
+        start_obj_id=start_obj_id,
+        seed=seed,
+    )
+
+
+def create_uniform_requests(
+    num_objects: int,
+    num_requests: int,
+    obj_size: int = 4000,
+    time_span: int = 86400 * 7,
+    start_obj_id: int = 0,
+    seed: Optional[int] = None,
+) -> _UniformRequestGenerator:
+    """Create a uniform-distributed request generator.
+
+    Args:
+        num_objects: Number of unique objects
+        num_requests: Number of requests to generate
+        obj_size: Object size in bytes
+        time_span: Time span in seconds
+        start_obj_id: Starting object ID
+        seed: Random seed for reproducibility
+
+    Returns:
+        Generator that yields Request objects
+    """
+    return _UniformRequestGenerator(
+        num_objects=num_objects,
+        num_requests=num_requests,
+        obj_size=obj_size,
+        time_span=time_span,
+        start_obj_id=start_obj_id,
+        seed=seed,
+    )
diff --git a/libCacheSim-python/libcachesim/trace_analyzer.py b/libCacheSim-python/libcachesim/trace_analyzer.py
new file mode 100644
index 000000000..bf598a71b
--- /dev/null
+++ b/libCacheSim-python/libcachesim/trace_analyzer.py
@@ -0,0 +1,29 @@
+"""Wrapper of Analyzer"""
+
+from .protocols import ReaderProtocol, AnalyzerProtocol
+
+from .libcachesim_python import (
+    Analyzer,
+    AnalysisOption,
+    AnalysisParam,
+)
+
+
+class TraceAnalyzer(AnalyzerProtocol):
+    _analyzer: Analyzer
+
+    def __init__(
+        self,
+        analyzer: Analyzer,
+        reader: "ReaderProtocol",
+        output_path: str,
+        analysis_param: AnalysisParam,
+        analysis_option: AnalysisOption,
+    ):
+        self._analyzer = Analyzer(reader._reader, output_path, analysis_option, analysis_param)
+
+    def run(self) -> None:
+        self._analyzer.run()
+
+    def cleanup(self) -> None:
+        self._analyzer.cleanup()
diff --git a/libCacheSim-python/libcachesim/trace_generator.py b/libCacheSim-python/libcachesim/trace_generator.py
deleted file mode 100644
index 8c5802243..000000000
--- a/libCacheSim-python/libcachesim/trace_generator.py
+++ /dev/null
@@ -1,215 +0,0 @@
-"""
-Trace generator module for libCacheSim Python bindings.
-
-This module provides functions to generate synthetic traces with different distributions.
-"""
-
-import numpy as np
-import random
-from typing import Optional
-from collections.abc import Iterator
-from ._libcachesim import Request, ReqOp
-
-
-def _gen_zipf(m: int, alpha: float, n: int, start: int = 0) -> np.ndarray:
-    """Generate zipf distributed workload (internal function).
-
-    Args:
-        m (int): The number of objects
-        alpha (float): The skewness parameter (alpha >= 0)
-        n (int): The number of requests
-        start (int, optional): Start object ID. Defaults to 0.
-
-    Returns:
-        np.ndarray: Array of object IDs following Zipf distribution
-    """
-    if m <= 0 or n <= 0:
-        raise ValueError("num_objects and num_requests must be positive")
-    if alpha < 0:
-        raise ValueError("alpha must be non-negative")
-    np_tmp = np.power(np.arange(1, m + 1), -alpha)
-    np_zeta = np.cumsum(np_tmp)
-    dist_map = np_zeta / np_zeta[-1]
-    r = np.random.uniform(0, 1, n)
-    return np.searchsorted(dist_map, r) + start
-
-
-def _gen_uniform(m: int, n: int, start: int = 0) -> np.ndarray:
-    """Generate uniform distributed workload (internal function).
-
-    Args:
-        m (int): The number of objects
-        n (int): The number of requests
-        start (int, optional): Start object ID. Defaults to 0.
-
-    Returns:
-        np.ndarray: Array of object IDs following uniform distribution
-    """
-    if m <= 0 or n <= 0:
-        raise ValueError("num_objects and num_requests must be positive")
-    return np.random.uniform(0, m, n).astype(int) + start
-
-
-class _ZipfRequestGenerator:
-    """Zipf-distributed request generator (internal class)."""
-
-    def __init__(
-        self,
-        num_objects: int,
-        num_requests: int,
-        alpha: float = 1.0,
-        obj_size: int = 4000,
-        time_span: int = 86400 * 7,
-        start_obj_id: int = 0,
-        seed: Optional[int] = None,
-    ):
-        """Initialize Zipf request generator.
-
-        Args:
-            num_objects (int): Number of unique objects
-            num_requests (int): Number of requests to generate
-            alpha (float): Zipf skewness parameter (alpha >= 0)
-            obj_size (int): Object size in bytes
-            time_span (int): Time span in seconds
-            start_obj_id (int): Starting object ID
-            seed (int, optional): Random seed for reproducibility
-        """
-        self.num_requests = num_requests
-        self.obj_size = obj_size
-        self.time_span = time_span
-
-        # Set random seed if provided
-        if seed is not None:
-            np.random.seed(seed)
-            random.seed(seed)
-
-        # Pre-generate object IDs
-        self.obj_ids = _gen_zipf(num_objects, alpha, num_requests, start_obj_id)
-
-    def __iter__(self) -> Iterator[Request]:
-        """Iterate over generated requests."""
-        for i, obj_id in enumerate(self.obj_ids):
-            req = Request()
-            req.clock_time = i * self.time_span // self.num_requests
-            req.obj_id = obj_id
-            req.obj_size = self.obj_size
-            req.op = ReqOp.READ  # Default operation
-            yield req
-
-    def __len__(self) -> int:
-        """Return the number of requests."""
-        return self.num_requests
-
-
-class _UniformRequestGenerator:
-    """Uniform-distributed request generator (internal class)."""
-
-    def __init__(
-        self,
-        num_objects: int,
-        num_requests: int,
-        obj_size: int = 4000,
-        time_span: int = 86400 * 7,
-        start_obj_id: int = 0,
-        seed: Optional[int] = None,
-    ):
-        """Initialize uniform request generator.
-
-        Args:
-            num_objects (int): Number of unique objects
-            num_requests (int): Number of requests to generate
-            obj_size (int): Object size in bytes
-            time_span (int): Time span in seconds
-            start_obj_id (int): Starting object ID
-            seed (int, optional): Random seed for reproducibility
-        """
-        self.num_requests = num_requests
-        self.obj_size = obj_size
-        self.time_span = time_span
-
-        # Set random seed if provided
-        if seed is not None:
-            np.random.seed(seed)
-            random.seed(seed)
-
-        # Pre-generate object IDs
-        self.obj_ids = _gen_uniform(num_objects, num_requests, start_obj_id)
-
-    def __iter__(self) -> Iterator[Request]:
-        """Iterate over generated requests."""
-        for i, obj_id in enumerate(self.obj_ids):
-            req = Request()
-            req.clock_time = i * self.time_span // self.num_requests
-            req.obj_id = obj_id
-            req.obj_size = self.obj_size
-            req.op = ReqOp.READ  # Default operation
-            yield req
-
-    def __len__(self) -> int:
-        """Return the number of requests."""
-        return self.num_requests
-
-
-def create_zipf_requests(
-    num_objects: int,
-    num_requests: int,
-    alpha: float = 1.0,
-    obj_size: int = 4000,
-    time_span: int = 86400 * 7,
-    start_obj_id: int = 0,
-    seed: Optional[int] = None,
-) -> _ZipfRequestGenerator:
-    """Create a Zipf-distributed request generator.
-
-    Args:
-        num_objects (int): Number of unique objects
-        num_requests (int): Number of requests to generate
-        alpha (float): Zipf skewness parameter (alpha >= 0)
-        obj_size (int): Object size in bytes
-        time_span (int): Time span in seconds
-        start_obj_id (int): Starting object ID
-        seed (int, optional): Random seed for reproducibility
-
-    Returns:
-        Generator: A generator that yields Request objects
-    """
-    return _ZipfRequestGenerator(
-        num_objects=num_objects,
-        num_requests=num_requests,
-        alpha=alpha,
-        obj_size=obj_size,
-        time_span=time_span,
-        start_obj_id=start_obj_id,
-        seed=seed,
-    )
-
-
-def create_uniform_requests(
-    num_objects: int,
-    num_requests: int,
-    obj_size: int = 4000,
-    time_span: int = 86400 * 7,
-    start_obj_id: int = 0,
-    seed: Optional[int] = None,
-) -> _UniformRequestGenerator:
-    """Create a uniform-distributed request generator.
-
-    Args:
-        num_objects (int): Number of unique objects
-        num_requests (int): Number of requests to generate
-        obj_size (int): Object size in bytes
-        time_span (int): Time span in seconds
-        start_obj_id (int): Starting object ID
-        seed (int, optional): Random seed for reproducibility
-
-    Returns:
-        Generator: A generator that yields Request objects
-    """
-    return _UniformRequestGenerator(
-        num_objects=num_objects,
-        num_requests=num_requests,
-        obj_size=obj_size,
-        time_span=time_span,
-        start_obj_id=start_obj_id,
-        seed=seed,
-    )
diff --git a/libCacheSim-python/libcachesim/trace_reader.py b/libCacheSim-python/libcachesim/trace_reader.py
new file mode 100644
index 000000000..d37dead20
--- /dev/null
+++ b/libCacheSim-python/libcachesim/trace_reader.py
@@ -0,0 +1,251 @@
+"""Wrapper of Reader"""
+
+import logging
+from typing import overload, Union
+from collections.abc import Iterator
+
+from .protocols import ReaderProtocol
+
+from .libcachesim_python import TraceType, SamplerType, Request, ReaderInitParam, Reader, Sampler, ReadDirection
+
+
+class TraceReader(ReaderProtocol):
+    _reader: Reader
+
+    # Mark this as a C++ reader for c_process_trace compatibility
+    c_reader: bool = True
+
+    @overload
+    def __init__(self, trace: Reader) -> None: ...
+
+    def __init__(
+        self,
+        trace: Union[Reader, str],
+        trace_type: TraceType = TraceType.UNKNOWN_TRACE,
+        ignore_obj_size: bool = False,
+        ignore_size_zero_req: bool = False,
+        obj_id_is_num: bool = False,
+        obj_id_is_num_set: bool = False,
+        cap_at_n_req: int = -1,
+        block_size: int = 0,
+        has_header: bool = False,
+        has_header_set: bool = False,
+        delimiter: str = ",",
+        trace_start_offset: int = 0,
+        binary_fmt_str: str = "",
+        sampling_ratio: float = 1.0,
+        sampling_type: SamplerType = SamplerType.INVALID_SAMPLER,
+    ):
+        if isinstance(trace, Reader):
+            self._reader = trace
+            return
+
+        # Process sampling_type
+        if sampling_ratio < 0.0 or sampling_ratio > 1.0:
+            raise ValueError("Sampling ratio must be between 0.0 and 1.0")
+
+        if sampling_ratio == 1.0:
+            sampler = None
+        else:
+            if sampling_type == SamplerType.INVALID_SAMPLER:
+                logging.warning("Sampling type is invalid, using SPATIAL_SAMPLER instead")
+                sampling_type = SamplerType.SPATIAL_SAMPLER
+            logging.info(f"Sampling ratio: {sampling_ratio}, Sampling type: {sampling_type}")
+            sampler = Sampler(sampling_ratio, sampling_type)
+
+        # Construct ReaderInitParam
+        reader_init_params = ReaderInitParam(
+            binary_fmt_str=binary_fmt_str,
+            ignore_obj_size=ignore_obj_size,
+            ignore_size_zero_req=ignore_size_zero_req,
+            obj_id_is_num=obj_id_is_num,
+            obj_id_is_num_set=obj_id_is_num_set,
+            cap_at_n_req=cap_at_n_req,
+            block_size=block_size,
+            has_header=has_header,
+            has_header_set=has_header_set,
+            delimiter=delimiter,
+            trace_start_offset=trace_start_offset,
+            sampler=sampler,
+        )
+
+        self._reader = Reader(trace, trace_type, reader_init_params)
+
+    @property
+    def n_read_req(self) -> int:
+        return self._reader.n_read_req
+
+    @property
+    def n_total_req(self) -> int:
+        return self._reader.n_total_req
+
+    @property
+    def trace_path(self) -> str:
+        return self._reader.trace_path
+
+    @property
+    def file_size(self) -> int:
+        return self._reader.file_size
+
+    @property
+    def init_params(self) -> ReaderInitParam:
+        return self._reader.init_params
+
+    @property
+    def trace_type(self) -> TraceType:
+        return self._reader.trace_type
+
+    @property
+    def trace_format(self) -> str:
+        return self._reader.trace_format
+
+    @property
+    def ver(self) -> int:
+        return self._reader.ver
+
+    @property
+    def cloned(self) -> bool:
+        return self._reader.cloned
+
+    @property
+    def cap_at_n_req(self) -> int:
+        return self._reader.cap_at_n_req
+
+    @property
+    def trace_start_offset(self) -> int:
+        return self._reader.trace_start_offset
+
+    @property
+    def mapped_file(self) -> bool:
+        return self._reader.mapped_file
+
+    @property
+    def mmap_offset(self) -> int:
+        return self._reader.mmap_offset
+
+    @property
+    def is_zstd_file(self) -> bool:
+        return self._reader.is_zstd_file
+
+    @property
+    def item_size(self) -> int:
+        return self._reader.item_size
+
+    @property
+    def line_buf(self) -> str:
+        return self._reader.line_buf
+
+    @property
+    def line_buf_size(self) -> int:
+        return self._reader.line_buf_size
+
+    @property
+    def csv_delimiter(self) -> str:
+        return self._reader.csv_delimiter
+
+    @property
+    def csv_has_header(self) -> bool:
+        return self._reader.csv_has_header
+
+    @property
+    def obj_id_is_num(self) -> bool:
+        return self._reader.obj_id_is_num
+
+    @property
+    def obj_id_is_num_set(self) -> bool:
+        return self._reader.obj_id_is_num_set
+
+    @property
+    def ignore_size_zero_req(self) -> bool:
+        return self._reader.ignore_size_zero_req
+
+    @property
+    def ignore_obj_size(self) -> bool:
+        return self._reader.ignore_obj_size
+
+    @property
+    def block_size(self) -> int:
+        return self._reader.block_size
+
+    @ignore_size_zero_req.setter
+    def ignore_size_zero_req(self, value: bool) -> None:
+        self._reader.ignore_size_zero_req = value
+
+    @ignore_obj_size.setter
+    def ignore_obj_size(self, value: bool) -> None:
+        self._reader.ignore_obj_size = value
+
+    @block_size.setter
+    def block_size(self, value: int) -> None:
+        self._reader.block_size = value
+
+    @property
+    def n_req_left(self) -> int:
+        return self._reader.n_req_left
+
+    @property
+    def last_req_clock_time(self) -> int:
+        return self._reader.last_req_clock_time
+
+    @property
+    def lcs_ver(self) -> int:
+        return self._reader.lcs_ver
+
+    @property
+    def sampler(self) -> Sampler:
+        return self._reader.sampler
+
+    @property
+    def read_direction(self) -> ReadDirection:
+        return self._reader.read_direction
+
+    def get_num_of_req(self) -> int:
+        return self._reader.get_num_of_req()
+
+    def read_one_req(self, req: Request) -> Request:
+        return self._reader.read_one_req(req)
+
+    def reset(self) -> None:
+        self._reader.reset()
+
+    def close(self) -> None:
+        self._reader.close()
+
+    def clone(self) -> "TraceReader":
+        return TraceReader(self._reader.clone())
+
+    def read_first_req(self, req: Request) -> Request:
+        return self._reader.read_first_req(req)
+
+    def read_last_req(self, req: Request) -> Request:
+        return self._reader.read_last_req(req)
+
+    def skip_n_req(self, n: int) -> int:
+        return self._reader.skip_n_req(n)
+
+    def read_one_req_above(self) -> Request:
+        return self._reader.read_one_req_above()
+
+    def go_back_one_req(self) -> None:
+        self._reader.go_back_one_req()
+
+    def set_read_pos(self, pos: float) -> None:
+        self._reader.set_read_pos(pos)
+
+    def __iter__(self) -> Iterator[Request]:
+        return self._reader.__iter__()
+
+    def __len__(self) -> int:
+        return self._reader.get_num_of_req()
+
+    def __next__(self) -> Request:
+        if self._reader.n_req_left == 0:
+            raise StopIteration
+        return self._reader.read_one_req()
+
+    def __getitem__(self, index: int) -> Request:
+        if index < 0 or index >= self._reader.get_num_of_req():
+            raise IndexError("Index out of range")
+        self._reader.reset()
+        self._reader.skip_n_req(index)
+        return self._reader.read_one_req()
diff --git a/libCacheSim-python/libcachesim/util.py b/libCacheSim-python/libcachesim/util.py
new file mode 100644
index 000000000..0f80a7fb2
--- /dev/null
+++ b/libCacheSim-python/libcachesim/util.py
@@ -0,0 +1,50 @@
+"""Wrapper misc functions"""
+
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from .protocols import CacheProtocol, ReaderProtocol
+
+from .libcachesim_python import convert_to_oracleGeneral, convert_to_lcs, c_process_trace
+
+
+class Util:
+    @staticmethod
+    def convert_to_oracleGeneral(reader, ofilepath, output_txt=False, remove_size_change=False):
+        return convert_to_oracleGeneral(reader, ofilepath, output_txt, remove_size_change)
+
+    @staticmethod
+    def convert_to_lcs(reader, ofilepath, output_txt=False, remove_size_change=False, lcs_ver=1):
+        """
+        Convert a trace to LCS format.
+
+        Args:
+            reader: The reader to convert.
+            ofilepath: The path to the output file.
+            output_txt: Whether to output the trace in text format.
+            remove_size_change: Whether to remove the size change field.
+            lcs_ver: The version of LCS format (1, 2, 3, 4, 5, 6, 7, 8).
+        """
+        return convert_to_lcs(reader, ofilepath, output_txt, remove_size_change, lcs_ver)
+
+    @staticmethod
+    def process_trace(
+        cache: "CacheProtocol", reader: "ReaderProtocol", start_req: int = 0, max_req: int = -1
+    ) -> tuple[float, float]:
+        """
+        Process a trace with a cache.
+
+        Args:
+            cache: The cache to process the trace with.
+            reader: The reader to read the trace from.
+            start_req: The starting request to process.
+            max_req: The maximum number of requests to process.
+
+        Returns:
+            tuple[float, float]: The object miss ratio and byte miss ratio.
+        """
+        # Check if reader is C++ reader
+        if not hasattr(reader, "c_reader") or not reader.c_reader:
+            raise ValueError("Reader must be a C++ reader")
+
+        return c_process_trace(cache._cache, reader._reader, start_req, max_req)
diff --git a/libCacheSim-python/src/exception.cpp b/libCacheSim-python/src/exception.cpp
new file mode 100644
index 000000000..078d9c4c0
--- /dev/null
+++ b/libCacheSim-python/src/exception.cpp
@@ -0,0 +1,56 @@
+// libcachesim_python - libCacheSim Python bindings
+// Copyright 2025 The libcachesim Authors.  All rights reserved.
+//
+// Use of this source code is governed by a GPL-3.0
+// license that can be found in the LICENSE file or at
+// https://github.com/1a1a11a/libcachesim/blob/develop/LICENSE
+
+#include "exception.h"
+
+#include <pybind11/pybind11.h>
+
+namespace libcachesim {
+
+namespace py = pybind11;
+
+void register_exception(py::module& m) {
+  static py::exception<CacheException> exc_cache(m, "CacheException");
+  static py::exception<ReaderException> exc_reader(m, "ReaderException");
+
+  py::register_exception_translator([](std::exception_ptr p) {
+    try {
+      if (p) std::rethrow_exception(p);
+    } catch (const CacheException& e) {
+      py::set_error(exc_cache, e.what());
+    } catch (const ReaderException& e) {
+      py::set_error(exc_reader, e.what());
+    }
+  });
+
+  py::register_exception_translator([](std::exception_ptr p) {
+    try {
+      if (p) std::rethrow_exception(p);
+    } catch (const std::bad_alloc& e) {
+      PyErr_SetString(PyExc_MemoryError, e.what());
+    } catch (const std::invalid_argument& e) {
+      PyErr_SetString(PyExc_ValueError, e.what());
+    } catch (const std::out_of_range& e) {
+      PyErr_SetString(PyExc_IndexError, e.what());
+    } catch (const std::domain_error& e) {
+      PyErr_SetString(PyExc_ValueError,
+                      ("Domain error: " + std::string(e.what())).c_str());
+    } catch (const std::overflow_error& e) {
+      PyErr_SetString(PyExc_OverflowError, e.what());
+    } catch (const std::range_error& e) {
+      PyErr_SetString(PyExc_ValueError,
+                      ("Range error: " + std::string(e.what())).c_str());
+    } catch (const std::runtime_error& e) {
+      PyErr_SetString(PyExc_RuntimeError, e.what());
+    } catch (const std::exception& e) {
+      PyErr_SetString(PyExc_RuntimeError,
+                      ("C++ exception: " + std::string(e.what())).c_str());
+    }
+  });
+}
+
+}  // namespace libcachesim
diff --git a/libCacheSim-python/src/exception.h b/libCacheSim-python/src/exception.h
new file mode 100644
index 000000000..2749ae078
--- /dev/null
+++ b/libCacheSim-python/src/exception.h
@@ -0,0 +1,33 @@
+// libcachesim_python - libCacheSim Python bindings
+// Copyright 2025 The libcachesim Authors.  All rights reserved.
+//
+// Use of this source code is governed by a GPL-3.0
+// license that can be found in the LICENSE file or at
+// https://github.com/1a1a11a/libcachesim/blob/develop/LICENSE
+
+#pragma once
+
+#include <pybind11/pybind11.h>
+
+#include <stdexcept>
+#include <string>
+
+namespace libcachesim {
+
+namespace py = pybind11;
+
+class CacheException : public std::runtime_error {
+ public:
+  explicit CacheException(const std::string& message)
+      : std::runtime_error("CacheException: " + message) {}
+};
+
+class ReaderException : public std::runtime_error {
+ public:
+  explicit ReaderException(const std::string& message)
+      : std::runtime_error("ReaderException: " + message) {}
+};
+
+void register_exception(py::module& m);
+
+}  // namespace libcachesim
diff --git a/libCacheSim-python/src/export.cpp b/libCacheSim-python/src/export.cpp
new file mode 100644
index 000000000..0ef8d8334
--- /dev/null
+++ b/libCacheSim-python/src/export.cpp
@@ -0,0 +1,38 @@
+// libcachesim_python - libCacheSim Python bindings
+// Copyright 2025 The libcachesim Authors.  All rights reserved.
+//
+// Use of this source code is governed by a GPL-3.0
+// license that can be found in the LICENSE file or at
+// https://github.com/1a1a11a/libcachesim/blob/develop/LICENSE
+
+#include "export.h"
+
+#include "exception.h"
+
+#define STRINGIFY(x) #x
+#define MACRO_STRINGIFY(x) STRINGIFY(x)
+
+namespace libcachesim {
+
+PYBIND11_MODULE(libcachesim_python, m) {
+  m.doc() = "libcachesim_python";
+
+  // NOTE(haocheng): can use decentralized interface holder to export all the
+  // methods if the codebase is large enough
+
+  export_cache(m);
+  export_reader(m);
+  export_analyzer(m);
+  export_misc(m);
+
+  // NOTE(haocheng): register exception to make it available in Python
+  register_exception(m);
+
+#ifdef VERSION_INFO
+  m.attr("__version__") = MACRO_STRINGIFY(VERSION_INFO);
+#else
+  m.attr("__version__") = "dev";
+#endif
+}
+
+}  // namespace libcachesim
diff --git a/libCacheSim-python/src/export.h b/libCacheSim-python/src/export.h
new file mode 100644
index 000000000..121ff97b1
--- /dev/null
+++ b/libCacheSim-python/src/export.h
@@ -0,0 +1,27 @@
+// libcachesim_python - libCacheSim Python bindings
+// Copyright 2025 The libcachesim Authors.  All rights reserved.
+//
+// Use of this source code is governed by a GPL-3.0
+// license that can be found in the LICENSE file or at
+// https://github.com/1a1a11a/libcachesim/blob/develop/LICENSE
+
+#pragma once
+
+#include "pybind11/operators.h"
+#include "pybind11/pybind11.h"
+#include "pybind11/stl.h"
+
+namespace libcachesim {
+
+namespace py = pybind11;
+
+using py::literals::operator""_a;
+
+void export_cache(py::module &m);
+void export_pyplugin_cache(py::module &m);
+
+void export_reader(py::module &m);
+void export_analyzer(py::module &m);
+void export_misc(py::module &m);
+
+}  // namespace libcachesim
diff --git a/libCacheSim-python/src/export_analyzer.cpp b/libCacheSim-python/src/export_analyzer.cpp
new file mode 100644
index 000000000..0d8fd6680
--- /dev/null
+++ b/libCacheSim-python/src/export_analyzer.cpp
@@ -0,0 +1,136 @@
+// libcachesim_python - libCacheSim Python bindings
+// Copyright 2025 The libcachesim Authors.  All rights reserved.
+//
+// Use of this source code is governed by a GPL-3.0
+// license that can be found in the LICENSE file or at
+// https://github.com/1a1a11a/libcachesim/blob/develop/LICENSE
+
+#include <pybind11/functional.h>
+#include <pybind11/pybind11.h>
+#include <pybind11/stl.h>
+
+#include <memory>
+#include <unordered_map>
+
+#include "../libCacheSim/traceAnalyzer/analyzer.h"
+#include "export.h"
+#include "libCacheSim/cache.h"
+#include "libCacheSim/reader.h"
+#include "libCacheSim/request.h"
+
+namespace libcachesim {
+
+namespace py = pybind11;
+
+// Custom deleters for smart pointers
+struct AnalysisParamDeleter {
+  void operator()(traceAnalyzer::analysis_param_t* ptr) const {
+    if (ptr != nullptr) free(ptr);
+  }
+};
+
+struct AnalysisOptionDeleter {
+  void operator()(traceAnalyzer::analysis_option_t* ptr) const {
+    if (ptr != nullptr) free(ptr);
+  }
+};
+
+void export_analyzer(py::module& m) {
+  py::class_<
+      traceAnalyzer::analysis_param_t,
+      std::unique_ptr<traceAnalyzer::analysis_param_t, AnalysisParamDeleter>>(
+      m, "AnalysisParam")
+      .def(py::init([](int access_pattern_sample_ratio_inv, int track_n_popular,
+                       int track_n_hit, int time_window, int warmup_time) {
+             traceAnalyzer::analysis_param_t params;
+             params.access_pattern_sample_ratio_inv =
+                 access_pattern_sample_ratio_inv;
+             params.track_n_popular = track_n_popular;
+             params.track_n_hit = track_n_hit;
+             params.time_window = time_window;
+             params.warmup_time = warmup_time;
+             return std::unique_ptr<traceAnalyzer::analysis_param_t,
+                                    AnalysisParamDeleter>(
+                 new traceAnalyzer::analysis_param_t(params));
+           }),
+           "access_pattern_sample_ratio_inv"_a = 10, "track_n_popular"_a = 10,
+           "track_n_hit"_a = 5, "time_window"_a = 60, "warmup_time"_a = 0)
+      .def_readwrite(
+          "access_pattern_sample_ratio_inv",
+          &traceAnalyzer::analysis_param_t::access_pattern_sample_ratio_inv)
+      .def_readwrite("track_n_popular",
+                     &traceAnalyzer::analysis_param_t::track_n_popular)
+      .def_readwrite("track_n_hit",
+                     &traceAnalyzer::analysis_param_t::track_n_hit)
+      .def_readwrite("time_window",
+                     &traceAnalyzer::analysis_param_t::time_window)
+      .def_readwrite("warmup_time",
+                     &traceAnalyzer::analysis_param_t::warmup_time);
+
+  py::class_<
+      traceAnalyzer::analysis_option_t,
+      std::unique_ptr<traceAnalyzer::analysis_option_t, AnalysisOptionDeleter>>(
+      m, "AnalysisOption")
+      .def(
+          py::init([](bool req_rate, bool access_pattern, bool size, bool reuse,
+                      bool popularity, bool ttl, bool popularity_decay,
+                      bool lifetime, bool create_future_reuse_ccdf,
+                      bool prob_at_age, bool size_change) {
+            traceAnalyzer::analysis_option_t option;
+            option.req_rate = req_rate;
+            option.access_pattern = access_pattern;
+            option.size = size;
+            option.reuse = reuse;
+            option.popularity = popularity;
+            option.ttl = ttl;
+            option.popularity_decay = popularity_decay;
+            option.lifetime = lifetime;
+            option.create_future_reuse_ccdf = create_future_reuse_ccdf;
+            option.prob_at_age = prob_at_age;
+            option.size_change = size_change;
+            return std::unique_ptr<traceAnalyzer::analysis_option_t,
+                                   AnalysisOptionDeleter>(
+                new traceAnalyzer::analysis_option_t(option));
+          }),
+          "req_rate"_a = false, "access_pattern"_a = false, "size"_a = false,
+          "reuse"_a = false, "popularity"_a = false, "ttl"_a = false,
+          "popularity_decay"_a = false, "lifetime"_a = false,
+          "create_future_reuse_ccdf"_a = false, "prob_at_age"_a = false,
+          "size_change"_a = false)
+      .def_readwrite("req_rate", &traceAnalyzer::analysis_option_t::req_rate)
+      .def_readwrite("access_pattern",
+                     &traceAnalyzer::analysis_option_t::access_pattern)
+      .def_readwrite("size", &traceAnalyzer::analysis_option_t::size)
+      .def_readwrite("reuse", &traceAnalyzer::analysis_option_t::reuse)
+      .def_readwrite("popularity",
+                     &traceAnalyzer::analysis_option_t::popularity)
+      .def_readwrite("ttl", &traceAnalyzer::analysis_option_t::ttl)
+      .def_readwrite("popularity_decay",
+                     &traceAnalyzer::analysis_option_t::popularity_decay)
+      .def_readwrite("lifetime", &traceAnalyzer::analysis_option_t::lifetime)
+      .def_readwrite(
+          "create_future_reuse_ccdf",
+          &traceAnalyzer::analysis_option_t::create_future_reuse_ccdf)
+      .def_readwrite("prob_at_age",
+                     &traceAnalyzer::analysis_option_t::prob_at_age)
+      .def_readwrite("size_change",
+                     &traceAnalyzer::analysis_option_t::size_change);
+
+  py::class_<traceAnalyzer::TraceAnalyzer,
+             std::unique_ptr<traceAnalyzer::TraceAnalyzer>>(m, "Analyzer")
+      .def(py::init([](reader_t* reader, std::string output_path,
+                       const traceAnalyzer::analysis_param_t& param,
+                       const traceAnalyzer::analysis_option_t& option) {
+             traceAnalyzer::TraceAnalyzer* analyzer =
+                 new traceAnalyzer::TraceAnalyzer(reader, output_path, option,
+                                                  param);
+             return std::unique_ptr<traceAnalyzer::TraceAnalyzer>(analyzer);
+           }),
+           "reader"_a, "output_path"_a,
+           "param"_a = traceAnalyzer::default_param(),
+           "option"_a = traceAnalyzer::default_option())
+      .def("run", &traceAnalyzer::TraceAnalyzer::run)
+      .def("cleanup", &traceAnalyzer::TraceAnalyzer::cleanup);
+}
+
+}  // namespace libcachesim
diff --git a/libCacheSim-python/src/export_cache.cpp b/libCacheSim-python/src/export_cache.cpp
new file mode 100644
index 000000000..3868866cc
--- /dev/null
+++ b/libCacheSim-python/src/export_cache.cpp
@@ -0,0 +1,493 @@
+// libcachesim_python - libCacheSim Python bindings
+// Export cache core functions and classes
+// Copyright 2025 The libcachesim Authors.  All rights reserved.
+//
+// Use of this source code is governed by a GPL-3.0
+// license that can be found in the LICENSE file or at
+// https://github.com/1a1a11a/libcachesim/blob/develop/LICENSE
+
+#include <pybind11/functional.h>
+#include <pybind11/pybind11.h>
+#include <pybind11/stl.h>
+
+#include <iostream>
+#include <sstream>
+
+#include "config.h"
+#include "dataStructure/hashtable/hashtable.h"
+#include "export.h"
+#include "libCacheSim/cache.h"
+#include "libCacheSim/cacheObj.h"
+#include "libCacheSim/enum.h"
+#include "libCacheSim/evictionAlgo.h"
+#include "libCacheSim/plugin.h"
+#include "libCacheSim/request.h"
+
+namespace libcachesim {
+
+namespace py = pybind11;
+
+// Custom deleters for smart pointers
+struct CacheDeleter {
+  void operator()(cache_t* ptr) const {
+    if (ptr != nullptr) ptr->cache_free(ptr);
+  }
+};
+
+struct CommonCacheParamsDeleter {
+  void operator()(common_cache_params_t* ptr) const {
+    if (ptr != nullptr) {
+      delete ptr;  // Simple delete for POD struct
+    }
+  }
+};
+
+struct CacheObjectDeleter {
+  void operator()(cache_obj_t* ptr) const {
+    if (ptr != nullptr) free_cache_obj(ptr);
+  }
+};
+
+struct RequestDeleter {
+  void operator()(request_t* ptr) const {
+    if (ptr != nullptr) free_request(ptr);
+  }
+};
+
+// ***********************************************************************
+// ****             Python plugin cache implementation BEGIN          ****
+// ***********************************************************************
+
+typedef struct pypluginCache_params {
+  py::object data;  ///< Plugin's internal data structure (python object)
+  py::function cache_init_hook;
+  py::function cache_hit_hook;
+  py::function cache_miss_hook;
+  py::function cache_eviction_hook;
+  py::function cache_remove_hook;
+  py::function cache_free_hook;
+  std::string cache_name;
+} pypluginCache_params_t;
+
+static void pypluginCache_free(cache_t* cache);
+static bool pypluginCache_get(cache_t* cache, const request_t* req);
+static cache_obj_t* pypluginCache_find(cache_t* cache, const request_t* req,
+                                       const bool update_cache);
+static cache_obj_t* pypluginCache_insert(cache_t* cache, const request_t* req);
+static cache_obj_t* pypluginCache_to_evict(cache_t* cache,
+                                           const request_t* req);
+static void pypluginCache_evict(cache_t* cache, const request_t* req);
+static bool pypluginCache_remove(cache_t* cache, const obj_id_t obj_id);
+
+cache_t* pypluginCache_init(
+    const common_cache_params_t ccache_params, std::string cache_name,
+    py::function cache_init_hook, py::function cache_hit_hook,
+    py::function cache_miss_hook, py::function cache_eviction_hook,
+    py::function cache_remove_hook, py::function cache_free_hook) {
+  // Initialize base cache structure
+  cache_t* cache = cache_struct_init(cache_name.c_str(), ccache_params, NULL);
+
+  // Set function pointers for cache operations
+  cache->cache_init = NULL;
+  cache->cache_free = pypluginCache_free;
+  cache->get = pypluginCache_get;
+  cache->find = pypluginCache_find;
+  cache->insert = pypluginCache_insert;
+  cache->evict = pypluginCache_evict;
+  cache->remove = pypluginCache_remove;
+  cache->to_evict = pypluginCache_to_evict;
+  cache->get_occupied_byte = cache_get_occupied_byte_default;
+  cache->get_n_obj = cache_get_n_obj_default;
+  cache->can_insert = cache_can_insert_default;
+  cache->obj_md_size = 0;
+
+  // Allocate and initialize plugin parameters
+  pypluginCache_params_t* params = new pypluginCache_params_t();
+  params->cache_name = cache_name;
+  params->cache_init_hook = cache_init_hook;
+  params->cache_hit_hook = cache_hit_hook;
+  params->cache_miss_hook = cache_miss_hook;
+  params->cache_eviction_hook = cache_eviction_hook;
+  params->cache_remove_hook = cache_remove_hook;
+  params->cache_free_hook = cache_free_hook;
+  params->data = cache_init_hook(ccache_params);
+
+  cache->eviction_params = params;
+
+  return cache;
+}
+
+static void pypluginCache_free(cache_t* cache) {
+  pypluginCache_params_t* params =
+      (pypluginCache_params_t*)cache->eviction_params;
+
+  if (!params->cache_free_hook.is_none()) {
+    params->cache_free_hook(params->data);
+  }
+  delete params;
+  cache_struct_free(cache);
+}
+
+static bool pypluginCache_get(cache_t* cache, const request_t* req) {
+  bool hit = cache_get_base(cache, req);
+  pypluginCache_params_t* params =
+      (pypluginCache_params_t*)cache->eviction_params;
+
+  if (hit) {
+    params->cache_hit_hook(params->data, req);
+  } else {
+    params->cache_miss_hook(params->data, req);
+  }
+
+  return hit;
+}
+
+static cache_obj_t* pypluginCache_find(cache_t* cache, const request_t* req,
+                                       const bool update_cache) {
+  return cache_find_base(cache, req, update_cache);
+}
+
+static cache_obj_t* pypluginCache_insert(cache_t* cache, const request_t* req) {
+  return cache_insert_base(cache, req);
+}
+
+static cache_obj_t* pypluginCache_to_evict(cache_t* cache,
+                                           const request_t* req) {
+  throw std::runtime_error("pypluginCache does not support to_evict function");
+}
+
+static void pypluginCache_evict(cache_t* cache, const request_t* req) {
+  pypluginCache_params_t* params =
+      (pypluginCache_params_t*)cache->eviction_params;
+
+  // Get eviction candidate from plugin
+  py::object result = params->cache_eviction_hook(params->data, req);
+  obj_id_t obj_id = result.cast<obj_id_t>();
+
+  // Find the object in the cache
+  cache_obj_t* obj_to_evict = hashtable_find_obj_id(cache->hashtable, obj_id);
+  if (obj_to_evict == NULL) {
+    throw std::runtime_error("pypluginCache: object " + std::to_string(obj_id) +
+                             " to be evicted not found in cache");
+  }
+
+  // Perform the eviction
+  cache_evict_base(cache, obj_to_evict, true);
+}
+
+static bool pypluginCache_remove(cache_t* cache, const obj_id_t obj_id) {
+  pypluginCache_params_t* params =
+      (pypluginCache_params_t*)cache->eviction_params;
+
+  // Notify plugin of the removal
+  params->cache_remove_hook(params->data, obj_id);
+
+  // Find the object in the cache
+  cache_obj_t* obj = hashtable_find_obj_id(cache->hashtable, obj_id);
+  if (obj == NULL) {
+    return false;
+  }
+
+  // Remove the object from the cache
+  cache_remove_obj_base(cache, obj, true);
+  return true;
+}
+
+// ***********************************************************************
+// ****            Python plugin cache implementation END             ****
+// ***********************************************************************
+
+// Templates
+template <cache_t* (*InitFn)(common_cache_params_t, const char*)>
+auto make_cache_wrapper(const std::string& fn_name) {
+  return [=](py::module_& m) {
+    m.def(
+        fn_name.c_str(),
+        [](const common_cache_params_t& cc_params,
+           const std::string& cache_specific_params) {
+          const char* params_cstr = cache_specific_params.empty()
+                                        ? nullptr
+                                        : cache_specific_params.c_str();
+          cache_t* ptr = InitFn(cc_params, params_cstr);
+          return std::unique_ptr<cache_t, CacheDeleter>(ptr);
+        },
+        "cc_params"_a, "cache_specific_params"_a = "");
+  };
+}
+
+void export_cache(py::module& m) {
+  /**
+   * @brief Cache structure
+   */
+  py::class_<cache_t, std::unique_ptr<cache_t, CacheDeleter>>(m, "Cache")
+      .def_readonly("cache_size", &cache_t::cache_size)
+      .def_readonly("default_ttl", &cache_t::default_ttl)
+      .def_readonly("obj_md_size", &cache_t::obj_md_size)
+      .def_readonly("n_req", &cache_t::n_req)
+      .def_readonly("cache_name", &cache_t::cache_name)
+      .def_readonly("init_params", &cache_t::init_params)
+      .def(
+          "get",
+          [](cache_t& self, const request_t& req) {
+            return self.get(&self, &req);
+          },
+          "req"_a)
+      .def(
+          "find",
+          [](cache_t& self, const request_t& req, const bool update_cache) {
+            return self.find(&self, &req, update_cache);
+          },
+          "req"_a, "update_cache"_a = true)
+      .def(
+          "can_insert",
+          [](cache_t& self, const request_t& req) {
+            return self.can_insert(&self, &req);
+          },
+          "req"_a)
+      .def(
+          "insert",
+          [](cache_t& self, const request_t& req) {
+            return self.insert(&self, &req);
+          },
+          "req"_a)
+      .def(
+          "need_eviction",
+          [](cache_t& self, const request_t& req) {
+            return self.need_eviction(&self, &req);
+          },
+          "req"_a)
+      .def(
+          "evict",
+          [](cache_t& self, const request_t& req) {
+            return self.evict(&self, &req);
+          },
+          "req"_a)
+      .def(
+          "remove",
+          [](cache_t& self, obj_id_t obj_id) {
+            return self.remove(&self, obj_id);
+          },
+          "obj_id"_a)
+      .def(
+          "to_evict",
+          [](cache_t& self, const request_t& req) {
+            return self.to_evict(&self, &req);
+          },
+          "req"_a)
+      .def("get_occupied_byte",
+           [](cache_t& self) { return self.get_occupied_byte(&self); })
+      .def("get_n_obj", [](cache_t& self) { return self.get_n_obj(&self); })
+      .def("print_cache", [](cache_t& self) {
+        // Capture stdout to return as string
+        std::ostringstream captured_output;
+        std::streambuf* orig = std::cout.rdbuf();
+        std::cout.rdbuf(captured_output.rdbuf());
+
+        self.print_cache(&self);
+
+        // Restore original stdout
+        std::cout.rdbuf(orig);
+        return captured_output.str();
+      });
+
+  /**
+   * @brief Common cache parameters
+   */
+  py::class_<common_cache_params_t,
+             std::unique_ptr<common_cache_params_t, CommonCacheParamsDeleter>>(
+      m, "CommonCacheParams")
+      .def(py::init([](uint64_t cache_size, uint64_t default_ttl,
+                       int32_t hashpower, bool consider_obj_metadata) {
+             common_cache_params_t* params = new common_cache_params_t();
+             params->cache_size = cache_size;
+             params->default_ttl = default_ttl;
+             params->hashpower = hashpower;
+             params->consider_obj_metadata = consider_obj_metadata;
+             return params;
+           }),
+           "cache_size"_a, "default_ttl"_a = 86400 * 300, "hashpower"_a = 24,
+           "consider_obj_metadata"_a = false)
+      .def_readwrite("cache_size", &common_cache_params_t::cache_size)
+      .def_readwrite("default_ttl", &common_cache_params_t::default_ttl)
+      .def_readwrite("hashpower", &common_cache_params_t::hashpower)
+      .def_readwrite("consider_obj_metadata",
+                     &common_cache_params_t::consider_obj_metadata);
+
+  /**
+   * @brief Cache object
+   *
+   * TODO: full support for cache object
+   */
+  py::class_<cache_obj_t, std::unique_ptr<cache_obj_t, CacheObjectDeleter>>(
+      m, "CacheObject")
+      .def_readonly("obj_id", &cache_obj_t::obj_id)
+      .def_readonly("obj_size", &cache_obj_t::obj_size);
+
+  /**
+   * @brief Request operation enumeration
+   */
+  py::enum_<req_op_e>(m, "ReqOp")
+      .value("OP_NOP", OP_NOP)
+      .value("OP_GET", OP_GET)
+      .value("OP_GETS", OP_GETS)
+      .value("OP_SET", OP_SET)
+      .value("OP_ADD", OP_ADD)
+      .value("OP_CAS", OP_CAS)
+      .value("OP_REPLACE", OP_REPLACE)
+      .value("OP_APPEND", OP_APPEND)
+      .value("OP_PREPEND", OP_PREPEND)
+      .value("OP_DELETE", OP_DELETE)
+      .value("OP_INCR", OP_INCR)
+      .value("OP_DECR", OP_DECR)
+      .value("OP_READ", OP_READ)
+      .value("OP_WRITE", OP_WRITE)
+      .value("OP_UPDATE", OP_UPDATE)
+      .value("OP_INVALID", OP_INVALID)
+      .export_values();
+
+  /**
+   * @brief Request structure
+   */
+  py::class_<request_t, std::unique_ptr<request_t, RequestDeleter>>(m,
+                                                                    "Request")
+      .def(py::init([](int64_t obj_size, req_op_e op, bool valid,
+                       obj_id_t obj_id, int64_t clock_time, uint64_t hv,
+                       int64_t next_access_vtime, int32_t ttl) {
+             request_t* req = new_request();
+             req->obj_size = obj_size;
+             req->op = op;
+             req->valid = valid;
+             req->obj_id = obj_id;
+             req->clock_time = clock_time;
+             req->hv = hv;
+             req->next_access_vtime = next_access_vtime;
+             req->ttl = ttl;
+             return req;
+           }),
+           "obj_size"_a = 1, "op"_a = OP_NOP, "valid"_a = true, "obj_id"_a = 0,
+           "clock_time"_a = 0, "hv"_a = 0, "next_access_vtime"_a = -2,
+           "ttl"_a = 0)
+      .def_readwrite("clock_time", &request_t::clock_time)
+      .def_readwrite("hv", &request_t::hv)
+      .def_readwrite("obj_id", &request_t::obj_id)
+      .def_readwrite("obj_size", &request_t::obj_size)
+      .def_readwrite("ttl", &request_t::ttl)
+      .def_readwrite("op", &request_t::op)
+      .def_readwrite("valid", &request_t::valid)
+      .def_readwrite("next_access_vtime", &request_t::next_access_vtime);
+
+  /**
+   * @brief Generic function to create a cache instance.
+   *
+   * TODO: add support for general cache creation and add support for cache
+   * specific parameters this is a backup for cache creation in python.
+   */
+
+  // Cache algorithm initialization functions
+
+  make_cache_wrapper<ARC_init>("ARC_init")(m);
+  make_cache_wrapper<ARCv0_init>("ARCv0_init")(m);
+  make_cache_wrapper<CAR_init>("CAR_init")(m);
+  make_cache_wrapper<Cacheus_init>("Cacheus_init")(m);
+  make_cache_wrapper<Clock_init>("Clock_init")(m);
+  make_cache_wrapper<ClockPro_init>("ClockPro_init")(m);
+  make_cache_wrapper<FIFO_init>("FIFO_init")(m);
+  make_cache_wrapper<FIFO_Merge_init>("FIFO_Merge_init")(m);
+  make_cache_wrapper<flashProb_init>("flashProb_init")(m);
+  make_cache_wrapper<GDSF_init>("GDSF_init")(m);
+  make_cache_wrapper<LHD_init>("LHD_init")(m);
+  make_cache_wrapper<LeCaR_init>("LeCaR_init")(m);
+  make_cache_wrapper<LeCaRv0_init>("LeCaRv0_init")(m);
+  make_cache_wrapper<LFU_init>("LFU_init")(m);
+  make_cache_wrapper<LFUCpp_init>("LFUCpp_init")(m);
+  make_cache_wrapper<LFUDA_init>("LFUDA_init")(m);
+  make_cache_wrapper<LIRS_init>("LIRS_init")(m);
+  make_cache_wrapper<LRU_init>("LRU_init")(m);
+  make_cache_wrapper<LRU_Prob_init>("LRU_Prob_init")(m);
+  make_cache_wrapper<nop_init>("nop_init")(m);
+
+  make_cache_wrapper<QDLP_init>("QDLP_init")(m);
+  make_cache_wrapper<Random_init>("Random_init")(m);
+  make_cache_wrapper<RandomLRU_init>("RandomLRU_init")(m);
+  make_cache_wrapper<RandomTwo_init>("RandomTwo_init")(m);
+  make_cache_wrapper<S3FIFO_init>("S3FIFO_init")(m);
+  make_cache_wrapper<S3FIFOv0_init>("S3FIFOv0_init")(m);
+  make_cache_wrapper<S3FIFOd_init>("S3FIFOd_init")(m);
+  make_cache_wrapper<Sieve_init>("Sieve_init")(m);
+  make_cache_wrapper<Size_init>("Size_init")(m);
+  make_cache_wrapper<SLRU_init>("SLRU_init")(m);
+  make_cache_wrapper<SLRUv0_init>("SLRUv0_init")(m);
+  make_cache_wrapper<TwoQ_init>("TwoQ_init")(m);
+  make_cache_wrapper<WTinyLFU_init>("WTinyLFU_init")(m);
+  make_cache_wrapper<Hyperbolic_init>("Hyperbolic_init")(m);
+  make_cache_wrapper<Belady_init>("Belady_init")(m);
+  make_cache_wrapper<BeladySize_init>("BeladySize_init")(m);
+
+#ifdef ENABLE_3L_CACHE
+  make_cache_wrapper<ThreeLCache_init>("ThreeLCache_init")(m);
+#endif
+
+#ifdef ENABLE_GLCACHE
+  make_cache_wrapper<GLCache_init>("GLCache_init")(m);
+#endif
+
+#ifdef ENABLE_LRB
+  make_cache_wrapper<LRB_init>("LRB_init")(m);
+#endif
+
+  // ***********************************************************************
+  // ****                                                               ****
+  // ****               Python plugin cache bindings                   ****
+  // ****                                                               ****
+  // ***********************************************************************
+
+  m.def("pypluginCache_init", &pypluginCache_init, "cc_params"_a,
+        "cache_name"_a, "cache_init_hook"_a, "cache_hit_hook"_a,
+        "cache_miss_hook"_a, "cache_eviction_hook"_a, "cache_remove_hook"_a,
+        "cache_free_hook"_a);
+  // ***********************************************************************
+  // ****                                                               ****
+  // ****                end functions for python plugin                ****
+  // ****                                                               ****
+  // ***********************************************************************
+
+  m.def(
+      "c_process_trace",
+      [](cache_t& cache, reader_t& reader, int64_t start_req = 0,
+         int64_t max_req = -1) {
+        reset_reader(&reader);
+        if (start_req > 0) {
+          skip_n_req(&reader, start_req);
+        }
+
+        request_t* req = new_request();
+        int64_t n_req = 0, n_hit = 0;
+        int64_t bytes_req = 0, bytes_hit = 0;
+        bool hit;
+
+        read_one_req(&reader, req);
+        while (req->valid) {
+          n_req += 1;
+          bytes_req += req->obj_size;
+          hit = cache.get(&cache, req);
+          if (hit) {
+            n_hit += 1;
+            bytes_hit += req->obj_size;
+          }
+          read_one_req(&reader, req);
+          if (max_req > 0 && n_req >= max_req) {
+            break;  // Stop if we reached the max request limit
+          }
+        }
+
+        free_request(req);
+        // return the miss ratio
+        double obj_miss_ratio = n_req > 0 ? 1.0 - (double)n_hit / n_req : 0.0;
+        double byte_miss_ratio =
+            bytes_req > 0 ? 1.0 - (double)bytes_hit / bytes_req : 0.0;
+        return std::make_tuple(obj_miss_ratio, byte_miss_ratio);
+      },
+      "cache"_a, "reader"_a, "start_req"_a = 0, "max_req"_a = -1);
+}
+
+}  // namespace libcachesim
diff --git a/libCacheSim-python/src/export_misc.cpp b/libCacheSim-python/src/export_misc.cpp
new file mode 100644
index 000000000..08000590f
--- /dev/null
+++ b/libCacheSim-python/src/export_misc.cpp
@@ -0,0 +1,30 @@
+// libcachesim_python - libCacheSim Python bindings
+// Copyright 2025 The libcachesim Authors.  All rights reserved.
+//
+// Use of this source code is governed by a GPL-3.0
+// license that can be found in the LICENSE file or at
+// https://github.com/1a1a11a/libcachesim/blob/develop/LICENSE
+
+#include <pybind11/pybind11.h>
+
+#include "../libCacheSim/bin/traceUtils/internal.hpp"
+#include "export.h"
+
+namespace libcachesim {
+
+namespace py = pybind11;
+
+void export_misc(py::module& m) {
+  // NOTE(haocheng): Here we provide some convertion functions and utilities
+  // - convert_to_oracleGeneral
+  // - convert_to_lcs: v1 to v8 (default v1)
+
+  m.def("convert_to_oracleGeneral", &traceConv::convert_to_oracleGeneral,
+        "reader"_a, "ofilepath"_a, "output_txt"_a = false,
+        "remove_size_change"_a = false);
+  m.def("convert_to_lcs", &traceConv::convert_to_lcs, "reader"_a, "ofilepath"_a,
+        "output_txt"_a = false, "remove_size_change"_a = false,
+        "lcs_ver"_a = 1);
+}
+
+}  // namespace libcachesim
diff --git a/libCacheSim-python/src/export_reader.cpp b/libCacheSim-python/src/export_reader.cpp
new file mode 100644
index 000000000..f9c3789b6
--- /dev/null
+++ b/libCacheSim-python/src/export_reader.cpp
@@ -0,0 +1,312 @@
+// libcachesim_python - libCacheSim Python bindings
+// Copyright 2025 The libcachesim Authors.  All rights reserved.
+//
+// Use of this source code is governed by a GPL-3.0
+// license that can be found in the LICENSE file or at
+// https://github.com/1a1a11a/libcachesim/blob/develop/LICENSE
+
+#include <pybind11/functional.h>
+#include <pybind11/numpy.h>
+#include <pybind11/pybind11.h>
+#include <pybind11/stl.h>
+
+#include <iostream>
+#include <memory>
+#include <sstream>
+#include <string_view>
+
+#include "cli_reader_utils.h"
+#include "config.h"
+#include "export.h"
+#include "libCacheSim/enum.h"
+#include "libCacheSim/reader.h"
+#include "libCacheSim/request.h"
+#include "mystr.h"
+
+namespace libcachesim {
+
+namespace py = pybind11;
+
+// Custom deleters for smart pointers
+struct ReaderDeleter {
+  void operator()(reader_t* ptr) const {
+    if (ptr != nullptr) close_trace(ptr);
+  }
+};
+
+struct RequestDeleter {
+  void operator()(request_t* ptr) const {
+    if (ptr != nullptr) free_request(ptr);
+  }
+};
+
+struct ReaderInitParamDeleter {
+  void operator()(reader_init_param_t* ptr) const {
+    if (ptr != nullptr) free(ptr);
+  }
+};
+
+struct SamplerDeleter {
+  void operator()(sampler_t* ptr) const {
+    if (ptr != nullptr && ptr->free != nullptr) {
+      ptr->free(ptr);
+    }
+  }
+};
+
+void export_reader(py::module& m) {
+  // Sampler type enumeration
+  py::enum_<sampler_type>(m, "SamplerType")
+      .value("SPATIAL_SAMPLER", sampler_type::SPATIAL_SAMPLER)
+      .value("TEMPORAL_SAMPLER", sampler_type::TEMPORAL_SAMPLER)
+      .value("SHARDS_SAMPLER", sampler_type::SHARDS_SAMPLER)
+      .value("INVALID_SAMPLER", sampler_type::INVALID_SAMPLER)
+      .export_values();
+
+  // Trace type enumeration
+  py::enum_<trace_type_e>(m, "TraceType")
+      .value("CSV_TRACE", trace_type_e::CSV_TRACE)
+      .value("BIN_TRACE", trace_type_e::BIN_TRACE)
+      .value("PLAIN_TXT_TRACE", trace_type_e::PLAIN_TXT_TRACE)
+      .value("ORACLE_GENERAL_TRACE", trace_type_e::ORACLE_GENERAL_TRACE)
+      .value("LCS_TRACE", trace_type_e::LCS_TRACE)
+      .value("VSCSI_TRACE", trace_type_e::VSCSI_TRACE)
+      .value("TWR_TRACE", trace_type_e::TWR_TRACE)
+      .value("TWRNS_TRACE", trace_type_e::TWRNS_TRACE)
+      .value("ORACLE_SIM_TWR_TRACE", trace_type_e::ORACLE_SIM_TWR_TRACE)
+      .value("ORACLE_SYS_TWR_TRACE", trace_type_e::ORACLE_SYS_TWR_TRACE)
+      .value("ORACLE_SIM_TWRNS_TRACE", trace_type_e::ORACLE_SIM_TWRNS_TRACE)
+      .value("ORACLE_SYS_TWRNS_TRACE", trace_type_e::ORACLE_SYS_TWRNS_TRACE)
+      .value("VALPIN_TRACE", trace_type_e::VALPIN_TRACE)
+      .value("UNKNOWN_TRACE", trace_type_e::UNKNOWN_TRACE)
+      .export_values();
+
+  py::enum_<read_direction>(m, "ReadDirection")
+      .value("READ_FORWARD", read_direction::READ_FORWARD)
+      .value("READ_BACKWARD", read_direction::READ_BACKWARD)
+      .export_values();
+
+  /**
+   * @brief Sampler structure
+   */
+  py::class_<sampler_t, std::unique_ptr<sampler_t, SamplerDeleter>>(m,
+                                                                    "Sampler")
+      .def(py::init([](double sample_ratio, enum sampler_type type)
+                        -> std::unique_ptr<sampler_t, SamplerDeleter> {
+             switch (type) {
+               case sampler_type::SPATIAL_SAMPLER:
+                 return std::unique_ptr<sampler_t, SamplerDeleter>(
+                     create_spatial_sampler(sample_ratio));
+               case sampler_type::TEMPORAL_SAMPLER:
+                 return std::unique_ptr<sampler_t, SamplerDeleter>(
+                     create_temporal_sampler(sample_ratio));
+               case sampler_type::SHARDS_SAMPLER:
+                 throw std::invalid_argument("SHARDS_SAMPLER is not added");
+               case sampler_type::INVALID_SAMPLER:
+               default:
+                 throw std::invalid_argument("Unknown sampler type");
+             }
+           }),
+           "sample_ratio"_a = 0.1, "type"_a = sampler_type::INVALID_SAMPLER)
+      .def_readwrite("sampling_ratio_inv", &sampler_t::sampling_ratio_inv)
+      .def_readwrite("sampling_ratio", &sampler_t::sampling_ratio)
+      .def_readwrite("sampling_salt", &sampler_t::sampling_salt)
+      .def_readwrite("sampling_type", &sampler_t::type);
+
+  // Reader initialization parameters
+  py::class_<reader_init_param_t>(m, "ReaderInitParam")
+      .def(py::init([]() { return default_reader_init_params(); }))
+      .def(py::init([](const std::string& binary_fmt_str, bool ignore_obj_size,
+                       bool ignore_size_zero_req, bool obj_id_is_num,
+                       bool obj_id_is_num_set, int64_t cap_at_n_req,
+                       int64_t block_size, bool has_header, bool has_header_set,
+                       const std::string& delimiter, ssize_t trace_start_offset,
+                       sampler_t* sampler) {
+             reader_init_param_t params = default_reader_init_params();
+             if (!binary_fmt_str.empty()) {
+               params.binary_fmt_str = strdup(binary_fmt_str.c_str());
+             }
+             params.ignore_obj_size = ignore_obj_size;
+             params.ignore_size_zero_req = ignore_size_zero_req;
+             params.obj_id_is_num = obj_id_is_num;
+             params.obj_id_is_num_set = obj_id_is_num_set;
+             params.cap_at_n_req = cap_at_n_req;
+             params.block_size = block_size;
+             params.has_header = has_header;
+             params.has_header_set = has_header_set;
+             params.delimiter = delimiter.empty() ? ',' : delimiter[0];
+             params.trace_start_offset = trace_start_offset;
+             params.sampler = sampler;
+             return params;
+           }),
+           "binary_fmt_str"_a = "", "ignore_obj_size"_a = false,
+           "ignore_size_zero_req"_a = true, "obj_id_is_num"_a = true,
+           "obj_id_is_num_set"_a = false, "cap_at_n_req"_a = -1,
+           "block_size"_a = -1, "has_header"_a = false,
+           "has_header_set"_a = false, "delimiter"_a = ",",
+           "trace_start_offset"_a = 0, "sampler"_a = nullptr)
+      .def_readwrite("ignore_obj_size", &reader_init_param_t::ignore_obj_size)
+      .def_readwrite("ignore_size_zero_req",
+                     &reader_init_param_t::ignore_size_zero_req)
+      .def_readwrite("obj_id_is_num", &reader_init_param_t::obj_id_is_num)
+      .def_readwrite("obj_id_is_num_set",
+                     &reader_init_param_t::obj_id_is_num_set)
+      .def_readwrite("cap_at_n_req", &reader_init_param_t::cap_at_n_req)
+      .def_readwrite("time_field", &reader_init_param_t::time_field)
+      .def_readwrite("obj_id_field", &reader_init_param_t::obj_id_field)
+      .def_readwrite("obj_size_field", &reader_init_param_t::obj_size_field)
+      .def_readwrite("op_field", &reader_init_param_t::op_field)
+      .def_readwrite("ttl_field", &reader_init_param_t::ttl_field)
+      .def_readwrite("cnt_field", &reader_init_param_t::cnt_field)
+      .def_readwrite("tenant_field", &reader_init_param_t::tenant_field)
+      .def_readwrite("next_access_vtime_field",
+                     &reader_init_param_t::next_access_vtime_field)
+      .def_readwrite("n_feature_fields", &reader_init_param_t::n_feature_fields)
+      // .def_readwrite("feature_fields", &reader_init_param_t::feature_fields)
+      .def_property(
+          "feature_fields",
+          [](const reader_init_param_t& self) {
+            return py::array_t<int>({self.n_feature_fields},
+                                    self.feature_fields);  // copy to python
+          },
+          [](reader_init_param_t& self, py::array_t<int> arr) {
+            if (arr.size() != self.n_feature_fields)
+              throw std::runtime_error("Expected array of size " +
+                                       std::to_string(self.n_feature_fields));
+            std::memcpy(
+                self.feature_fields, arr.data(),
+                self.n_feature_fields * sizeof(int));  // write to C++ array
+          })
+      .def_readwrite("block_size", &reader_init_param_t::block_size)
+      .def_readwrite("has_header", &reader_init_param_t::has_header)
+      .def_readwrite("has_header_set", &reader_init_param_t::has_header_set)
+      .def_readwrite("delimiter", &reader_init_param_t::delimiter)
+      .def_readwrite("trace_start_offset",
+                     &reader_init_param_t::trace_start_offset)
+      .def_readwrite("binary_fmt_str", &reader_init_param_t::binary_fmt_str)
+      .def_readwrite("sampler", &reader_init_param_t::sampler);
+
+  /**
+   * @brief Reader structure
+   */
+  py::class_<reader_t, std::unique_ptr<reader_t, ReaderDeleter>>(m, "Reader")
+      .def(py::init([](const std::string& trace_path, trace_type_e trace_type,
+                       const reader_init_param_t& init_params) {
+             trace_type_e final_trace_type = trace_type;
+             if (final_trace_type == trace_type_e::UNKNOWN_TRACE) {
+               final_trace_type = detect_trace_type(trace_path.c_str());
+             }
+             reader_t* ptr = setup_reader(trace_path.c_str(), final_trace_type,
+                                          &init_params);
+             if (ptr == nullptr) {
+               throw std::runtime_error("Failed to create reader for " +
+                                        trace_path);
+             }
+             return std::unique_ptr<reader_t, ReaderDeleter>(ptr);
+           }),
+           "trace_path"_a, "trace_type"_a = trace_type_e::UNKNOWN_TRACE,
+           "init_params"_a = default_reader_init_params())
+      .def_readonly("n_read_req", &reader_t::n_read_req)
+      .def_readonly("n_total_req", &reader_t::n_total_req)
+      .def_readonly("trace_path", &reader_t::trace_path)
+      .def_readonly("file_size", &reader_t::file_size)
+      .def_readonly("init_params", &reader_t::init_params)
+      .def_readonly("trace_type", &reader_t::trace_type)
+      .def_readonly("trace_format", &reader_t::trace_format)
+      .def_readonly("ver", &reader_t::ver)
+      .def_readonly("cloned", &reader_t::cloned)
+      .def_readonly("cap_at_n_req", &reader_t::cap_at_n_req)
+      .def_readonly("trace_start_offset", &reader_t::trace_start_offset)
+      // For binary traces
+      .def_readonly("mapped_file", &reader_t::mapped_file)
+      .def_readonly("mmap_offset", &reader_t::mmap_offset)
+      // .def_readonly("zstd_reader_p", &reader_t::zstd_reader_p)
+      .def_readonly("is_zstd_file", &reader_t::is_zstd_file)
+      .def_readonly("item_size", &reader_t::item_size)
+      // For text traces
+      .def_readonly("file", &reader_t::file)
+      .def_readonly("line_buf", &reader_t::line_buf)
+      .def_readonly("line_buf_size", &reader_t::line_buf_size)
+      .def_readonly("csv_delimiter", &reader_t::csv_delimiter)
+      .def_readonly("csv_has_header", &reader_t::csv_has_header)
+      .def_readonly("obj_id_is_num", &reader_t::obj_id_is_num)
+      .def_readonly("obj_id_is_num_set", &reader_t::obj_id_is_num_set)
+      // Other properties
+      .def_readwrite("ignore_size_zero_req", &reader_t::ignore_size_zero_req)
+      .def_readwrite("ignore_obj_size", &reader_t::ignore_obj_size)
+      .def_readwrite("block_size", &reader_t::block_size)
+      .def_readonly("n_req_left", &reader_t::n_req_left)
+      .def_readonly("last_req_clock_time", &reader_t::last_req_clock_time)
+      .def_readonly("lcs_ver", &reader_t::lcs_ver)
+      // TODO(haocheng): Fully support sampler in Python bindings
+      .def_readonly("sampler", &reader_t::sampler)
+      .def_readonly("read_direction", &reader_t::read_direction)
+      .def("get_num_of_req",
+           [](reader_t& self) { return get_num_of_req(&self); })
+      .def(
+          "read_one_req",
+          [](reader_t& self, request_t& req) {
+            int ret = read_one_req(&self, &req);
+            if (ret != 0) {
+              throw std::runtime_error("Failed to read request");
+            }
+            return req;
+          },
+          "req"_a)
+      .def("reset", [](reader_t& self) { reset_reader(&self); })
+      .def("close", [](reader_t& self) { close_reader(&self); })
+      .def("clone",
+           [](const reader_t& self) {
+             reader_t* cloned_reader = clone_reader(&self);
+             if (cloned_reader == nullptr) {
+               throw std::runtime_error("Failed to clone reader");
+             }
+             return std::unique_ptr<reader_t, ReaderDeleter>(cloned_reader);
+           })
+      .def(
+          "read_first_req",
+          [](reader_t& self, request_t& req) {
+            read_first_req(&self, &req);
+            return req;
+          },
+          "req"_a)
+      .def(
+          "read_last_req",
+          [](reader_t& self, request_t& req) {
+            read_last_req(&self, &req);
+            return req;
+          },
+          "req"_a)
+      .def(
+          "skip_n_req",
+          [](reader_t& self, int n) {
+            int ret = skip_n_req(&self, n);
+            if (ret != 0) {
+              throw std::runtime_error("Failed to skip requests");
+            }
+            return ret;
+          },
+          "n"_a)
+      .def("read_one_req_above",
+           [](reader_t& self) {
+             request_t* req = new_request();
+             int ret = read_one_req_above(&self, req);
+             if (ret != 0) {
+               free_request(req);
+               throw std::runtime_error("Failed to read one request above");
+             }
+             return std::unique_ptr<request_t, RequestDeleter>(req);
+           })
+      .def("go_back_one_req",
+           [](reader_t& self) {
+             int ret = go_back_one_req(&self);
+             if (ret != 0) {
+               throw std::runtime_error("Failed to go back one request");
+             }
+           })
+      .def(
+          "set_read_pos",
+          [](reader_t& self, double pos) { reader_set_read_pos(&self, pos); },
+          "pos"_a);
+}
+}  // namespace libcachesim
diff --git a/libCacheSim-python/src/pylibcachesim.cpp b/libCacheSim-python/src/pylibcachesim.cpp
deleted file mode 100644
index 5ca90ca21..000000000
--- a/libCacheSim-python/src/pylibcachesim.cpp
+++ /dev/null
@@ -1,1223 +0,0 @@
-#include <pybind11/functional.h>
-#include <pybind11/pybind11.h>
-#include <pybind11/stl.h>
-
-// Suppress visibility warnings for pybind11 types
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wattributes"
-
-#include <iostream>
-#include <memory>
-#include <unordered_map>
-
-#include "config.h"
-#include "libCacheSim/cache.h"
-#include "libCacheSim/cacheObj.h"
-#include "libCacheSim/const.h"
-#include "libCacheSim/enum.h"
-#include "libCacheSim/logging.h"
-#include "libCacheSim/macro.h"
-#include "libCacheSim/reader.h"
-#include "libCacheSim/request.h"
-#include "libCacheSim/sampling.h"
-#include "mystr.h"
-
-/* admission */
-#include "libCacheSim/admissionAlgo.h"
-
-/* eviction */
-#include "libCacheSim/evictionAlgo.h"
-
-/* cache simulator */
-#include "libCacheSim/profilerLRU.h"
-#include "libCacheSim/simulator.h"
-
-/* bin */
-#include "cachesim/cache_init.h"
-#include "cli_reader_utils.h"
-
-#define STRINGIFY(x) #x
-#define MACRO_STRINGIFY(x) STRINGIFY(x)
-
-namespace py = pybind11;
-
-// Helper functions
-
-// https://stackoverflow.com/questions/874134/find-out-if-string-ends-with-another-string-in-c
-static bool ends_with(std::string_view str, std::string_view suffix) {
-  return str.size() >= suffix.size() &&
-         str.compare(str.size() - suffix.size(), suffix.size(), suffix) == 0;
-}
-
-trace_type_e infer_trace_type(const std::string& trace_path) {
-  // Infer the trace type based on the file extension
-  if (trace_path.find("oracleGeneral") != std::string::npos) {
-    return trace_type_e::ORACLE_GENERAL_TRACE;
-  } else if (ends_with(trace_path, ".csv")) {
-    return trace_type_e::CSV_TRACE;
-  } else if (ends_with(trace_path, ".txt")) {
-    return trace_type_e::PLAIN_TXT_TRACE;
-  } else if (ends_with(trace_path, ".bin")) {
-    return trace_type_e::BIN_TRACE;
-  } else if (ends_with(trace_path, ".vscsi")) {
-    return trace_type_e::VSCSI_TRACE;
-  } else if (ends_with(trace_path, ".twr")) {
-    return trace_type_e::TWR_TRACE;
-  } else if (ends_with(trace_path, ".twrns")) {
-    return trace_type_e::TWRNS_TRACE;
-  } else if (ends_with(trace_path, ".lcs")) {
-    return trace_type_e::LCS_TRACE;
-  } else if (ends_with(trace_path, ".valpin")) {
-    return trace_type_e::VALPIN_TRACE;
-  } else {
-    return trace_type_e::UNKNOWN_TRACE;
-  }
-}
-
-// Python Hook Cache Implementation
-class PythonHookCache {
- private:
-  uint64_t cache_size_;
-  std::string cache_name_;
-  std::unordered_map<uint64_t, uint64_t> objects_;  // obj_id -> obj_size
-  py::object plugin_data_;
-
-  // Hook functions
-  py::function init_hook_;
-  py::function hit_hook_;
-  py::function miss_hook_;
-  py::function eviction_hook_;
-  py::function remove_hook_;
-  py::object free_hook_;  // Changed to py::object to allow py::none()
-
- public:
-  uint64_t n_req = 0;
-  uint64_t n_obj = 0;
-  uint64_t occupied_byte = 0;
-  uint64_t cache_size;
-
-  PythonHookCache(uint64_t cache_size,
-                  const std::string& cache_name = "PythonHookCache")
-      : cache_size_(cache_size),
-        cache_name_(cache_name),
-        cache_size(cache_size),
-        free_hook_(py::none()) {}
-
-  void set_hooks(py::function init_hook, py::function hit_hook,
-                 py::function miss_hook, py::function eviction_hook,
-                 py::function remove_hook, py::object free_hook = py::none()) {
-    init_hook_ = init_hook;
-    hit_hook_ = hit_hook;
-    miss_hook_ = miss_hook;
-    eviction_hook_ = eviction_hook;
-    remove_hook_ = remove_hook;
-
-    // Handle free_hook properly
-    if (!free_hook.is_none()) {
-      free_hook_ = free_hook;
-    } else {
-      free_hook_ = py::none();
-    }
-
-    // Initialize plugin data
-    plugin_data_ = init_hook_(cache_size_);
-  }
-
-  bool get(const request_t& req) {
-    n_req++;
-
-    auto it = objects_.find(req.obj_id);
-    if (it != objects_.end()) {
-      // Cache hit
-      hit_hook_(plugin_data_, req.obj_id, req.obj_size);
-      return true;
-    } else {
-      // Cache miss - call miss hook first
-      miss_hook_(plugin_data_, req.obj_id, req.obj_size);
-
-      // Check if eviction is needed
-      while (occupied_byte + req.obj_size > cache_size_ && !objects_.empty()) {
-        // Need to evict
-        uint64_t victim_id =
-            eviction_hook_(plugin_data_, req.obj_id, req.obj_size)
-                .cast<uint64_t>();
-        auto victim_it = objects_.find(victim_id);
-        if (victim_it != objects_.end()) {
-          occupied_byte -= victim_it->second;
-          objects_.erase(victim_it);
-          n_obj--;
-          remove_hook_(plugin_data_, victim_id);
-        } else {
-          // Safety check: if eviction hook returns invalid ID, break to avoid
-          // infinite loop
-          break;
-        }
-      }
-
-      // Insert new object if there's space
-      if (occupied_byte + req.obj_size <= cache_size_) {
-        objects_[req.obj_id] = req.obj_size;
-        occupied_byte += req.obj_size;
-        n_obj++;
-      }
-
-      return false;
-    }
-  }
-
-  ~PythonHookCache() {
-    if (!free_hook_.is_none()) {
-      py::function free_func = free_hook_.cast<py::function>();
-      free_func(plugin_data_);
-    }
-  }
-};
-
-// Restore visibility warnings
-#pragma GCC diagnostic pop
-
-struct CacheDeleter {
-  void operator()(cache_t* ptr) const {
-    if (ptr != nullptr) ptr->cache_free(ptr);
-  }
-};
-
-struct RequestDeleter {
-  void operator()(request_t* ptr) const {
-    if (ptr != nullptr) free_request(ptr);
-  }
-};
-
-struct ReaderDeleter {
-  void operator()(reader_t* ptr) const {
-    if (ptr != nullptr) close_trace(ptr);
-  }
-};
-
-PYBIND11_MODULE(_libcachesim, m) {  // NOLINT(readability-named-parameter)
-  m.doc() = R"pbdoc(
-        libCacheSim Python bindings
-        --------------------------
-
-        .. currentmodule:: libcachesim
-
-        .. autosummary::
-           :toctree: _generate
-
-           TODO(haocheng): add meaningful methods
-    )pbdoc";
-
-  py::enum_<trace_type_e>(m, "TraceType")
-      .value("CSV_TRACE", trace_type_e::CSV_TRACE)
-      .value("BIN_TRACE", trace_type_e::BIN_TRACE)
-      .value("PLAIN_TXT_TRACE", trace_type_e::PLAIN_TXT_TRACE)
-      .value("ORACLE_GENERAL_TRACE", trace_type_e::ORACLE_GENERAL_TRACE)
-      .value("LCS_TRACE", trace_type_e::LCS_TRACE)
-      .value("VSCSI_TRACE", trace_type_e::VSCSI_TRACE)
-      .value("TWR_TRACE", trace_type_e::TWR_TRACE)
-      .value("TWRNS_TRACE", trace_type_e::TWRNS_TRACE)
-      .value("ORACLE_SIM_TWR_TRACE", trace_type_e::ORACLE_SIM_TWR_TRACE)
-      .value("ORACLE_SYS_TWR_TRACE", trace_type_e::ORACLE_SYS_TWR_TRACE)
-      .value("ORACLE_SIM_TWRNS_TRACE", trace_type_e::ORACLE_SIM_TWRNS_TRACE)
-      .value("ORACLE_SYS_TWRNS_TRACE", trace_type_e::ORACLE_SYS_TWRNS_TRACE)
-      .value("VALPIN_TRACE", trace_type_e::VALPIN_TRACE)
-      .value("UNKNOWN_TRACE", trace_type_e::UNKNOWN_TRACE)
-      .export_values();
-
-  py::enum_<req_op_e>(m, "ReqOp")
-      .value("NOP", req_op_e::OP_NOP)
-      .value("GET", req_op_e::OP_GET)
-      .value("GETS", req_op_e::OP_GETS)
-      .value("SET", req_op_e::OP_SET)
-      .value("ADD", req_op_e::OP_ADD)
-      .value("CAS", req_op_e::OP_CAS)
-      .value("REPLACE", req_op_e::OP_REPLACE)
-      .value("APPEND", req_op_e::OP_APPEND)
-      .value("PREPEND", req_op_e::OP_PREPEND)
-      .value("DELETE", req_op_e::OP_DELETE)
-      .value("INCR", req_op_e::OP_INCR)
-      .value("DECR", req_op_e::OP_DECR)
-      .value("READ", req_op_e::OP_READ)
-      .value("WRITE", req_op_e::OP_WRITE)
-      .value("UPDATE", req_op_e::OP_UPDATE)
-      .value("INVALID", req_op_e::OP_INVALID)
-      .export_values();
-
-  // *************** structs ***************
-  /**
-   * @brief Cache structure
-   */
-  py::class_<cache_t, std::unique_ptr<cache_t, CacheDeleter>>(m, "Cache")
-      .def_readwrite("n_req", &cache_t::n_req)
-      .def_readwrite("cache_size", &cache_t::cache_size)
-      // Use proper accessor functions for private fields
-      .def_property_readonly(
-          "n_obj", [](const cache_t& self) { return self.get_n_obj(&self); })
-      .def_property_readonly(
-          "occupied_byte",
-          [](const cache_t& self) { return self.get_occupied_byte(&self); })
-      // methods
-      .def("get", [](cache_t& self, const request_t& req) {
-        return self.get(&self, &req);
-      });
-
-  /**
-   * @brief Request structure
-   */
-  py::class_<request_t, std::unique_ptr<request_t, RequestDeleter>>(m,
-                                                                    "Request")
-      .def(py::init([]() { return new_request(); }))
-      .def(py::init([](uint64_t obj_id, uint64_t obj_size, uint64_t clock_time,
-                       uint64_t hv, req_op_e op) {
-             request_t* req = new_request();
-             req->obj_id = obj_id;
-             req->obj_size = obj_size;
-             req->clock_time = clock_time;
-             req->hv = hv;
-             req->op = op;
-             return req;
-           }),
-           py::arg("obj_id"), py::arg("obj_size") = 1,
-           py::arg("clock_time") = 0, py::arg("hv") = 0,
-           py::arg("op") = req_op_e::OP_GET,
-           R"pbdoc(
-            Create a request instance.
-
-            Args:
-                obj_id (int): The object ID.
-                obj_size (int): The object size. (default: 1)
-                clock_time (int): The clock time. (default: 0)
-                hv (int): The hash value. (default: 0)
-                op (req_op_e): The operation. (default: OP_GET)
-
-            Returns:
-                Request: A new request instance.
-        )pbdoc")
-      .def_readwrite("clock_time", &request_t::clock_time)
-      .def_readwrite("hv", &request_t::hv)
-      .def_readwrite("obj_id", &request_t::obj_id)
-      .def_readwrite("obj_size", &request_t::obj_size)
-      .def_readwrite("op", &request_t::op);
-
-  /**
-   * @brief Reader structure
-   */
-  py::class_<reader_t, std::unique_ptr<reader_t, ReaderDeleter>>(m, "Reader")
-      .def_readwrite("n_read_req", &reader_t::n_read_req)
-      .def_readwrite("n_total_req", &reader_t::n_total_req)
-      .def_readwrite("trace_path", &reader_t::trace_path)
-      .def_readwrite("file_size", &reader_t::file_size)
-      .def_readwrite("ignore_obj_size", &reader_t::ignore_obj_size)
-      // methods
-      .def(
-          "get_wss",
-          [](reader_t& self) {
-            int64_t wss_obj = 0, wss_byte = 0;
-            cal_working_set_size(&self, &wss_obj, &wss_byte);
-            return self.ignore_obj_size ? wss_obj : wss_byte;
-          },
-          R"pbdoc(
-            Get the working set size of the trace.
-
-            Args:
-                ignore_obj_size (bool): Whether to ignore the object size.
-
-            Returns:
-                int: The working set size of the trace.
-      )pbdoc")
-      .def(
-          "seek",
-          [](reader_t& self, int64_t offset, bool from_beginning = false) {
-            int64_t offset_from_beginning = offset;
-            if (!from_beginning) {
-              offset_from_beginning += self.n_read_req;
-            }
-            reset_reader(&self);
-            skip_n_req(&self, offset_from_beginning);
-          },
-          py::arg("offset"), py::arg("from_beginning") = false,
-          R"pbdoc(
-            Seek to a specific offset in the trace file.
-            We only support seeking from current position or from the beginning.
-
-            Can only move forward, not backward.
-
-            Args:
-                offset (int): The offset to seek to the beginning.
-
-            Raises:
-                RuntimeError: If seeking fails.
-      )pbdoc")
-      .def("__iter__", [](reader_t& self) -> reader_t& { return self; })
-      .def("__next__", [](reader_t& self) {
-        auto req = std::unique_ptr<request_t, RequestDeleter>(new_request());
-        int ret = read_one_req(&self, req.get());
-        if (ret != 0) {
-          throw py::stop_iteration();
-        }
-        return req;
-      });
-
-  // Helper function to apply parameters from dictionary to reader_init_param_t
-  auto apply_params_from_dict = [](reader_init_param_t& params,
-                                   py::dict dict_params) {
-    // Template field setter with type safety
-    auto set_if_present = [&](const char* key, auto& field) {
-      if (dict_params.contains(key)) {
-        field =
-            dict_params[key].cast<std::remove_reference_t<decltype(field)>>();
-      }
-    };
-
-    // Apply all standard fields
-    set_if_present("time_field", params.time_field);
-    set_if_present("obj_id_field", params.obj_id_field);
-    set_if_present("obj_size_field", params.obj_size_field);
-    set_if_present("has_header", params.has_header);
-    set_if_present("ignore_obj_size", params.ignore_obj_size);
-    set_if_present("ignore_size_zero_req", params.ignore_size_zero_req);
-    set_if_present("obj_id_is_num", params.obj_id_is_num);
-    set_if_present("obj_id_is_num_set", params.obj_id_is_num_set);
-    set_if_present("has_header_set", params.has_header_set);
-    set_if_present("cap_at_n_req", params.cap_at_n_req);
-    set_if_present("op_field", params.op_field);
-    set_if_present("ttl_field", params.ttl_field);
-    set_if_present("cnt_field", params.cnt_field);
-    set_if_present("tenant_field", params.tenant_field);
-    set_if_present("next_access_vtime_field", params.next_access_vtime_field);
-    set_if_present("block_size", params.block_size);
-    set_if_present("trace_start_offset", params.trace_start_offset);
-
-    // Special fields with custom handling
-    if (dict_params.contains("delimiter")) {
-      std::string delim = dict_params["delimiter"].cast<std::string>();
-      params.delimiter = delim.empty() ? ',' : delim[0];
-    }
-
-    if (dict_params.contains("binary_fmt_str")) {
-      // Free existing memory first to prevent leaks
-      if (params.binary_fmt_str) {
-        free(params.binary_fmt_str);
-        params.binary_fmt_str = nullptr;
-      }
-      std::string fmt = dict_params["binary_fmt_str"].cast<std::string>();
-      if (!fmt.empty()) {
-        // Note: Using strdup for C-compatible memory allocation
-        // Memory is managed by reader_init_param_t destructor/cleanup
-        params.binary_fmt_str = strdup(fmt.c_str());
-        if (!params.binary_fmt_str) {
-          throw std::runtime_error(
-              "Failed to allocate memory for binary_fmt_str");
-        }
-      }
-    }
-
-    if (dict_params.contains("feature_fields")) {
-      auto ff = dict_params["feature_fields"].cast<std::vector<int32_t>>();
-      if (ff.size() > N_MAX_FEATURES) {
-        throw py::value_error("Too many feature fields (max " +
-                              std::to_string(N_MAX_FEATURES) + ")");
-      }
-      params.n_feature_fields = static_cast<int32_t>(ff.size());
-      // Use copy_n for explicit bounds checking
-      std::copy_n(ff.begin(), params.n_feature_fields, params.feature_fields);
-    }
-  };
-
-  py::class_<reader_init_param_t>(m, "ReaderInitParam")
-      .def(py::init([]() {
-             reader_init_param_t params;
-             set_default_reader_init_params(&params);
-             return params;
-           }),
-           "Create with default parameters")
-
-      .def(py::init([apply_params_from_dict](py::kwargs kwargs) {
-             reader_init_param_t params;
-             set_default_reader_init_params(&params);
-
-             // Convert kwargs to dict and apply using shared helper
-             py::dict dict_params = py::dict(kwargs);
-             apply_params_from_dict(params, dict_params);
-
-             return params;
-           }),
-           "Create with keyword arguments")
-
-      .def(py::init([apply_params_from_dict](py::dict dict_params) {
-             reader_init_param_t params;
-             set_default_reader_init_params(&params);
-
-             // Apply using shared helper function
-             apply_params_from_dict(params, dict_params);
-
-             return params;
-           }),
-           py::arg("params"), "Create from dictionary (backward compatibility)")
-      .def("__repr__", [](const reader_init_param_t& params) {
-        std::stringstream ss;
-        ss << "ReaderInitParam(\n";
-
-        // Group 1: Core fields
-        ss << "  # Core fields\n";
-        ss << "  time_field=" << params.time_field << ", ";
-        ss << "obj_id_field=" << params.obj_id_field << ", ";
-        ss << "obj_size_field=" << params.obj_size_field << ",\n";
-
-        // Group 2: Flags and options
-        ss << "  # Flags and options\n";
-        ss << "  has_header=" << params.has_header << ", ";
-        ss << "ignore_obj_size=" << params.ignore_obj_size << ", ";
-        ss << "ignore_size_zero_req=" << params.ignore_size_zero_req << ", ";
-        ss << "obj_id_is_num=" << params.obj_id_is_num << ",\n";
-
-        // Group 3: Internal state flags
-        ss << "  # Internal state\n";
-        ss << "  obj_id_is_num_set=" << params.obj_id_is_num_set << ", ";
-        ss << "has_header_set=" << params.has_header_set << ",\n";
-
-        // Group 4: Optional fields
-        ss << "  # Optional fields\n";
-        ss << "  cap_at_n_req=" << params.cap_at_n_req << ", ";
-        ss << "op_field=" << params.op_field << ", ";
-        ss << "ttl_field=" << params.ttl_field << ", ";
-        ss << "cnt_field=" << params.cnt_field << ",\n";
-        ss << "  tenant_field=" << params.tenant_field << ", ";
-        ss << "next_access_vtime_field=" << params.next_access_vtime_field
-           << ",\n";
-
-        // Group 5: Miscellaneous
-        ss << "  # Miscellaneous\n";
-        ss << "  block_size=" << params.block_size << ", ";
-        ss << "trace_start_offset=" << params.trace_start_offset;
-        ss << "\n)";
-        return ss.str();
-      });
-
-  // *************** functions ***************
-  /**
-   * @brief Open a trace file for reading
-   */
-  m.def(
-      "open_trace",
-      [apply_params_from_dict](const std::string& trace_path, py::object type,
-                               py::object params) {
-        trace_type_e c_type = UNKNOWN_TRACE;
-        if (!type.is_none()) {
-          c_type = type.cast<trace_type_e>();
-        } else {
-          // If type is None, we can try to infer the type from the file
-          // extension
-          c_type = infer_trace_type(trace_path);
-          if (c_type == UNKNOWN_TRACE) {
-            throw std::runtime_error("Could not infer trace type from path: " +
-                                     trace_path);
-          }
-        }
-
-        // Handle different parameter types
-        reader_init_param_t init_param;
-        set_default_reader_init_params(&init_param);
-
-        if (py::isinstance<py::dict>(params)) {
-          // Dictionary parameters - use shared helper function
-          py::dict dict_params = params.cast<py::dict>();
-          apply_params_from_dict(init_param, dict_params);
-        } else if (!params.is_none()) {
-          // reader_init_param_t object - direct cast (pybind11 handles
-          // conversion)
-          init_param = params.cast<reader_init_param_t>();
-        }
-        reader_t* ptr = open_trace(trace_path.c_str(), c_type, &init_param);
-        return std::unique_ptr<reader_t, ReaderDeleter>(ptr);
-      },
-      py::arg("trace_path"), py::arg("type") = py::none(),
-      py::arg("params") = py::none(),
-      R"pbdoc(
-            Open a trace file for reading.
-
-            Args:
-                trace_path (str): Path to the trace file.
-                type (Union[trace_type_e, None]): Type of the trace (e.g., CSV_TRACE). If None, the type will be inferred.
-                params (Union[dict, reader_init_param_t, None]): Initialization parameters for the reader.
-
-            Returns:
-                Reader: A new reader instance for the trace.
-        )pbdoc");
-
-  /**
-   * @brief Generic function to create a cache instance.
-   */
-  m.def(
-      "create_cache",
-      [](const std::string& eviction_algo, const uint64_t cache_size,
-         const std::string& eviction_params,
-         bool consider_obj_metadata) { return nullptr; },
-      py::arg("eviction_algo"), py::arg("cache_size"),
-      py::arg("eviction_params"), py::arg("consider_obj_metadata"),
-      R"pbdoc(
-            Create a cache instance.
-
-            Args:
-                eviction_algo (str): Eviction algorithm to use (e.g., "LRU", "FIFO", "Random").
-                cache_size (int): Size of the cache in bytes.
-                eviction_params (str): Additional parameters for the eviction algorithm.
-                consider_obj_metadata (bool): Whether to consider object metadata in eviction decisions.
-
-            Returns:
-                Cache: A new cache instance.
-        )pbdoc");
-
-  /* TODO(haocheng): should we support all parameters in the
-   * common_cache_params_t? (hash_power, etc.) */
-
-  // Currently supported eviction algorithms with direct initialization:
-  //   - "ARC"
-  //   - "Clock"
-  //   - "FIFO"
-  //   - "LRB"
-  //   - "LRU"
-  //   - "S3FIFO"
-  //   - "Sieve"
-  //   - "ThreeLCache"
-  //   - "TinyLFU"
-  //   - "TwoQ"
-
-  /**
-   * @brief Create a ARC cache instance.
-   */
-  m.def(
-      "ARC_init",
-      [](uint64_t cache_size) {
-        common_cache_params_t cc_params = {.cache_size = cache_size};
-        cache_t* ptr = ARC_init(cc_params, nullptr);
-        return std::unique_ptr<cache_t, CacheDeleter>(ptr);
-      },
-      py::arg("cache_size"),
-      R"pbdoc(
-            Create a ARC cache instance.
-
-            Args:
-                cache_size (int): Size of the cache in bytes.
-      )pbdoc");
-
-  /**
-   * @brief Create a Clock cache instance.
-   */
-  m.def(
-      "Clock_init",
-      [](uint64_t cache_size, long int n_bit_counter, long int init_freq) {
-        common_cache_params_t cc_params = {.cache_size = cache_size};
-        // assemble the cache specific parameters
-        std::string cache_specific_params =
-            "n-bit-counter=" + std::to_string(n_bit_counter) + "," +
-            "init-freq=" + std::to_string(init_freq);
-
-        cache_t* ptr = Clock_init(cc_params, cache_specific_params.c_str());
-        return std::unique_ptr<cache_t, CacheDeleter>(ptr);
-      },
-      py::arg("cache_size"), py::arg("n_bit_counter") = 1,
-      py::arg("init_freq") = 0,
-      R"pbdoc(
-            Create a Clock cache instance.
-
-            Args:
-                cache_size (int): Size of the cache in bytes.
-                n_bit_counter (int): Number of bits for counter (default: 1).
-                init_freq (int): Initial frequency value (default: 0).
-
-            Returns:
-                Cache: A new Clock cache instance.
-      )pbdoc");
-
-  /**
-   * @brief Create a FIFO cache instance.
-   */
-  m.def(
-      "FIFO_init",
-      [](uint64_t cache_size) {
-        // Construct common cache parameters
-        common_cache_params_t cc_params = {.cache_size = cache_size};
-        // FIFO no specific parameters, so we pass nullptr
-        cache_t* ptr = FIFO_init(cc_params, nullptr);
-        return std::unique_ptr<cache_t, CacheDeleter>(ptr);
-      },
-      py::arg("cache_size"),
-      R"pbdoc(
-            Create a FIFO cache instance.
-
-            Args:
-                cache_size (int): Size of the cache in bytes.
-
-            Returns:
-                Cache: A new FIFO cache instance.
-      )pbdoc");
-
-#ifdef ENABLE_LRB
-  /**
-   * @brief Create a LRB cache instance.
-   */
-  m.def(
-      "LRB_init",
-      [](uint64_t cache_size, std::string objective) {
-        common_cache_params_t cc_params = {.cache_size = cache_size};
-        cache_t* ptr = LRB_init(cc_params, ("objective=" + objective).c_str());
-        return std::unique_ptr<cache_t, CacheDeleter>(ptr);
-      },
-      py::arg("cache_size"), py::arg("objective") = "byte-miss-ratio",
-      R"pbdoc(
-            Create a LRB cache instance.
-
-            Args:
-                cache_size (int): Size of the cache in bytes.
-                objective (str): Objective function to optimize (default: "byte-miss-ratio").
-
-            Returns:
-                Cache: A new LRB cache instance.
-      )pbdoc");
-#else
-  // TODO(haocheng): add a dummy function to avoid the error when LRB is not
-  // enabled
-  m.def(
-      "LRB_init",
-      [](uint64_t cache_size, std::string objective) {
-        throw std::runtime_error("LRB is not enabled");
-      },
-      py::arg("cache_size"), py::arg("objective") = "byte-miss-ratio");
-#endif
-
-  /**
-   * @brief Create a LRU cache instance.
-   */
-  m.def(
-      "LRU_init",
-      [](uint64_t cache_size) {
-        common_cache_params_t cc_params = {.cache_size = cache_size};
-        cache_t* ptr = LRU_init(cc_params, nullptr);
-        return std::unique_ptr<cache_t, CacheDeleter>(ptr);
-      },
-      py::arg("cache_size"),
-      R"pbdoc(
-            Create a LRU cache instance.
-
-            Args:
-                cache_size (int): Size of the cache in bytes.
-
-            Returns:
-                Cache: A new LRU cache instance.
-      )pbdoc");
-
-  /**
-   * @brief Create a S3FIFO cache instance.
-   */
-  m.def(
-      "S3FIFO_init",
-      [](uint64_t cache_size, double fifo_size_ratio, double ghost_size_ratio,
-         int move_to_main_threshold) {
-        common_cache_params_t cc_params = {.cache_size = cache_size};
-        cache_t* ptr = S3FIFO_init(
-            cc_params,
-            ("fifo-size-ratio=" + std::to_string(fifo_size_ratio) + "," +
-             "ghost-size-ratio=" + std::to_string(ghost_size_ratio) + "," +
-             "move-to-main-threshold=" + std::to_string(move_to_main_threshold))
-                .c_str());
-        return std::unique_ptr<cache_t, CacheDeleter>(ptr);
-      },
-      py::arg("cache_size"), py::arg("fifo_size_ratio") = 0.10,
-      py::arg("ghost_size_ratio") = 0.90, py::arg("move_to_main_threshold") = 2,
-      R"pbdoc(
-            Create a S3FIFO cache instance.
-
-            Args:
-                cache_size (int): Size of the cache in bytes.
-                fifo_size_ratio (float): Ratio of FIFO size to cache size (default: 0.10).
-                ghost_size_ratio (float): Ratio of ghost size to cache size (default: 0.90).
-                move_to_main_threshold (int): Threshold for moving to main queue (default: 2).
-
-            Returns:
-                Cache: A new S3FIFO cache instance.
-      )pbdoc");
-
-  /**
-   * @brief Create a Sieve cache instance.
-   */
-  m.def(
-      "Sieve_init",
-      [](uint64_t cache_size) {
-        common_cache_params_t cc_params = {.cache_size = cache_size};
-        cache_t* ptr = Sieve_init(cc_params, nullptr);
-        return std::unique_ptr<cache_t, CacheDeleter>(ptr);
-      },
-      py::arg("cache_size"),
-      R"pbdoc(
-            Create a Sieve cache instance.
-
-            Args:
-                cache_size (int): Size of the cache in bytes.
-
-            Returns:
-                Cache: A new Sieve cache instance.
-      )pbdoc");
-
-#ifdef ENABLE_3L_CACHE
-  /**
-   * @brief Create a ThreeL cache instance.
-   */
-  m.def(
-      "ThreeLCache_init",
-      [](uint64_t cache_size, std::string objective) {
-        common_cache_params_t cc_params = {.cache_size = cache_size};
-        cache_t* ptr =
-            ThreeLCache_init(cc_params, ("objective=" + objective).c_str());
-        return std::unique_ptr<cache_t, CacheDeleter>(ptr);
-      },
-      py::arg("cache_size"), py::arg("objective") = "byte-miss-ratio",
-      R"pbdoc(
-            Create a ThreeL cache instance.
-
-            Args:
-                cache_size (int): Size of the cache in bytes.
-                objective (str): Objective function to optimize (default: "byte-miss-ratio").
-
-            Returns:
-                Cache: A new ThreeL cache instance.
-      )pbdoc");
-#else
-  // TODO(haocheng): add a dummy function to avoid the error when ThreeLCache is
-  // not enabled
-  m.def(
-      "ThreeLCache_init",
-      [](uint64_t cache_size, std::string objective) {
-        throw std::runtime_error("ThreeLCache is not enabled");
-      },
-      py::arg("cache_size"), py::arg("objective") = "byte-miss-ratio");
-#endif
-
-  /**
-   * @brief Create a TinyLFU cache instance.
-   */
-  // mark evivtion parsing need change
-  m.def(
-      "TinyLFU_init",
-      [](uint64_t cache_size, std::string main_cache, double window_size) {
-        common_cache_params_t cc_params = {.cache_size = cache_size};
-        cache_t* ptr = WTinyLFU_init(
-            cc_params, ("main-cache=" + main_cache + "," +
-                        "window-size=" + std::to_string(window_size))
-                           .c_str());
-        return std::unique_ptr<cache_t, CacheDeleter>(ptr);
-      },
-      py::arg("cache_size"), py::arg("main_cache") = "SLRU",
-      py::arg("window_size") = 0.01,
-      R"pbdoc(
-            Create a TinyLFU cache instance.
-
-            Args:
-                cache_size (int): Size of the cache in bytes.
-                main_cache (str): Main cache to use (default: "SLRU").
-                window_size (float): Window size for TinyLFU (default: 0.01).
-
-            Returns:
-                Cache: A new TinyLFU cache instance.
-      )pbdoc");
-
-  /**
-   * @brief Create a TwoQ cache instance.
-   */
-  m.def(
-      "TwoQ_init",
-      [](uint64_t cache_size, double Ain_size_ratio, double Aout_size_ratio) {
-        common_cache_params_t cc_params = {.cache_size = cache_size};
-        cache_t* ptr = TwoQ_init(
-            cc_params,
-            ("Ain-size-ratio=" + std::to_string(Ain_size_ratio) + "," +
-             "Aout-size-ratio=" + std::to_string(Aout_size_ratio))
-                .c_str());
-        return std::unique_ptr<cache_t, CacheDeleter>(ptr);
-      },
-      py::arg("cache_size"), py::arg("Ain_size_ratio") = 0.25,
-      py::arg("Aout_size_ratio") = 0.5,
-      R"pbdoc(
-            Create a TwoQ cache instance.
-
-            Args:
-                cache_size (int): Size of the cache in bytes.
-                Ain_size_ratio (float): Ratio of A-in size to cache size (default: 0.25).
-                Aout_size_ratio (float): Ratio of A-out size to cache size (default: 0.5).
-
-            Returns:
-                Cache: A new TwoQ cache instance.
-      )pbdoc");
-
-  /**
-   * @brief Create a LFU cache instance.
-   */
-  m.def(
-      "LFU_init",
-      [](uint64_t cache_size) {
-        common_cache_params_t cc_params = {.cache_size = cache_size};
-        cache_t* ptr = LFU_init(cc_params, nullptr);
-        return std::unique_ptr<cache_t, CacheDeleter>(ptr);
-      },
-      py::arg("cache_size"),
-      R"pbdoc(
-            Create a LFU cache instance.
-
-            Args:
-                cache_size (int): Size of the cache in bytes.
-
-            Returns:
-                Cache: A new LFU cache instance.
-      )pbdoc");
-
-  /**
-   * @brief Create a LFUDA cache instance.
-   */
-  m.def(
-      "LFUDA_init",
-      [](uint64_t cache_size) {
-        common_cache_params_t cc_params = {.cache_size = cache_size};
-        cache_t* ptr = LFUDA_init(cc_params, nullptr);
-        return std::unique_ptr<cache_t, CacheDeleter>(ptr);
-      },
-      py::arg("cache_size"),
-      R"pbdoc(
-            Create a LFUDA cache instance.
-
-            Args:
-                cache_size (int): Size of the cache in bytes.
-
-            Returns:
-                Cache: A new LFUDA cache instance.
-      )pbdoc");
-
-  /**
-   * @brief Create a SLRU cache instance.
-   */
-  m.def(
-      "SLRU_init",
-      [](uint64_t cache_size) {
-        common_cache_params_t cc_params = {.cache_size = cache_size};
-        cache_t* ptr = SLRU_init(cc_params, nullptr);
-        return std::unique_ptr<cache_t, CacheDeleter>(ptr);
-      },
-      py::arg("cache_size"),
-      R"pbdoc(
-            Create a SLRU cache instance.
-
-            Args:
-                cache_size (int): Size of the cache in bytes.
-
-            Returns:
-                Cache: A new SLRU cache instance.
-      )pbdoc");
-
-  /**
-   * @brief Create a Belady cache instance.
-   */
-  m.def(
-      "Belady_init",
-      [](uint64_t cache_size) {
-        common_cache_params_t cc_params = {.cache_size = cache_size};
-        cache_t* ptr = Belady_init(cc_params, nullptr);
-        return std::unique_ptr<cache_t, CacheDeleter>(ptr);
-      },
-      py::arg("cache_size"),
-      R"pbdoc(
-            Create a Belady cache instance.
-
-            Args:
-                cache_size (int): Size of the cache in bytes.
-
-            Returns:
-                Cache: A new Belady cache instance.
-      )pbdoc");
-
-  /**
-   * @brief Create a BeladySize cache instance.
-   */
-  m.def(
-      "BeladySize_init",
-      [](uint64_t cache_size) {
-        common_cache_params_t cc_params = {.cache_size = cache_size};
-        cache_t* ptr = BeladySize_init(cc_params, nullptr);
-        return std::unique_ptr<cache_t, CacheDeleter>(ptr);
-      },
-      py::arg("cache_size"),
-      R"pbdoc(
-            Create a BeladySize cache instance.
-
-            Args:
-                cache_size (int): Size of the cache in bytes.
-
-            Returns:
-                Cache: A new BeladySize cache instance.
-      )pbdoc");
-
-  /**
-   * @brief Create a QDLP cache instance.
-   */
-  m.def(
-      "QDLP_init",
-      [](uint64_t cache_size) {
-        common_cache_params_t cc_params = {.cache_size = cache_size};
-        cache_t* ptr = QDLP_init(cc_params, nullptr);
-        return std::unique_ptr<cache_t, CacheDeleter>(ptr);
-      },
-      py::arg("cache_size"),
-      R"pbdoc(
-            Create a QDLP cache instance.
-
-            Args:
-                cache_size (int): Size of the cache in bytes.
-
-            Returns:
-                Cache: A new QDLP cache instance.
-      )pbdoc");
-
-  /**
-   * @brief Create a LeCaR cache instance.
-   */
-  m.def(
-      "LeCaR_init",
-      [](uint64_t cache_size) {
-        common_cache_params_t cc_params = {.cache_size = cache_size};
-        cache_t* ptr = LeCaR_init(cc_params, nullptr);
-        return std::unique_ptr<cache_t, CacheDeleter>(ptr);
-      },
-      py::arg("cache_size"),
-      R"pbdoc(
-            Create a LeCaR cache instance.
-
-            Args:
-                cache_size (int): Size of the cache in bytes.
-
-            Returns:
-                Cache: A new LeCaR cache instance.
-      )pbdoc");
-
-  /**
-   * @brief Create a Cacheus cache instance.
-   */
-  m.def(
-      "Cacheus_init",
-      [](uint64_t cache_size) {
-        common_cache_params_t cc_params = {.cache_size = cache_size};
-        cache_t* ptr = Cacheus_init(cc_params, nullptr);
-        return std::unique_ptr<cache_t, CacheDeleter>(ptr);
-      },
-      py::arg("cache_size"),
-      R"pbdoc(
-            Create a Cacheus cache instance.
-
-            Args:
-                cache_size (int): Size of the cache in bytes.
-
-            Returns:
-                Cache: A new Cacheus cache instance.
-      )pbdoc");
-
-  /**
-   * @brief Create a WTinyLFU cache instance.
-   */
-  m.def(
-      "WTinyLFU_init",
-      [](uint64_t cache_size, std::string main_cache, double window_size) {
-        common_cache_params_t cc_params = {.cache_size = cache_size};
-        cache_t* ptr = WTinyLFU_init(
-            cc_params, ("main-cache=" + main_cache + "," +
-                        "window-size=" + std::to_string(window_size))
-                           .c_str());
-        return std::unique_ptr<cache_t, CacheDeleter>(ptr);
-      },
-      py::arg("cache_size"), py::arg("main_cache") = "SLRU",
-      py::arg("window_size") = 0.01,
-      R"pbdoc(
-            Create a WTinyLFU cache instance.
-
-            Args:
-                cache_size (int): Size of the cache in bytes.
-                main_cache (str): Main cache to use (default: "SLRU").
-                window_size (float): Window size for TinyLFU (default: 0.01).
-
-            Returns:
-                Cache: A new WTinyLFU cache instance.
-      )pbdoc");
-
-  /**
-   * @brief Create a Python hook-based cache instance.
-   */
-  py::class_<PythonHookCache>(m, "PythonHookCache")
-      .def(py::init<uint64_t, const std::string&>(), py::arg("cache_size"),
-           py::arg("cache_name") = "PythonHookCache")
-      .def("set_hooks", &PythonHookCache::set_hooks, py::arg("init_hook"),
-           py::arg("hit_hook"), py::arg("miss_hook"), py::arg("eviction_hook"),
-           py::arg("remove_hook"), py::arg("free_hook") = py::none(),
-           R"pbdoc(
-            Set the hook functions for the cache.
-
-            Args:
-                init_hook (callable): Function called during cache initialization.
-                    Signature: init_hook(cache_size: int) -> Any
-                hit_hook (callable): Function called on cache hit.
-                    Signature: hit_hook(plugin_data: Any, obj_id: int, obj_size: int) -> None
-                miss_hook (callable): Function called on cache miss.
-                    Signature: miss_hook(plugin_data: Any, obj_id: int, obj_size: int) -> None
-                eviction_hook (callable): Function called to select eviction candidate.
-                    Signature: eviction_hook(plugin_data: Any, obj_id: int, obj_size: int) -> int
-                remove_hook (callable): Function called when object is removed.
-                    Signature: remove_hook(plugin_data: Any, obj_id: int) -> None
-                free_hook (callable, optional): Function called during cache cleanup.
-                    Signature: free_hook(plugin_data: Any) -> None
-      )pbdoc")
-      .def("get", &PythonHookCache::get, py::arg("req"),
-           R"pbdoc(
-            Process a cache request.
-
-            Args:
-                req (Request): The cache request to process.
-
-            Returns:
-                bool: True if cache hit, False if cache miss.
-      )pbdoc")
-      .def_readwrite("n_req", &PythonHookCache::n_req)
-      .def_readwrite("n_obj", &PythonHookCache::n_obj)
-      .def_readwrite("occupied_byte", &PythonHookCache::occupied_byte)
-      .def_readwrite("cache_size", &PythonHookCache::cache_size);
-
-  /**
-   * @brief Process a trace with a cache and return miss ratio.
-   */
-  m.def(
-      "process_trace",
-      [](cache_t& cache, reader_t& reader, int64_t start_req = 0,
-         int64_t max_req = -1) {
-        reset_reader(&reader);
-        if (start_req > 0) {
-          skip_n_req(&reader, start_req);
-        }
-
-        request_t* req = new_request();
-        int64_t n_req = 0, n_hit = 0;
-        int64_t bytes_req = 0, bytes_hit = 0;
-        bool hit;
-
-        read_one_req(&reader, req);
-        while (req->valid) {
-          n_req += 1;
-          bytes_req += req->obj_size;
-          hit = cache.get(&cache, req);
-          if (hit) {
-            n_hit += 1;
-            bytes_hit += req->obj_size;
-          }
-          read_one_req(&reader, req);
-          if (max_req > 0 && n_req >= max_req) {
-            break;  // Stop if we reached the max request limit
-          }
-        }
-
-        free_request(req);
-        // return the miss ratio
-        double obj_miss_ratio = n_req > 0 ? 1.0 - (double)n_hit / n_req : 0.0;
-        double byte_miss_ratio =
-            bytes_req > 0 ? 1.0 - (double)bytes_hit / bytes_req : 0.0;
-        return std::make_tuple(obj_miss_ratio, byte_miss_ratio);
-      },
-      py::arg("cache"), py::arg("reader"), py::arg("start_req") = 0,
-      py::arg("max_req") = -1,
-      R"pbdoc(
-            Process a trace with a cache and return miss ratio.
-
-            This function processes trace data entirely on the C++ side to avoid
-            data movement overhead between Python and C++.
-
-            Args:
-                cache (Cache): The cache instance to use for processing.
-                reader (Reader): The trace reader instance.
-                start_req (int): The starting request number to process from (default: 0, from the beginning).
-                max_req (int): Maximum number of requests to process (-1 for no limit).
-
-            Returns:
-                float: Object miss ratio (0.0 to 1.0).
-                float: Byte miss ratio (0.0 to 1.0).
-
-            Example:
-                >>> cache = libcachesim.LRU(1024*1024)
-                >>> reader = libcachesim.open_trace("trace.csv", libcachesim.TraceType.CSV_TRACE)
-                >>> obj_miss_ratio, byte_miss_ratio = libcachesim.process_trace(cache, reader)
-                >>> print(f"Obj miss ratio: {obj_miss_ratio:.4f}, Byte miss ratio: {byte_miss_ratio:.4f}")
-      )pbdoc");
-
-  /**
-   * @brief Process a trace with a Python hook cache and return miss ratio.
-   */
-  m.def(
-      "process_trace_python_hook",
-      [](PythonHookCache& cache, reader_t& reader, int64_t start_req = 0,
-         int64_t max_req = -1) {
-        reset_reader(&reader);
-        if (start_req > 0) {
-          skip_n_req(&reader, start_req);
-        }
-
-        request_t* req = new_request();
-        int64_t n_req = 0, n_hit = 0;
-        int64_t bytes_req = 0, bytes_hit = 0;
-        bool hit;
-
-        read_one_req(&reader, req);
-        while (req->valid) {
-          n_req += 1;
-          bytes_req += req->obj_size;
-          hit = cache.get(*req);
-          if (hit) {
-            n_hit += 1;
-            bytes_hit += req->obj_size;
-          }
-          read_one_req(&reader, req);
-          if (max_req > 0 && n_req >= max_req) {
-            break;  // Stop if we reached the max request limit
-          }
-        }
-
-        free_request(req);
-        // return the miss ratio
-        double obj_miss_ratio = n_req > 0 ? 1.0 - (double)n_hit / n_req : 0.0;
-        double byte_miss_ratio =
-            bytes_req > 0 ? 1.0 - (double)bytes_hit / bytes_req : 0.0;
-        return std::make_tuple(obj_miss_ratio, byte_miss_ratio);
-      },
-      py::arg("cache"), py::arg("reader"), py::arg("start_req") = 0,
-      py::arg("max_req") = -1,
-      R"pbdoc(
-            Process a trace with a Python hook cache and return miss ratio.
-
-            This function processes trace data entirely on the C++ side to avoid
-            data movement overhead between Python and C++. Specifically designed
-            for PythonHookCache instances.
-
-            Args:
-                cache (PythonHookCache): The Python hook cache instance to use.
-                reader (Reader): The trace reader instance.
-                start_req (int): The starting request number to process from (0 for beginning).
-                max_req (int): Maximum number of requests to process (-1 for no limit).
-
-            Returns:
-                float: Object miss ratio (0.0 to 1.0).
-                float: Byte miss ratio (0.0 to 1.0).
-
-            Example:
-                >>> cache = libcachesim.PythonHookCachePolicy(1024*1024)
-                >>> cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook)
-                >>> reader = libcachesim.open_trace("trace.csv", libcachesim.TraceType.CSV_TRACE)
-                >>> obj_miss_ratio, byte_miss_ratio = libcachesim.process_trace_python_hook(cache.cache, reader)
-                >>> print(f"Obj miss ratio: {obj_miss_ratio:.4f}, Byte miss ratio: {byte_miss_ratio:.4f}")
-      )pbdoc");
-
-#ifdef VERSION_INFO
-  m.attr("__version__") = MACRO_STRINGIFY(VERSION_INFO);
-#else
-  m.attr("__version__") = "dev";
-#endif
-}
diff --git a/libCacheSim-python/tests/conftest.py b/libCacheSim-python/tests/conftest.py
index a3e2705e1..42edf9190 100644
--- a/libCacheSim-python/tests/conftest.py
+++ b/libCacheSim-python/tests/conftest.py
@@ -4,29 +4,3 @@
 import gc
 
 import pytest
-
-from libcachesim import Reader, TraceType, open_trace
-
-
-@pytest.fixture
-def mock_reader():
-    data_file = os.path.join(  # noqa: PTH118
-        os.path.dirname(os.path.dirname(os.path.dirname(__file__))),  # noqa: PTH120
-        "data",
-        "cloudPhysicsIO.oracleGeneral.bin",
-    )
-    reader: Reader = open_trace(
-        data_file,
-        type=TraceType.ORACLE_GENERAL_TRACE,
-    )
-    try:
-        yield reader
-    finally:
-        # More careful cleanup
-        try:
-            if hasattr(reader, "close"):
-                reader.close()
-        except Exception:  # Be specific about exception type
-            pass
-        # Don't explicitly del reader here, let Python handle it
-        gc.collect()
diff --git a/libCacheSim-python/tests/test_eviction.py b/libCacheSim-python/tests/test_eviction.py
deleted file mode 100644
index a51aae860..000000000
--- a/libCacheSim-python/tests/test_eviction.py
+++ /dev/null
@@ -1,62 +0,0 @@
-import pytest
-
-from libcachesim import (
-    ARC,
-    FIFO,
-    LRU,
-    S3FIFO,
-    Clock,
-    Sieve,
-    TinyLFU,
-    TwoQ,
-)
-from tests.utils import get_reference_data
-
-
-@pytest.mark.parametrize(
-    "eviction_algo",
-    [
-        FIFO,
-        ARC,
-        Clock,
-        LRU,
-        S3FIFO,
-        Sieve,
-        TinyLFU,
-        TwoQ,
-    ],
-)
-@pytest.mark.parametrize("cache_size_ratio", [0.01])
-def test_eviction_algo(eviction_algo, cache_size_ratio, mock_reader):
-    cache = None
-    try:
-        # create a cache with the eviction policy
-        cache = eviction_algo(cache_size=int(mock_reader.get_wss() * cache_size_ratio))
-        req_count = 0
-        miss_count = 0
-
-        # Limit the number of requests to avoid long test times
-        # max_requests = 1000
-        for i, req in enumerate(mock_reader):
-            # if i >= max_requests:
-            #     break
-            hit = cache.get(req)
-            if not hit:
-                miss_count += 1
-            req_count += 1
-
-        if req_count == 0:
-            pytest.skip("No requests processed")
-
-        miss_ratio = miss_count / req_count
-        reference_miss_ratio = get_reference_data(eviction_algo.__name__, cache_size_ratio)
-        if reference_miss_ratio is None:
-            pytest.skip(f"No reference data for {eviction_algo.__name__} with cache size ratio {cache_size_ratio}")
-        assert abs(miss_ratio - reference_miss_ratio) < 0.01, (
-            f"Miss ratio {miss_ratio} is not close to reference {reference_miss_ratio}"
-        )
-
-    except Exception as e:
-        pytest.fail(f"Error in test_eviction_algo: {e}")
-    finally:
-        pass
diff --git a/libCacheSim-python/tests/test_example.py b/libCacheSim-python/tests/test_example.py
new file mode 100644
index 000000000..9cfcb7f3f
--- /dev/null
+++ b/libCacheSim-python/tests/test_example.py
@@ -0,0 +1,16 @@
+from libcachesim import (
+    Request,
+    LRU,
+    SyntheticReader,
+    Util,
+)
+
+def test_example():
+    reader = SyntheticReader(num_of_req=1000)
+    cache = LRU(cache_size=1000)
+    miss_cnt = 0
+    for req in reader:
+        hit = cache.get(req)
+        if not hit:
+            miss_cnt += 1
+    print(f"Miss ratio: {miss_cnt / reader.num_of_req}")
diff --git a/libCacheSim-python/tests/test_process_trace.py b/libCacheSim-python/tests/test_process_trace.py
deleted file mode 100644
index 1dbfb486f..000000000
--- a/libCacheSim-python/tests/test_process_trace.py
+++ /dev/null
@@ -1,220 +0,0 @@
-#!/usr/bin/env python3
-"""
-Test file for process_trace functionality.
-"""
-
-import sys
-import os
-import pytest
-
-# Add the parent directory to the Python path for development testing
-sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
-
-try:
-    import libcachesim as lcs
-except ImportError as e:
-    pytest.skip(f"libcachesim not available: {e}", allow_module_level=True)
-
-from collections import OrderedDict
-
-
-def create_trace_reader():
-    """Helper function to create a trace reader with binary trace file."""
-    data_file = os.path.join(
-        os.path.dirname(os.path.dirname(os.path.dirname(__file__))), "data", "cloudPhysicsIO.oracleGeneral.bin"
-    )
-    if not os.path.exists(data_file):
-        return None
-    return lcs.open_trace(data_file, lcs.TraceType.ORACLE_GENERAL_TRACE)
-
-
-def test_process_trace_native():
-    """Test process_trace with native LRU cache."""
-
-    # Open trace
-    reader = create_trace_reader()
-    if reader is None:
-        pytest.skip("Test trace file not found, skipping test")
-
-    # Create LRU cache
-    cache = lcs.LRU(1024 * 1024)  # 1MB cache
-
-    # Process trace and get miss ratio
-    obj_miss_ratio, byte_miss_ratio = cache.process_trace(reader, max_req=1000)
-
-    # Verify miss ratio is reasonable (should be between 0 and 1)
-    assert 0.0 <= obj_miss_ratio <= 1.0, f"Invalid miss ratio: {obj_miss_ratio}"
-
-
-def test_process_trace_python_hook():
-    """Test process_trace with Python hook cache."""
-
-    # Open trace
-    reader = create_trace_reader()
-    if reader is None:
-        pytest.skip("Test trace file not found, skipping test")
-
-    # Create Python hook LRU cache
-    cache = lcs.PythonHookCachePolicy(1024 * 1024, "TestLRU")
-
-    # Define LRU hooks
-    def init_hook(cache_size):
-        return OrderedDict()
-
-    def hit_hook(lru_dict, obj_id, obj_size):
-        lru_dict.move_to_end(obj_id)
-
-    def miss_hook(lru_dict, obj_id, obj_size):
-        lru_dict[obj_id] = True
-
-    def eviction_hook(lru_dict, obj_id, obj_size):
-        return next(iter(lru_dict))
-
-    def remove_hook(lru_dict, obj_id):
-        lru_dict.pop(obj_id, None)
-
-    # Set hooks
-    cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook)
-
-    # Test both methods
-    # Method 1: Direct function call
-    miss_ratio1 = lcs.process_trace_python_hook(cache.cache, reader, max_req=1000)[0]
-
-    # Need to reopen the trace for second test
-    reader2 = create_trace_reader()
-    if reader2 is None:
-        pytest.skip("Warning: Cannot reopen trace file, skipping second test")
-        # Continue with just the first test result
-        assert miss_ratio1 is not None and 0.0 <= miss_ratio1 <= 1.0, f"Invalid miss ratio: {miss_ratio1}"
-        return
-
-    # Reset cache for fair comparison
-    cache2 = lcs.PythonHookCachePolicy(1024 * 1024, "TestLRU2")
-    cache2.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook)
-
-    # Method 2: Convenience method
-    miss_ratio2 = cache2.process_trace(reader2, max_req=1000)[0]
-
-    # Verify both methods give the same result and miss ratios are reasonable
-    assert 0.0 <= miss_ratio1 <= 1.0, f"Invalid miss ratio 1: {miss_ratio1}"
-    assert 0.0 <= miss_ratio2 <= 1.0, f"Invalid miss ratio 2: {miss_ratio2}"
-    assert abs(miss_ratio1 - miss_ratio2) < 0.001, (
-        f"Different results from the two methods: {miss_ratio1} vs {miss_ratio2}"
-    )
-
-
-def test_compare_native_vs_python_hook():
-    """Compare native LRU vs Python hook LRU using process_trace."""
-
-    cache_size = 512 * 1024  # 512KB cache
-    max_requests = 500
-
-    # Test native LRU
-    native_cache = lcs.LRU(cache_size)
-    reader1 = create_trace_reader()
-    if reader1 is None:
-        pytest.skip("Test trace file not found, skipping test")
-
-    native_obj_miss_ratio, native_byte_miss_ratio = native_cache.process_trace(reader1, max_req=max_requests)
-
-    # Test Python hook LRU
-    hook_cache = lcs.PythonHookCachePolicy(cache_size, "HookLRU")
-
-    def init_hook(cache_size):
-        return OrderedDict()
-
-    def hit_hook(lru_dict, obj_id, obj_size):
-        lru_dict.move_to_end(obj_id)
-
-    def miss_hook(lru_dict, obj_id, obj_size):
-        lru_dict[obj_id] = True
-
-    def eviction_hook(lru_dict, obj_id, obj_size):
-        return next(iter(lru_dict))
-
-    def remove_hook(lru_dict, obj_id):
-        lru_dict.pop(obj_id, None)
-
-    hook_cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook)
-
-    reader2 = create_trace_reader()
-    if reader2 is None:
-        pytest.skip("Warning: Cannot reopen trace file, skipping comparison")
-        return  # Skip test
-
-    hook_obj_miss_ratio, hook_byte_miss_ratio = hook_cache.process_trace(reader2, max_req=max_requests)
-
-    # They should be very similar (allowing for some small differences due to implementation details)
-    assert abs(native_obj_miss_ratio - hook_obj_miss_ratio) < 0.05, (
-        f"Too much difference: {abs(native_obj_miss_ratio - hook_obj_miss_ratio):.4f}"
-    )
-
-
-def test_error_handling():
-    """Test error handling for process_trace."""
-
-    cache = lcs.PythonHookCachePolicy(1024)
-
-    reader = create_trace_reader()
-    if reader is None:
-        pytest.skip("Test trace file not found, skipping error test")
-
-    # Try to process trace without setting hooks - should raise RuntimeError
-    with pytest.raises(RuntimeError, match="Hooks must be set before processing trace"):
-        cache.process_trace(reader)
-
-
-def test_lru_implementation_accuracy():
-    """Test that Python hook LRU implementation matches native LRU closely."""
-
-    cache_size = 1024 * 1024  # 1MB
-    max_requests = 100
-
-    # Create readers
-    reader1 = create_trace_reader()
-    reader2 = create_trace_reader()
-
-    if not reader1 or not reader2:
-        pytest.skip("Cannot open trace files for LRU accuracy test")
-
-    # Test native LRU
-    native_cache = lcs.LRU(cache_size)
-    native_obj_miss_ratio, native_byte_miss_ratio = native_cache.process_trace(reader1, max_req=max_requests)
-
-    # Test Python hook LRU
-    hook_cache = lcs.PythonHookCachePolicy(cache_size, "AccuracyTestLRU")
-    init_hook, hit_hook, miss_hook, eviction_hook, remove_hook = create_optimized_lru_hooks()
-    hook_cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook)
-
-    hook_obj_miss_ratio, hook_byte_miss_ratio = hook_cache.process_trace(reader2, max_req=max_requests)
-
-    # Calculate difference
-    difference = abs(native_obj_miss_ratio - hook_obj_miss_ratio)
-    percentage_diff = (difference / native_obj_miss_ratio) * 100 if native_obj_miss_ratio > 0 else 0
-
-    # Assert that the difference is small (< 5%)
-    assert percentage_diff < 5.0, f"LRU implementation difference too large: {percentage_diff:.4f}%"
-
-
-def create_optimized_lru_hooks():
-    """Create optimized LRU hooks that closely match native LRU behavior."""
-
-    def init_hook(cache_size):
-        return OrderedDict()
-
-    def hit_hook(lru_dict, obj_id, obj_size):
-        if obj_id in lru_dict:
-            lru_dict.move_to_end(obj_id)
-
-    def miss_hook(lru_dict, obj_id, obj_size):
-        lru_dict[obj_id] = obj_size
-
-    def eviction_hook(lru_dict, obj_id, obj_size):
-        if lru_dict:
-            return next(iter(lru_dict))
-        return obj_id
-
-    def remove_hook(lru_dict, obj_id):
-        lru_dict.pop(obj_id, None)
-
-    return init_hook, hit_hook, miss_hook, eviction_hook, remove_hook
diff --git a/libCacheSim-python/tests/test_python_hook_cache.py b/libCacheSim-python/tests/test_python_hook_cache.py
deleted file mode 100644
index 7af8873dc..000000000
--- a/libCacheSim-python/tests/test_python_hook_cache.py
+++ /dev/null
@@ -1,205 +0,0 @@
-#!/usr/bin/env python3
-"""
-Test file for PythonHookCachePolicy functionality.
-"""
-
-import pytest
-import libcachesim as lcs
-from dataclasses import dataclass
-from collections import OrderedDict
-
-
-@dataclass
-class CacheTestCase:
-    """Represents a single test case for cache operations."""
-
-    request: tuple[int, int]  # (obj_id, obj_size)
-    expected_hit: bool
-    expected_obj_count: int
-    description: str = ""
-
-
-def create_lru_hooks():
-    """Create standard LRU hooks for testing.
-
-    Returns:
-        tuple: A tuple of (init_hook, hit_hook, miss_hook, eviction_hook, remove_hook)
-    """
-
-    def init_hook(cache_size):
-        return OrderedDict()
-
-    def hit_hook(lru_dict, obj_id, obj_size):
-        lru_dict.move_to_end(obj_id)
-
-    def miss_hook(lru_dict, obj_id, obj_size):
-        lru_dict[obj_id] = True
-
-    def eviction_hook(lru_dict, obj_id, obj_size):
-        return next(iter(lru_dict))
-
-    def remove_hook(lru_dict, obj_id):
-        lru_dict.pop(obj_id, None)
-
-    return init_hook, hit_hook, miss_hook, eviction_hook, remove_hook
-
-
-def create_test_request(obj_id: int, obj_size: int) -> lcs.Request:
-    """Create a test request with given parameters.
-
-    Args:
-        obj_id: Object ID
-        obj_size: Object size in bytes
-
-    Returns:
-        Request: A configured request object
-    """
-    req = lcs.Request()
-    req.obj_id = obj_id
-    req.obj_size = obj_size
-    return req
-
-
-def test_python_hook_cache():
-    """Test the Python hook cache implementation."""
-    cache_size = 300  # 3 objects of size 100 each
-    cache = lcs.PythonHookCachePolicy(cache_size, "TestLRU")
-
-    # Set up hooks
-    init_hook, hit_hook, miss_hook, eviction_hook, remove_hook = create_lru_hooks()
-    cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook)
-
-    # Define test sequence
-    test_cases = [
-        CacheTestCase((1, 100), False, 1, "Miss - insert 1"),
-        CacheTestCase((2, 100), False, 2, "Miss - insert 2"),
-        CacheTestCase((3, 100), False, 3, "Miss - insert 3 (cache full)"),
-        CacheTestCase((1, 100), True, 3, "Hit - move 1 to end"),
-        CacheTestCase((4, 100), False, 3, "Miss - should evict 2 (LRU), insert 4"),
-        CacheTestCase((2, 100), False, 3, "Miss - should evict 3, insert 2"),
-        CacheTestCase((1, 100), True, 3, "Hit - move 1 to end"),
-    ]
-
-    # Execute test sequence
-    for i, test_case in enumerate(test_cases):
-        obj_id, obj_size = test_case.request
-        req = create_test_request(obj_id, obj_size)
-
-        result = cache.get(req)
-        assert result == test_case.expected_hit, f"Request {i + 1} (obj_id={obj_id}):"
-        f"Expected {'hit' if test_case.expected_hit else 'miss'} - {test_case.description}"
-        assert cache.n_obj == test_case.expected_obj_count, (
-            f"Request {i + 1}: Expected {test_case.expected_obj_count} objects - {test_case.description}"
-        )
-        assert cache.occupied_byte <= cache_size, f"Request {i + 1}: Cache size exceeded"
-
-
-def test_error_handling():
-    """Test error handling for uninitialized cache."""
-    cache = lcs.PythonHookCachePolicy(1000)
-
-    # Try to use cache without setting hooks
-    req = create_test_request(1, 100)
-
-    with pytest.raises(RuntimeError):
-        cache.get(req)
-
-
-def test_lru_comparison():
-    """Test Python hook LRU against native LRU to verify identical behavior."""
-    cache_size = 300  # 3 objects of size 100 each
-
-    # Create native LRU cache
-    native_lru = lcs.LRU(cache_size)
-
-    # Create Python hook LRU cache
-    hook_lru = lcs.PythonHookCachePolicy(cache_size, "TestLRU")
-    init_hook, hit_hook, miss_hook, eviction_hook, remove_hook = create_lru_hooks()
-    hook_lru.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook)
-
-    # Define test sequence with various access patterns
-    test_cases = [
-        CacheTestCase((1, 100), False, 1, "Miss - insert 1"),
-        CacheTestCase((2, 100), False, 2, "Miss - insert 2"),
-        CacheTestCase((3, 100), False, 3, "Miss - insert 3 (cache full)"),
-        CacheTestCase((1, 100), True, 3, "Hit - move 1 to end"),
-        CacheTestCase((4, 100), False, 3, "Miss - should evict 2 (LRU), insert 4"),
-        CacheTestCase((2, 100), False, 3, "Miss - should evict 3, insert 2"),
-        CacheTestCase((1, 100), True, 3, "Hit - move 1 to end"),
-        CacheTestCase((3, 100), False, 3, "Miss - should evict 4, insert 3"),
-        CacheTestCase((5, 100), False, 3, "Miss - should evict 2, insert 5"),
-        CacheTestCase((1, 100), True, 3, "Hit - move 1 to end"),
-        CacheTestCase((3, 100), True, 3, "Hit - move 3 to end"),
-        CacheTestCase((6, 100), False, 3, "Miss - should evict 5, insert 6"),
-    ]
-
-    # Test both caches with identical requests
-    for i, test_case in enumerate(test_cases):
-        obj_id, obj_size = test_case.request
-
-        # Test native LRU
-        req_native = create_test_request(obj_id, obj_size)
-        native_result = native_lru.get(req_native)
-
-        # Test hook LRU
-        req_hook = create_test_request(obj_id, obj_size)
-        hook_result = hook_lru.get(req_hook)
-
-        # Compare results
-        assert native_result == hook_result, (
-            f"Request {i + 1} (obj_id={obj_id}): Native and hook LRU differ - {test_case.description}"
-        )
-
-        # Compare cache statistics
-        assert native_lru.n_obj == hook_lru.n_obj, f"Request {i + 1}: Object count differs - {test_case.description}"
-        assert native_lru.occupied_byte == hook_lru.occupied_byte, (
-            f"Request {i + 1}: Occupied bytes differ - {test_case.description}"
-        )
-
-
-def test_lru_comparison_variable_sizes():
-    """Test Python hook LRU vs Native LRU with variable object sizes."""
-    cache_size = 1000  # Total cache capacity
-
-    # Create caches
-    native_lru = lcs.LRU(cache_size)
-    hook_lru = lcs.PythonHookCachePolicy(cache_size, "VariableSizeLRU")
-
-    init_hook, hit_hook, miss_hook, eviction_hook, remove_hook = create_lru_hooks()
-    hook_lru.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook)
-
-    # Define test sequence with variable object sizes
-    test_cases = [
-        CacheTestCase((1, 200), False, 1, "Miss - insert 1 (200 bytes)"),
-        CacheTestCase((2, 300), False, 2, "Miss - insert 2 (300 bytes)"),
-        CacheTestCase((3, 400), False, 3, "Miss - insert 3 (400 bytes) - total 900 bytes"),
-        CacheTestCase((4, 200), False, 3, "Miss - should evict 1, insert 4 (total would be 1100, over limit)"),
-        CacheTestCase((1, 200), False, 3, "Miss - should evict 2, insert 1"),
-        CacheTestCase((5, 100), False, 3, "Miss - should evict 3, insert 5"),
-        CacheTestCase((4, 200), True, 3, "Hit - access 4"),
-        CacheTestCase((6, 500), False, 2, "Miss - should evict multiple objects to fit"),
-        CacheTestCase((4, 200), False, 3, "Miss - 4 was evicted"),
-    ]
-
-    # Test both caches with identical requests
-    for i, test_case in enumerate(test_cases):
-        obj_id, obj_size = test_case.request
-
-        # Test native LRU
-        req_native = create_test_request(obj_id, obj_size)
-        native_result = native_lru.get(req_native)
-
-        # Test hook LRU
-        req_hook = create_test_request(obj_id, obj_size)
-        hook_result = hook_lru.get(req_hook)
-
-        # Compare results
-        assert native_result == hook_result, (
-            f"Request {i + 1} (obj_id={obj_id}, size={obj_size}): Results differ - {test_case.description}"
-        )
-
-        # Compare cache statistics
-        assert native_lru.n_obj == hook_lru.n_obj, f"Request {i + 1}: Object count differs - {test_case.description}"
-        assert native_lru.occupied_byte == hook_lru.occupied_byte, (
-            f"Request {i + 1}: Occupied bytes differ - {test_case.description}"
-        )
diff --git a/libCacheSim-python/tests/test_trace_generator.py b/libCacheSim-python/tests/test_trace_generator.py
deleted file mode 100644
index 37040026e..000000000
--- a/libCacheSim-python/tests/test_trace_generator.py
+++ /dev/null
@@ -1,135 +0,0 @@
-#!/usr/bin/env python3
-"""
-Tests for trace generator module.
-"""
-
-import libcachesim as lcs
-
-
-class TestTraceGeneration:
-    """Test trace generation functions."""
-
-    # Constants for test readability
-    NUM_SAMPLE_REQUESTS = 10  # Number of requests to check in detail
-
-    def test_create_zipf_requests_basic(self):
-        """Test basic Zipf request creation."""
-        generator = lcs.create_zipf_requests(num_objects=100, num_requests=1000, alpha=1.0, obj_size=4000, seed=42)
-
-        # Test iteration
-        requests = list(generator)
-        assert len(requests) == 1000
-
-        for req in requests[: self.NUM_SAMPLE_REQUESTS]:  # Check first NUM_SAMPLE_REQUESTS
-            assert isinstance(req, lcs.Request)
-            assert 0 <= req.obj_id < 100
-            assert req.obj_size == 4000
-            assert req.clock_time >= 0
-
-    def test_create_uniform_requests_basic(self):
-        """Test basic uniform request creation."""
-        generator = lcs.create_uniform_requests(num_objects=100, num_requests=1000, obj_size=4000, seed=42)
-
-        # Test iteration
-        requests = list(generator)
-        assert len(requests) == 1000
-
-        for req in requests[: self.NUM_SAMPLE_REQUESTS]:  # Check first NUM_SAMPLE_REQUESTS
-            assert isinstance(req, lcs.Request)
-            assert 0 <= req.obj_id < 100
-            assert req.obj_size == 4000
-            assert req.clock_time >= 0
-
-    def test_zipf_reproducibility(self):
-        """Test reproducibility with seed."""
-        gen1 = lcs.create_zipf_requests(10, 100, alpha=1.0, seed=42)
-        gen2 = lcs.create_zipf_requests(10, 100, alpha=1.0, seed=42)
-
-        requests1 = list(gen1)
-        requests2 = list(gen2)
-
-        assert len(requests1) == len(requests2)
-        for req1, req2 in zip(requests1, requests2):
-            assert req1.obj_id == req2.obj_id
-
-    def test_uniform_reproducibility(self):
-        """Test reproducibility with seed."""
-        gen1 = lcs.create_uniform_requests(10, 100, seed=42)
-        gen2 = lcs.create_uniform_requests(10, 100, seed=42)
-
-        requests1 = list(gen1)
-        requests2 = list(gen2)
-
-        assert len(requests1) == len(requests2)
-        for req1, req2 in zip(requests1, requests2):
-            assert req1.obj_id == req2.obj_id
-
-    def test_different_seeds(self):
-        """Test that different seeds produce different results."""
-        gen1 = lcs.create_zipf_requests(10, 100, alpha=1.0, seed=42)
-        gen2 = lcs.create_zipf_requests(10, 100, alpha=1.0, seed=43)
-
-        requests1 = [req.obj_id for req in gen1]
-        requests2 = [req.obj_id for req in gen2]
-
-        assert requests1 != requests2
-
-    def test_zipf_with_cache(self):
-        """Test Zipf generator with cache simulation."""
-        cache = lcs.LRU(cache_size=50 * 1024)  # 50KB cache
-        generator = lcs.create_zipf_requests(
-            num_objects=100,
-            num_requests=1000,
-            alpha=1.0,
-            obj_size=1000,  # 1KB objects
-            seed=42,
-        )
-
-        hit_count = 0
-        for req in generator:
-            if cache.get(req):
-                hit_count += 1
-
-        # Should have some hits and some misses
-        assert 0 <= hit_count <= 1000
-        assert hit_count > 0  # Should have some hits
-
-    def test_uniform_with_cache(self):
-        """Test uniform generator with cache simulation."""
-        cache = lcs.LRU(cache_size=50 * 1024)  # 50KB cache
-        generator = lcs.create_uniform_requests(
-            num_objects=100,
-            num_requests=1000,
-            obj_size=1000,  # 1KB objects
-            seed=42,
-        )
-
-        hit_count = 0
-        for req in generator:
-            if cache.get(req):
-                hit_count += 1
-
-        # Should have some hits and some misses
-        assert 0 <= hit_count <= 1000
-        assert hit_count > 0  # Should have some hits
-
-    def test_custom_parameters(self):
-        """Test generators with custom parameters."""
-        generator = lcs.create_zipf_requests(
-            num_objects=50,
-            num_requests=200,
-            alpha=1.5,
-            obj_size=2048,
-            time_span=3600,  # 1 hour
-            start_obj_id=1000,
-            seed=123,
-        )
-
-        requests = list(generator)
-        assert len(requests) == 200
-
-        # Check custom parameters
-        for req in requests[: self.NUM_SAMPLE_REQUESTS // 2]:  # Check fewer for shorter test
-            assert 1000 <= req.obj_id < 1050  # start_obj_id + num_objects
-            assert req.obj_size == 2048
-            assert req.clock_time <= 3600
diff --git a/libCacheSim-python/tests/test_unified_interface.py b/libCacheSim-python/tests/test_unified_interface.py
deleted file mode 100644
index a2c7c8c26..000000000
--- a/libCacheSim-python/tests/test_unified_interface.py
+++ /dev/null
@@ -1,181 +0,0 @@
-#!/usr/bin/env python3
-"""
-Test the unified interface for all cache policies.
-"""
-
-import sys
-import os
-import pytest
-
-# Add the parent directory to the Python path for development testing
-sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
-
-try:
-    import libcachesim as lcs
-except ImportError as e:
-    pytest.skip(f"libcachesim not available: {e}", allow_module_level=True)
-
-from collections import OrderedDict
-
-
-def create_trace_reader():
-    """Helper function to create a trace reader.
-
-    Returns:
-        Reader or None: A trace reader instance, or None if trace file not found.
-    """
-    data_file = os.path.join(
-        os.path.dirname(os.path.dirname(os.path.dirname(__file__))), "data", "cloudPhysicsIO.oracleGeneral.bin"
-    )
-    if not os.path.exists(data_file):
-        return None
-    return lcs.open_trace(data_file, lcs.TraceType.ORACLE_GENERAL_TRACE)
-
-
-def create_test_lru_hooks():
-    """Create LRU hooks for testing.
-
-    Returns:
-        tuple: A tuple of (init_hook, hit_hook, miss_hook, eviction_hook, remove_hook)
-    """
-
-    def init_hook(cache_size):
-        """Initialize LRU data structure."""
-        return OrderedDict()
-
-    def hit_hook(lru_dict, obj_id, obj_size):
-        """Handle cache hit by moving to end (most recently used)."""
-        if obj_id in lru_dict:
-            lru_dict.move_to_end(obj_id)
-
-    def miss_hook(lru_dict, obj_id, obj_size):
-        """Handle cache miss by adding new object."""
-        lru_dict[obj_id] = obj_size
-
-    def eviction_hook(lru_dict, obj_id, obj_size):
-        """Return the least recently used object ID for eviction."""
-        if lru_dict:
-            return next(iter(lru_dict))
-        return obj_id
-
-    def remove_hook(lru_dict, obj_id):
-        """Remove object from LRU structure."""
-        lru_dict.pop(obj_id, None)
-
-    return init_hook, hit_hook, miss_hook, eviction_hook, remove_hook
-
-
-def test_unified_process_trace_interface():
-    """Test that all cache policies have the same process_trace interface."""
-
-    cache_size = 1024 * 1024  # 1MB
-    max_requests = 100
-
-    # Create trace reader
-    reader = create_trace_reader()
-    if not reader:
-        pytest.skip("Skipping test: Trace file not available")
-
-    # Test different cache policies
-    caches = {
-        "LRU": lcs.LRU(cache_size),
-        "FIFO": lcs.FIFO(cache_size),
-        "ARC": lcs.ARC(cache_size),
-    }
-
-    # Add Python hook cache
-    python_cache = lcs.PythonHookCachePolicy(cache_size, "TestLRU")
-    init_hook, hit_hook, miss_hook, eviction_hook, remove_hook = create_test_lru_hooks()
-    python_cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook)
-    caches["Python Hook LRU"] = python_cache
-
-    results = {}
-    for name, cache in caches.items():
-        # Create fresh reader for each test
-        test_reader = create_trace_reader()
-        if not test_reader:
-            pytest.skip(f"Cannot create reader for {name} test")
-
-        # Test process_trace method exists
-        assert hasattr(cache, "process_trace"), f"{name} missing process_trace method"
-
-        # Test process_trace functionality
-        obj_miss_ratio, byte_miss_ratio = cache.process_trace(test_reader, max_req=max_requests)
-        results[name] = obj_miss_ratio
-
-        # Verify miss_ratio is valid
-        assert 0.0 <= obj_miss_ratio <= 1.0, f"{name} returned invalid miss_ratio: {obj_miss_ratio}"
-
-    # Verify we got results for all caches
-    assert len(results) == len(caches), "Not all caches were tested"
-
-
-def test_unified_properties_interface():
-    """Test that all cache policies have the same properties interface."""
-
-    cache_size = 1024 * 1024
-
-    # Create different cache types
-    caches = {
-        "LRU": lcs.LRU(cache_size),
-        "FIFO": lcs.FIFO(cache_size),
-        "Python Hook": lcs.PythonHookCachePolicy(cache_size, "TestCache"),
-    }
-
-    required_properties = ["cache_size", "n_req", "n_obj", "occupied_byte"]
-
-    for name, cache in caches.items():
-        # Test all required properties exist
-        for prop in required_properties:
-            assert hasattr(cache, prop), f"{name} missing {prop} property"
-
-        # Test cache_size is correct
-        assert cache.cache_size == cache_size, f"{name} cache_size mismatch"
-
-
-def test_get_interface_consistency():
-    """Test that get() method works consistently across all cache policies."""
-
-    cache_size = 1024 * 1024
-
-    # Create caches
-    caches = {
-        "LRU": lcs.LRU(cache_size),
-        "FIFO": lcs.FIFO(cache_size),
-    }
-
-    # Add Python hook cache
-    python_cache = lcs.PythonHookCachePolicy(cache_size, "ConsistencyTest")
-    init_hook, hit_hook, miss_hook, eviction_hook, remove_hook = create_test_lru_hooks()
-    python_cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook)
-    caches["Python Hook"] = python_cache
-
-    # Create a test request using the proper request class
-    test_req = lcs.Request()
-    test_req.obj_id = 1
-    test_req.obj_size = 1024
-
-    for name, cache in caches.items():
-        # Reset cache state for consistent testing
-        initial_n_req = cache.n_req
-        initial_n_obj = cache.n_obj
-        initial_occupied = cache.occupied_byte
-
-        # Test get method exists
-        assert hasattr(cache, "get"), f"{name} missing get method"
-
-        # Test first access (should be miss for new object)
-        result = cache.get(test_req)
-
-        # Test properties updated correctly
-        assert cache.n_req > initial_n_req, f"{name} n_req not updated"
-        if not result:  # If it was a miss, object should be added
-            assert cache.n_obj > initial_n_obj, f"{name} n_obj not updated after miss"
-            assert cache.occupied_byte > initial_occupied, f"{name} occupied_byte not updated after miss"
-
-        # Test second access to same object (should be hit)
-        second_result = cache.get(test_req)
-
-        # Second access should be a hit (unless cache is too small)
-        if cache.cache_size >= test_req.obj_size:
-            assert second_result, f"{name} second access should be a hit"
diff --git a/libCacheSim-python/tests/utils.py b/libCacheSim-python/tests/utils.py
deleted file mode 100644
index 0977cc815..000000000
--- a/libCacheSim-python/tests/utils.py
+++ /dev/null
@@ -1,16 +0,0 @@
-import os
-
-
-def get_reference_data(eviction_algo, cache_size_ratio):
-    data_file = os.path.join(  # noqa: PTH118
-        (os.path.dirname(os.path.dirname(__file__))),  # noqa: PTH120
-        "tests",
-        "reference.csv",
-    )
-    with open(data_file) as f:  # noqa: PTH123
-        lines = f.readlines()
-        key = "3LCache" if eviction_algo == "ThreeLCache" else eviction_algo
-        for line in lines:
-            if line.startswith(f"{key},{cache_size_ratio}"):
-                return float(line.split(",")[-1])
-    return None
diff --git a/libCacheSim/traceReader/CMakeLists.txt b/libCacheSim/traceReader/CMakeLists.txt
index 6e1b68ced..db28cb9f3 100644
--- a/libCacheSim/traceReader/CMakeLists.txt
+++ b/libCacheSim/traceReader/CMakeLists.txt
@@ -3,9 +3,9 @@
 # ==============================================================================
 
 set(traceReader_sources_c
-    generalReader/binary.c 
-    generalReader/csv.c 
-    generalReader/txt.c 
+    generalReader/binary.c
+    generalReader/csv.c
+    generalReader/txt.c
     generalReader/libcsv.c
     customizedReader/lcs.c
     reader.c
diff --git a/scripts/install_python_dev.sh b/scripts/install_python_dev.sh
index d878d89b9..a97159972 100644
--- a/scripts/install_python_dev.sh
+++ b/scripts/install_python_dev.sh
@@ -39,7 +39,7 @@ echo "Building Python binding..."
 echo "Sync python version..."
 python scripts/sync_python_version.py
 pushd libCacheSim-python
-pip install -e . -vvv
+python -m pip install -e . -vvv
 popd
 
 # Test that the import works

From 0fff17647c286f868bbad1bd29b794372a05ca6b Mon Sep 17 00:00:00 2001
From: haochengxia <xhc_1007@163.com>
Date: Mon, 21 Jul 2025 00:03:24 -0400
Subject: [PATCH 2/4] Fix comments by copilot

---
 .../libcachesim/synthetic_reader.py           |  15 ++-
 libCacheSim-python/src/export_cache.cpp       | 121 ++++++++++++------
 libCacheSim-python/src/export_reader.cpp      |  18 ++-
 3 files changed, 107 insertions(+), 47 deletions(-)

diff --git a/libCacheSim-python/libcachesim/synthetic_reader.py b/libCacheSim-python/libcachesim/synthetic_reader.py
index c9d3575fc..16f8a1046 100644
--- a/libCacheSim-python/libcachesim/synthetic_reader.py
+++ b/libCacheSim-python/libcachesim/synthetic_reader.py
@@ -95,7 +95,7 @@ def read_one_req(self, req: Request) -> Request:
         req.obj_id = obj_id
         req.obj_size = self.obj_size
         req.clock_time = self.current_pos * self.time_span // self.num_of_req
-        req.op = ReqOp.OP_NOP
+        req.op = ReqOp.OP_READ
         req.valid = True
 
         self.current_pos += 1
@@ -132,7 +132,7 @@ def read_first_req(self, req: Request) -> Request:
         req.obj_id = obj_id
         req.obj_size = self.obj_size
         req.clock_time = 0
-        req.op = ReqOp.OP_NOP
+        req.op = ReqOp.OP_READ
         req.valid = True
         return req
 
@@ -146,7 +146,7 @@ def read_last_req(self, req: Request) -> Request:
         req.obj_id = obj_id
         req.obj_size = self.obj_size
         req.clock_time = (self.num_of_req - 1) * self.time_span // self.num_of_req
-        req.op = ReqOp.OP_NOP
+        req.op = ReqOp.OP_READ
         req.valid = True
         return req
 
@@ -165,7 +165,7 @@ def read_one_req_above(self, req: Request) -> Request:
         req.obj_id = obj_id
         req.obj_size = self.obj_size
         req.clock_time = (self.current_pos + 1) * self.time_span // self.num_of_req
-        req.op = ReqOp.OP_NOP
+        req.op = ReqOp.OP_READ
         req.valid = True
         return req
 
@@ -207,7 +207,7 @@ def __getitem__(self, index: int) -> Request:
         req.obj_id = obj_id
         req.obj_size = self.obj_size
         req.clock_time = index * self.time_span // self.num_of_req
-        req.op = ReqOp.OP_NOP
+        req.op = ReqOp.OP_READ
         req.valid = True
         return req
 
@@ -256,7 +256,8 @@ def _gen_uniform(m: int, n: int, start: int = 0) -> np.ndarray:
     """
     if m <= 0 or n <= 0:
         raise ValueError("num_objects and num_requests must be positive")
-    return np.random.randint(0, m, n) + start
+    # Optimized: directly generate in the target range for better performance
+    return np.random.randint(start, start + m, n)
 
 
 class _BaseRequestGenerator:
@@ -302,7 +303,7 @@ def __iter__(self) -> Iterator[Request]:
             req.clock_time = i * self.time_span // self.num_requests
             req.obj_id = obj_id
             req.obj_size = self.obj_size
-            req.op = ReqOp.OP_NOP
+            req.op = ReqOp.OP_READ
             req.valid = True
             yield req
 
diff --git a/libCacheSim-python/src/export_cache.cpp b/libCacheSim-python/src/export_cache.cpp
index 3868866cc..fb383a236 100644
--- a/libCacheSim-python/src/export_cache.cpp
+++ b/libCacheSim-python/src/export_cache.cpp
@@ -11,6 +11,7 @@
 #include <pybind11/stl.h>
 
 #include <iostream>
+#include <memory>
 #include <sstream>
 
 #include "config.h"
@@ -58,7 +59,10 @@ struct RequestDeleter {
 // ****             Python plugin cache implementation BEGIN          ****
 // ***********************************************************************
 
-typedef struct pypluginCache_params {
+// Forward declaration with appropriate visibility
+struct pypluginCache_params;
+
+typedef struct __attribute__((visibility("hidden"))) pypluginCache_params {
   py::object data;  ///< Plugin's internal data structure (python object)
   py::function cache_init_hook;
   py::function cache_hit_hook;
@@ -69,6 +73,23 @@ typedef struct pypluginCache_params {
   std::string cache_name;
 } pypluginCache_params_t;
 
+// Custom deleter for pypluginCache_params_t
+struct PypluginCacheParamsDeleter {
+  void operator()(pypluginCache_params_t* ptr) const {
+    if (ptr != nullptr) {
+      // Call the free hook if available before deletion
+      if (!ptr->cache_free_hook.is_none()) {
+        try {
+          ptr->cache_free_hook(ptr->data);
+        } catch (...) {
+          // Ignore exceptions during cleanup to prevent double-fault
+        }
+      }
+      delete ptr;
+    }
+  }
+};
+
 static void pypluginCache_free(cache_t* cache);
 static bool pypluginCache_get(cache_t* cache, const request_t* req);
 static cache_obj_t* pypluginCache_find(cache_t* cache, const request_t* req,
@@ -84,47 +105,71 @@ cache_t* pypluginCache_init(
     py::function cache_init_hook, py::function cache_hit_hook,
     py::function cache_miss_hook, py::function cache_eviction_hook,
     py::function cache_remove_hook, py::function cache_free_hook) {
-  // Initialize base cache structure
-  cache_t* cache = cache_struct_init(cache_name.c_str(), ccache_params, NULL);
-
-  // Set function pointers for cache operations
-  cache->cache_init = NULL;
-  cache->cache_free = pypluginCache_free;
-  cache->get = pypluginCache_get;
-  cache->find = pypluginCache_find;
-  cache->insert = pypluginCache_insert;
-  cache->evict = pypluginCache_evict;
-  cache->remove = pypluginCache_remove;
-  cache->to_evict = pypluginCache_to_evict;
-  cache->get_occupied_byte = cache_get_occupied_byte_default;
-  cache->get_n_obj = cache_get_n_obj_default;
-  cache->can_insert = cache_can_insert_default;
-  cache->obj_md_size = 0;
-
-  // Allocate and initialize plugin parameters
-  pypluginCache_params_t* params = new pypluginCache_params_t();
-  params->cache_name = cache_name;
-  params->cache_init_hook = cache_init_hook;
-  params->cache_hit_hook = cache_hit_hook;
-  params->cache_miss_hook = cache_miss_hook;
-  params->cache_eviction_hook = cache_eviction_hook;
-  params->cache_remove_hook = cache_remove_hook;
-  params->cache_free_hook = cache_free_hook;
-  params->data = cache_init_hook(ccache_params);
-
-  cache->eviction_params = params;
-
-  return cache;
+  // Initialize base cache structure with exception safety
+  cache_t* cache = nullptr;
+  std::unique_ptr<pypluginCache_params_t, PypluginCacheParamsDeleter> params;
+
+  try {
+    cache = cache_struct_init(cache_name.c_str(), ccache_params, NULL);
+    if (!cache) {
+      throw std::runtime_error("Failed to initialize cache structure");
+    }
+
+    // Set function pointers for cache operations
+    cache->cache_init = NULL;
+    cache->cache_free = pypluginCache_free;
+    cache->get = pypluginCache_get;
+    cache->find = pypluginCache_find;
+    cache->insert = pypluginCache_insert;
+    cache->evict = pypluginCache_evict;
+    cache->remove = pypluginCache_remove;
+    cache->to_evict = pypluginCache_to_evict;
+    cache->get_occupied_byte = cache_get_occupied_byte_default;
+    cache->get_n_obj = cache_get_n_obj_default;
+    cache->can_insert = cache_can_insert_default;
+    cache->obj_md_size = 0;
+
+    // Allocate and initialize plugin parameters using smart pointer with custom
+    // deleter
+    params =
+        std::unique_ptr<pypluginCache_params_t, PypluginCacheParamsDeleter>(
+            new pypluginCache_params_t(), PypluginCacheParamsDeleter());
+    params->cache_name = cache_name;
+    params->cache_init_hook = cache_init_hook;
+    params->cache_hit_hook = cache_hit_hook;
+    params->cache_miss_hook = cache_miss_hook;
+    params->cache_eviction_hook = cache_eviction_hook;
+    params->cache_remove_hook = cache_remove_hook;
+    params->cache_free_hook = cache_free_hook;
+
+    // Initialize the cache data - this might throw
+    params->data = cache_init_hook(ccache_params);
+
+    // Transfer ownership to the cache structure
+    cache->eviction_params = params.release();
+
+    return cache;
+
+  } catch (...) {
+    // Clean up on exception
+    if (cache) {
+      cache_struct_free(cache);
+    }
+    // params will be automatically cleaned up by smart pointer destructor
+    throw;  // Re-throw the exception
+  }
 }
 
 static void pypluginCache_free(cache_t* cache) {
-  pypluginCache_params_t* params =
-      (pypluginCache_params_t*)cache->eviction_params;
-
-  if (!params->cache_free_hook.is_none()) {
-    params->cache_free_hook(params->data);
+  if (!cache || !cache->eviction_params) {
+    return;
   }
-  delete params;
+
+  // Use smart pointer for automatic cleanup
+  std::unique_ptr<pypluginCache_params_t, PypluginCacheParamsDeleter> params(
+      static_cast<pypluginCache_params_t*>(cache->eviction_params));
+
+  // The smart pointer destructor will handle cleanup automatically
   cache_struct_free(cache);
 }
 
diff --git a/libCacheSim-python/src/export_reader.cpp b/libCacheSim-python/src/export_reader.cpp
index f9c3789b6..468f54289 100644
--- a/libCacheSim-python/src/export_reader.cpp
+++ b/libCacheSim-python/src/export_reader.cpp
@@ -42,7 +42,14 @@ struct RequestDeleter {
 
 struct ReaderInitParamDeleter {
   void operator()(reader_init_param_t* ptr) const {
-    if (ptr != nullptr) free(ptr);
+    if (ptr != nullptr) {
+      // Free the strdup'ed string if it exists
+      if (ptr->binary_fmt_str != nullptr) {
+        free(ptr->binary_fmt_str);
+        ptr->binary_fmt_str = nullptr;
+      }
+      free(ptr);
+    }
   }
 };
 
@@ -123,9 +130,16 @@ void export_reader(py::module& m) {
                        const std::string& delimiter, ssize_t trace_start_offset,
                        sampler_t* sampler) {
              reader_init_param_t params = default_reader_init_params();
+
+             // Safe string handling with proper error checking
              if (!binary_fmt_str.empty()) {
-               params.binary_fmt_str = strdup(binary_fmt_str.c_str());
+               char* fmt_str = strdup(binary_fmt_str.c_str());
+               if (!fmt_str) {
+                 throw std::bad_alloc();
+               }
+               params.binary_fmt_str = fmt_str;
              }
+
              params.ignore_obj_size = ignore_obj_size;
              params.ignore_size_zero_req = ignore_size_zero_req;
              params.obj_id_is_num = obj_id_is_num;

From 7f11a0aec4f52fb6a5ed7e46f5522119610599ee Mon Sep 17 00:00:00 2001
From: haochengxia <xhc_1007@163.com>
Date: Mon, 21 Jul 2025 00:47:12 -0400
Subject: [PATCH 3/4] Preserve reader_protocol only

---
 libCacheSim-python/libcachesim/__init__.pyi   | 12 ++--
 libCacheSim-python/libcachesim/cache.py       |  6 +-
 libCacheSim-python/libcachesim/protocols.py   | 69 +++++--------------
 .../libcachesim/trace_analyzer.py             | 10 ++-
 libCacheSim-python/libcachesim/util.py        | 10 +--
 5 files changed, 38 insertions(+), 69 deletions(-)

diff --git a/libCacheSim-python/libcachesim/__init__.pyi b/libCacheSim-python/libcachesim/__init__.pyi
index 213eb1eb8..2e2a565e5 100644
--- a/libCacheSim-python/libcachesim/__init__.pyi
+++ b/libCacheSim-python/libcachesim/__init__.pyi
@@ -3,7 +3,7 @@ from typing import bool, int, str, tuple
 from collections.abc import Iterator
 
 from .libcachesim_python import ReqOp, TraceType, SamplerType
-from .protocols import ReaderProtocol, CacheProtocol
+from .protocols import ReaderProtocol
 
 class Request:
     clock_time: int
@@ -59,8 +59,8 @@ class Cache:
     def get_n_obj(self) -> int: ...
     def print_cache(self) -> str: ...
 
-class CacheBase(CacheProtocol):
-    """Base class implementing CacheProtocol"""
+class CacheBase:
+    """Base class for all cache implementations"""
     def __init__(self, _cache: Cache): ...
     def get(self, req: Request) -> bool: ...
     def find(self, req: Request, update_cache: bool = True) -> CacheObject: ...
@@ -219,6 +219,7 @@ def create_zipf_requests(
     start_obj_id: int = 0,
     seed: int | None = None,
 ) -> Iterator[Request]: ...
+
 def create_uniform_requests(
     num_objects: int,
     num_requests: int,
@@ -230,8 +231,9 @@ def create_uniform_requests(
 
 # Analyzer
 class TraceAnalyzer:
-    def __init__(self, analyzer): ...
-    def analyze(self, reader: ReaderProtocol, output_path: str, analysis_param, analysis_option) -> None: ...
+    def __init__(self, analyzer, reader: ReaderProtocol, output_path: str, analysis_param, analysis_option): ...
+    def run(self) -> None: ...
+    def cleanup(self) -> None: ...
 
 # Utilities
 class Util:
diff --git a/libCacheSim-python/libcachesim/cache.py b/libCacheSim-python/libcachesim/cache.py
index 3f3a2bd38..3e40249e1 100644
--- a/libCacheSim-python/libcachesim/cache.py
+++ b/libCacheSim-python/libcachesim/cache.py
@@ -40,11 +40,11 @@
     c_process_trace,
 )
 
-from .protocols import CacheProtocol, ReaderProtocol
+from .protocols import ReaderProtocol
 
 
-class CacheBase(CacheProtocol):
-    """Base class for all cache implementations that implements CacheProtocol"""
+class CacheBase(ABC):
+    """Base class for all cache implementations"""
 
     _cache: Cache  # Internal C++ cache object
 
diff --git a/libCacheSim-python/libcachesim/protocols.py b/libCacheSim-python/libcachesim/protocols.py
index d362946a0..d2e7b8170 100644
--- a/libCacheSim-python/libcachesim/protocols.py
+++ b/libCacheSim-python/libcachesim/protocols.py
@@ -1,71 +1,34 @@
-from __future__ import annotations
-
-from typing import Protocol, TYPE_CHECKING
-
-if TYPE_CHECKING:
-    from .libcachesim_python import Request, CacheObject, Reader, Analyzer
-
-
-class CacheProtocol(Protocol):
-    def get(self, req: Request) -> bool: ...
-
-    def find(self, req: Request, update_cache: bool = True) -> CacheObject: ...
-
-    def can_insert(self, req: Request) -> bool: ...
+"""
+Reader protocol for libCacheSim Python bindings.
 
-    def insert(self, req: Request) -> CacheObject: ...
+ReaderProtocol defines the interface contract for trace readers,
+enabling different implementations (Python/C++) to work interchangeably.
+"""
 
-    def need_eviction(self, req: Request) -> bool: ...
-
-    def evict(self, req: Request) -> CacheObject: ...
-
-    def remove(self, obj_id: int) -> bool: ...
-
-    def to_evict(self, req: Request) -> CacheObject: ...
-
-    def get_occupied_byte(self) -> int: ...
-
-    def get_n_obj(self) -> int: ...
-
-    def print_cache(self) -> str: ...
+from __future__ import annotations
+from typing import Protocol, runtime_checkable, TYPE_CHECKING
 
-    def process_trace(self, reader: "ReaderProtocol", start_req: int = 0, max_req: int = -1) -> tuple[float, float]: ...
+if TYPE_CHECKING:
+    from .libcachesim_python import Request
 
-    # Properties
-    @property
-    def cache_size(self) -> int: ...
 
-    @property
-    def cache_name(self) -> str: ...
+@runtime_checkable
+class ReaderProtocol(Protocol):
+    """Protocol for trace readers
 
+    This protocol ensures that different reader implementations
+    (SyntheticReader, TraceReader) can be used interchangeably.
+    """
 
-class ReaderProtocol(Protocol):
     def get_num_of_req(self) -> int: ...
-
     def read_one_req(self, req: Request) -> Request: ...
-
     def reset(self) -> None: ...
-
     def close(self) -> None: ...
-
-    def clone(self) -> ReaderProtocol: ...
-
+    def clone(self) -> "ReaderProtocol": ...
     def read_first_req(self, req: Request) -> Request: ...
-
     def read_last_req(self, req: Request) -> Request: ...
-
     def skip_n_req(self, n: int) -> int: ...
-
     def read_one_req_above(self, req: Request) -> Request: ...
-
     def go_back_one_req(self) -> None: ...
-
     def set_read_pos(self, pos: float) -> None: ...
-
     def get_read_pos(self) -> float: ...
-
-
-class AnalyzerProtocol(Protocol):
-    def run(self) -> None: ...
-
-    def cleanup(self) -> None: ...
diff --git a/libCacheSim-python/libcachesim/trace_analyzer.py b/libCacheSim-python/libcachesim/trace_analyzer.py
index bf598a71b..46c0f63a6 100644
--- a/libCacheSim-python/libcachesim/trace_analyzer.py
+++ b/libCacheSim-python/libcachesim/trace_analyzer.py
@@ -1,6 +1,10 @@
 """Wrapper of Analyzer"""
+from __future__ import annotations
 
-from .protocols import ReaderProtocol, AnalyzerProtocol
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from .protocols import ReaderProtocol
 
 from .libcachesim_python import (
     Analyzer,
@@ -9,13 +13,13 @@
 )
 
 
-class TraceAnalyzer(AnalyzerProtocol):
+class TraceAnalyzer:
     _analyzer: Analyzer
 
     def __init__(
         self,
         analyzer: Analyzer,
-        reader: "ReaderProtocol",
+        reader: ReaderProtocol,
         output_path: str,
         analysis_param: AnalysisParam,
         analysis_option: AnalysisOption,
diff --git a/libCacheSim-python/libcachesim/util.py b/libCacheSim-python/libcachesim/util.py
index 0f80a7fb2..c9c351b35 100644
--- a/libCacheSim-python/libcachesim/util.py
+++ b/libCacheSim-python/libcachesim/util.py
@@ -1,9 +1,11 @@
 """Wrapper misc functions"""
+from __future__ import annotations
 
 from typing import TYPE_CHECKING
 
 if TYPE_CHECKING:
-    from .protocols import CacheProtocol, ReaderProtocol
+    from .protocols import ReaderProtocol
+    from .cache import CacheBase
 
 from .libcachesim_python import convert_to_oracleGeneral, convert_to_lcs, c_process_trace
 
@@ -28,9 +30,7 @@ def convert_to_lcs(reader, ofilepath, output_txt=False, remove_size_change=False
         return convert_to_lcs(reader, ofilepath, output_txt, remove_size_change, lcs_ver)
 
     @staticmethod
-    def process_trace(
-        cache: "CacheProtocol", reader: "ReaderProtocol", start_req: int = 0, max_req: int = -1
-    ) -> tuple[float, float]:
+    def process_trace(cache: CacheBase, reader: ReaderProtocol, start_req: int = 0, max_req: int = -1) -> tuple[float, float]:
         """
         Process a trace with a cache.
 
@@ -44,7 +44,7 @@ def process_trace(
             tuple[float, float]: The object miss ratio and byte miss ratio.
         """
         # Check if reader is C++ reader
-        if not hasattr(reader, "c_reader") or not reader.c_reader:
+        if not hasattr(reader, 'c_reader') or not reader.c_reader:
             raise ValueError("Reader must be a C++ reader")
 
         return c_process_trace(cache._cache, reader._reader, start_req, max_req)

From 7642dac89e51f3c92ecfee710be13bb4693abbf2 Mon Sep 17 00:00:00 2001
From: haochengxia <xhc_1007@163.com>
Date: Wed, 23 Jul 2025 14:14:56 -0400
Subject: [PATCH 4/4] Connect S3

---
 libCacheSim-python/libcachesim/__init__.py    |   7 +
 libCacheSim-python/libcachesim/data_loader.py | 131 ++++++++++++++++++
 libCacheSim-python/libcachesim/protocols.py   |  15 +-
 .../libcachesim/trace_analyzer.py             |  26 +++-
 libCacheSim-python/src/export_analyzer.cpp    |  15 +-
 libCacheSim-python/tests/test_analyzer.py     |  15 ++
 libCacheSim-python/tests/test_data_loader.py  |   8 ++
 7 files changed, 198 insertions(+), 19 deletions(-)
 create mode 100644 libCacheSim-python/libcachesim/data_loader.py
 create mode 100644 libCacheSim-python/tests/test_analyzer.py
 create mode 100644 libCacheSim-python/tests/test_data_loader.py

diff --git a/libCacheSim-python/libcachesim/__init__.py b/libCacheSim-python/libcachesim/__init__.py
index b9424a37b..f71c6ee47 100644
--- a/libCacheSim-python/libcachesim/__init__.py
+++ b/libCacheSim-python/libcachesim/__init__.py
@@ -8,6 +8,8 @@
     ReqOp,
     TraceType,
     SamplerType,
+    AnalysisParam,
+    AnalysisOption,
     __doc__,
     __version__,
 )
@@ -43,6 +45,7 @@
 from .trace_analyzer import TraceAnalyzer
 from .synthetic_reader import SyntheticReader, create_zipf_requests, create_uniform_requests
 from .util import Util
+from .data_loader import DataLoader
 
 __all__ = [
     # Core classes
@@ -51,6 +54,8 @@
     "ReqOp",
     "TraceType",
     "SamplerType",
+    "AnalysisParam",
+    "AnalysisOption",
     # Cache base class
     "CacheBase",
     # Core cache algorithms
@@ -85,6 +90,8 @@
     "create_uniform_requests",
     # Utilities
     "Util",
+    # Data loader
+    "DataLoader",
     # Metadata
     "__doc__",
     "__version__",
diff --git a/libCacheSim-python/libcachesim/data_loader.py b/libCacheSim-python/libcachesim/data_loader.py
new file mode 100644
index 000000000..fee5f9bc3
--- /dev/null
+++ b/libCacheSim-python/libcachesim/data_loader.py
@@ -0,0 +1,131 @@
+"""S3 Bucket data loader with local caching (HuggingFace-style)."""
+
+from __future__ import annotations
+
+import hashlib
+import logging
+import shutil
+from pathlib import Path
+from typing import Optional, Union
+from urllib.parse import quote
+
+logger = logging.getLogger(__name__)
+
+
+class DataLoader:
+    DEFAULT_BUCKET = "cache-datasets"
+    DEFAULT_CACHE_DIR = Path.home() / ".cache/libcachesim_hub"
+
+    def __init__(
+        self,
+        bucket_name: str = DEFAULT_BUCKET,
+        cache_dir: Optional[Union[str, Path]] = None,
+        use_auth: bool = False
+    ):
+        self.bucket_name = bucket_name
+        self.cache_dir = Path(cache_dir) if cache_dir else self.DEFAULT_CACHE_DIR
+        self.use_auth = use_auth
+        self._s3_client = None
+        self._ensure_cache_dir()
+
+    def _ensure_cache_dir(self) -> None:
+        (self.cache_dir / self.bucket_name).mkdir(parents=True, exist_ok=True)
+
+    @property
+    def s3_client(self):
+        if self._s3_client is None:
+            try:
+                import boto3
+                from botocore.config import Config
+                from botocore import UNSIGNED
+
+                self._s3_client = boto3.client(
+                    's3',
+                    config=None if self.use_auth else Config(signature_version=UNSIGNED)
+                )
+            except ImportError:
+                raise ImportError("Install boto3: pip install boto3")
+        return self._s3_client
+
+    def _cache_path(self, key: str) -> Path:
+        safe_name = hashlib.sha256(key.encode()).hexdigest()[:16] + "_" + quote(key, safe='')
+        return self.cache_dir / self.bucket_name / safe_name
+
+    def _download(self, key: str, dest: Path) -> None:
+        temp = dest.with_suffix(dest.suffix + '.tmp')
+        temp.parent.mkdir(parents=True, exist_ok=True)
+
+        try:
+            logger.info(f"Downloading s3://{self.bucket_name}/{key}")
+            obj = self.s3_client.get_object(Bucket=self.bucket_name, Key=key)
+            with open(temp, 'wb') as f:
+                f.write(obj['Body'].read())
+            shutil.move(str(temp), str(dest))
+            logger.info(f"Saved to: {dest}")
+        except Exception as e:
+            if temp.exists():
+                temp.unlink()
+            raise RuntimeError(f"Download failed for s3://{self.bucket_name}/{key}: {e}")
+
+    def load(self, key: str, force: bool = False, mode: str = 'rb') -> Union[bytes, str]:
+        path = self._cache_path(key)
+        if not path.exists() or force:
+            self._download(key, path)
+        with open(path, mode) as f:
+            return f.read()
+
+    def is_cached(self, key: str) -> bool:
+        return self._cache_path(key).exists()
+
+    def get_cache_path(self, key: str) -> Path:
+        return self._cache_path(key).as_posix()
+
+    def clear_cache(self, key: Optional[str] = None) -> None:
+        if key:
+            path = self._cache_path(key)
+            if path.exists():
+                path.unlink()
+                logger.info(f"Cleared: {path}")
+        else:
+            shutil.rmtree(self.cache_dir, ignore_errors=True)
+            logger.info(f"Cleared entire cache: {self.cache_dir}")
+
+    def list_cached_files(self) -> list[str]:
+        if not self.cache_dir.exists():
+            return []
+        return [
+            str(p) for p in self.cache_dir.rglob('*')
+            if p.is_file() and not p.name.endswith('.tmp')
+        ]
+
+    def get_cache_size(self) -> int:
+        return sum(
+            p.stat().st_size for p in self.cache_dir.rglob('*') if p.is_file()
+        )
+
+    def list_s3_objects(self, prefix: str = "", delimiter: str = "/") -> dict:
+        """
+        List S3 objects and pseudo-folders under a prefix.
+
+        Args:
+            prefix: The S3 prefix to list under (like folder path)
+            delimiter: Use "/" to simulate folder structure
+
+        Returns:
+            A dict with two keys:
+                - "folders": list of sub-prefixes (folders)
+                - "files": list of object keys (files)
+        """
+        paginator = self.s3_client.get_paginator('list_objects_v2')
+        result = {"folders": [], "files": []}
+
+        for page in paginator.paginate(
+            Bucket=self.bucket_name,
+            Prefix=prefix,
+            Delimiter=delimiter
+        ):
+            # CommonPrefixes are like subdirectories
+            result["folders"].extend(cp["Prefix"] for cp in page.get("CommonPrefixes", []))
+            result["files"].extend(obj["Key"] for obj in page.get("Contents", []))
+
+        return result
diff --git a/libCacheSim-python/libcachesim/protocols.py b/libCacheSim-python/libcachesim/protocols.py
index d2e7b8170..58eeddbff 100644
--- a/libCacheSim-python/libcachesim/protocols.py
+++ b/libCacheSim-python/libcachesim/protocols.py
@@ -6,7 +6,7 @@
 """
 
 from __future__ import annotations
-from typing import Protocol, runtime_checkable, TYPE_CHECKING
+from typing import Iterator, Protocol, runtime_checkable, TYPE_CHECKING
 
 if TYPE_CHECKING:
     from .libcachesim_python import Request
@@ -18,17 +18,16 @@ class ReaderProtocol(Protocol):
 
     This protocol ensures that different reader implementations
     (SyntheticReader, TraceReader) can be used interchangeably.
+
+    Only core methods are defined here.
     """
 
     def get_num_of_req(self) -> int: ...
     def read_one_req(self, req: Request) -> Request: ...
+    def skip_n_req(self, n: int) -> int: ...
     def reset(self) -> None: ...
     def close(self) -> None: ...
     def clone(self) -> "ReaderProtocol": ...
-    def read_first_req(self, req: Request) -> Request: ...
-    def read_last_req(self, req: Request) -> Request: ...
-    def skip_n_req(self, n: int) -> int: ...
-    def read_one_req_above(self, req: Request) -> Request: ...
-    def go_back_one_req(self) -> None: ...
-    def set_read_pos(self, pos: float) -> None: ...
-    def get_read_pos(self) -> float: ...
+    def __iter__(self) -> Iterator[Request]: ...
+    def __next__(self) -> Request: ...
+    def __len__(self) -> int: ...
diff --git a/libCacheSim-python/libcachesim/trace_analyzer.py b/libCacheSim-python/libcachesim/trace_analyzer.py
index 46c0f63a6..4e51da41c 100644
--- a/libCacheSim-python/libcachesim/trace_analyzer.py
+++ b/libCacheSim-python/libcachesim/trace_analyzer.py
@@ -12,18 +12,38 @@
     AnalysisParam,
 )
 
+# Import ReaderException
+class ReaderException(Exception):
+    """Exception raised when reader is not compatible"""
+    pass
 
 class TraceAnalyzer:
     _analyzer: Analyzer
 
     def __init__(
         self,
-        analyzer: Analyzer,
         reader: ReaderProtocol,
         output_path: str,
-        analysis_param: AnalysisParam,
-        analysis_option: AnalysisOption,
+        analysis_param: AnalysisParam = None,
+        analysis_option: AnalysisOption = None,
     ):
+        """
+        Initialize trace analyzer.
+
+        Args:
+            reader: Reader protocol
+            output_path: Path to output file
+            analysis_param: Analysis parameters
+            analysis_option: Analysis options
+        """
+        if not hasattr(reader, 'c_reader') or not reader.c_reader:
+            raise ReaderException("Only C/C++ reader is supported")
+
+        if analysis_param is None:
+            analysis_param = AnalysisParam()
+        if analysis_option is None:
+            analysis_option = AnalysisOption()
+
         self._analyzer = Analyzer(reader._reader, output_path, analysis_option, analysis_param)
 
     def run(self) -> None:
diff --git a/libCacheSim-python/src/export_analyzer.cpp b/libCacheSim-python/src/export_analyzer.cpp
index 0d8fd6680..f05c853ab 100644
--- a/libCacheSim-python/src/export_analyzer.cpp
+++ b/libCacheSim-python/src/export_analyzer.cpp
@@ -92,8 +92,8 @@ void export_analyzer(py::module& m) {
                                    AnalysisOptionDeleter>(
                 new traceAnalyzer::analysis_option_t(option));
           }),
-          "req_rate"_a = false, "access_pattern"_a = false, "size"_a = false,
-          "reuse"_a = false, "popularity"_a = false, "ttl"_a = false,
+          "req_rate"_a = true, "access_pattern"_a = true, "size"_a = true,
+          "reuse"_a = true, "popularity"_a = true, "ttl"_a = false,
           "popularity_decay"_a = false, "lifetime"_a = false,
           "create_future_reuse_ccdf"_a = false, "prob_at_age"_a = false,
           "size_change"_a = false)
@@ -119,18 +119,17 @@ void export_analyzer(py::module& m) {
   py::class_<traceAnalyzer::TraceAnalyzer,
              std::unique_ptr<traceAnalyzer::TraceAnalyzer>>(m, "Analyzer")
       .def(py::init([](reader_t* reader, std::string output_path,
-                       const traceAnalyzer::analysis_param_t& param,
-                       const traceAnalyzer::analysis_option_t& option) {
+                       const traceAnalyzer::analysis_option_t& option,
+                       const traceAnalyzer::analysis_param_t& param) {
              traceAnalyzer::TraceAnalyzer* analyzer =
                  new traceAnalyzer::TraceAnalyzer(reader, output_path, option,
                                                   param);
              return std::unique_ptr<traceAnalyzer::TraceAnalyzer>(analyzer);
            }),
            "reader"_a, "output_path"_a,
-           "param"_a = traceAnalyzer::default_param(),
-           "option"_a = traceAnalyzer::default_option())
-      .def("run", &traceAnalyzer::TraceAnalyzer::run)
-      .def("cleanup", &traceAnalyzer::TraceAnalyzer::cleanup);
+           "option"_a = traceAnalyzer::default_option(),
+           "param"_a = traceAnalyzer::default_param())
+      .def("run", &traceAnalyzer::TraceAnalyzer::run);
 }
 
 }  // namespace libcachesim
diff --git a/libCacheSim-python/tests/test_analyzer.py b/libCacheSim-python/tests/test_analyzer.py
new file mode 100644
index 000000000..f5d854345
--- /dev/null
+++ b/libCacheSim-python/tests/test_analyzer.py
@@ -0,0 +1,15 @@
+from libcachesim import TraceAnalyzer, TraceReader, DataLoader
+import os
+
+
+def test_analyzer_common():
+    # Add debugging and error handling
+    loader = DataLoader()
+    loader.load("cache_dataset_oracleGeneral/2020_tencentBlock/1K/tencentBlock_1621.oracleGeneral.zst")
+    file_path = loader.get_cache_path("cache_dataset_oracleGeneral/2020_tencentBlock/1K/tencentBlock_1621.oracleGeneral.zst")
+
+    reader = TraceReader(file_path)
+
+    analyzer = TraceAnalyzer(reader, output_path="./")
+
+    analyzer.run()
diff --git a/libCacheSim-python/tests/test_data_loader.py b/libCacheSim-python/tests/test_data_loader.py
new file mode 100644
index 000000000..5aba6f5f2
--- /dev/null
+++ b/libCacheSim-python/tests/test_data_loader.py
@@ -0,0 +1,8 @@
+from libcachesim import DataLoader
+
+
+def test_data_loader_common():
+    loader = DataLoader()
+    loader.load("cache_dataset_oracleGeneral/2007_msr/msr_hm_0.oracleGeneral.zst")
+    path = loader.get_cache_path("cache_dataset_oracleGeneral/2007_msr/msr_hm_0.oracleGeneral.zst")
+    filles = loader.list_s3_objects("cache_dataset_oracleGeneral/2007_msr/")