diff --git a/.github/workflows/pypi-release.yml b/.github/workflows/pypi-release.yml
deleted file mode 100644
index 4733a91a..00000000
--- a/.github/workflows/pypi-release.yml
+++ /dev/null
@@ -1,149 +0,0 @@
-name: PyPI Release
-
-on:
- release:
- types: [published]
- workflow_dispatch: # Allow manual triggering
-
-permissions:
- contents: read
- actions: read
- id-token: write
-
-jobs:
- build-wheels:
- name: Build wheels on ${{ matrix.os }}
- runs-on: ${{ matrix.os }}
- strategy:
- matrix:
- os: [ubuntu-latest, macos-latest]
-
- steps:
- - uses: actions/checkout@v4
- with:
- submodules: recursive
-
- - name: Set up Python
- uses: actions/setup-python@v4
- with:
- python-version: '3.11'
-
- - name: Set up Docker Buildx (Linux only)
- if: runner.os == 'Linux'
- uses: docker/setup-buildx-action@v3
-
- - name: Install dependencies
- run: |
- python -m pip install --upgrade pip
- python -m pip install cibuildwheel
-
- - name: Sync Python version
- run: python scripts/sync_python_version.py
-
-
- - name: Verify Docker (Linux only)
- if: runner.os == 'Linux'
- run: |
- docker --version
- docker info
- echo "Docker is ready for cibuildwheel"
-
- - name: Build wheels
- run: python -m cibuildwheel libCacheSim-python --output-dir wheelhouse
-
- - name: Upload wheels as artifacts
- uses: actions/upload-artifact@v4
- with:
- name: wheels-${{ matrix.os }}
- path: wheelhouse/*.whl
-
- build-sdist:
- name: Build source distribution
- runs-on: ubuntu-latest
- steps:
- - uses: actions/checkout@v4
- with:
- submodules: recursive
-
- - name: Set up Python
- uses: actions/setup-python@v4
- with:
- python-version: '3.11'
-
- - name: Install build dependencies
- run: |
- python -m pip install --upgrade pip
- python -m pip install build
-
- - name: Sync Python version
- run: python scripts/sync_python_version.py
-
- - name: Build source distribution
- run: python -m build --sdist libCacheSim-python --outdir dist/
-
- - name: Upload sdist as artifact
- uses: actions/upload-artifact@v4
- with:
- name: sdist
- path: dist/*.tar.gz
-
- publish-to-pypi:
- name: Publish to PyPI
- needs: [build-wheels, build-sdist]
- runs-on: ubuntu-latest
- if: github.event_name == 'release' && github.event.action == 'published'
- environment:
- name: pypi
- url: https://pypi.org/p/libcachesim
- permissions:
- id-token: write # IMPORTANT: this permission is mandatory for trusted publishing
-
- steps:
- - name: Download all artifacts
- uses: actions/download-artifact@v4
- with:
- path: dist/
-
- - name: Flatten artifacts directory
- run: |
- mkdir -p final-dist
- find dist/ -name "*.whl" -exec cp {} final-dist/ \;
- find dist/ -name "*.tar.gz" -exec cp {} final-dist/ \;
- ls -la final-dist/
-
- - name: Publish to PyPI
- uses: pypa/gh-action-pypi-publish@release/v1
- with:
- packages-dir: final-dist/
- skip-existing: true
-
- publish-to-test-pypi:
- name: Publish to TestPyPI
- needs: [build-wheels, build-sdist]
- runs-on: ubuntu-latest
- if: github.event_name == 'workflow_dispatch'
- environment:
- name: testpypi
- url: https://test.pypi.org/p/libcachesim
- permissions:
- id-token: write # IMPORTANT: this permission is mandatory for trusted publishing
-
- steps:
- - name: Download all artifacts
- uses: actions/download-artifact@v4
- with:
- path: dist/
-
- - name: Flatten artifacts directory
- run: |
- mkdir -p final-dist
- find dist/ -name "*.whl" -exec cp {} final-dist/ \;
- find dist/ -name "*.tar.gz" -exec cp {} final-dist/ \;
- ls -la final-dist/
-
- - name: Publish to TestPyPI
- uses: pypa/gh-action-pypi-publish@release/v1
- with:
- repository-url: https://test.pypi.org/legacy/
- packages-dir: final-dist/
- skip-existing: true
diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml
deleted file mode 100644
index 502cfaa7..00000000
--- a/.github/workflows/python.yml
+++ /dev/null
@@ -1,41 +0,0 @@
-name: Python
-
-on: [push, pull_request]
-
-permissions:
- contents: read
-
-jobs:
- build:
- runs-on: ubuntu-latest
- steps:
- - uses: actions/checkout@v4
-
- - name: Set up Python
- uses: actions/setup-python@v4
- with:
- python-version: "3.10"
-
- - name: Prepare
- run: bash scripts/install_dependency.sh
-
- - name: Build main libCacheSim project
- run: |
- cmake -G Ninja -B build
- ninja -C build
-
- - name: Install Python dependencies
- run: |
- pip install --upgrade pip
- pip install -r requirements.txt
- pip install pytest
-
- - name: Build libCacheSim-python
- run: |
- cd libCacheSim-python
- pip install -e .
-
- - name: Run tests
- run: |
- cd libCacheSim-python
- pytest tests/
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 395a31d9..ebe0ba9a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -255,9 +255,6 @@ else()
message(STATUS "Building without test")
endif()
-# Export variables for scikit-build -> build/export_vars.cmake
-add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/libCacheSim-python/export)
-
# libCacheSim unified library compilation and installation
# Create a single library that combines all modular libraries
add_library(${PROJECT_NAME} STATIC
diff --git a/README.md b/README.md
index c176bc73..fb03d36a 100644
--- a/README.md
+++ b/README.md
@@ -11,7 +11,7 @@ A high-performance library for building and running cache simulations
---
[](https://github.com/1a1a11a/libCacheSim/actions/workflows/build.yml)
-[](https://github.com/1a1a11a/libCacheSim/actions/workflows/pypi-release.yml)
+[](https://github.com/1a1a11a/libCacheSim/actions/workflows/pypi-release.yml)
[](https://github.com/1a1a11a/libCacheSim/actions/workflows/npm-release.yml)
[](https://scorecard.dev/viewer/?uri=github.com/1a1a11a/libCacheSim)
@@ -291,14 +291,16 @@ If you are not extremely sensitive to the performance, our python binding can of
pip install libcachesim
```
+
+
### Simulation with python
```python
-import libcachesim as lcs
+from libcachesim import SyntheticReader, TraceReader, FIFO
-reader = lcs.create_zipf_requests(num_objects=1000, num_requests=10000) # synthetic trace
-# reader = lcs.open_trace("./data/cloudPhysicsIO.oracleGeneral.bin") # real trace
-cache = lcs.FIFO(cache_size=1024*1024)
+reader = SyntheticReader(num_objects=1000, num_of_req=10000, alpha=1.0, dist="zipf") # synthetic trace
+# reader = TraceReader("./data/cloudPhysicsIO.oracleGeneral.bin") # real trace
+cache = FIFO(cache_size=1024*1024)
obj_miss_ratio, byte_miss_ratio = cache.process_trace(reader)
print(f"Obj miss ratio: {obj_miss_ratio:.4f}, byte miss ratio: {byte_miss_ratio:.4f}")
```
@@ -310,42 +312,51 @@ With python package, you can extend new algorithm to test your own eviction desi
See an example below
```python
-import libcachesim as lcs
-from collections import deque
-from contextlib import suppress
+from collections import OrderedDict
+from typing import Any
+
+from libcachesim import PluginCache, LRU, CommonCacheParams, Request
-cache = lcs.PythonHookCachePolicy(cache_size=1024, cache_name="CustomFIFO")
+def init_hook(_: CommonCacheParams) -> Any:
+ return OrderedDict()
-def init_hook(cache_size):
- return deque() # Use deque for FIFO order
+def hit_hook(data: Any, req: Request) -> None:
+ data.move_to_end(req.obj_id, last=True)
-def hit_hook(fifo_queue, obj_id, obj_size):
- pass # FIFO doesn't reorder on hit
+def miss_hook(data: Any, req: Request) -> None:
+ data.__setitem__(req.obj_id, req.obj_size)
-def miss_hook(fifo_queue, obj_id, obj_size):
- fifo_queue.append(obj_id) # Add to end of queue
+def eviction_hook(data: Any, _: Request) -> int:
+ return data.popitem(last=False)[0]
-def eviction_hook(fifo_queue, obj_id, obj_size):
- return fifo_queue[0] # Return first item (oldest)
+def remove_hook(data: Any, obj_id: int) -> None:
+ data.pop(obj_id, None)
-def remove_hook(fifo_queue, obj_id):
- with suppress(ValueError):
- fifo_queue.remove(obj_id)
+def free_hook(data: Any) -> None:
+ data.clear()
-# Set the hooks and test
-cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook)
-reader = lcs.open_trace(
- trace_path="./data/cloudPhysicsIO.oracleGeneral.bin",
- params=lcs.ReaderInitParam(ignore_obj_size=True)
+plugin_lru_cache = PluginCache(
+ cache_size=128,
+ cache_init_hook=init_hook,
+ cache_hit_hook=hit_hook,
+ cache_miss_hook=miss_hook,
+ cache_eviction_hook=eviction_hook,
+ cache_remove_hook=remove_hook,
+ cache_free_hook=free_hook,
+ cache_name="Plugin_LRU",
)
-obj_miss_ratio, byte_miss_ratio = cache.process_trace(reader)
-print(f"Obj miss ratio: {obj_miss_ratio:.4f}, byte miss ratio: {byte_miss_ratio:.4f}")
+
+reader = lcs.SyntheticReader(num_objects=1000, num_of_req=10000, obj_size=1, alpha=1.0, dist="zipf")
+req_miss_ratio, byte_miss_ratio = plugin_lru_cache.process_trace(reader)
+ref_req_miss_ratio, ref_byte_miss_ratio = LRU(128).process_trace(reader)
+print(f"plugin req miss ratio {req_miss_ratio}, ref req miss ratio {ref_req_miss_ratio}")
+print(f"plugin byte miss ratio {byte_miss_ratio}, ref byte miss ratio {ref_byte_miss_ratio}")
```
-See more information in [README.md](./libCacheSim-python/README.md) of the Python binding.
+See more information in [README.md](https://github.com/cacheMon/libCacheSim-python) of the Python binding.
---
## Open source cache traces
diff --git a/libCacheSim-python/.gitignore b/libCacheSim-python/.gitignore
deleted file mode 100644
index 34712f29..00000000
--- a/libCacheSim-python/.gitignore
+++ /dev/null
@@ -1,59 +0,0 @@
-# Automatically generated by `hgimportsvn`
-.svn
-.hgsvn
-
-# Ignore local virtualenvs
-lib/
-bin/
-include/
-.Python/
-
-# These lines are suggested according to the svn:ignore property
-# Feel free to enable them by uncommenting them
-*.pyc
-*.pyo
-*.swp
-*.class
-*.orig
-*~
-.hypothesis/
-
-# autogenerated
-src/_pytest/_version.py
-# setuptools
-.eggs/
-
-doc/*/_build
-doc/*/.doctrees
-build/
-dist/
-*.egg-info
-htmlcov/
-issue/
-env/
-.env/
-.venv/
-/pythonenv*/
-3rdparty/
-.tox
-.cache
-.pytest_cache
-.mypy_cache
-.coverage
-.coverage.*
-coverage.xml
-.ropeproject
-.idea
-.hypothesis
-.pydevproject
-.project
-.settings
-.vscode
-__pycache__/
-.python-version
-
-# generated by pip
-pip-wheel-metadata/
-
-# pytest debug logs generated via --debug
-pytestdebug.log
\ No newline at end of file
diff --git a/libCacheSim-python/CMakeLists.txt b/libCacheSim-python/CMakeLists.txt
deleted file mode 100644
index aebee06c..00000000
--- a/libCacheSim-python/CMakeLists.txt
+++ /dev/null
@@ -1,132 +0,0 @@
-cmake_minimum_required(VERSION 3.15...3.27)
-
-# Include exported variables from cache
-if(DEFINED LIBCB_BUILD_DIR)
- set(PARENT_BUILD_DIR "${LIBCB_BUILD_DIR}")
- message(STATUS "Using provided LIBCB_BUILD_DIR: ${LIBCB_BUILD_DIR}")
-else()
- set(PARENT_BUILD_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../build")
-endif()
-set(EXPORT_FILE "${PARENT_BUILD_DIR}/export_vars.cmake")
-
-if(EXISTS "${EXPORT_FILE}")
- include("${EXPORT_FILE}")
- message(STATUS "Loaded variables from export_vars.cmake")
-else()
- message(FATAL_ERROR "export_vars.cmake not found at ${EXPORT_FILE}. Please build the main project first (e.g. cd .. && cmake -G Ninja -B build)")
-endif()
-
-# Force enable -fPIC
-set(CMAKE_POSITION_INDEPENDENT_CODE ON)
-set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fPIC")
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC")
-
-project(libCacheSim-python VERSION "${LIBCACHESIM_VERSION}")
-
-if(LOG_LEVEL_LOWER STREQUAL "default")
- if(CMAKE_BUILD_TYPE_LOWER MATCHES "debug")
- add_compile_definitions(LOGLEVEL=6)
- else()
- add_compile_definitions(LOGLEVEL=7)
- endif()
-elseif(LOG_LEVEL_LOWER STREQUAL "verbose")
- add_compile_definitions(LOGLEVEL=5)
-elseif(LOG_LEVEL_LOWER STREQUAL "debug")
- add_compile_definitions(LOGLEVEL=6)
-elseif(LOG_LEVEL_LOWER STREQUAL "info")
- add_compile_definitions(LOGLEVEL=7)
-elseif(LOG_LEVEL_LOWER STREQUAL "warn")
- add_compile_definitions(LOGLEVEL=8)
-elseif(LOG_LEVEL_LOWER STREQUAL "error")
- add_compile_definitions(LOGLEVEL=9)
-else()
- add_compile_definitions(LOGLEVEL=7)
-endif()
-
-# Find python and pybind11
-find_package(Python REQUIRED COMPONENTS Interpreter Development.Module)
-find_package(pybind11 CONFIG REQUIRED)
-
-# Include directories for dependencies
-include_directories(${GLib_INCLUDE_DIRS})
-include_directories(${GLib_CONFIG_INCLUDE_DIR})
-include_directories(${XGBOOST_INCLUDE_DIR})
-include_directories(${LIGHTGBM_PATH})
-include_directories(${ZSTD_INCLUDE_DIR})
-include_directories(${MAIN_PROJECT_SOURCE_DIR}/libCacheSim/bin)
-
-# Find the main libCacheSim library
-set(MAIN_PROJECT_BUILD_DIR "${PARENT_BUILD_DIR}")
-set(MAIN_PROJECT_LIB_PATH "${MAIN_PROJECT_BUILD_DIR}/liblibCacheSim.a")
-
-if(EXISTS "${MAIN_PROJECT_LIB_PATH}")
- message(STATUS "Found pre-built libCacheSim library at ${MAIN_PROJECT_LIB_PATH}")
-
- # Import the main library as an imported target
- add_library(libCacheSim_main STATIC IMPORTED)
- set_target_properties(libCacheSim_main PROPERTIES
- IMPORTED_LOCATION "${MAIN_PROJECT_LIB_PATH}"
- INTERFACE_INCLUDE_DIRECTORIES "${MAIN_PROJECT_SOURCE_DIR}/libCacheSim/include;${MAIN_PROJECT_SOURCE_DIR}/libCacheSim/utils/include;${MAIN_PROJECT_SOURCE_DIR}/libCacheSim"
- )
- link_directories(${GLib_LIBRARY_DIRS})
- link_directories(${ZSTD_LIBRARY_DIRS})
- set(LIBCACHESIM_TARGET libCacheSim_main)
-
-else()
- message(FATAL_ERROR "Pre-built libCacheSim library not found. Please build the main project first: cd .. && cmake -G Ninja -B build && ninja -C build")
-endif()
-
-python_add_library(_libcachesim MODULE
- src/pylibcachesim.cpp
- ${MAIN_PROJECT_SOURCE_DIR}/libCacheSim/bin/cli_reader_utils.c
- WITH_SOABI
-)
-
-set_target_properties(_libcachesim PROPERTIES
- POSITION_INDEPENDENT_CODE ON
- INSTALL_RPATH_USE_LINK_PATH TRUE
- BUILD_WITH_INSTALL_RPATH TRUE
- INSTALL_RPATH "$ORIGIN"
-)
-
-target_compile_definitions(_libcachesim PRIVATE VERSION_INFO=${PROJECT_VERSION})
-
-target_link_libraries(_libcachesim PRIVATE
- ${LIBCACHESIM_TARGET}
- pybind11::headers
- pybind11::module
- ${GLib_LIBRARIES}
- ${ZSTD_LIBRARIES}
-)
-
-# Add platform-specific link options and libraries
-if(CMAKE_SYSTEM_NAME STREQUAL "Linux")
- # GNU ld option, only available on Linux
- target_link_options(_libcachesim PRIVATE -Wl,--no-as-needed)
- target_link_libraries(_libcachesim PRIVATE dl)
-elseif(CMAKE_SYSTEM_NAME STREQUAL "Darwin")
- # macOS doesn't need --no-as-needed
- # dl functions are part of the system library on macOS
- # No need to explicitly link dl
-
- # Find argp library on macOS
- find_library(ARGP_LIBRARY argp PATHS /opt/homebrew/lib /usr/local/lib)
- if(ARGP_LIBRARY)
- target_link_libraries(_libcachesim PRIVATE ${ARGP_LIBRARY})
- endif()
-
- # Find and link other dependencies that might be needed
- find_library(INTL_LIBRARY intl PATHS /opt/homebrew/lib /usr/local/lib)
- if(INTL_LIBRARY)
- target_link_libraries(_libcachesim PRIVATE ${INTL_LIBRARY})
- endif()
-else()
- # Other platforms - try to link dl if available
- find_library(DL_LIBRARY dl)
- if(DL_LIBRARY)
- target_link_libraries(_libcachesim PRIVATE ${DL_LIBRARY})
- endif()
-endif()
-
-# install to wheel directory
-install(TARGETS _libcachesim LIBRARY DESTINATION libcachesim)
diff --git a/libCacheSim-python/MAINFEST.in b/libCacheSim-python/MAINFEST.in
deleted file mode 100644
index e69de29b..00000000
diff --git a/libCacheSim-python/README.md b/libCacheSim-python/README.md
deleted file mode 100644
index 23424c3d..00000000
--- a/libCacheSim-python/README.md
+++ /dev/null
@@ -1,337 +0,0 @@
-# libCacheSim Python Binding
-
-[](https://github.com/1a1a11a/libCacheSim/actions/workflows/pypi-release.yml)
-[](https://pypi.org/project/libcachesim)
-[](https://pypi.org/project/libcachesim)
-[](https://pypistats.org/packages/libcachesim)
-
-Python bindings for libCacheSim, a high-performance cache simulator and analysis library.
-
-## Installation
-
-Binary installers for the latest released version are available at the [Python Package Index (PyPI)](https://pypi.org/project/libcachesim).
-
-```bash
-pip install libcachesim
-```
-
-### Installation from sources
-
-If there are no wheels suitable for your environment, consider building from source.
-
-```bash
-git clone https://github.com/1a1a11a/libCacheSim.git
-cd libCacheSim
-
-# Build the main libCacheSim library first
-cmake -G Ninja -B build
-ninja -C build
-
-# Install Python binding
-cd libCacheSim-python
-pip install -e .
-```
-
-### Testing
-```bash
-# Run all tests
-python -m pytest .
-
-# Test import
-python -c "import libcachesim; print('Success!')"
-```
-
-## Quick Start
-
-### Basic Usage
-
-```python
-import libcachesim as lcs
-
-# Create a cache
-cache = lcs.LRU(cache_size=1024*1024) # 1MB cache
-
-# Process requests
-req = lcs.Request()
-req.obj_id = 1
-req.obj_size = 100
-
-print(cache.get(req)) # False (first access)
-print(cache.get(req)) # True (second access)
-```
-
-### Trace Processing
-
-To simulate with traces, we need to read the request of traces correctly. `open_trace` is an unified interface for trace reading, which accepet three parameters:
-
-- `trace_path`: trace path, can be relative or absolutive path.
-- `type` (optional): if not given, we will automatically infer the type of trace according to the suffix of the trace file.
-- `params` (optional): if not given, default params are applied.
-
-```python
-import libcachesim as lcs
-
-# Open trace and process efficiently
-reader = lcs.open_trace(
- trace_path = "./data/cloudPhysicsIO.oracleGeneral.bin",
- type = lcs.TraceType.ORACLE_GENERAL_TRACE,
- params = lcs.ReaderInitParam(ignore_obj_size=True)
-)
-cache = lcs.S3FIFO(cache_size=1024*1024)
-
-# Process entire trace efficiently (C++ backend)
-obj_miss_ratio, byte_miss_ratio = cache.process_trace(reader)
-print(f"Object miss ratio: {obj_miss_ratio:.4f}, Byte miss ratio: {byte_miss_ratio:.4f}")
-
-cache = lcs.S3FIFO(cache_size=1024*1024)
-# Process with limits and time ranges
-obj_miss_ratio, byte_miss_ratio = cache.process_trace(
- reader,
- start_req=0,
- max_req=1000
-)
-print(f"Object miss ratio: {obj_miss_ratio:.4f}, Byte miss ratio: {byte_miss_ratio:.4f}")
-```
-
-## Custom Cache Policies
-
-Implement custom cache replacement algorithms using pure Python functions - **no C/C++ compilation required**.
-
-### Python Hook Cache Overview
-
-The `PythonHookCachePolicy` allows you to define custom caching behavior through Python callback functions. This is perfect for:
-- Prototyping new cache algorithms
-- Educational purposes and learning
-- Research and experimentation
-- Custom business logic implementation
-
-### Hook Functions
-
-You need to implement these callback functions:
-
-- **`init_hook(cache_size: int) -> Any`**: Initialize your data structure
-- **`hit_hook(data: Any, obj_id: int, obj_size: int) -> None`**: Handle cache hits
-- **`miss_hook(data: Any, obj_id: int, obj_size: int) -> None`**: Handle cache misses
-- **`eviction_hook(data: Any, obj_id: int, obj_size: int) -> int`**: Return object ID to evict
-- **`remove_hook(data: Any, obj_id: int) -> None`**: Clean up when object removed
-- **`free_hook(data: Any) -> None`**: [Optional] Final cleanup
-
-### Example: Custom LRU Implementation
-
-```python
-import libcachesim as lcs
-from collections import OrderedDict
-
-# Create a Python hook-based cache
-cache = lcs.PythonHookCachePolicy(cache_size=1024*1024, cache_name="MyLRU")
-
-# Define LRU policy hooks
-def init_hook(cache_size):
- return OrderedDict() # Track access order
-
-def hit_hook(lru_dict, obj_id, obj_size):
- lru_dict.move_to_end(obj_id) # Move to most recent
-
-def miss_hook(lru_dict, obj_id, obj_size):
- lru_dict[obj_id] = True # Add to end
-
-def eviction_hook(lru_dict, obj_id, obj_size):
- return next(iter(lru_dict)) # Return least recent
-
-def remove_hook(lru_dict, obj_id):
- lru_dict.pop(obj_id, None)
-
-# Set the hooks
-cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook)
-
-# Use it like any other cache
-req = lcs.Request()
-req.obj_id = 1
-req.obj_size = 100
-hit = cache.get(req)
-print(f"Cache hit: {hit}") # Should be False (miss)
-```
-
-### Example: Custom FIFO Implementation
-
-```python
-import libcachesim as lcs
-from collections import deque
-from contextlib import suppress
-
-cache = lcs.PythonHookCachePolicy(cache_size=1024, cache_name="CustomFIFO")
-
-def init_hook(cache_size):
- return deque() # Use deque for FIFO order
-
-def hit_hook(fifo_queue, obj_id, obj_size):
- pass # FIFO doesn't reorder on hit
-
-def miss_hook(fifo_queue, obj_id, obj_size):
- fifo_queue.append(obj_id) # Add to end of queue
-
-def eviction_hook(fifo_queue, obj_id, obj_size):
- return fifo_queue[0] # Return first item (oldest)
-
-def remove_hook(fifo_queue, obj_id):
- with suppress(ValueError):
- fifo_queue.remove(obj_id)
-
-# Set the hooks and test
-cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook)
-
-req = lcs.Request(obj_id=1, obj_size=100)
-hit = cache.get(req)
-print(f"Cache hit: {hit}") # Should be False (miss)
-```
-
-## Available Algorithms
-
-### Built-in Cache Algorithms
-
-#### Basic Algorithms
-- **FIFO**: First-In-First-Out
-- **LRU**: Least Recently Used
-- **LFU**: Least Frequently Used
-- **LFUDA**: LFU with Dynamic Aging
-- **Clock**: Clock/Second-chance algorithm
-
-#### Advanced Algorithms
-- **QDLP**: Queue Demotion with Lazy Promotion
-- **S3FIFO**: Simple, Fast, Fair FIFO (recommended for most workloads)
-- **Sieve**: High-performance eviction algorithm
-- **ARC**: Adaptive Replacement Cache
-- **TwoQ**: Two-Queue algorithm
-- **SLRU**: Segmented LRU
-- **TinyLFU**: TinyLFU with window
-- **WTinyLFU**: Windowed TinyLFU
-
-#### Research/ML Algorithms
-- **LeCaR**: Learning Cache Replacement (adaptive)
-- **Cacheus**: Cache replacement policy
-- **LRB**: Learning-based cache (if enabled)
-- **GLCache**: Machine learning-based cache
-- **ThreeLCache**: Three-level cache hierarchy (if enabled)
-
-#### Optimal Algorithms (for analysis)
-- **Belady**: Optimal offline algorithm
-- **BeladySize**: Size-aware optimal algorithm
-
-```python
-import libcachesim as lcs
-
-# All algorithms use the same unified interface
-cache_size = 1024 * 1024 # 1MB
-
-lru_cache = lcs.LRU(cache_size)
-s3fifo_cache = lcs.S3FIFO(cache_size)
-sieve_cache = lcs.Sieve(cache_size)
-arc_cache = lcs.ARC(cache_size)
-
-# All caches work identically
-req = lcs.Request()
-req.obj_id = 1
-req.obj_size = 100
-hit = lru_cache.get(req)
-print(hit)
-```
-
-## Examples and Testing
-
-### Algorithm Comparison
-```python
-import libcachesim as lcs
-
-def compare_algorithms(trace_path):
- reader = lcs.open_trace(trace_path, lcs.TraceType.VSCSI_TRACE)
- algorithms = ['LRU', 'S3FIFO', 'Sieve', 'ARC']
- for algo_name in algorithms:
- cache = getattr(lcs, algo_name)(cache_size=1024*1024)
- obj_miss_ratio, byte_miss_ratio = cache.process_trace(reader)
- print(f"{algo_name}\t\tObj: {obj_miss_ratio:.4f}, Byte: {byte_miss_ratio:.4f}")
-
-compare_algorithms("./data/cloudPhysicsIO.vscsi")
-```
-
-### Performance Benchmarking
-```python
-import time
-
-def benchmark_cache(cache, num_requests=100000):
- """Benchmark cache performance"""
- start_time = time.time()
- for i in range(num_requests):
- req = lcs.Request()
- req.obj_id = i % 1000 # Working set of 1000 objects
- req.obj_size = 100
- cache.get(req)
- end_time = time.time()
- throughput = num_requests / (end_time - start_time)
- print(f"Processed {num_requests} requests in {end_time - start_time:.2f}s")
- print(f"Throughput: {throughput:.0f} requests/sec")
-
-# Compare performance
-lru_cache = lcs.LRU(cache_size=1024*1024)
-s3fifo_cache = lcs.S3FIFO(cache_size=1024*1024)
-
-print("LRU Performance:")
-benchmark_cache(lru_cache)
-
-print("\nS3FIFO Performance:")
-benchmark_cache(s3fifo_cache)
-```
-
-## Advanced Usage
-
-### Multi-Format Trace Processing
-
-```python
-import libcachesim as lcs
-
-# Supported trace types
-trace_types = {
- "oracle": lcs.TraceType.ORACLE_GENERAL_TRACE,
- "csv": lcs.TraceType.CSV_TRACE,
- "vscsi": lcs.TraceType.VSCSI_TRACE,
- "txt": lcs.TraceType.PLAIN_TXT_TRACE
-}
-
-# Open different trace formats
-oracle_reader = lcs.open_trace("./data/cloudPhysicsIO.oracleGeneral.bin", trace_types["oracle"])
-csv_reader = lcs.open_trace("./data/cloudPhysicsIO.txt", trace_types["txt"])
-
-# Process traces with different caches
-caches = [
- lcs.LRU(cache_size=1024*1024),
- lcs.S3FIFO(cache_size=1024*1024),
- lcs.Sieve(cache_size=1024*1024)
-]
-
-for i, cache in enumerate(caches):
- miss_ratio_oracle = cache.process_trace(oracle_reader)[0]
- miss_ratio_csv = cache.process_trace(csv_reader)[0]
- print(f"Cache {i} miss ratio: {miss_ratio_oracle:.4f}, {miss_ratio_csv:.4f}")
-```
-
-## Troubleshooting
-
-### Common Issues
-
-**Import Error**: Make sure libCacheSim C++ library is built first:
-```bash
-cmake -G Ninja -B build && ninja -C build
-```
-
-**Performance Issues**: Use `process_trace()` for large workloads instead of individual `get()` calls for better performance.
-
-**Memory Usage**: Monitor cache statistics (`cache.occupied_byte`) and ensure proper cache size limits for your system.
-
-**Custom Cache Issues**: Validate your custom implementation against built-in algorithms using the test functions above.
-
-**Install with uv**: Since automatically building with `uv` will fail due to incomplete source code, please force install the binary file via `uv pip install libcachesim --only-binary=:all:`.
-
-### Getting Help
-
-- Check the [main documentation](../doc/) for detailed guides
-- Open issues on [GitHub](https://github.com/1a1a11a/libCacheSim/issues)
-- Review [examples](/example) in the main repository
diff --git a/libCacheSim-python/examples/README.md b/libCacheSim-python/examples/README.md
deleted file mode 100644
index 3b63b7ff..00000000
--- a/libCacheSim-python/examples/README.md
+++ /dev/null
@@ -1,280 +0,0 @@
-# libCacheSim Python Examples
-
-This directory contains examples demonstrating how to use libCacheSim Python bindings for cache simulation and trace generation.
-
-## Overview
-
-libCacheSim Python bindings provide a powerful interface for:
-
-- Cache simulation with various eviction policies (LRU, FIFO, ARC, etc.)
-- Synthetic trace generation (Zipf and Uniform distributions)
-- Real trace analysis and processing
-- Custom cache policy implementation with Python hooks
-- Unified interface supporting all cache algorithms
-
-## Example Files
-
-### 1. Stream Request Generation (`stream_request_example.py`)
-
-Demonstrates how to generate synthetic request traces and use them for cache simulation:
-
-```python
-import libcachesim as lcs
-
-# Create Zipf-distributed requests
-zipf_generator = lcs.create_zipf_requests(
- num_objects=1000, # 1000 unique objects
- num_requests=10000, # 10000 requests
- alpha=1.0, # Zipf skewness
- obj_size=4000, # Object size in bytes
- seed=42 # For reproducibility
-)
-
-# Test with LRU cache
-cache = lcs.LRU(cache_size=50*1024*1024) # 50MB cache for better hit ratio
-miss_count = sum(1 for req in zipf_generator if not cache.get(req))
-print(f"Final miss ratio: {miss_count / 10000:.3f}")
-```
-
-**Features**:
-- Memory efficient: No temporary files created
-- Fast: Direct Request object generation
-- Reproducible: Support for random seeds
-- Flexible: Easy parameter adjustment
-
-### 2. Unified Interface Demo (`demo_unified_interface.py`)
-
-Shows the unified interface for all cache policies, including built-in and custom Python hook caches:
-
-```python
-import libcachesim as lcs
-
-cache_size = 1024 * 1024 # 1MB
-
-# Create different cache policies
-caches = {
- "LRU": lcs.LRU(cache_size),
- "FIFO": lcs.FIFO(cache_size),
- "ARC": lcs.ARC(cache_size),
-}
-
-# Create Python hook cache
-python_cache = lcs.PythonHookCachePolicy(cache_size, "CustomLRU")
-# Set hook functions...
-caches["Custom Python LRU"] = python_cache
-
-# Unified interface testing
-test_req = lcs.Request()
-test_req.obj_id = 1
-test_req.obj_size = 1024
-
-for name, cache in caches.items():
- result = cache.get(test_req)
- print(f"{name}: {'HIT' if result else 'MISS'}")
-```
-
-**Benefits of Unified Interface**:
-- Same API for all cache policies
-- Easy to switch between different algorithms
-- Efficient C++ backend trace processing
-- Consistent properties and statistics
-
-### 3. Python Hook Cache (`python_hook_cache_example.py`)
-
-Demonstrates how to create custom cache policies using Python hooks:
-
-```python
-import libcachesim as lcs
-from collections import OrderedDict
-
-class LRUPolicy:
- def __init__(self, cache_size):
- self.access_order = OrderedDict()
-
- def on_hit(self, obj_id, obj_size):
- self.access_order.move_to_end(obj_id)
-
- def on_miss(self, obj_id, obj_size):
- self.access_order[obj_id] = True
-
- def evict(self, obj_id, obj_size):
- return next(iter(self.access_order))
-
-def create_lru_cache(cache_size):
- cache = lcs.PythonHookCachePolicy(cache_size, "PythonLRU")
-
- def init_hook(cache_size):
- return LRUPolicy(cache_size)
-
- # Set other hooks...
- cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook)
- return cache
-```
-
-**Custom Policy Features**:
-- Pure Python cache logic implementation
-- Support for LRU, FIFO and other policies
-- Flexible hook system
-- Same interface as built-in policies
-
-### 4. Zipf Trace Examples (`zipf_trace_example.py`)
-
-Shows synthetic trace generation methods and algorithm comparison:
-
-```python
-import libcachesim as lcs
-
-# Method 1: Create Zipf-distributed request generator
-zipf_generator = lcs.create_zipf_requests(
- num_objects=1000,
- num_requests=10000,
- alpha=1.0,
- obj_size=1024,
- seed=42
-)
-
-# Method 2: Create uniform-distributed request generator
-uniform_generator = lcs.create_uniform_requests(
- num_objects=1000,
- num_requests=10000,
- obj_size=1024,
- seed=42
-)
-
-# Compare different Zipf parameters
-alphas = [0.5, 1.0, 1.5, 2.0]
-for alpha in alphas:
- generator = lcs.create_zipf_requests(1000, 10000, alpha=alpha, seed=42)
- cache = lcs.LRU(1024*1024)
- hit_count = sum(1 for req in generator if cache.get(req))
- hit_ratio = hit_count / 10000
- print(f"α={alpha}: Hit ratio={hit_ratio:.4f}")
-```
-
-**Synthetic Trace Features**:
-- Higher α values create more skewed access patterns
-- Memory efficient: No temporary files created
-- Request generators for flexible processing
-- Suitable for simulating real workloads
-
-## Key Features
-
-### Trace Generation
-- `create_zipf_requests()`: Create Zipf-distributed request generator
-- `create_uniform_requests()`: Create uniform-distributed request generator
-
-### Cache Algorithms
-- **Classic algorithms**: `LRU()`, `FIFO()`, `ARC()`, `Clock()`
-- **Modern algorithms**: `S3FIFO()`, `Sieve()`, `TinyLFU()`
-- **Custom policies**: `PythonHookCachePolicy()`
-
-### Trace Processing
-- `open_trace()`: Open real trace files
-- `process_trace()`: High-performance trace processing
-
-## Basic Usage Examples
-
-### 1. Compare Cache Algorithms
-
-```python
-import libcachesim as lcs
-
-# Test different algorithms
-algorithms = ['LRU', 'FIFO', 'ARC', 'S3FIFO']
-cache_size = 1024*1024
-
-for algo_name in algorithms:
- # Create fresh workload for each algorithm
- generator = lcs.create_zipf_requests(1000, 10000, alpha=1.0, seed=42)
- cache = getattr(lcs, algo_name)(cache_size)
- hit_count = sum(1 for req in generator if cache.get(req))
- print(f"{algo_name}: {hit_count/10000:.3f}")
-```
-
-### 2. Parameter Sensitivity Analysis
-
-```python
-import libcachesim as lcs
-
-# Test different Zipf parameters
-for alpha in [0.5, 1.0, 1.5, 2.0]:
- generator = lcs.create_zipf_requests(1000, 10000, alpha=alpha, seed=42)
- cache = lcs.LRU(cache_size=512*1024)
-
- hit_count = sum(1 for req in generator if cache.get(req))
- print(f"α={alpha}: Hit ratio={hit_count/10000:.3f}")
-```
-
-## Parameters
-
-### Trace Generation Parameters
-- `num_objects`: Number of unique objects
-- `num_requests`: Number of requests to generate
-- `alpha`: Zipf skewness (α=1.0 for classic Zipf)
-- `obj_size`: Object size in bytes (default: 4000)
-- `seed`: Random seed for reproducibility
-
-### Cache Parameters
-- `cache_size`: Cache capacity in bytes
-- Algorithm-specific parameters (e.g.,`fifo_size_ratio` for S3FIFO)
-
-## Running Examples
-
-```bash
-# Navigate to examples directory
-cd libCacheSim-python/examples
-
-# Run stream-based trace generation
-python stream_request_example.py
-
-# Run unified interface demo
-python demo_unified_interface.py
-
-# Run Python hook cache example
-python python_hook_cache_example.py
-
-# Run Zipf trace examples
-python zipf_trace_example.py
-
-# Run all tests
-python -m pytest ../tests/ -v
-```
-
-## Performance Tips
-
-1. **Use appropriate cache and object sizes**:
- ```python
- # Good: cache can hold multiple objects
- cache = lcs.LRU(cache_size=1024*1024) # 1MB
- generator = lcs.create_zipf_requests(1000, 10000, obj_size=1024) # 1KB objects
- ```
-
-2. **Use seeds for reproducible experiments**:
- ```python
- generator = lcs.create_zipf_requests(1000, 10000, seed=42)
- ```
-
-3. **Process large traces with C++ backend**:
- ```python
- # Fast: C++ processing
- obj_miss_ratio, byte_miss_ratio = lcs.process_trace(cache, reader)
-
- # Slow: Python loop
- for req in reader:
- cache.get(req)
- ```
-
-4. **Understand Zipf parameter effects**:
- - α=0.5: Slightly skewed, close to uniform distribution
- - α=1.0: Classic Zipf distribution
- - α=2.0: Highly skewed, few objects get most accesses
-
-## Testing
-
-Run comprehensive tests:
-
-```bash
-python -m pytest ../tests/test_trace_generator.py -v
-python -m pytest ../tests/test_eviction.py -v
-python -m pytest ../tests/test_process_trace.py -v
-```
diff --git a/libCacheSim-python/examples/demo_unified_interface.py b/libCacheSim-python/examples/demo_unified_interface.py
deleted file mode 100644
index e435e582..00000000
--- a/libCacheSim-python/examples/demo_unified_interface.py
+++ /dev/null
@@ -1,131 +0,0 @@
-#!/usr/bin/env python3
-"""
-Demo script showing the unified interface for all cache policies.
-This demonstrates how to use both native and Python hook-based caches
-with the same API for seamless algorithm comparison and switching.
-"""
-
-import sys
-import os
-
-# Add parent directory for development testing
-sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
-
-try:
- import libcachesim as lcs
-except ImportError as e:
- print(f"Error importing libcachesim: {e}")
- print("Make sure the Python binding is built and installed")
- sys.exit(1)
-
-from collections import OrderedDict
-
-
-def create_trace_reader():
- """Helper function to create a trace reader."""
- data_file = os.path.join(
- os.path.dirname(os.path.dirname(os.path.dirname(__file__))), "data", "cloudPhysicsIO.oracleGeneral.bin"
- )
- if not os.path.exists(data_file):
- print(f"Warning: Trace file not found at {data_file}")
- return None
- return lcs.open_trace(data_file, lcs.TraceType.ORACLE_GENERAL_TRACE)
-
-
-def create_demo_lru_hooks():
- """Create demo LRU hooks for Python-based cache policy."""
-
- def init_hook(cache_size):
- print(f" Initializing custom LRU with {cache_size} bytes")
- return OrderedDict()
-
- def hit_hook(lru_dict, obj_id, obj_size):
- if obj_id in lru_dict:
- lru_dict.move_to_end(obj_id)
-
- def miss_hook(lru_dict, obj_id, obj_size):
- lru_dict[obj_id] = obj_size
-
- def eviction_hook(lru_dict, obj_id, obj_size):
- if lru_dict:
- return next(iter(lru_dict))
- return obj_id
-
- def remove_hook(lru_dict, obj_id):
- lru_dict.pop(obj_id, None)
-
- return init_hook, hit_hook, miss_hook, eviction_hook, remove_hook
-
-
-def demo_unified_interface():
- """Demonstrate the unified interface across different cache policies."""
- print("libCacheSim Python Binding - Unified Interface Demo")
- print("=" * 60)
-
- cache_size = 1024 * 1024 # 1MB
-
- # Create different cache policies
- caches = {
- "LRU": lcs.LRU(cache_size),
- "FIFO": lcs.FIFO(cache_size),
- "ARC": lcs.ARC(cache_size),
- }
-
- # Create Python hook-based LRU
- python_cache = lcs.PythonHookCachePolicy(cache_size, "CustomLRU")
- init_hook, hit_hook, miss_hook, eviction_hook, remove_hook = create_demo_lru_hooks()
- python_cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook)
- caches["Custom Python LRU"] = python_cache
-
- print(f"Testing {len(caches)} different cache policies with unified interface:")
-
- # Demo 1: Single request interface
- print("1. Single Request Interface:")
- print(" All caches use: cache.get(request)")
-
- test_req = lcs.Request()
- test_req.obj_id = 1
- test_req.obj_size = 1024
-
- for name, cache in caches.items():
- result = cache.get(test_req)
- print(f" {name:20s}: {'HIT' if result else 'MISS'}")
-
- # Demo 2: Unified properties interface
- print("\n2. Unified Properties Interface:")
- print(" All caches provide: cache_size, n_obj, occupied_byte, n_req")
-
- for name, cache in caches.items():
- print(
- f" {name:20s}: size={cache.cache_size}, objs={cache.n_obj}, "
- f"bytes={cache.occupied_byte}, reqs={cache.n_req}"
- )
-
- # Demo 3: Efficient trace processing
- print("\n3. Efficient Trace Processing Interface:")
- print(" All caches use: cache.process_trace(reader, max_req=N)")
-
- max_requests = 1000
-
- for name, cache in caches.items():
- # Create fresh reader for each cache
- reader = create_trace_reader()
- if not reader:
- print(f" {name:20s}: trace file not available")
- continue
-
- obj_miss_ratio, byte_miss_ratio = cache.process_trace(reader, max_req=max_requests)
- print(f" {name:20s}: obj_miss_ratio={obj_miss_ratio:.4f}, byte_miss_ratio={byte_miss_ratio:.4f}")
-
- print("\nKey Benefits of Unified Interface:")
- print(" • Same API for all cache policies (built-in + custom)")
- print(" • Easy to switch between different algorithms")
- print(" • Efficient trace processing in C++ (no Python overhead)")
- print(" • Consistent properties and statistics")
- print(" • Type-safe and well-documented")
-
- print("\nDemo completed! All cache policies work with the same interface.")
-
-
-if __name__ == "__main__":
- demo_unified_interface()
diff --git a/libCacheSim-python/examples/python_hook_cache_example.py b/libCacheSim-python/examples/python_hook_cache_example.py
deleted file mode 100644
index 06d06c45..00000000
--- a/libCacheSim-python/examples/python_hook_cache_example.py
+++ /dev/null
@@ -1,178 +0,0 @@
-#!/usr/bin/env python3
-"""
-Example demonstrating how to create custom cache policies using Python hooks.
-
-This example shows how to implement LRU and FIFO cache policies using the
-PythonHookCachePolicy class, which allows users to define cache behavior using
-pure Python functions instead of C/C++ plugins.
-"""
-
-import libcachesim as lcs
-from collections import OrderedDict, deque
-from contextlib import suppress
-
-
-class LRUPolicy:
- """LRU (Least Recently Used) cache policy implementation."""
-
- def __init__(self, cache_size):
- self.cache_size = cache_size
- self.access_order = OrderedDict() # obj_id -> True (for ordering)
-
- def on_hit(self, obj_id, obj_size):
- """Move accessed object to end (most recent)."""
- if obj_id in self.access_order:
- # Move to end (most recent)
- self.access_order.move_to_end(obj_id)
-
- def on_miss(self, obj_id, obj_size):
- """Add new object to end (most recent)."""
- self.access_order[obj_id] = True
-
- def evict(self, obj_id, obj_size):
- """Return the least recently used object ID."""
- if self.access_order:
- # Return first item (least recent)
- victim_id = next(iter(self.access_order))
- return victim_id
- raise RuntimeError("No objects to evict")
-
- def on_remove(self, obj_id):
- """Remove object from tracking."""
- self.access_order.pop(obj_id, None)
-
-
-class FIFOPolicy:
- """FIFO (First In First Out) cache policy implementation."""
-
- def __init__(self, cache_size):
- self.cache_size = cache_size
- self.insertion_order = deque() # obj_id queue
-
- def on_hit(self, obj_id, obj_size):
- """FIFO doesn't change order on hits."""
- pass
-
- def on_miss(self, obj_id, obj_size):
- """Add new object to end of queue."""
- self.insertion_order.append(obj_id)
-
- def evict(self, obj_id, obj_size):
- """Return the first inserted object ID."""
- if self.insertion_order:
- victim_id = self.insertion_order.popleft()
- return victim_id
- raise RuntimeError("No objects to evict")
-
- def on_remove(self, obj_id):
- """Remove object from tracking."""
- with suppress(ValueError):
- self.insertion_order.remove(obj_id)
-
-
-def create_lru_cache(cache_size):
- """Create an LRU cache using Python hooks."""
- cache = lcs.PythonHookCachePolicy(cache_size, "PythonLRU")
-
- def init_hook(cache_size):
- return LRUPolicy(cache_size)
-
- def hit_hook(policy, obj_id, obj_size):
- policy.on_hit(obj_id, obj_size)
-
- def miss_hook(policy, obj_id, obj_size):
- policy.on_miss(obj_id, obj_size)
-
- def eviction_hook(policy, obj_id, obj_size):
- return policy.evict(obj_id, obj_size)
-
- def remove_hook(policy, obj_id):
- policy.on_remove(obj_id)
-
- def free_hook(policy):
- # Python garbage collection handles cleanup
- pass
-
- cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook, free_hook)
- return cache
-
-
-def create_fifo_cache(cache_size):
- """Create a FIFO cache using Python hooks."""
- cache = lcs.PythonHookCachePolicy(cache_size, "PythonFIFO")
-
- def init_hook(cache_size):
- return FIFOPolicy(cache_size)
-
- def hit_hook(policy, obj_id, obj_size):
- policy.on_hit(obj_id, obj_size)
-
- def miss_hook(policy, obj_id, obj_size):
- policy.on_miss(obj_id, obj_size)
-
- def eviction_hook(policy, obj_id, obj_size):
- return policy.evict(obj_id, obj_size)
-
- def remove_hook(policy, obj_id):
- policy.on_remove(obj_id)
-
- cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook)
- return cache
-
-
-def test_cache_policy(cache, name):
- """Test a cache policy with sample requests."""
- print(f"\n=== Testing {name} Cache ===")
-
- # Test requests: obj_id, obj_size
- test_requests = [
- (1, 100),
- (2, 100),
- (3, 100),
- (4, 100),
- (5, 100), # Fill cache
- (1, 100), # Hit
- (6, 100), # Miss, should evict something
- (2, 100), # Hit or miss depending on policy
- (7, 100), # Miss, should evict something
- ]
-
- hits = 0
- misses = 0
-
- for obj_id, obj_size in test_requests:
- req = lcs.Request()
- req.obj_id = obj_id
- req.obj_size = obj_size
-
- hit = cache.get(req)
- if hit:
- hits += 1
- print(f"Request {obj_id}: HIT")
- else:
- misses += 1
- print(f"Request {obj_id}: MISS")
-
- print(f"Total: {hits} hits, {misses} misses")
- print(f"Cache stats: {cache.n_obj} objects, {cache.occupied_byte} bytes occupied")
-
-
-def main():
- """Main example function."""
- cache_size = 500 # Bytes (can hold 5 objects of size 100 each)
-
- # Test LRU cache
- lru_cache = create_lru_cache(cache_size)
- test_cache_policy(lru_cache, "LRU")
-
- # Test FIFO cache
- fifo_cache = create_fifo_cache(cache_size)
- test_cache_policy(fifo_cache, "FIFO")
-
- print("\n=== Comparison ===")
- print("LRU keeps recently accessed items, evicting least recently used")
- print("FIFO keeps items in insertion order, evicting oldest inserted")
-
-
-if __name__ == "__main__":
- main()
diff --git a/libCacheSim-python/examples/stream_request_example.py b/libCacheSim-python/examples/stream_request_example.py
deleted file mode 100644
index eed213b7..00000000
--- a/libCacheSim-python/examples/stream_request_example.py
+++ /dev/null
@@ -1,154 +0,0 @@
-#!/usr/bin/env python3
-"""
-Example: Using stream request generators for cache simulation.
-
-This example demonstrates how to use the stream request generators
-to create synthetic traces and run cache simulations without creating
-temporary files.
-"""
-
-import libcachesim as lcs
-
-
-def main():
- """Demonstrate stream request generators."""
- print("libCacheSim Stream Request Generation Example")
- print("=" * 50)
-
- # Example 1: Basic Zipf generation with appropriate cache size
- print("\n1. Basic Zipf Request Generation")
- print("-" * 30)
-
- # Use reasonable cache and object sizes
- cache_size = 50 * 1024 * 1024 # 50MB cache
- obj_size = 1024 # 1KB objects
- num_objects = 1000
- num_requests = 10000
-
- # Create a cache
- cache = lcs.LRU(cache_size=cache_size)
-
- # Create a Zipf-distributed request generator
- zipf_generator = lcs.create_zipf_requests(
- num_objects=num_objects,
- num_requests=num_requests,
- alpha=1.0, # Zipf skewness
- obj_size=obj_size, # Object size in bytes
- seed=42, # For reproducibility
- )
-
- print(f"Cache size: {cache_size // 1024 // 1024}MB")
- print(f"Object size: {obj_size}B")
- print(f"Generated {num_requests} Zipf requests for {num_objects} objects")
-
- # Process the requests directly
- hit_count = 0
- for i, req in enumerate(zipf_generator):
- if cache.get(req):
- hit_count += 1
-
- # Print progress every 2000 requests
- if (i + 1) % 2000 == 0:
- current_hit_ratio = hit_count / (i + 1)
- print(f"Processed {i + 1} requests, hit ratio: {current_hit_ratio:.3f}")
-
- final_hit_ratio = hit_count / num_requests
- print(f"Final hit ratio: {final_hit_ratio:.3f}")
-
- # Example 2: Uniform distribution comparison
- print("\n2. Uniform Request Generation")
- print("-" * 30)
-
- # Create a uniform-distributed request generator
- uniform_generator = lcs.create_uniform_requests(
- num_objects=num_objects, num_requests=num_requests, obj_size=obj_size, seed=42
- )
-
- print(f"Generated {num_requests} uniform requests for {num_objects} objects")
-
- # Reset cache and process uniform requests
- cache = lcs.LRU(cache_size=cache_size)
- hit_count = 0
-
- for i, req in enumerate(uniform_generator):
- if cache.get(req):
- hit_count += 1
-
- if (i + 1) % 2000 == 0:
- current_hit_ratio = hit_count / (i + 1)
- print(f"Processed {i + 1} requests, hit ratio: {current_hit_ratio:.3f}")
-
- final_hit_ratio = hit_count / num_requests
- print(f"Final hit ratio: {final_hit_ratio:.3f}")
-
- # Example 3: Compare different Zipf alpha values
- print("\n3. Zipf Alpha Parameter Comparison")
- print("-" * 30)
-
- alphas = [0.5, 1.0, 1.5, 2.0]
- print(f"{'Alpha':<8} {'Hit Ratio':<12} {'Description'}")
- print("-" * 40)
-
- for alpha in alphas:
- generator = lcs.create_zipf_requests(
- num_objects=num_objects, num_requests=num_requests, alpha=alpha, obj_size=obj_size, seed=42
- )
-
- cache = lcs.LRU(cache_size=cache_size)
- hit_count = sum(1 for req in generator if cache.get(req))
- hit_ratio = hit_count / num_requests
-
- # Describe the skewness
- if alpha < 0.8:
- description = "Low skew (nearly uniform)"
- elif alpha < 1.2:
- description = "Classic Zipf"
- elif alpha < 1.8:
- description = "High skew"
- else:
- description = "Very high skew"
-
- print(f"{alpha:<8.1f} {hit_ratio:<12.3f} {description}")
-
- # Example 4: Cache size sensitivity
- print("\n4. Cache Size Sensitivity")
- print("-" * 30)
-
- # Fixed workload
- generator = lcs.create_zipf_requests(
- num_objects=num_objects, num_requests=num_requests, alpha=1.0, obj_size=obj_size, seed=42
- )
-
- cache_sizes = [
- 1 * 1024 * 1024, # 1MB
- 5 * 1024 * 1024, # 5MB
- 10 * 1024 * 1024, # 10MB
- 50 * 1024 * 1024, # 50MB
- ]
-
- print(f"{'Cache Size':<12} {'Hit Ratio':<12} {'Objects Fit'}")
- print("-" * 36)
-
- for cache_size in cache_sizes:
- cache = lcs.LRU(cache_size=cache_size)
-
- # Create fresh generator for each test
- test_generator = lcs.create_zipf_requests(
- num_objects=num_objects, num_requests=num_requests, alpha=1.0, obj_size=obj_size, seed=42
- )
-
- hit_count = sum(1 for req in test_generator if cache.get(req))
- hit_ratio = hit_count / num_requests
- objects_fit = cache_size // obj_size
-
- print(f"{cache_size // 1024 // 1024}MB{'':<8} {hit_ratio:<12.3f} ~{objects_fit}")
-
- print("\nNotes:")
- print("- Higher α values create more skewed access patterns")
- print("- Skewed patterns generally have higher hit ratios")
- print("- Cache size affects performance, but beyond a point diminishing returns")
- print(f"- Working set: {num_objects} objects × {obj_size}B = {num_objects * obj_size // 1024}KB")
-
-
-if __name__ == "__main__":
- main()
diff --git a/libCacheSim-python/examples/zipf_trace_example.py b/libCacheSim-python/examples/zipf_trace_example.py
deleted file mode 100644
index 662ae0fa..00000000
--- a/libCacheSim-python/examples/zipf_trace_example.py
+++ /dev/null
@@ -1,243 +0,0 @@
-#!/usr/bin/env python3
-"""
-Example demonstrating trace generation and cache simulation in libCacheSim Python bindings.
-
-This example shows how to:
-1. Generate synthetic request traces using available APIs
-2. Use the generated traces with cache simulations
-3. Compare different algorithms and parameters
-"""
-
-import libcachesim as lcs
-
-
-def example_basic_trace_generation():
- """Basic example of generating synthetic traces."""
- print("=== Basic Synthetic Trace Generation ===")
-
- # Generate Zipf requests using available API
- num_objects = 1000
- num_requests = 10000
- alpha = 1.0
- obj_size = 1024 # 1KB objects
-
- # Create Zipf-distributed requests
- zipf_requests = lcs.create_zipf_requests(
- num_objects=num_objects, num_requests=num_requests, alpha=alpha, obj_size=obj_size, seed=42
- )
-
- print(f"Generated {num_requests} Zipf requests with α={alpha}")
- print(f"Object size: {obj_size}B, Number of unique objects: {num_objects}")
-
- # Use the requests with a cache
- cache = lcs.LRU(cache_size=50 * 1024 * 1024) # 50MB cache
- hit_count = sum(1 for req in zipf_requests if cache.get(req))
- hit_ratio = hit_count / num_requests
- print(f"LRU cache hit ratio: {hit_ratio:.4f}")
-
- return hit_ratio
-
-
-def example_compare_zipf_parameters():
- """Compare different Zipf parameters."""
- print("\n=== Comparing Zipf Parameters ===")
-
- num_objects = 1000
- num_requests = 10000
- cache_size = 50 * 1024 * 1024 # 50MB
- obj_size = 1024 # 1KB objects
-
- alphas = [0.5, 1.0, 1.5, 2.0]
- results = {}
-
- print(f"{'Alpha':<8} {'LRU':<8} {'FIFO':<8} {'ARC':<8} {'Clock':<8}")
- print("-" * 40)
-
- for alpha in alphas:
- # Test with different cache policies
- policies = {
- "LRU": lcs.LRU(cache_size),
- "FIFO": lcs.FIFO(cache_size),
- "ARC": lcs.ARC(cache_size),
- "Clock": lcs.Clock(cache_size),
- }
-
- results[alpha] = {}
- hit_ratios = []
- for name, cache in policies.items():
- # Create fresh request iterator for each cache
- test_requests = lcs.create_zipf_requests(
- num_objects=num_objects, num_requests=num_requests, alpha=alpha, obj_size=obj_size, seed=42
- )
- hit_count = sum(1 for req in test_requests if cache.get(req))
- hit_ratio = hit_count / num_requests
- results[alpha][name] = hit_ratio
- hit_ratios.append(f"{hit_ratio:.3f}")
-
- print(f"{alpha:<8.1f} {hit_ratios[0]:<8} {hit_ratios[1]:<8} {hit_ratios[2]:<8} {hit_ratios[3]:<8}")
-
- return results
-
-
-def example_algorithm_comparison():
- """Compare different cache algorithms."""
- print("\n=== Cache Algorithm Comparison ===")
-
- # Fixed workload parameters
- num_objects = 1000
- num_requests = 10000
- alpha = 1.0
- obj_size = 1024
- cache_size = 10 * 1024 * 1024 # 10MB
-
- # Available algorithms
- algorithms = {
- "LRU": lcs.LRU,
- "FIFO": lcs.FIFO,
- "ARC": lcs.ARC,
- "Clock": lcs.Clock,
- "S3FIFO": lcs.S3FIFO,
- "Sieve": lcs.Sieve,
- }
-
- print(f"Testing with: {num_objects} objects, {num_requests} requests")
- print(f"Cache size: {cache_size // 1024 // 1024}MB, Object size: {obj_size}B")
- print(f"Zipf alpha: {alpha}")
- print()
-
- print(f"{'Algorithm':<10} {'Hit Ratio':<12} {'Description'}")
- print("-" * 45)
-
- results = {}
- for name, cache_class in algorithms.items():
- try:
- # Create fresh requests for each algorithm
- requests = lcs.create_zipf_requests(
- num_objects=num_objects, num_requests=num_requests, alpha=alpha, obj_size=obj_size, seed=42
- )
-
- cache = cache_class(cache_size)
- hit_count = sum(1 for req in requests if cache.get(req))
- hit_ratio = hit_count / num_requests
- results[name] = hit_ratio
-
- # Add descriptions
- descriptions = {
- "LRU": "Least Recently Used",
- "FIFO": "First In First Out",
- "ARC": "Adaptive Replacement Cache",
- "Clock": "Clock/Second Chance",
- "S3FIFO": "Simple Scalable FIFO",
- "Sieve": "Lazy Promotion",
- }
-
- print(f"{name:<10} {hit_ratio:<12.4f} {descriptions.get(name, '')}")
-
- except Exception as e:
- print(f"{name:<10} {'ERROR':<12} {str(e)}")
-
- return results
-
-
-def example_uniform_vs_zipf():
- """Compare uniform vs Zipf distributions."""
- print("\n=== Uniform vs Zipf Distribution Comparison ===")
-
- num_objects = 1000
- num_requests = 10000
- obj_size = 1024
- cache_size = 10 * 1024 * 1024
-
- # Test uniform distribution
- uniform_requests = lcs.create_uniform_requests(
- num_objects=num_objects, num_requests=num_requests, obj_size=obj_size, seed=42
- )
-
- cache = lcs.LRU(cache_size)
- uniform_hits = sum(1 for req in uniform_requests if cache.get(req))
- uniform_hit_ratio = uniform_hits / num_requests
-
- # Test Zipf distribution
- zipf_requests = lcs.create_zipf_requests(
- num_objects=num_objects, num_requests=num_requests, alpha=1.0, obj_size=obj_size, seed=42
- )
-
- cache = lcs.LRU(cache_size)
- zipf_hits = sum(1 for req in zipf_requests if cache.get(req))
- zipf_hit_ratio = zipf_hits / num_requests
-
- print(f"{'Distribution':<12} {'Hit Ratio':<12} {'Description'}")
- print("-" * 45)
- print(f"{'Uniform':<12} {uniform_hit_ratio:<12.4f} {'All objects equally likely'}")
- print(f"{'Zipf (α=1.0)':<12} {zipf_hit_ratio:<12.4f} {'Some objects much more popular'}")
-
- print(
- f"\nObservation: Zipf typically shows{'higher' if zipf_hit_ratio > uniform_hit_ratio else 'lower'} hit ratios"
- )
- print("due to locality of reference (hot objects get cached)")
-
-
-def example_cache_size_analysis():
- """Analyze the effect of different cache sizes."""
- print("\n=== Cache Size Sensitivity Analysis ===")
-
- num_objects = 1000
- num_requests = 10000
- alpha = 1.0
- obj_size = 1024
-
- cache_sizes = [
- 1 * 1024 * 1024, # 1MB
- 5 * 1024 * 1024, # 5MB
- 10 * 1024 * 1024, # 10MB
- 25 * 1024 * 1024, # 25MB
- 50 * 1024 * 1024, # 50MB
- ]
-
- print(f"{'Cache Size':<12} {'Objects Fit':<12} {'Hit Ratio':<12} {'Efficiency'}")
- print("-" * 55)
-
- for cache_size in cache_sizes:
- requests = lcs.create_zipf_requests(
- num_objects=num_objects, num_requests=num_requests, alpha=alpha, obj_size=obj_size, seed=42
- )
-
- cache = lcs.LRU(cache_size)
- hit_count = sum(1 for req in requests if cache.get(req))
- hit_ratio = hit_count / num_requests
- objects_fit = cache_size // obj_size
- efficiency = hit_ratio / (cache_size / (1024 * 1024)) # hit ratio per MB
-
- print(f"{cache_size // 1024 // 1024}MB{'':<8} {objects_fit:<12} {hit_ratio:<12.4f} {efficiency:<12.4f}")
-
-
-def main():
- """Run all examples."""
- print("libCacheSim Python Bindings - Trace Generation Examples")
- print("=" * 60)
-
- try:
- # Run examples
- example_basic_trace_generation()
- example_compare_zipf_parameters()
- example_algorithm_comparison()
- example_uniform_vs_zipf()
- example_cache_size_analysis()
-
- print("\n" + "=" * 60)
- print("All examples completed successfully!")
- print("\nKey Takeaways:")
- print("• Higher Zipf α values create more skewed access patterns")
- print("• Skewed patterns generally result in higher cache hit ratios")
- print("• Different algorithms perform differently based on workload")
- print("• Cache size has diminishing returns beyond working set size")
-
- except Exception as e:
- print(f"Error running examples: {e}")
- import traceback
-
- traceback.print_exc()
-
-
-if __name__ == "__main__":
- main()
diff --git a/libCacheSim-python/export/CMakeLists.txt b/libCacheSim-python/export/CMakeLists.txt
deleted file mode 100644
index 917e8319..00000000
--- a/libCacheSim-python/export/CMakeLists.txt
+++ /dev/null
@@ -1,38 +0,0 @@
-# Helper functions are removed since we don't export source files anymore
-
-set(EXPORT_FILE "${CMAKE_BINARY_DIR}/export_vars.cmake")
-file(WRITE "${EXPORT_FILE}" "")
-
-get_filename_component(MAIN_PROJECT_SOURCE_DIR ${CMAKE_SOURCE_DIR} ABSOLUTE)
-file(WRITE ${CMAKE_BINARY_DIR}/export_vars.cmake "set(MAIN_PROJECT_SOURCE_DIR \"${MAIN_PROJECT_SOURCE_DIR}\")\n")
-file(APPEND ${CMAKE_BINARY_DIR}/export_vars.cmake "set(dependency_libs \"${dependency_libs}\")\n")
-file(APPEND ${CMAKE_BINARY_DIR}/export_vars.cmake "set(LIBCACHESIM_VERSION \"${LIBCACHESIM_VERSION}\")\n")
-
-# ==============================================================================
-# Export project metadata
-# ==============================================================================
-file(APPEND "${EXPORT_FILE}" "set(LIBCACHESIM_VERSION \"${${PROJECT_NAME}_VERSION}\")\n")
-
-# ==============================================================================
-# Export essential include directory variables
-# ==============================================================================
-foreach(var IN ITEMS GLib_INCLUDE_DIRS GLib_CONFIG_INCLUDE_DIR XGBOOST_INCLUDE_DIR LIGHTGBM_PATH ZSTD_INCLUDE_DIR)
- file(APPEND "${EXPORT_FILE}" "set(${var} \"${${var}}\")\n")
-endforeach()
-
-# ==============================================================================
-# Export dependency library variables
-# ==============================================================================
-file(APPEND "${EXPORT_FILE}" "set(GLib_LIBRARY_DIRS \"${GLib_LIBRARY_DIRS}\")\n")
-file(APPEND "${EXPORT_FILE}" "set(GLib_LIBRARIES \"${GLib_LIBRARIES}\")\n")
-get_filename_component(ZSTD_LIBRARY_DIR "${ZSTD_LIBRARIES}" DIRECTORY)
-file(APPEND "${EXPORT_FILE}" "set(ZSTD_LIBRARY_DIRS \"${ZSTD_LIBRARY_DIRS}\")\n")
-file(APPEND "${EXPORT_FILE}" "set(ZSTD_LIBRARIES \"${ZSTD_LIBRARIES}\")\n")
-file(APPEND "${EXPORT_FILE}" "set(dependency_libs \"${dependency_libs}\")\n")
-
-# ==============================================================================
-# Export essential build option variables
-# ==============================================================================
-file(APPEND "${EXPORT_FILE}" "set(LOG_LEVEL_LOWER \"${LOG_LEVEL_LOWER}\")\n")
-
-message(STATUS "Exported essential variables to ${EXPORT_FILE}")
diff --git a/libCacheSim-python/export/README.md b/libCacheSim-python/export/README.md
deleted file mode 100644
index 976b1daa..00000000
--- a/libCacheSim-python/export/README.md
+++ /dev/null
@@ -1,47 +0,0 @@
-# Python Binding Export System
-
-Build system bridge for sharing CMake variables between the main libCacheSim project and Python binding.
-
-## Purpose
-
-The `export/CMakeLists.txt` exports all necessary build variables (source files, include directories, compiler flags, etc.) from the main project to the Python binding, enabling consistent builds without duplicating configuration.
-
-## How It Works
-
-1. **Export**: Main project writes variables to `export_vars.cmake`
-2. **Import**: Python binding includes this file during CMake configuration
-3. **Build**: Python binding uses shared variables for consistent compilation
-
-## Key Exported Variables
-
-### Source Files
-- Cache algorithms, data structures, trace readers
-- Profilers, utilities, analyzers
-
-### Build Configuration
-- Include directories (main, GLib, ZSTD, XGBoost, LightGBM)
-- Compiler flags (C/C++)
-- Dependency libraries
-- Build options (hugepage, tests, optional features)
-
-## Usage
-
-**Main Project** (`CMakeLists.txt`):
-```cmake
-add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/libCacheSim-python/export)
-```
-
-**Python Binding** (`libCacheSim-python/CMakeLists.txt`):
-```cmake
-set(EXPORT_FILE "${CMAKE_CURRENT_SOURCE_DIR}/../build/export_vars.cmake")
-include("${EXPORT_FILE}")
-```
-
-## For Developers
-
-This system ensures the Python binding automatically picks up changes to:
-- New source files added to the main project
-- Updated compiler flags or dependencies
-- Modified build options
-
-No manual synchronization needed between main project and Python binding builds.
diff --git a/libCacheSim-python/libcachesim/__init__.py b/libCacheSim-python/libcachesim/__init__.py
deleted file mode 100644
index 47e693cd..00000000
--- a/libCacheSim-python/libcachesim/__init__.py
+++ /dev/null
@@ -1,85 +0,0 @@
-"""libCacheSim Python bindings"""
-
-from __future__ import annotations
-
-from ._libcachesim import (
- Cache,
- Reader,
- ReaderInitParam,
- Request,
- ReqOp,
- TraceType,
- __doc__,
- __version__,
- open_trace,
- process_trace,
- process_trace_python_hook,
-)
-from .eviction import (
- ARC,
- Belady,
- BeladySize,
- Cacheus,
- Clock,
- FIFO,
- LeCaR,
- LFU,
- LFUDA,
- LRB,
- LRU,
- PythonHookCachePolicy,
- QDLP,
- S3FIFO,
- Sieve,
- SLRU,
- ThreeLCache,
- TinyLFU,
- TwoQ,
- WTinyLFU,
-)
-from .trace_generator import (
- create_zipf_requests,
- create_uniform_requests,
-)
-
-__all__ = [
- # Core classes
- "Cache",
- "Reader",
- "Request",
- "ReaderInitParam",
- # Trace types and operations
- "TraceType",
- "ReqOp",
- # Cache policies
- "LRU",
- "FIFO",
- "ARC",
- "Clock",
- "LFU",
- "LFUDA",
- "SLRU",
- "S3FIFO",
- "Sieve",
- "TinyLFU",
- "WTinyLFU",
- "TwoQ",
- "ThreeLCache",
- "Belady",
- "BeladySize",
- "LRB",
- "QDLP",
- "LeCaR",
- "Cacheus",
- # Custom cache policy
- "PythonHookCachePolicy",
- # Functions
- "open_trace",
- "process_trace",
- "process_trace_python_hook",
- "create_zipf_requests",
- "create_uniform_requests",
- # Metadata
- "__doc__",
- "__version__",
-]
diff --git a/libCacheSim-python/libcachesim/__init__.pyi b/libCacheSim-python/libcachesim/__init__.pyi
deleted file mode 100644
index 6992a74a..00000000
--- a/libCacheSim-python/libcachesim/__init__.pyi
+++ /dev/null
@@ -1,293 +0,0 @@
-"""
-libCacheSim Python bindings
---------------------------
-
-.. currentmodule:: libcachesim
-
-.. autosummary::
- :toctree: _generate
-
- open_trace
- ARC
- Clock
- FIFO
- LRB
- LRU
- S3FIFO
- Sieve
- ThreeLCache
- TinyLFU
- TwoQ
- Cache
- Request
- Reader
- reader_init_param_t
- TraceType
- PythonHookCachePolicy
- process_trace
- process_trace_python_hook
- create_zipf_requests
- create_uniform_requests
-"""
-
-from typing import Any, Callable, Optional, Union, overload
-from collections.abc import Iterator
-
-from _libcachesim import TraceType, ReqOp
-
-def open_trace(
- trace_path: str,
- type: Optional[TraceType] = None,
- reader_init_param: Optional[Union[dict, reader_init_param_t]] = None,
-) -> Reader: ...
-def process_trace(
- cache: Cache,
- reader: Reader,
- start_req: int = 0,
- max_req: int = -1,
-) -> tuple[float, float]:
- """
- Process a trace with a cache and return miss ratio.
- """
-
-def process_trace_python_hook(
- cache: PythonHookCache,
- reader: Reader,
- start_req: int = 0,
- max_req: int = -1,
-) -> tuple[float, float]:
- """
- Process a trace with a Python hook cache and return miss ratio.
- """
-
-# Trace generation functions
-def create_zipf_requests(
- num_objects: int,
- num_requests: int,
- alpha: float = 1.0,
- obj_size: int = 4000,
- time_span: int = 86400 * 7,
- start_obj_id: int = 0,
- seed: Optional[int] = None,
-) -> Iterator[Request]:
- """Create a Zipf-distributed request generator.
-
- Args:
- num_objects (int): Number of unique objects
- num_requests (int): Number of requests to generate
- alpha (float): Zipf skewness parameter (alpha >= 0)
- obj_size (int): Object size in bytes
- time_span (int): Time span in seconds
- start_obj_id (int): Starting object ID
- seed (int, optional): Random seed for reproducibility
-
- Returns:
- Iterator[Request]: A generator that yields Request objects
- """
-
-def create_uniform_requests(
- num_objects: int,
- num_requests: int,
- obj_size: int = 4000,
- time_span: int = 86400 * 7,
- start_obj_id: int = 0,
- seed: Optional[int] = None,
-) -> Iterator[Request]:
- """Create a uniform-distributed request generator.
-
- Args:
- num_objects (int): Number of unique objects
- num_requests (int): Number of requests to generate
- obj_size (int): Object size in bytes
- time_span (int): Time span in seconds
- start_obj_id (int): Starting object ID
- seed (int, optional): Random seed for reproducibility
-
- Returns:
- Iterator[Request]: A generator that yields Request objects
- """
-
-class reader_init_param_t:
- time_field: int
- obj_id_field: int
- obj_size_field: int
- delimiter: str
- has_header: bool
- binary_fmt_str: str
-
-class Cache:
- n_req: int
- cache_size: int
- @property
- def n_obj(self) -> int: ...
- @property
- def occupied_byte(self) -> int: ...
- def get(self, req: Request) -> bool: ...
-
-class Request:
- clock_time: int
- hv: int
- obj_id: int
- obj_size: int
- op: ReqOp
-
- @overload
- def __init__(self) -> None: ...
- @overload
- def __init__(
- self, obj_id: int, obj_size: int = 1, clock_time: int = 0, hv: int = 0, op: ReqOp = ReqOp.GET
- ) -> None: ...
- def __init__(
- self, obj_id: Optional[int] = None, obj_size: int = 1, clock_time: int = 0, hv: int = 0, op: ReqOp = ReqOp.GET
- ) -> None:
- """Create a request instance.
-
- Args:
- obj_id (int, optional): The object ID.
- obj_size (int): The object size. (default: 1)
- clock_time (int): The clock time. (default: 0)
- hv (int): The hash value. (default: 0)
- op (ReqOp): The operation. (default: ReqOp.GET)
-
- Returns:
- Request: A new request instance.
- """
-
-class Reader:
- n_read_req: int
- n_total_req: int
- trace_path: str
- file_size: int
- def get_wss(self, ignore_obj_size: bool = False) -> int: ...
- def seek(self, offset: int, from_beginning: bool = False) -> None: ...
- def __iter__(self) -> Reader: ...
- def __next__(self) -> Request: ...
-
-class PythonHookCache:
- n_req: int
- n_obj: int
- occupied_byte: int
- cache_size: int
-
- def __init__(self, cache_size: int, cache_name: str = "PythonHookCache") -> None: ...
- def set_hooks(
- self,
- init_hook: Callable[[int], Any],
- hit_hook: Callable[[Any, int, int], None],
- miss_hook: Callable[[Any, int, int], None],
- eviction_hook: Callable[[Any, int, int], int],
- remove_hook: Callable[[Any, int], None],
- free_hook: Optional[Callable[[Any], None]] = None,
- ) -> None: ...
- def get(self, req: Request) -> bool: ...
-
-# Base class for all eviction policies
-class EvictionPolicyBase:
- """Abstract base class for all eviction policies."""
- def get(self, req: Request) -> bool: ...
- def process_trace(self, reader: Reader, start_req: int = 0, max_req: int = -1) -> tuple[float, float]: ...
- @property
- def n_req(self) -> int: ...
- @property
- def n_obj(self) -> int: ...
- @property
- def occupied_byte(self) -> int: ...
- @property
- def cache_size(self) -> int: ...
- def __repr__(self) -> str: ...
-
-# Eviction policy classes
-class ARC(EvictionPolicyBase):
- """Adaptive Replacement Cache policy."""
- def __init__(self, cache_size: int) -> None: ...
-
-class Belady(EvictionPolicyBase):
- """Belady replacement policy (optimal offline algorithm)."""
- def __init__(self, cache_size: int) -> None: ...
-
-class BeladySize(EvictionPolicyBase):
- """BeladySize replacement policy (optimal offline algorithm with size consideration)."""
- def __init__(self, cache_size: int) -> None: ...
-
-class Cacheus(EvictionPolicyBase):
- """Cacheus replacement policy."""
- def __init__(self, cache_size: int) -> None: ...
-
-class Clock(EvictionPolicyBase):
- """Clock (Second Chance or FIFO-Reinsertion) replacement policy."""
- def __init__(self, cache_size: int, n_bit_counter: int = 1, init_freq: int = 0) -> None: ...
-
-class FIFO(EvictionPolicyBase):
- """First In First Out replacement policy."""
- def __init__(self, cache_size: int) -> None: ...
-
-class LeCaR(EvictionPolicyBase):
- """LeCaR (Learning Cache Replacement) adaptive replacement policy."""
- def __init__(self, cache_size: int) -> None: ...
-
-class LFU(EvictionPolicyBase):
- """LFU (Least Frequently Used) replacement policy."""
- def __init__(self, cache_size: int) -> None: ...
-
-class LFUDA(EvictionPolicyBase):
- """LFUDA (LFU with Dynamic Aging) replacement policy."""
- def __init__(self, cache_size: int) -> None: ...
-
-class LRB(EvictionPolicyBase):
- """LRB (Learning Relaxed Belady) replacement policy."""
- def __init__(self, cache_size: int, objective: str = "byte-miss-ratio") -> None: ...
-
-class LRU(EvictionPolicyBase):
- """Least Recently Used replacement policy."""
- def __init__(self, cache_size: int) -> None: ...
-
-class QDLP(EvictionPolicyBase):
- """QDLP (Queue Demotion with Lazy Promotion) replacement policy."""
- def __init__(self, cache_size: int) -> None: ...
-
-class S3FIFO(EvictionPolicyBase):
- """S3FIFO replacement policy."""
- def __init__(
- self,
- cache_size: int,
- fifo_size_ratio: float = 0.1,
- ghost_size_ratio: float = 0.9,
- move_to_main_threshold: int = 2,
- ) -> None: ...
-
-class Sieve(EvictionPolicyBase):
- """Sieve replacement policy."""
- def __init__(self, cache_size: int) -> None: ...
-
-class SLRU(EvictionPolicyBase):
- """SLRU (Segmented LRU) replacement policy."""
- def __init__(self, cache_size: int) -> None: ...
-
-class ThreeLCache(EvictionPolicyBase):
- """ThreeL cache replacement policy."""
- def __init__(self, cache_size: int, objective: str = "byte-miss-ratio") -> None: ...
-
-class TinyLFU(EvictionPolicyBase):
- """TinyLFU replacement policy."""
- def __init__(self, cache_size: int, main_cache: str = "SLRU", window_size: float = 0.01) -> None: ...
-
-class TwoQ(EvictionPolicyBase):
- """2Q replacement policy."""
- def __init__(self, cache_size: int, ain_size_ratio: float = 0.25, aout_size_ratio: float = 0.5) -> None: ...
-
-class WTinyLFU(EvictionPolicyBase):
- """WTinyLFU (Windowed TinyLFU) replacement policy."""
- def __init__(self, cache_size: int, main_cache: str = "SLRU", window_size: float = 0.01) -> None: ...
-
-class PythonHookCachePolicy(EvictionPolicyBase):
- """Python hook-based cache policy."""
- def __init__(self, cache_size: int, cache_name: str = "PythonHookCache") -> None: ...
- def set_hooks(
- self,
- init_hook: Callable[[int], Any],
- hit_hook: Callable[[Any, int, int], None],
- miss_hook: Callable[[Any, int, int], None],
- eviction_hook: Callable[[Any, int, int], int],
- remove_hook: Callable[[Any, int], None],
- free_hook: Optional[Callable[[Any], None]] = None,
- ) -> None: ...
diff --git a/libCacheSim-python/libcachesim/const.py b/libCacheSim-python/libcachesim/const.py
deleted file mode 100644
index 9d48db4f..00000000
--- a/libCacheSim-python/libcachesim/const.py
+++ /dev/null
@@ -1 +0,0 @@
-from __future__ import annotations
diff --git a/libCacheSim-python/libcachesim/eviction.py b/libCacheSim-python/libcachesim/eviction.py
deleted file mode 100644
index 63599ec0..00000000
--- a/libCacheSim-python/libcachesim/eviction.py
+++ /dev/null
@@ -1,713 +0,0 @@
-"""Registry of eviction policies."""
-
-from __future__ import annotations
-
-from abc import ABC, abstractmethod
-
-from ._libcachesim import (
- ARC_init,
- Belady_init,
- BeladySize_init,
- Cacheus_init,
- Cache,
- Clock_init,
- FIFO_init,
- LeCaR_init,
- LFU_init,
- LFUDA_init,
- LRB_init,
- LRU_init,
- QDLP_init,
- Reader,
- Request,
- S3FIFO_init,
- Sieve_init,
- SLRU_init,
- ThreeLCache_init,
- TinyLFU_init,
- TwoQ_init,
- WTinyLFU_init,
- PythonHookCache,
-)
-
-from .trace_generator import _ZipfRequestGenerator, _UniformRequestGenerator
-
-# Define generator types once to avoid repeated tuple creation
-_GENERATOR_TYPES = (_ZipfRequestGenerator, _UniformRequestGenerator)
-
-
-class EvictionPolicyBase(ABC):
- """Abstract base class for all eviction policies."""
-
- @abstractmethod
- def get(self, req: Request) -> bool:
- pass
-
- @abstractmethod
- def __repr__(self) -> str:
- pass
-
- @abstractmethod
- def process_trace(self, reader, start_req=0, max_req=-1) -> tuple[float, float]:
- """Process a trace with this cache and return miss ratio.
-
- This method processes trace data entirely on the C++ side to avoid
- data movement overhead between Python and C++.
-
- Args:
- reader: The trace reader instance
- start_req: Start request index (-1 for no limit)
- max_req: Number of requests to process (-1 for no limit)
-
- Returns:
- tuple[float, float]: Object miss ratio (0.0 to 1.0) and byte miss ratio (0.0 to 1.0)
- """
- pass
-
-
-class EvictionPolicy(EvictionPolicyBase):
- """Base class for all eviction policies."""
-
- def __init__(self, cache_size: int, **kwargs) -> None:
- self.cache: Cache = self.init_cache(cache_size, **kwargs)
-
- @abstractmethod
- def init_cache(self, cache_size: int, **kwargs) -> Cache:
- pass
-
- def get(self, req: Request) -> bool:
- return self.cache.get(req)
-
- def process_trace(self, reader, start_req=0, max_req=-1) -> tuple[float, float]:
- """Process a trace with this cache and return miss ratio.
-
- This method processes trace data entirely on the C++ side to avoid
- data movement overhead between Python and C++.
-
- Args:
- reader: The trace reader instance
- start_req: Start request index (-1 for no limit)
- max_req: Number of requests to process (-1 for no limit)
-
- Returns:
- tuple[float, float]: Object miss ratio (0.0 to 1.0) and byte miss ratio (0.0 to 1.0)
- Example:
- >>> cache = LRU(1024*1024)
- >>> reader = open_trace("trace.csv", TraceType.CSV_TRACE)
- >>> obj_miss_ratio, byte_miss_ratio = cache.process_trace(reader)
- >>> print(f"Obj miss ratio: {obj_miss_ratio:.4f}, byte miss ratio: {byte_miss_ratio:.4f}")
- """
- obj_miss_ratio = 0.0
- byte_miss_ratio = 0.0
- if not isinstance(reader, Reader):
- # streaming generator
- if isinstance(reader, _GENERATOR_TYPES):
- miss_cnt = 0
- byte_miss_cnt = 0
- total_byte = 0
- for req in reader:
- hit = self.get(req)
- total_byte += req.obj_size
- if not hit:
- miss_cnt += 1
- byte_miss_cnt += req.obj_size
- obj_miss_ratio = miss_cnt / len(reader) if len(reader) > 0 else 0.0
- byte_miss_ratio = byte_miss_cnt / total_byte if total_byte > 0 else 0.0
- return obj_miss_ratio, byte_miss_ratio
- else:
- from ._libcachesim import process_trace
-
- obj_miss_ratio, byte_miss_ratio = process_trace(self.cache, reader, start_req, max_req)
-
- return obj_miss_ratio, byte_miss_ratio
-
- def __repr__(self):
- return f"{self.__class__.__name__}(cache_size={self.cache.cache_size})"
-
- @property
- def n_req(self):
- """Number of requests processed."""
- return self.cache.n_req
-
- @property
- def n_obj(self):
- """Number of objects currently in cache."""
- return self.cache.n_obj
-
- @property
- def occupied_byte(self):
- """Number of bytes currently occupied in cache."""
- return self.cache.occupied_byte
-
- @property
- def cache_size(self):
- """Total cache size in bytes."""
- return self.cache.cache_size
-
-
-class FIFO(EvictionPolicy):
- """First In First Out replacement policy.
-
- Args:
- cache_size: Size of the cache
- """
-
- def init_cache(self, cache_size: int, **kwargs) -> Cache: # noqa: ARG002
- return FIFO_init(cache_size)
-
-
-class Clock(EvictionPolicy):
- """Clock (Second Chance or FIFO-Reinsertion) replacement policy.
-
- Args:
- cache_size: Size of the cache
- n_bit_counter: Number of bits for counter (default: 1)
- init_freq: Initial frequency value (default: 0)
- """
-
- def __init__(self, cache_size: int, n_bit_counter: int = 1, init_freq: int = 0):
- super().__init__(cache_size, n_bit_counter=n_bit_counter, init_freq=init_freq)
-
- def init_cache(self, cache_size: int, **kwargs):
- init_freq = kwargs.get("init_freq", 0)
- n_bit_counter = kwargs.get("n_bit_counter", 1)
-
- if n_bit_counter < 1 or n_bit_counter > 32:
- msg = "n_bit_counter must be between 1 and 32"
- raise ValueError(msg)
- if init_freq < 0 or init_freq > 2**n_bit_counter - 1:
- msg = "init_freq must be between 0 and 2^n_bit_counter - 1"
- raise ValueError(msg)
-
- self.init_freq = init_freq
- self.n_bit_counter = n_bit_counter
-
- return Clock_init(cache_size, n_bit_counter, init_freq)
-
- def __repr__(self):
- return (
- f"{self.__class__.__name__}(cache_size={self.cache.cache_size}, "
- f"n_bit_counter={self.n_bit_counter}, "
- f"init_freq={self.init_freq})"
- )
-
-
-class TwoQ(EvictionPolicy):
- """2Q replacement policy.
-
- 2Q has three queues: Ain, Aout, Am. When a obj hits in Aout, it will be
- inserted into Am otherwise it will be inserted into Ain.
-
- Args:
- cache_size: Total size of the cache
- ain_size_ratio: Size ratio for Ain queue (default: 0.25)
- aout_size_ratio: Size ratio for Aout queue (default: 0.5)
- """
-
- def __init__(self, cache_size: int, ain_size_ratio: float = 0.25, aout_size_ratio: float = 0.5):
- super().__init__(cache_size, ain_size_ratio=ain_size_ratio, aout_size_ratio=aout_size_ratio)
-
- def init_cache(self, cache_size: int, **kwargs):
- ain_size_ratio = kwargs.get("ain_size_ratio", 0.25)
- aout_size_ratio = kwargs.get("aout_size_ratio", 0.5)
-
- if ain_size_ratio <= 0 or aout_size_ratio <= 0:
- msg = "ain_size_ratio and aout_size_ratio must be greater than 0"
- raise ValueError(msg)
-
- self.ain_size_ratio = ain_size_ratio
- self.aout_size_ratio = aout_size_ratio
-
- return TwoQ_init(cache_size, ain_size_ratio, aout_size_ratio)
-
- def __repr__(self):
- return (
- f"{self.__class__.__name__}(cache_size={self.cache.cache_size}, "
- f"ain_size_ratio={self.ain_size_ratio}, "
- f"aout_size_ratio={self.aout_size_ratio})"
- )
-
-
-class LRB(EvictionPolicy):
- """LRB (Learning Relaxed Belady) replacement policy.
-
- LRB is a learning-based replacement policy that uses a neural network to
- predict the future access patterns of the cache, randomly select one obj
- outside the Belady boundary to evict.
-
- Args:
- cache_size: Size of the cache
- objective: Objective function to optimize (default: "byte-miss-ratio")
- """
-
- def __init__(self, cache_size: int, objective: str = "byte-miss-ratio"):
- super().__init__(cache_size, objective=objective)
-
- def init_cache(self, cache_size: int, **kwargs) -> Cache:
- objective = kwargs.get("objective", "byte-miss-ratio")
-
- if objective not in ["byte-miss-ratio", "byte-hit-ratio"]:
- msg = "objective must be either 'byte-miss-ratio' or 'byte-hit-ratio'"
- raise ValueError(msg)
-
- self.objective = objective
-
- return LRB_init(cache_size, objective)
-
- def __repr__(self):
- return f"{self.__class__.__name__}(cache_size={self.cache.cache_size}, objective={self.objective})"
-
-
-class LRU(EvictionPolicy):
- """Least Recently Used replacement policy.
-
- Args:
- cache_size: Size of the cache
- """
-
- def init_cache(self, cache_size: int, **kwargs): # noqa: ARG002
- return LRU_init(cache_size)
-
-
-class ARC(EvictionPolicy):
- """Adaptive Replacement Cache policy.
-
- ARC is a two-tiered cache with two LRU caches (T1 and T2) and two ghost
- lists (B1 and B2). T1 records the obj accessed only once, T2 records
- the obj accessed more than once. ARC has an internal parameter `p` to
- learn and dynamically control the size of T1 and T2.
-
- Args:
- cache_size: Size of the cache
- """
-
- def init_cache(self, cache_size: int, **kwargs): # noqa: ARG002
- return ARC_init(cache_size)
-
-
-class S3FIFO(EvictionPolicy):
- """S3FIFO replacement policy.
-
- S3FIFO consists of three FIFO queues: Small, Main, and Ghost. Small
- queue gets the obj and records the freq.
- When small queue is full, if the obj to evict satisfies the threshold,
- it will be moved to main queue. Otherwise, it will be evicted from small
- queue and inserted into ghost queue.
- When main queue is full, the obj to evict will be evicted and reinserted
- like Clock.
- If obj hits in the ghost queue, it will be moved to main queue.
-
- Args:
- cache_size: Size of the cache
- fifo_size_ratio: Size ratio for FIFO queue (default: 0.1)
- ghost_size_ratio: Size ratio for ghost queue (default: 0.9)
- move_to_main_threshold: Threshold for moving obj from ghost to main (default: 2)
- """
-
- def __init__(
- self,
- cache_size: int,
- fifo_size_ratio: float = 0.1,
- ghost_size_ratio: float = 0.9,
- move_to_main_threshold: int = 2,
- ):
- super().__init__(
- cache_size,
- fifo_size_ratio=fifo_size_ratio,
- ghost_size_ratio=ghost_size_ratio,
- move_to_main_threshold=move_to_main_threshold,
- )
-
- def init_cache(self, cache_size: int, **kwargs):
- fifo_size_ratio = kwargs.get("fifo_size_ratio", 0.1)
- ghost_size_ratio = kwargs.get("ghost_size_ratio", 0.9)
- move_to_main_threshold = kwargs.get("move_to_main_threshold", 2)
-
- if fifo_size_ratio <= 0 or ghost_size_ratio <= 0:
- msg = "fifo_size_ratio and ghost_size_ratio must be greater than 0"
- raise ValueError(msg)
- if move_to_main_threshold < 0:
- msg = "move_to_main_threshold must be greater or equal to 0"
- raise ValueError(msg)
-
- self.fifo_size_ratio = fifo_size_ratio
- self.ghost_size_ratio = ghost_size_ratio
- self.move_to_main_threshold = move_to_main_threshold
-
- return S3FIFO_init(cache_size, fifo_size_ratio, ghost_size_ratio, move_to_main_threshold)
-
- def __repr__(self):
- return (
- f"{self.__class__.__name__}(cache_size={self.cache.cache_size}, "
- f"fifo_size_ratio={self.fifo_size_ratio}, "
- f"ghost_size_ratio={self.ghost_size_ratio}, "
- f"move_to_main_threshold={self.move_to_main_threshold})"
- )
-
-
-class Sieve(EvictionPolicy):
- """Sieve replacement policy.
-
- FIFO-Reinsertion with check pointer.
-
- Args:
- cache_size: Size of the cache
- """
-
- def init_cache(self, cache_size: int, **kwargs): # noqa: ARG002
- return Sieve_init(cache_size)
-
-
-class ThreeLCache(EvictionPolicy):
- """3L-Cache replacement policy.
-
- Args:
- cache_size: Size of the cache
- objective: Objective function to optimize (default: "byte-miss-ratio")
- """
-
- def __init__(self, cache_size: int, objective: str = "byte-miss-ratio"):
- super().__init__(cache_size, objective=objective)
-
- def init_cache(self, cache_size: int, **kwargs):
- objective = kwargs.get("objective", "byte-miss-ratio")
-
- if objective not in ["byte-miss-ratio", "byte-hit-ratio"]:
- msg = "objective must be either 'byte-miss-ratio' or 'byte-hit-ratio'"
- raise ValueError(msg)
-
- self.objective = objective
-
- return ThreeLCache_init(cache_size, objective)
-
- def __repr__(self):
- return f"{self.__class__.__name__}(cache_size={self.cache.cache_size}, objective={self.objective})"
-
-
-class TinyLFU(EvictionPolicy):
- """TinyLFU replacement policy.
-
- Args:
- cache_size: Size of the cache
- main_cache: Main cache to use (default: "SLRU")
- window_size: Window size for TinyLFU (default: 0.01)
- """
-
- def __init__(self, cache_size: int, main_cache: str = "SLRU", window_size: float = 0.01):
- super().__init__(cache_size, main_cache=main_cache, window_size=window_size)
-
- def init_cache(self, cache_size: int, **kwargs):
- main_cache = kwargs.get("main_cache", "SLRU")
- window_size = kwargs.get("window_size", 0.01)
-
- if window_size <= 0:
- msg = "window_size must be greater than 0"
- raise ValueError(msg)
-
- self.main_cache = main_cache
- self.window_size = window_size
-
- return TinyLFU_init(cache_size, main_cache, window_size)
-
- def __repr__(self):
- return (
- f"{self.__class__.__name__}(cache_size={self.cache.cache_size}, "
- f"main_cache={self.main_cache}, "
- f"window_size={self.window_size})"
- )
-
-
-class LFU(EvictionPolicy):
- """LFU (Least Frequently Used) replacement policy.
-
- Args:
- cache_size: Size of the cache
- """
-
- def init_cache(self, cache_size: int, **kwargs): # noqa: ARG002
- return LFU_init(cache_size)
-
-
-class LFUDA(EvictionPolicy):
- """LFUDA (LFU with Dynamic Aging) replacement policy.
-
- Args:
- cache_size: Size of the cache
- """
-
- def init_cache(self, cache_size: int, **kwargs): # noqa: ARG002
- return LFUDA_init(cache_size)
-
-
-class SLRU(EvictionPolicy):
- """SLRU (Segmented LRU) replacement policy.
-
- Args:
- cache_size: Size of the cache
- """
-
- def init_cache(self, cache_size: int, **kwargs): # noqa: ARG002
- return SLRU_init(cache_size)
-
-
-class Belady(EvictionPolicy):
- """Belady replacement policy (optimal offline algorithm).
-
- Note: Requires oracle trace with future access information.
-
- Args:
- cache_size: Size of the cache
- """
-
- def init_cache(self, cache_size: int, **kwargs): # noqa: ARG002
- return Belady_init(cache_size)
-
-
-class BeladySize(EvictionPolicy):
- """BeladySize replacement policy (optimal offline algorithm with size consideration).
-
- Note: Requires oracle trace with future access information.
-
- Args:
- cache_size: Size of the cache
- """
-
- def init_cache(self, cache_size: int, **kwargs): # noqa: ARG002
- return BeladySize_init(cache_size)
-
-
-class QDLP(EvictionPolicy):
- """QDLP (Queue Demotion with Lazy Promotion) replacement policy.
-
- Args:
- cache_size: Size of the cache
- """
-
- def init_cache(self, cache_size: int, **kwargs): # noqa: ARG002
- return QDLP_init(cache_size)
-
-
-class LeCaR(EvictionPolicy):
- """LeCaR (Learning Cache Replacement) adaptive replacement policy.
-
- Args:
- cache_size: Size of the cache
- """
-
- def init_cache(self, cache_size: int, **kwargs): # noqa: ARG002
- return LeCaR_init(cache_size)
-
-
-class Cacheus(EvictionPolicy):
- """Cacheus replacement policy.
-
- Args:
- cache_size: Size of the cache
- """
-
- def init_cache(self, cache_size: int, **kwargs): # noqa: ARG002
- return Cacheus_init(cache_size)
-
-
-class WTinyLFU(EvictionPolicy):
- """WTinyLFU (Windowed TinyLFU) replacement policy.
-
- Args:
- cache_size: Size of the cache
- main_cache: Main cache to use (default: "SLRU")
- window_size: Window size for TinyLFU (default: 0.01)
- """
-
- def __init__(self, cache_size: int, main_cache: str = "SLRU", window_size: float = 0.01):
- super().__init__(cache_size, main_cache=main_cache, window_size=window_size)
-
- def init_cache(self, cache_size: int, **kwargs):
- main_cache = kwargs.get("main_cache", "SLRU")
- window_size = kwargs.get("window_size", 0.01)
-
- if window_size <= 0:
- msg = "window_size must be greater than 0"
- raise ValueError(msg)
-
- self.main_cache = main_cache
- self.window_size = window_size
-
- return WTinyLFU_init(cache_size, main_cache, window_size)
-
- def __repr__(self):
- return (
- f"{self.__class__.__name__}(cache_size={self.cache.cache_size}, "
- f"main_cache={self.main_cache}, "
- f"window_size={self.window_size})"
- )
-
-
-class PythonHookCachePolicy(EvictionPolicyBase):
- """Python hook-based cache that allows defining custom policies using Python functions.
-
- This cache implementation allows users to define custom cache replacement algorithms
- using pure Python functions instead of compiling C/C++ plugins. Users provide hook
- functions for cache initialization, hit handling, miss handling, eviction decisions,
- and cleanup.
-
- Args:
- cache_size: Size of the cache in bytes
- cache_name: Optional name for the cache (default: "PythonHookCache")
-
- Hook Functions Required:
- init_hook(cache_size: int) -> Any:
- Initialize plugin data structures. Return any object to be passed to other hooks.
-
- hit_hook(plugin_data: Any, obj_id: int, obj_size: int) -> None:
- Handle cache hit events. Update internal state as needed.
-
- miss_hook(plugin_data: Any, obj_id: int, obj_size: int) -> None:
- Handle cache miss events. Update internal state for new object.
-
- eviction_hook(plugin_data: Any, obj_id: int, obj_size: int) -> int:
- Determine which object to evict. Return the object ID to be evicted.
-
- remove_hook(plugin_data: Any, obj_id: int) -> None:
- Clean up when objects are removed from cache.
-
- free_hook(plugin_data: Any) -> None: [Optional]
- Clean up plugin resources when cache is destroyed.
-
- Example:
- >>> from collections import OrderedDict
- >>>
- >>> cache = PythonHookCachePolicy(1024)
- >>>
- >>> def init_hook(cache_size):
- ... return OrderedDict() # LRU tracking
- >>>
- >>> def hit_hook(lru_dict, obj_id, obj_size):
- ... lru_dict.move_to_end(obj_id) # Move to end (most recent)
- >>>
- >>> def miss_hook(lru_dict, obj_id, obj_size):
- ... lru_dict[obj_id] = True # Add to end
- >>>
- >>> def eviction_hook(lru_dict, obj_id, obj_size):
- ... return next(iter(lru_dict)) # Return least recent
- >>>
- >>> def remove_hook(lru_dict, obj_id):
- ... lru_dict.pop(obj_id, None)
- >>>
- >>> cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook)
- >>>
- >>> req = Request()
- >>> req.obj_id = 1
- >>> req.obj_size = 100
- >>> hit = cache.get(req)
- """
-
- def __init__(self, cache_size: int, cache_name: str = "PythonHookCache"):
- self._cache_size = cache_size
- self.cache_name = cache_name
- self.cache = PythonHookCache(cache_size, cache_name)
- self._hooks_set = False
-
- def set_hooks(self, init_hook, hit_hook, miss_hook, eviction_hook, remove_hook, free_hook=None):
- """Set the hook functions for the cache.
-
- Args:
- init_hook: Function called during cache initialization
- hit_hook: Function called on cache hit
- miss_hook: Function called on cache miss
- eviction_hook: Function called to select eviction candidate
- remove_hook: Function called when object is removed
- free_hook: Optional function called during cache cleanup
- """
- self.cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook, free_hook)
- self._hooks_set = True
-
- def get(self, req: Request) -> bool:
- """Process a cache request.
-
- Args:
- req: The cache request to process
-
- Returns:
- True if cache hit, False if cache miss
-
- Raises:
- RuntimeError: If hooks have not been set
- """
- if not self._hooks_set:
- raise RuntimeError("Hooks must be set before using the cache. Call set_hooks() first.")
- return self.cache.get(req)
-
- def process_trace(self, reader, start_req=0, max_req=-1) -> tuple[float, float]:
- """Process a trace with this cache and return miss ratio.
-
- This method processes trace data entirely on the C++ side to avoid
- data movement overhead between Python and C++.
-
- Args:
- reader: The trace reader instance
- start_req: Start request index (-1 for no limit)
- n_req: Number of requests to process (-1 for no limit)
-
- Returns:
- tuple[float, float]: Object miss ratio (0.0 to 1.0) and byte miss ratio (0.0 to 1.0)
-
- Raises:
- RuntimeError: If hooks have not been set
-
- Example:
- >>> cache = PythonHookCachePolicy(1024*1024)
- >>> cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook)
- >>> reader = open_trace("trace.csv", TraceType.CSV_TRACE)
- >>> obj_miss_ratio, byte_miss_ratio = cache.process_trace(reader)
- >>> print(f"Obj miss ratio: {obj_miss_ratio:.4f}, byte miss ratio: {byte_miss_ratio:.4f}")
- """
- if not self._hooks_set:
- raise RuntimeError("Hooks must be set before processing trace. Call set_hooks() first.")
- obj_miss_ratio = 0.0
- byte_miss_ratio = 0.0
- if not isinstance(reader, Reader):
- # streaming generator
- if isinstance(reader, _GENERATOR_TYPES):
- miss_cnt = 0
- byte_miss_cnt = 0
- total_byte = 0
- for req in reader:
- hit = self.get(req)
- total_byte += req.obj_size
- if not hit:
- miss_cnt += 1
- byte_miss_cnt += req.obj_size
- obj_miss_ratio = miss_cnt / len(reader) if len(reader) > 0 else 0.0
- byte_miss_ratio = byte_miss_cnt / total_byte if total_byte > 0 else 0.0
- return obj_miss_ratio, byte_miss_ratio
- else:
- from ._libcachesim import process_trace_python_hook
-
- obj_miss_ratio, byte_miss_ratio = process_trace_python_hook(self.cache, reader, start_req, max_req)
- return obj_miss_ratio, byte_miss_ratio
-
- @property
- def n_req(self):
- """Number of requests processed."""
- return self.cache.n_req
-
- @property
- def n_obj(self):
- """Number of objects currently in cache."""
- return self.cache.n_obj
-
- @property
- def occupied_byte(self):
- """Number of bytes currently occupied in cache."""
- return self.cache.occupied_byte
-
- @property
- def cache_size(self):
- """Total cache size in bytes."""
- return self.cache.cache_size
-
- def __repr__(self):
- return (
- f"{self.__class__.__name__}(cache_size={self._cache_size}, "
- f"cache_name='{self.cache_name}', hooks_set={self._hooks_set})"
- )
diff --git a/libCacheSim-python/libcachesim/trace_generator.py b/libCacheSim-python/libcachesim/trace_generator.py
deleted file mode 100644
index 8c580224..00000000
--- a/libCacheSim-python/libcachesim/trace_generator.py
+++ /dev/null
@@ -1,215 +0,0 @@
-"""
-Trace generator module for libCacheSim Python bindings.
-
-This module provides functions to generate synthetic traces with different distributions.
-"""
-
-import numpy as np
-import random
-from typing import Optional
-from collections.abc import Iterator
-from ._libcachesim import Request, ReqOp
-
-
-def _gen_zipf(m: int, alpha: float, n: int, start: int = 0) -> np.ndarray:
- """Generate zipf distributed workload (internal function).
-
- Args:
- m (int): The number of objects
- alpha (float): The skewness parameter (alpha >= 0)
- n (int): The number of requests
- start (int, optional): Start object ID. Defaults to 0.
-
- Returns:
- np.ndarray: Array of object IDs following Zipf distribution
- """
- if m <= 0 or n <= 0:
- raise ValueError("num_objects and num_requests must be positive")
- if alpha < 0:
- raise ValueError("alpha must be non-negative")
- np_tmp = np.power(np.arange(1, m + 1), -alpha)
- np_zeta = np.cumsum(np_tmp)
- dist_map = np_zeta / np_zeta[-1]
- r = np.random.uniform(0, 1, n)
- return np.searchsorted(dist_map, r) + start
-
-
-def _gen_uniform(m: int, n: int, start: int = 0) -> np.ndarray:
- """Generate uniform distributed workload (internal function).
-
- Args:
- m (int): The number of objects
- n (int): The number of requests
- start (int, optional): Start object ID. Defaults to 0.
-
- Returns:
- np.ndarray: Array of object IDs following uniform distribution
- """
- if m <= 0 or n <= 0:
- raise ValueError("num_objects and num_requests must be positive")
- return np.random.uniform(0, m, n).astype(int) + start
-
-
-class _ZipfRequestGenerator:
- """Zipf-distributed request generator (internal class)."""
-
- def __init__(
- self,
- num_objects: int,
- num_requests: int,
- alpha: float = 1.0,
- obj_size: int = 4000,
- time_span: int = 86400 * 7,
- start_obj_id: int = 0,
- seed: Optional[int] = None,
- ):
- """Initialize Zipf request generator.
-
- Args:
- num_objects (int): Number of unique objects
- num_requests (int): Number of requests to generate
- alpha (float): Zipf skewness parameter (alpha >= 0)
- obj_size (int): Object size in bytes
- time_span (int): Time span in seconds
- start_obj_id (int): Starting object ID
- seed (int, optional): Random seed for reproducibility
- """
- self.num_requests = num_requests
- self.obj_size = obj_size
- self.time_span = time_span
-
- # Set random seed if provided
- if seed is not None:
- np.random.seed(seed)
- random.seed(seed)
-
- # Pre-generate object IDs
- self.obj_ids = _gen_zipf(num_objects, alpha, num_requests, start_obj_id)
-
- def __iter__(self) -> Iterator[Request]:
- """Iterate over generated requests."""
- for i, obj_id in enumerate(self.obj_ids):
- req = Request()
- req.clock_time = i * self.time_span // self.num_requests
- req.obj_id = obj_id
- req.obj_size = self.obj_size
- req.op = ReqOp.READ # Default operation
- yield req
-
- def __len__(self) -> int:
- """Return the number of requests."""
- return self.num_requests
-
-
-class _UniformRequestGenerator:
- """Uniform-distributed request generator (internal class)."""
-
- def __init__(
- self,
- num_objects: int,
- num_requests: int,
- obj_size: int = 4000,
- time_span: int = 86400 * 7,
- start_obj_id: int = 0,
- seed: Optional[int] = None,
- ):
- """Initialize uniform request generator.
-
- Args:
- num_objects (int): Number of unique objects
- num_requests (int): Number of requests to generate
- obj_size (int): Object size in bytes
- time_span (int): Time span in seconds
- start_obj_id (int): Starting object ID
- seed (int, optional): Random seed for reproducibility
- """
- self.num_requests = num_requests
- self.obj_size = obj_size
- self.time_span = time_span
-
- # Set random seed if provided
- if seed is not None:
- np.random.seed(seed)
- random.seed(seed)
-
- # Pre-generate object IDs
- self.obj_ids = _gen_uniform(num_objects, num_requests, start_obj_id)
-
- def __iter__(self) -> Iterator[Request]:
- """Iterate over generated requests."""
- for i, obj_id in enumerate(self.obj_ids):
- req = Request()
- req.clock_time = i * self.time_span // self.num_requests
- req.obj_id = obj_id
- req.obj_size = self.obj_size
- req.op = ReqOp.READ # Default operation
- yield req
-
- def __len__(self) -> int:
- """Return the number of requests."""
- return self.num_requests
-
-
-def create_zipf_requests(
- num_objects: int,
- num_requests: int,
- alpha: float = 1.0,
- obj_size: int = 4000,
- time_span: int = 86400 * 7,
- start_obj_id: int = 0,
- seed: Optional[int] = None,
-) -> _ZipfRequestGenerator:
- """Create a Zipf-distributed request generator.
-
- Args:
- num_objects (int): Number of unique objects
- num_requests (int): Number of requests to generate
- alpha (float): Zipf skewness parameter (alpha >= 0)
- obj_size (int): Object size in bytes
- time_span (int): Time span in seconds
- start_obj_id (int): Starting object ID
- seed (int, optional): Random seed for reproducibility
-
- Returns:
- Generator: A generator that yields Request objects
- """
- return _ZipfRequestGenerator(
- num_objects=num_objects,
- num_requests=num_requests,
- alpha=alpha,
- obj_size=obj_size,
- time_span=time_span,
- start_obj_id=start_obj_id,
- seed=seed,
- )
-
-
-def create_uniform_requests(
- num_objects: int,
- num_requests: int,
- obj_size: int = 4000,
- time_span: int = 86400 * 7,
- start_obj_id: int = 0,
- seed: Optional[int] = None,
-) -> _UniformRequestGenerator:
- """Create a uniform-distributed request generator.
-
- Args:
- num_objects (int): Number of unique objects
- num_requests (int): Number of requests to generate
- obj_size (int): Object size in bytes
- time_span (int): Time span in seconds
- start_obj_id (int): Starting object ID
- seed (int, optional): Random seed for reproducibility
-
- Returns:
- Generator: A generator that yields Request objects
- """
- return _UniformRequestGenerator(
- num_objects=num_objects,
- num_requests=num_requests,
- obj_size=obj_size,
- time_span=time_span,
- start_obj_id=start_obj_id,
- seed=seed,
- )
diff --git a/libCacheSim-python/pyproject.toml b/libCacheSim-python/pyproject.toml
deleted file mode 100644
index 45eb26f1..00000000
--- a/libCacheSim-python/pyproject.toml
+++ /dev/null
@@ -1,111 +0,0 @@
-[build-system]
-requires = ["scikit-build-core>=0.10", "pybind11"]
-build-backend = "scikit_build_core.build"
-
-
-[project]
-name = "libcachesim"
-version = "0.3.2"
-description="Python bindings for libCacheSim"
-readme = "README.md"
-requires-python = ">=3.9"
-keywords = ["performance", "cache", "simulator"]
-classifiers = [
- "Intended Audience :: Developers",
- "License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)",
- "Programming Language :: Python :: 3.9",
- "Programming Language :: Python :: 3.10",
- "Programming Language :: Python :: 3.11",
- "Programming Language :: Python :: 3.12",
- "Programming Language :: Python :: 3.13",
-]
-dependencies = [
- "numpy>=1.20.0",
-]
-
-[project.optional-dependencies]
-test = ["pytest"]
-dev = [
- "pytest",
- "pre-commit",
- "ruff>=0.7.0",
- "mypy>=1.0.0",
-]
-
-
-[tool.scikit-build]
-wheel.expand-macos-universal-tags = true
-
-[tool.pytest.ini_options]
-minversion = "8.0"
-addopts = ["-ra", "--showlocals", "--strict-markers", "--strict-config", "-m", "not optional"]
-xfail_strict = true
-log_cli_level = "INFO"
-filterwarnings = [
- "error",
- "ignore::pytest.PytestCacheWarning",
-]
-testpaths = ["tests"]
-markers = [
- "optional: mark test as optional",
-]
-python_files = ["test.py", "test_*.py", "*_test.py"]
-python_classes = ["Test*"]
-python_functions = ["test_*"]
-
-
-[tool.cibuildwheel]
-manylinux-x86_64-image = "quay.io/pypa/manylinux_2_34_x86_64"
-manylinux-aarch64-image = "quay.io/pypa/manylinux_2_34_aarch64"
-
-build = ["cp39-*", "cp310-*", "cp311-*", "cp312-*", "cp313-*"]
-skip = ["*-win32", "*-manylinux_i686", "*-musllinux*", "pp*"]
-
-# Set the environment variable for the wheel build step.
-environment = { LCS_BUILD_DIR = "{project}/build", MACOSX_DEPLOYMENT_TARGET = "14.0" }
-
-# Test that the wheel can be imported
-test-command = "python -c 'import libcachesim; print(\"Import successful\")'"
-
-[tool.cibuildwheel.linux]
-before-all = "yum install -y yum-utils && yum-config-manager --set-enabled crb && yum install -y ninja-build cmake libzstd-devel glib2-devel"
-before-build = "rm -rf {project}/build && cmake -S {project} -B {project}/build -G Ninja && cmake --build {project}/build"
-
-[tool.cibuildwheel.macos]
-before-all = "brew install glib google-perftools argp-standalone xxhash llvm wget cmake ninja zstd xgboost lightgbm"
-before-build = "rm -rf {project}/build && cmake -S {project} -B {project}/build -G Ninja -DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 && cmake --build {project}/build"
-
-[tool.ruff]
-# Allow lines to be as long as 120.
-line-length = 120
-
-[tool.ruff.lint]
-select = [
- # pycodestyle
- "E",
- # Pyflakes
- "F",
- # pyupgrade
- "UP",
- # flake8-bugbear
- "B",
- # flake8-simplify
- "SIM",
- # isort
- # "I",
- # flake8-logging-format
- "G",
-]
-ignore = [
- # star imports
- "F405", "F403",
- # lambda expression assignment
- "E731",
- # Loop control variable not used within loop body
- "B007",
- # f-string format
- "UP032",
- # Can remove once 3.10+ is the minimum Python version
- "UP007",
- "UP045"
-]
diff --git a/libCacheSim-python/requirements.txt b/libCacheSim-python/requirements.txt
deleted file mode 100644
index e69de29b..00000000
diff --git a/libCacheSim-python/src/pylibcachesim.cpp b/libCacheSim-python/src/pylibcachesim.cpp
deleted file mode 100644
index 5ca90ca2..00000000
--- a/libCacheSim-python/src/pylibcachesim.cpp
+++ /dev/null
@@ -1,1223 +0,0 @@
-#include
-#include
-#include
-
-// Suppress visibility warnings for pybind11 types
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wattributes"
-
-#include
-#include
-#include
-
-#include "config.h"
-#include "libCacheSim/cache.h"
-#include "libCacheSim/cacheObj.h"
-#include "libCacheSim/const.h"
-#include "libCacheSim/enum.h"
-#include "libCacheSim/logging.h"
-#include "libCacheSim/macro.h"
-#include "libCacheSim/reader.h"
-#include "libCacheSim/request.h"
-#include "libCacheSim/sampling.h"
-#include "mystr.h"
-
-/* admission */
-#include "libCacheSim/admissionAlgo.h"
-
-/* eviction */
-#include "libCacheSim/evictionAlgo.h"
-
-/* cache simulator */
-#include "libCacheSim/profilerLRU.h"
-#include "libCacheSim/simulator.h"
-
-/* bin */
-#include "cachesim/cache_init.h"
-#include "cli_reader_utils.h"
-
-#define STRINGIFY(x) #x
-#define MACRO_STRINGIFY(x) STRINGIFY(x)
-
-namespace py = pybind11;
-
-// Helper functions
-
-// https://stackoverflow.com/questions/874134/find-out-if-string-ends-with-another-string-in-c
-static bool ends_with(std::string_view str, std::string_view suffix) {
- return str.size() >= suffix.size() &&
- str.compare(str.size() - suffix.size(), suffix.size(), suffix) == 0;
-}
-
-trace_type_e infer_trace_type(const std::string& trace_path) {
- // Infer the trace type based on the file extension
- if (trace_path.find("oracleGeneral") != std::string::npos) {
- return trace_type_e::ORACLE_GENERAL_TRACE;
- } else if (ends_with(trace_path, ".csv")) {
- return trace_type_e::CSV_TRACE;
- } else if (ends_with(trace_path, ".txt")) {
- return trace_type_e::PLAIN_TXT_TRACE;
- } else if (ends_with(trace_path, ".bin")) {
- return trace_type_e::BIN_TRACE;
- } else if (ends_with(trace_path, ".vscsi")) {
- return trace_type_e::VSCSI_TRACE;
- } else if (ends_with(trace_path, ".twr")) {
- return trace_type_e::TWR_TRACE;
- } else if (ends_with(trace_path, ".twrns")) {
- return trace_type_e::TWRNS_TRACE;
- } else if (ends_with(trace_path, ".lcs")) {
- return trace_type_e::LCS_TRACE;
- } else if (ends_with(trace_path, ".valpin")) {
- return trace_type_e::VALPIN_TRACE;
- } else {
- return trace_type_e::UNKNOWN_TRACE;
- }
-}
-
-// Python Hook Cache Implementation
-class PythonHookCache {
- private:
- uint64_t cache_size_;
- std::string cache_name_;
- std::unordered_map objects_; // obj_id -> obj_size
- py::object plugin_data_;
-
- // Hook functions
- py::function init_hook_;
- py::function hit_hook_;
- py::function miss_hook_;
- py::function eviction_hook_;
- py::function remove_hook_;
- py::object free_hook_; // Changed to py::object to allow py::none()
-
- public:
- uint64_t n_req = 0;
- uint64_t n_obj = 0;
- uint64_t occupied_byte = 0;
- uint64_t cache_size;
-
- PythonHookCache(uint64_t cache_size,
- const std::string& cache_name = "PythonHookCache")
- : cache_size_(cache_size),
- cache_name_(cache_name),
- cache_size(cache_size),
- free_hook_(py::none()) {}
-
- void set_hooks(py::function init_hook, py::function hit_hook,
- py::function miss_hook, py::function eviction_hook,
- py::function remove_hook, py::object free_hook = py::none()) {
- init_hook_ = init_hook;
- hit_hook_ = hit_hook;
- miss_hook_ = miss_hook;
- eviction_hook_ = eviction_hook;
- remove_hook_ = remove_hook;
-
- // Handle free_hook properly
- if (!free_hook.is_none()) {
- free_hook_ = free_hook;
- } else {
- free_hook_ = py::none();
- }
-
- // Initialize plugin data
- plugin_data_ = init_hook_(cache_size_);
- }
-
- bool get(const request_t& req) {
- n_req++;
-
- auto it = objects_.find(req.obj_id);
- if (it != objects_.end()) {
- // Cache hit
- hit_hook_(plugin_data_, req.obj_id, req.obj_size);
- return true;
- } else {
- // Cache miss - call miss hook first
- miss_hook_(plugin_data_, req.obj_id, req.obj_size);
-
- // Check if eviction is needed
- while (occupied_byte + req.obj_size > cache_size_ && !objects_.empty()) {
- // Need to evict
- uint64_t victim_id =
- eviction_hook_(plugin_data_, req.obj_id, req.obj_size)
- .cast();
- auto victim_it = objects_.find(victim_id);
- if (victim_it != objects_.end()) {
- occupied_byte -= victim_it->second;
- objects_.erase(victim_it);
- n_obj--;
- remove_hook_(plugin_data_, victim_id);
- } else {
- // Safety check: if eviction hook returns invalid ID, break to avoid
- // infinite loop
- break;
- }
- }
-
- // Insert new object if there's space
- if (occupied_byte + req.obj_size <= cache_size_) {
- objects_[req.obj_id] = req.obj_size;
- occupied_byte += req.obj_size;
- n_obj++;
- }
-
- return false;
- }
- }
-
- ~PythonHookCache() {
- if (!free_hook_.is_none()) {
- py::function free_func = free_hook_.cast();
- free_func(plugin_data_);
- }
- }
-};
-
-// Restore visibility warnings
-#pragma GCC diagnostic pop
-
-struct CacheDeleter {
- void operator()(cache_t* ptr) const {
- if (ptr != nullptr) ptr->cache_free(ptr);
- }
-};
-
-struct RequestDeleter {
- void operator()(request_t* ptr) const {
- if (ptr != nullptr) free_request(ptr);
- }
-};
-
-struct ReaderDeleter {
- void operator()(reader_t* ptr) const {
- if (ptr != nullptr) close_trace(ptr);
- }
-};
-
-PYBIND11_MODULE(_libcachesim, m) { // NOLINT(readability-named-parameter)
- m.doc() = R"pbdoc(
- libCacheSim Python bindings
- --------------------------
-
- .. currentmodule:: libcachesim
-
- .. autosummary::
- :toctree: _generate
-
- TODO(haocheng): add meaningful methods
- )pbdoc";
-
- py::enum_(m, "TraceType")
- .value("CSV_TRACE", trace_type_e::CSV_TRACE)
- .value("BIN_TRACE", trace_type_e::BIN_TRACE)
- .value("PLAIN_TXT_TRACE", trace_type_e::PLAIN_TXT_TRACE)
- .value("ORACLE_GENERAL_TRACE", trace_type_e::ORACLE_GENERAL_TRACE)
- .value("LCS_TRACE", trace_type_e::LCS_TRACE)
- .value("VSCSI_TRACE", trace_type_e::VSCSI_TRACE)
- .value("TWR_TRACE", trace_type_e::TWR_TRACE)
- .value("TWRNS_TRACE", trace_type_e::TWRNS_TRACE)
- .value("ORACLE_SIM_TWR_TRACE", trace_type_e::ORACLE_SIM_TWR_TRACE)
- .value("ORACLE_SYS_TWR_TRACE", trace_type_e::ORACLE_SYS_TWR_TRACE)
- .value("ORACLE_SIM_TWRNS_TRACE", trace_type_e::ORACLE_SIM_TWRNS_TRACE)
- .value("ORACLE_SYS_TWRNS_TRACE", trace_type_e::ORACLE_SYS_TWRNS_TRACE)
- .value("VALPIN_TRACE", trace_type_e::VALPIN_TRACE)
- .value("UNKNOWN_TRACE", trace_type_e::UNKNOWN_TRACE)
- .export_values();
-
- py::enum_(m, "ReqOp")
- .value("NOP", req_op_e::OP_NOP)
- .value("GET", req_op_e::OP_GET)
- .value("GETS", req_op_e::OP_GETS)
- .value("SET", req_op_e::OP_SET)
- .value("ADD", req_op_e::OP_ADD)
- .value("CAS", req_op_e::OP_CAS)
- .value("REPLACE", req_op_e::OP_REPLACE)
- .value("APPEND", req_op_e::OP_APPEND)
- .value("PREPEND", req_op_e::OP_PREPEND)
- .value("DELETE", req_op_e::OP_DELETE)
- .value("INCR", req_op_e::OP_INCR)
- .value("DECR", req_op_e::OP_DECR)
- .value("READ", req_op_e::OP_READ)
- .value("WRITE", req_op_e::OP_WRITE)
- .value("UPDATE", req_op_e::OP_UPDATE)
- .value("INVALID", req_op_e::OP_INVALID)
- .export_values();
-
- // *************** structs ***************
- /**
- * @brief Cache structure
- */
- py::class_>(m, "Cache")
- .def_readwrite("n_req", &cache_t::n_req)
- .def_readwrite("cache_size", &cache_t::cache_size)
- // Use proper accessor functions for private fields
- .def_property_readonly(
- "n_obj", [](const cache_t& self) { return self.get_n_obj(&self); })
- .def_property_readonly(
- "occupied_byte",
- [](const cache_t& self) { return self.get_occupied_byte(&self); })
- // methods
- .def("get", [](cache_t& self, const request_t& req) {
- return self.get(&self, &req);
- });
-
- /**
- * @brief Request structure
- */
- py::class_>(m,
- "Request")
- .def(py::init([]() { return new_request(); }))
- .def(py::init([](uint64_t obj_id, uint64_t obj_size, uint64_t clock_time,
- uint64_t hv, req_op_e op) {
- request_t* req = new_request();
- req->obj_id = obj_id;
- req->obj_size = obj_size;
- req->clock_time = clock_time;
- req->hv = hv;
- req->op = op;
- return req;
- }),
- py::arg("obj_id"), py::arg("obj_size") = 1,
- py::arg("clock_time") = 0, py::arg("hv") = 0,
- py::arg("op") = req_op_e::OP_GET,
- R"pbdoc(
- Create a request instance.
-
- Args:
- obj_id (int): The object ID.
- obj_size (int): The object size. (default: 1)
- clock_time (int): The clock time. (default: 0)
- hv (int): The hash value. (default: 0)
- op (req_op_e): The operation. (default: OP_GET)
-
- Returns:
- Request: A new request instance.
- )pbdoc")
- .def_readwrite("clock_time", &request_t::clock_time)
- .def_readwrite("hv", &request_t::hv)
- .def_readwrite("obj_id", &request_t::obj_id)
- .def_readwrite("obj_size", &request_t::obj_size)
- .def_readwrite("op", &request_t::op);
-
- /**
- * @brief Reader structure
- */
- py::class_>(m, "Reader")
- .def_readwrite("n_read_req", &reader_t::n_read_req)
- .def_readwrite("n_total_req", &reader_t::n_total_req)
- .def_readwrite("trace_path", &reader_t::trace_path)
- .def_readwrite("file_size", &reader_t::file_size)
- .def_readwrite("ignore_obj_size", &reader_t::ignore_obj_size)
- // methods
- .def(
- "get_wss",
- [](reader_t& self) {
- int64_t wss_obj = 0, wss_byte = 0;
- cal_working_set_size(&self, &wss_obj, &wss_byte);
- return self.ignore_obj_size ? wss_obj : wss_byte;
- },
- R"pbdoc(
- Get the working set size of the trace.
-
- Args:
- ignore_obj_size (bool): Whether to ignore the object size.
-
- Returns:
- int: The working set size of the trace.
- )pbdoc")
- .def(
- "seek",
- [](reader_t& self, int64_t offset, bool from_beginning = false) {
- int64_t offset_from_beginning = offset;
- if (!from_beginning) {
- offset_from_beginning += self.n_read_req;
- }
- reset_reader(&self);
- skip_n_req(&self, offset_from_beginning);
- },
- py::arg("offset"), py::arg("from_beginning") = false,
- R"pbdoc(
- Seek to a specific offset in the trace file.
- We only support seeking from current position or from the beginning.
-
- Can only move forward, not backward.
-
- Args:
- offset (int): The offset to seek to the beginning.
-
- Raises:
- RuntimeError: If seeking fails.
- )pbdoc")
- .def("__iter__", [](reader_t& self) -> reader_t& { return self; })
- .def("__next__", [](reader_t& self) {
- auto req = std::unique_ptr(new_request());
- int ret = read_one_req(&self, req.get());
- if (ret != 0) {
- throw py::stop_iteration();
- }
- return req;
- });
-
- // Helper function to apply parameters from dictionary to reader_init_param_t
- auto apply_params_from_dict = [](reader_init_param_t& params,
- py::dict dict_params) {
- // Template field setter with type safety
- auto set_if_present = [&](const char* key, auto& field) {
- if (dict_params.contains(key)) {
- field =
- dict_params[key].cast>();
- }
- };
-
- // Apply all standard fields
- set_if_present("time_field", params.time_field);
- set_if_present("obj_id_field", params.obj_id_field);
- set_if_present("obj_size_field", params.obj_size_field);
- set_if_present("has_header", params.has_header);
- set_if_present("ignore_obj_size", params.ignore_obj_size);
- set_if_present("ignore_size_zero_req", params.ignore_size_zero_req);
- set_if_present("obj_id_is_num", params.obj_id_is_num);
- set_if_present("obj_id_is_num_set", params.obj_id_is_num_set);
- set_if_present("has_header_set", params.has_header_set);
- set_if_present("cap_at_n_req", params.cap_at_n_req);
- set_if_present("op_field", params.op_field);
- set_if_present("ttl_field", params.ttl_field);
- set_if_present("cnt_field", params.cnt_field);
- set_if_present("tenant_field", params.tenant_field);
- set_if_present("next_access_vtime_field", params.next_access_vtime_field);
- set_if_present("block_size", params.block_size);
- set_if_present("trace_start_offset", params.trace_start_offset);
-
- // Special fields with custom handling
- if (dict_params.contains("delimiter")) {
- std::string delim = dict_params["delimiter"].cast();
- params.delimiter = delim.empty() ? ',' : delim[0];
- }
-
- if (dict_params.contains("binary_fmt_str")) {
- // Free existing memory first to prevent leaks
- if (params.binary_fmt_str) {
- free(params.binary_fmt_str);
- params.binary_fmt_str = nullptr;
- }
- std::string fmt = dict_params["binary_fmt_str"].cast();
- if (!fmt.empty()) {
- // Note: Using strdup for C-compatible memory allocation
- // Memory is managed by reader_init_param_t destructor/cleanup
- params.binary_fmt_str = strdup(fmt.c_str());
- if (!params.binary_fmt_str) {
- throw std::runtime_error(
- "Failed to allocate memory for binary_fmt_str");
- }
- }
- }
-
- if (dict_params.contains("feature_fields")) {
- auto ff = dict_params["feature_fields"].cast>();
- if (ff.size() > N_MAX_FEATURES) {
- throw py::value_error("Too many feature fields (max " +
- std::to_string(N_MAX_FEATURES) + ")");
- }
- params.n_feature_fields = static_cast(ff.size());
- // Use copy_n for explicit bounds checking
- std::copy_n(ff.begin(), params.n_feature_fields, params.feature_fields);
- }
- };
-
- py::class_(m, "ReaderInitParam")
- .def(py::init([]() {
- reader_init_param_t params;
- set_default_reader_init_params(¶ms);
- return params;
- }),
- "Create with default parameters")
-
- .def(py::init([apply_params_from_dict](py::kwargs kwargs) {
- reader_init_param_t params;
- set_default_reader_init_params(¶ms);
-
- // Convert kwargs to dict and apply using shared helper
- py::dict dict_params = py::dict(kwargs);
- apply_params_from_dict(params, dict_params);
-
- return params;
- }),
- "Create with keyword arguments")
-
- .def(py::init([apply_params_from_dict](py::dict dict_params) {
- reader_init_param_t params;
- set_default_reader_init_params(¶ms);
-
- // Apply using shared helper function
- apply_params_from_dict(params, dict_params);
-
- return params;
- }),
- py::arg("params"), "Create from dictionary (backward compatibility)")
- .def("__repr__", [](const reader_init_param_t& params) {
- std::stringstream ss;
- ss << "ReaderInitParam(\n";
-
- // Group 1: Core fields
- ss << " # Core fields\n";
- ss << " time_field=" << params.time_field << ", ";
- ss << "obj_id_field=" << params.obj_id_field << ", ";
- ss << "obj_size_field=" << params.obj_size_field << ",\n";
-
- // Group 2: Flags and options
- ss << " # Flags and options\n";
- ss << " has_header=" << params.has_header << ", ";
- ss << "ignore_obj_size=" << params.ignore_obj_size << ", ";
- ss << "ignore_size_zero_req=" << params.ignore_size_zero_req << ", ";
- ss << "obj_id_is_num=" << params.obj_id_is_num << ",\n";
-
- // Group 3: Internal state flags
- ss << " # Internal state\n";
- ss << " obj_id_is_num_set=" << params.obj_id_is_num_set << ", ";
- ss << "has_header_set=" << params.has_header_set << ",\n";
-
- // Group 4: Optional fields
- ss << " # Optional fields\n";
- ss << " cap_at_n_req=" << params.cap_at_n_req << ", ";
- ss << "op_field=" << params.op_field << ", ";
- ss << "ttl_field=" << params.ttl_field << ", ";
- ss << "cnt_field=" << params.cnt_field << ",\n";
- ss << " tenant_field=" << params.tenant_field << ", ";
- ss << "next_access_vtime_field=" << params.next_access_vtime_field
- << ",\n";
-
- // Group 5: Miscellaneous
- ss << " # Miscellaneous\n";
- ss << " block_size=" << params.block_size << ", ";
- ss << "trace_start_offset=" << params.trace_start_offset;
- ss << "\n)";
- return ss.str();
- });
-
- // *************** functions ***************
- /**
- * @brief Open a trace file for reading
- */
- m.def(
- "open_trace",
- [apply_params_from_dict](const std::string& trace_path, py::object type,
- py::object params) {
- trace_type_e c_type = UNKNOWN_TRACE;
- if (!type.is_none()) {
- c_type = type.cast();
- } else {
- // If type is None, we can try to infer the type from the file
- // extension
- c_type = infer_trace_type(trace_path);
- if (c_type == UNKNOWN_TRACE) {
- throw std::runtime_error("Could not infer trace type from path: " +
- trace_path);
- }
- }
-
- // Handle different parameter types
- reader_init_param_t init_param;
- set_default_reader_init_params(&init_param);
-
- if (py::isinstance(params)) {
- // Dictionary parameters - use shared helper function
- py::dict dict_params = params.cast();
- apply_params_from_dict(init_param, dict_params);
- } else if (!params.is_none()) {
- // reader_init_param_t object - direct cast (pybind11 handles
- // conversion)
- init_param = params.cast();
- }
- reader_t* ptr = open_trace(trace_path.c_str(), c_type, &init_param);
- return std::unique_ptr(ptr);
- },
- py::arg("trace_path"), py::arg("type") = py::none(),
- py::arg("params") = py::none(),
- R"pbdoc(
- Open a trace file for reading.
-
- Args:
- trace_path (str): Path to the trace file.
- type (Union[trace_type_e, None]): Type of the trace (e.g., CSV_TRACE). If None, the type will be inferred.
- params (Union[dict, reader_init_param_t, None]): Initialization parameters for the reader.
-
- Returns:
- Reader: A new reader instance for the trace.
- )pbdoc");
-
- /**
- * @brief Generic function to create a cache instance.
- */
- m.def(
- "create_cache",
- [](const std::string& eviction_algo, const uint64_t cache_size,
- const std::string& eviction_params,
- bool consider_obj_metadata) { return nullptr; },
- py::arg("eviction_algo"), py::arg("cache_size"),
- py::arg("eviction_params"), py::arg("consider_obj_metadata"),
- R"pbdoc(
- Create a cache instance.
-
- Args:
- eviction_algo (str): Eviction algorithm to use (e.g., "LRU", "FIFO", "Random").
- cache_size (int): Size of the cache in bytes.
- eviction_params (str): Additional parameters for the eviction algorithm.
- consider_obj_metadata (bool): Whether to consider object metadata in eviction decisions.
-
- Returns:
- Cache: A new cache instance.
- )pbdoc");
-
- /* TODO(haocheng): should we support all parameters in the
- * common_cache_params_t? (hash_power, etc.) */
-
- // Currently supported eviction algorithms with direct initialization:
- // - "ARC"
- // - "Clock"
- // - "FIFO"
- // - "LRB"
- // - "LRU"
- // - "S3FIFO"
- // - "Sieve"
- // - "ThreeLCache"
- // - "TinyLFU"
- // - "TwoQ"
-
- /**
- * @brief Create a ARC cache instance.
- */
- m.def(
- "ARC_init",
- [](uint64_t cache_size) {
- common_cache_params_t cc_params = {.cache_size = cache_size};
- cache_t* ptr = ARC_init(cc_params, nullptr);
- return std::unique_ptr(ptr);
- },
- py::arg("cache_size"),
- R"pbdoc(
- Create a ARC cache instance.
-
- Args:
- cache_size (int): Size of the cache in bytes.
- )pbdoc");
-
- /**
- * @brief Create a Clock cache instance.
- */
- m.def(
- "Clock_init",
- [](uint64_t cache_size, long int n_bit_counter, long int init_freq) {
- common_cache_params_t cc_params = {.cache_size = cache_size};
- // assemble the cache specific parameters
- std::string cache_specific_params =
- "n-bit-counter=" + std::to_string(n_bit_counter) + "," +
- "init-freq=" + std::to_string(init_freq);
-
- cache_t* ptr = Clock_init(cc_params, cache_specific_params.c_str());
- return std::unique_ptr(ptr);
- },
- py::arg("cache_size"), py::arg("n_bit_counter") = 1,
- py::arg("init_freq") = 0,
- R"pbdoc(
- Create a Clock cache instance.
-
- Args:
- cache_size (int): Size of the cache in bytes.
- n_bit_counter (int): Number of bits for counter (default: 1).
- init_freq (int): Initial frequency value (default: 0).
-
- Returns:
- Cache: A new Clock cache instance.
- )pbdoc");
-
- /**
- * @brief Create a FIFO cache instance.
- */
- m.def(
- "FIFO_init",
- [](uint64_t cache_size) {
- // Construct common cache parameters
- common_cache_params_t cc_params = {.cache_size = cache_size};
- // FIFO no specific parameters, so we pass nullptr
- cache_t* ptr = FIFO_init(cc_params, nullptr);
- return std::unique_ptr(ptr);
- },
- py::arg("cache_size"),
- R"pbdoc(
- Create a FIFO cache instance.
-
- Args:
- cache_size (int): Size of the cache in bytes.
-
- Returns:
- Cache: A new FIFO cache instance.
- )pbdoc");
-
-#ifdef ENABLE_LRB
- /**
- * @brief Create a LRB cache instance.
- */
- m.def(
- "LRB_init",
- [](uint64_t cache_size, std::string objective) {
- common_cache_params_t cc_params = {.cache_size = cache_size};
- cache_t* ptr = LRB_init(cc_params, ("objective=" + objective).c_str());
- return std::unique_ptr(ptr);
- },
- py::arg("cache_size"), py::arg("objective") = "byte-miss-ratio",
- R"pbdoc(
- Create a LRB cache instance.
-
- Args:
- cache_size (int): Size of the cache in bytes.
- objective (str): Objective function to optimize (default: "byte-miss-ratio").
-
- Returns:
- Cache: A new LRB cache instance.
- )pbdoc");
-#else
- // TODO(haocheng): add a dummy function to avoid the error when LRB is not
- // enabled
- m.def(
- "LRB_init",
- [](uint64_t cache_size, std::string objective) {
- throw std::runtime_error("LRB is not enabled");
- },
- py::arg("cache_size"), py::arg("objective") = "byte-miss-ratio");
-#endif
-
- /**
- * @brief Create a LRU cache instance.
- */
- m.def(
- "LRU_init",
- [](uint64_t cache_size) {
- common_cache_params_t cc_params = {.cache_size = cache_size};
- cache_t* ptr = LRU_init(cc_params, nullptr);
- return std::unique_ptr(ptr);
- },
- py::arg("cache_size"),
- R"pbdoc(
- Create a LRU cache instance.
-
- Args:
- cache_size (int): Size of the cache in bytes.
-
- Returns:
- Cache: A new LRU cache instance.
- )pbdoc");
-
- /**
- * @brief Create a S3FIFO cache instance.
- */
- m.def(
- "S3FIFO_init",
- [](uint64_t cache_size, double fifo_size_ratio, double ghost_size_ratio,
- int move_to_main_threshold) {
- common_cache_params_t cc_params = {.cache_size = cache_size};
- cache_t* ptr = S3FIFO_init(
- cc_params,
- ("fifo-size-ratio=" + std::to_string(fifo_size_ratio) + "," +
- "ghost-size-ratio=" + std::to_string(ghost_size_ratio) + "," +
- "move-to-main-threshold=" + std::to_string(move_to_main_threshold))
- .c_str());
- return std::unique_ptr(ptr);
- },
- py::arg("cache_size"), py::arg("fifo_size_ratio") = 0.10,
- py::arg("ghost_size_ratio") = 0.90, py::arg("move_to_main_threshold") = 2,
- R"pbdoc(
- Create a S3FIFO cache instance.
-
- Args:
- cache_size (int): Size of the cache in bytes.
- fifo_size_ratio (float): Ratio of FIFO size to cache size (default: 0.10).
- ghost_size_ratio (float): Ratio of ghost size to cache size (default: 0.90).
- move_to_main_threshold (int): Threshold for moving to main queue (default: 2).
-
- Returns:
- Cache: A new S3FIFO cache instance.
- )pbdoc");
-
- /**
- * @brief Create a Sieve cache instance.
- */
- m.def(
- "Sieve_init",
- [](uint64_t cache_size) {
- common_cache_params_t cc_params = {.cache_size = cache_size};
- cache_t* ptr = Sieve_init(cc_params, nullptr);
- return std::unique_ptr(ptr);
- },
- py::arg("cache_size"),
- R"pbdoc(
- Create a Sieve cache instance.
-
- Args:
- cache_size (int): Size of the cache in bytes.
-
- Returns:
- Cache: A new Sieve cache instance.
- )pbdoc");
-
-#ifdef ENABLE_3L_CACHE
- /**
- * @brief Create a ThreeL cache instance.
- */
- m.def(
- "ThreeLCache_init",
- [](uint64_t cache_size, std::string objective) {
- common_cache_params_t cc_params = {.cache_size = cache_size};
- cache_t* ptr =
- ThreeLCache_init(cc_params, ("objective=" + objective).c_str());
- return std::unique_ptr(ptr);
- },
- py::arg("cache_size"), py::arg("objective") = "byte-miss-ratio",
- R"pbdoc(
- Create a ThreeL cache instance.
-
- Args:
- cache_size (int): Size of the cache in bytes.
- objective (str): Objective function to optimize (default: "byte-miss-ratio").
-
- Returns:
- Cache: A new ThreeL cache instance.
- )pbdoc");
-#else
- // TODO(haocheng): add a dummy function to avoid the error when ThreeLCache is
- // not enabled
- m.def(
- "ThreeLCache_init",
- [](uint64_t cache_size, std::string objective) {
- throw std::runtime_error("ThreeLCache is not enabled");
- },
- py::arg("cache_size"), py::arg("objective") = "byte-miss-ratio");
-#endif
-
- /**
- * @brief Create a TinyLFU cache instance.
- */
- // mark evivtion parsing need change
- m.def(
- "TinyLFU_init",
- [](uint64_t cache_size, std::string main_cache, double window_size) {
- common_cache_params_t cc_params = {.cache_size = cache_size};
- cache_t* ptr = WTinyLFU_init(
- cc_params, ("main-cache=" + main_cache + "," +
- "window-size=" + std::to_string(window_size))
- .c_str());
- return std::unique_ptr(ptr);
- },
- py::arg("cache_size"), py::arg("main_cache") = "SLRU",
- py::arg("window_size") = 0.01,
- R"pbdoc(
- Create a TinyLFU cache instance.
-
- Args:
- cache_size (int): Size of the cache in bytes.
- main_cache (str): Main cache to use (default: "SLRU").
- window_size (float): Window size for TinyLFU (default: 0.01).
-
- Returns:
- Cache: A new TinyLFU cache instance.
- )pbdoc");
-
- /**
- * @brief Create a TwoQ cache instance.
- */
- m.def(
- "TwoQ_init",
- [](uint64_t cache_size, double Ain_size_ratio, double Aout_size_ratio) {
- common_cache_params_t cc_params = {.cache_size = cache_size};
- cache_t* ptr = TwoQ_init(
- cc_params,
- ("Ain-size-ratio=" + std::to_string(Ain_size_ratio) + "," +
- "Aout-size-ratio=" + std::to_string(Aout_size_ratio))
- .c_str());
- return std::unique_ptr(ptr);
- },
- py::arg("cache_size"), py::arg("Ain_size_ratio") = 0.25,
- py::arg("Aout_size_ratio") = 0.5,
- R"pbdoc(
- Create a TwoQ cache instance.
-
- Args:
- cache_size (int): Size of the cache in bytes.
- Ain_size_ratio (float): Ratio of A-in size to cache size (default: 0.25).
- Aout_size_ratio (float): Ratio of A-out size to cache size (default: 0.5).
-
- Returns:
- Cache: A new TwoQ cache instance.
- )pbdoc");
-
- /**
- * @brief Create a LFU cache instance.
- */
- m.def(
- "LFU_init",
- [](uint64_t cache_size) {
- common_cache_params_t cc_params = {.cache_size = cache_size};
- cache_t* ptr = LFU_init(cc_params, nullptr);
- return std::unique_ptr(ptr);
- },
- py::arg("cache_size"),
- R"pbdoc(
- Create a LFU cache instance.
-
- Args:
- cache_size (int): Size of the cache in bytes.
-
- Returns:
- Cache: A new LFU cache instance.
- )pbdoc");
-
- /**
- * @brief Create a LFUDA cache instance.
- */
- m.def(
- "LFUDA_init",
- [](uint64_t cache_size) {
- common_cache_params_t cc_params = {.cache_size = cache_size};
- cache_t* ptr = LFUDA_init(cc_params, nullptr);
- return std::unique_ptr(ptr);
- },
- py::arg("cache_size"),
- R"pbdoc(
- Create a LFUDA cache instance.
-
- Args:
- cache_size (int): Size of the cache in bytes.
-
- Returns:
- Cache: A new LFUDA cache instance.
- )pbdoc");
-
- /**
- * @brief Create a SLRU cache instance.
- */
- m.def(
- "SLRU_init",
- [](uint64_t cache_size) {
- common_cache_params_t cc_params = {.cache_size = cache_size};
- cache_t* ptr = SLRU_init(cc_params, nullptr);
- return std::unique_ptr(ptr);
- },
- py::arg("cache_size"),
- R"pbdoc(
- Create a SLRU cache instance.
-
- Args:
- cache_size (int): Size of the cache in bytes.
-
- Returns:
- Cache: A new SLRU cache instance.
- )pbdoc");
-
- /**
- * @brief Create a Belady cache instance.
- */
- m.def(
- "Belady_init",
- [](uint64_t cache_size) {
- common_cache_params_t cc_params = {.cache_size = cache_size};
- cache_t* ptr = Belady_init(cc_params, nullptr);
- return std::unique_ptr(ptr);
- },
- py::arg("cache_size"),
- R"pbdoc(
- Create a Belady cache instance.
-
- Args:
- cache_size (int): Size of the cache in bytes.
-
- Returns:
- Cache: A new Belady cache instance.
- )pbdoc");
-
- /**
- * @brief Create a BeladySize cache instance.
- */
- m.def(
- "BeladySize_init",
- [](uint64_t cache_size) {
- common_cache_params_t cc_params = {.cache_size = cache_size};
- cache_t* ptr = BeladySize_init(cc_params, nullptr);
- return std::unique_ptr(ptr);
- },
- py::arg("cache_size"),
- R"pbdoc(
- Create a BeladySize cache instance.
-
- Args:
- cache_size (int): Size of the cache in bytes.
-
- Returns:
- Cache: A new BeladySize cache instance.
- )pbdoc");
-
- /**
- * @brief Create a QDLP cache instance.
- */
- m.def(
- "QDLP_init",
- [](uint64_t cache_size) {
- common_cache_params_t cc_params = {.cache_size = cache_size};
- cache_t* ptr = QDLP_init(cc_params, nullptr);
- return std::unique_ptr(ptr);
- },
- py::arg("cache_size"),
- R"pbdoc(
- Create a QDLP cache instance.
-
- Args:
- cache_size (int): Size of the cache in bytes.
-
- Returns:
- Cache: A new QDLP cache instance.
- )pbdoc");
-
- /**
- * @brief Create a LeCaR cache instance.
- */
- m.def(
- "LeCaR_init",
- [](uint64_t cache_size) {
- common_cache_params_t cc_params = {.cache_size = cache_size};
- cache_t* ptr = LeCaR_init(cc_params, nullptr);
- return std::unique_ptr(ptr);
- },
- py::arg("cache_size"),
- R"pbdoc(
- Create a LeCaR cache instance.
-
- Args:
- cache_size (int): Size of the cache in bytes.
-
- Returns:
- Cache: A new LeCaR cache instance.
- )pbdoc");
-
- /**
- * @brief Create a Cacheus cache instance.
- */
- m.def(
- "Cacheus_init",
- [](uint64_t cache_size) {
- common_cache_params_t cc_params = {.cache_size = cache_size};
- cache_t* ptr = Cacheus_init(cc_params, nullptr);
- return std::unique_ptr(ptr);
- },
- py::arg("cache_size"),
- R"pbdoc(
- Create a Cacheus cache instance.
-
- Args:
- cache_size (int): Size of the cache in bytes.
-
- Returns:
- Cache: A new Cacheus cache instance.
- )pbdoc");
-
- /**
- * @brief Create a WTinyLFU cache instance.
- */
- m.def(
- "WTinyLFU_init",
- [](uint64_t cache_size, std::string main_cache, double window_size) {
- common_cache_params_t cc_params = {.cache_size = cache_size};
- cache_t* ptr = WTinyLFU_init(
- cc_params, ("main-cache=" + main_cache + "," +
- "window-size=" + std::to_string(window_size))
- .c_str());
- return std::unique_ptr(ptr);
- },
- py::arg("cache_size"), py::arg("main_cache") = "SLRU",
- py::arg("window_size") = 0.01,
- R"pbdoc(
- Create a WTinyLFU cache instance.
-
- Args:
- cache_size (int): Size of the cache in bytes.
- main_cache (str): Main cache to use (default: "SLRU").
- window_size (float): Window size for TinyLFU (default: 0.01).
-
- Returns:
- Cache: A new WTinyLFU cache instance.
- )pbdoc");
-
- /**
- * @brief Create a Python hook-based cache instance.
- */
- py::class_(m, "PythonHookCache")
- .def(py::init(), py::arg("cache_size"),
- py::arg("cache_name") = "PythonHookCache")
- .def("set_hooks", &PythonHookCache::set_hooks, py::arg("init_hook"),
- py::arg("hit_hook"), py::arg("miss_hook"), py::arg("eviction_hook"),
- py::arg("remove_hook"), py::arg("free_hook") = py::none(),
- R"pbdoc(
- Set the hook functions for the cache.
-
- Args:
- init_hook (callable): Function called during cache initialization.
- Signature: init_hook(cache_size: int) -> Any
- hit_hook (callable): Function called on cache hit.
- Signature: hit_hook(plugin_data: Any, obj_id: int, obj_size: int) -> None
- miss_hook (callable): Function called on cache miss.
- Signature: miss_hook(plugin_data: Any, obj_id: int, obj_size: int) -> None
- eviction_hook (callable): Function called to select eviction candidate.
- Signature: eviction_hook(plugin_data: Any, obj_id: int, obj_size: int) -> int
- remove_hook (callable): Function called when object is removed.
- Signature: remove_hook(plugin_data: Any, obj_id: int) -> None
- free_hook (callable, optional): Function called during cache cleanup.
- Signature: free_hook(plugin_data: Any) -> None
- )pbdoc")
- .def("get", &PythonHookCache::get, py::arg("req"),
- R"pbdoc(
- Process a cache request.
-
- Args:
- req (Request): The cache request to process.
-
- Returns:
- bool: True if cache hit, False if cache miss.
- )pbdoc")
- .def_readwrite("n_req", &PythonHookCache::n_req)
- .def_readwrite("n_obj", &PythonHookCache::n_obj)
- .def_readwrite("occupied_byte", &PythonHookCache::occupied_byte)
- .def_readwrite("cache_size", &PythonHookCache::cache_size);
-
- /**
- * @brief Process a trace with a cache and return miss ratio.
- */
- m.def(
- "process_trace",
- [](cache_t& cache, reader_t& reader, int64_t start_req = 0,
- int64_t max_req = -1) {
- reset_reader(&reader);
- if (start_req > 0) {
- skip_n_req(&reader, start_req);
- }
-
- request_t* req = new_request();
- int64_t n_req = 0, n_hit = 0;
- int64_t bytes_req = 0, bytes_hit = 0;
- bool hit;
-
- read_one_req(&reader, req);
- while (req->valid) {
- n_req += 1;
- bytes_req += req->obj_size;
- hit = cache.get(&cache, req);
- if (hit) {
- n_hit += 1;
- bytes_hit += req->obj_size;
- }
- read_one_req(&reader, req);
- if (max_req > 0 && n_req >= max_req) {
- break; // Stop if we reached the max request limit
- }
- }
-
- free_request(req);
- // return the miss ratio
- double obj_miss_ratio = n_req > 0 ? 1.0 - (double)n_hit / n_req : 0.0;
- double byte_miss_ratio =
- bytes_req > 0 ? 1.0 - (double)bytes_hit / bytes_req : 0.0;
- return std::make_tuple(obj_miss_ratio, byte_miss_ratio);
- },
- py::arg("cache"), py::arg("reader"), py::arg("start_req") = 0,
- py::arg("max_req") = -1,
- R"pbdoc(
- Process a trace with a cache and return miss ratio.
-
- This function processes trace data entirely on the C++ side to avoid
- data movement overhead between Python and C++.
-
- Args:
- cache (Cache): The cache instance to use for processing.
- reader (Reader): The trace reader instance.
- start_req (int): The starting request number to process from (default: 0, from the beginning).
- max_req (int): Maximum number of requests to process (-1 for no limit).
-
- Returns:
- float: Object miss ratio (0.0 to 1.0).
- float: Byte miss ratio (0.0 to 1.0).
-
- Example:
- >>> cache = libcachesim.LRU(1024*1024)
- >>> reader = libcachesim.open_trace("trace.csv", libcachesim.TraceType.CSV_TRACE)
- >>> obj_miss_ratio, byte_miss_ratio = libcachesim.process_trace(cache, reader)
- >>> print(f"Obj miss ratio: {obj_miss_ratio:.4f}, Byte miss ratio: {byte_miss_ratio:.4f}")
- )pbdoc");
-
- /**
- * @brief Process a trace with a Python hook cache and return miss ratio.
- */
- m.def(
- "process_trace_python_hook",
- [](PythonHookCache& cache, reader_t& reader, int64_t start_req = 0,
- int64_t max_req = -1) {
- reset_reader(&reader);
- if (start_req > 0) {
- skip_n_req(&reader, start_req);
- }
-
- request_t* req = new_request();
- int64_t n_req = 0, n_hit = 0;
- int64_t bytes_req = 0, bytes_hit = 0;
- bool hit;
-
- read_one_req(&reader, req);
- while (req->valid) {
- n_req += 1;
- bytes_req += req->obj_size;
- hit = cache.get(*req);
- if (hit) {
- n_hit += 1;
- bytes_hit += req->obj_size;
- }
- read_one_req(&reader, req);
- if (max_req > 0 && n_req >= max_req) {
- break; // Stop if we reached the max request limit
- }
- }
-
- free_request(req);
- // return the miss ratio
- double obj_miss_ratio = n_req > 0 ? 1.0 - (double)n_hit / n_req : 0.0;
- double byte_miss_ratio =
- bytes_req > 0 ? 1.0 - (double)bytes_hit / bytes_req : 0.0;
- return std::make_tuple(obj_miss_ratio, byte_miss_ratio);
- },
- py::arg("cache"), py::arg("reader"), py::arg("start_req") = 0,
- py::arg("max_req") = -1,
- R"pbdoc(
- Process a trace with a Python hook cache and return miss ratio.
-
- This function processes trace data entirely on the C++ side to avoid
- data movement overhead between Python and C++. Specifically designed
- for PythonHookCache instances.
-
- Args:
- cache (PythonHookCache): The Python hook cache instance to use.
- reader (Reader): The trace reader instance.
- start_req (int): The starting request number to process from (0 for beginning).
- max_req (int): Maximum number of requests to process (-1 for no limit).
-
- Returns:
- float: Object miss ratio (0.0 to 1.0).
- float: Byte miss ratio (0.0 to 1.0).
-
- Example:
- >>> cache = libcachesim.PythonHookCachePolicy(1024*1024)
- >>> cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook)
- >>> reader = libcachesim.open_trace("trace.csv", libcachesim.TraceType.CSV_TRACE)
- >>> obj_miss_ratio, byte_miss_ratio = libcachesim.process_trace_python_hook(cache.cache, reader)
- >>> print(f"Obj miss ratio: {obj_miss_ratio:.4f}, Byte miss ratio: {byte_miss_ratio:.4f}")
- )pbdoc");
-
-#ifdef VERSION_INFO
- m.attr("__version__") = MACRO_STRINGIFY(VERSION_INFO);
-#else
- m.attr("__version__") = "dev";
-#endif
-}
diff --git a/libCacheSim-python/tests/conftest.py b/libCacheSim-python/tests/conftest.py
deleted file mode 100644
index a3e2705e..00000000
--- a/libCacheSim-python/tests/conftest.py
+++ /dev/null
@@ -1,32 +0,0 @@
-from __future__ import annotations
-
-import os
-import gc
-
-import pytest
-
-from libcachesim import Reader, TraceType, open_trace
-
-
-@pytest.fixture
-def mock_reader():
- data_file = os.path.join( # noqa: PTH118
- os.path.dirname(os.path.dirname(os.path.dirname(__file__))), # noqa: PTH120
- "data",
- "cloudPhysicsIO.oracleGeneral.bin",
- )
- reader: Reader = open_trace(
- data_file,
- type=TraceType.ORACLE_GENERAL_TRACE,
- )
- try:
- yield reader
- finally:
- # More careful cleanup
- try:
- if hasattr(reader, "close"):
- reader.close()
- except Exception: # Be specific about exception type
- pass
- # Don't explicitly del reader here, let Python handle it
- gc.collect()
diff --git a/libCacheSim-python/tests/reference.csv b/libCacheSim-python/tests/reference.csv
deleted file mode 100644
index cb569d0c..00000000
--- a/libCacheSim-python/tests/reference.csv
+++ /dev/null
@@ -1,20 +0,0 @@
-FIFO,0.01,0.8368
-ARC,0.01,0.8222
-Clock,0.01,0.8328
-LRB,0.01,0.8339
-LRU,0.01,0.8339
-S3FIFO,0.01,0.8235
-Sieve,0.01,0.8231
-3LCache,0.01,0.8339
-TinyLFU,0.01,0.8262
-TwoQ,0.01,0.8276
-FIFO,0.1,0.8075
-ARC,0.1,0.7688
-Clock,0.1,0.8086
-LRB,0.1,0.8097
-LRU,0.1,0.8097
-S3FIFO,0.1,0.7542
-Sieve,0.1,0.7903
-3LCache,0.1,0.8097
-TinyLFU,0.1,0.7666
-TwoQ,0.1,0.7695
diff --git a/libCacheSim-python/tests/test_eviction.py b/libCacheSim-python/tests/test_eviction.py
deleted file mode 100644
index a51aae86..00000000
--- a/libCacheSim-python/tests/test_eviction.py
+++ /dev/null
@@ -1,62 +0,0 @@
-import pytest
-
-from libcachesim import (
- ARC,
- FIFO,
- LRU,
- S3FIFO,
- Clock,
- Sieve,
- TinyLFU,
- TwoQ,
-)
-from tests.utils import get_reference_data
-
-
-@pytest.mark.parametrize(
- "eviction_algo",
- [
- FIFO,
- ARC,
- Clock,
- LRU,
- S3FIFO,
- Sieve,
- TinyLFU,
- TwoQ,
- ],
-)
-@pytest.mark.parametrize("cache_size_ratio", [0.01])
-def test_eviction_algo(eviction_algo, cache_size_ratio, mock_reader):
- cache = None
- try:
- # create a cache with the eviction policy
- cache = eviction_algo(cache_size=int(mock_reader.get_wss() * cache_size_ratio))
- req_count = 0
- miss_count = 0
-
- # Limit the number of requests to avoid long test times
- # max_requests = 1000
- for i, req in enumerate(mock_reader):
- # if i >= max_requests:
- # break
- hit = cache.get(req)
- if not hit:
- miss_count += 1
- req_count += 1
-
- if req_count == 0:
- pytest.skip("No requests processed")
-
- miss_ratio = miss_count / req_count
- reference_miss_ratio = get_reference_data(eviction_algo.__name__, cache_size_ratio)
- if reference_miss_ratio is None:
- pytest.skip(f"No reference data for {eviction_algo.__name__} with cache size ratio {cache_size_ratio}")
- assert abs(miss_ratio - reference_miss_ratio) < 0.01, (
- f"Miss ratio {miss_ratio} is not close to reference {reference_miss_ratio}"
- )
-
- except Exception as e:
- pytest.fail(f"Error in test_eviction_algo: {e}")
- finally:
- pass
diff --git a/libCacheSim-python/tests/test_process_trace.py b/libCacheSim-python/tests/test_process_trace.py
deleted file mode 100644
index 1dbfb486..00000000
--- a/libCacheSim-python/tests/test_process_trace.py
+++ /dev/null
@@ -1,220 +0,0 @@
-#!/usr/bin/env python3
-"""
-Test file for process_trace functionality.
-"""
-
-import sys
-import os
-import pytest
-
-# Add the parent directory to the Python path for development testing
-sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
-
-try:
- import libcachesim as lcs
-except ImportError as e:
- pytest.skip(f"libcachesim not available: {e}", allow_module_level=True)
-
-from collections import OrderedDict
-
-
-def create_trace_reader():
- """Helper function to create a trace reader with binary trace file."""
- data_file = os.path.join(
- os.path.dirname(os.path.dirname(os.path.dirname(__file__))), "data", "cloudPhysicsIO.oracleGeneral.bin"
- )
- if not os.path.exists(data_file):
- return None
- return lcs.open_trace(data_file, lcs.TraceType.ORACLE_GENERAL_TRACE)
-
-
-def test_process_trace_native():
- """Test process_trace with native LRU cache."""
-
- # Open trace
- reader = create_trace_reader()
- if reader is None:
- pytest.skip("Test trace file not found, skipping test")
-
- # Create LRU cache
- cache = lcs.LRU(1024 * 1024) # 1MB cache
-
- # Process trace and get miss ratio
- obj_miss_ratio, byte_miss_ratio = cache.process_trace(reader, max_req=1000)
-
- # Verify miss ratio is reasonable (should be between 0 and 1)
- assert 0.0 <= obj_miss_ratio <= 1.0, f"Invalid miss ratio: {obj_miss_ratio}"
-
-
-def test_process_trace_python_hook():
- """Test process_trace with Python hook cache."""
-
- # Open trace
- reader = create_trace_reader()
- if reader is None:
- pytest.skip("Test trace file not found, skipping test")
-
- # Create Python hook LRU cache
- cache = lcs.PythonHookCachePolicy(1024 * 1024, "TestLRU")
-
- # Define LRU hooks
- def init_hook(cache_size):
- return OrderedDict()
-
- def hit_hook(lru_dict, obj_id, obj_size):
- lru_dict.move_to_end(obj_id)
-
- def miss_hook(lru_dict, obj_id, obj_size):
- lru_dict[obj_id] = True
-
- def eviction_hook(lru_dict, obj_id, obj_size):
- return next(iter(lru_dict))
-
- def remove_hook(lru_dict, obj_id):
- lru_dict.pop(obj_id, None)
-
- # Set hooks
- cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook)
-
- # Test both methods
- # Method 1: Direct function call
- miss_ratio1 = lcs.process_trace_python_hook(cache.cache, reader, max_req=1000)[0]
-
- # Need to reopen the trace for second test
- reader2 = create_trace_reader()
- if reader2 is None:
- pytest.skip("Warning: Cannot reopen trace file, skipping second test")
- # Continue with just the first test result
- assert miss_ratio1 is not None and 0.0 <= miss_ratio1 <= 1.0, f"Invalid miss ratio: {miss_ratio1}"
- return
-
- # Reset cache for fair comparison
- cache2 = lcs.PythonHookCachePolicy(1024 * 1024, "TestLRU2")
- cache2.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook)
-
- # Method 2: Convenience method
- miss_ratio2 = cache2.process_trace(reader2, max_req=1000)[0]
-
- # Verify both methods give the same result and miss ratios are reasonable
- assert 0.0 <= miss_ratio1 <= 1.0, f"Invalid miss ratio 1: {miss_ratio1}"
- assert 0.0 <= miss_ratio2 <= 1.0, f"Invalid miss ratio 2: {miss_ratio2}"
- assert abs(miss_ratio1 - miss_ratio2) < 0.001, (
- f"Different results from the two methods: {miss_ratio1} vs {miss_ratio2}"
- )
-
-
-def test_compare_native_vs_python_hook():
- """Compare native LRU vs Python hook LRU using process_trace."""
-
- cache_size = 512 * 1024 # 512KB cache
- max_requests = 500
-
- # Test native LRU
- native_cache = lcs.LRU(cache_size)
- reader1 = create_trace_reader()
- if reader1 is None:
- pytest.skip("Test trace file not found, skipping test")
-
- native_obj_miss_ratio, native_byte_miss_ratio = native_cache.process_trace(reader1, max_req=max_requests)
-
- # Test Python hook LRU
- hook_cache = lcs.PythonHookCachePolicy(cache_size, "HookLRU")
-
- def init_hook(cache_size):
- return OrderedDict()
-
- def hit_hook(lru_dict, obj_id, obj_size):
- lru_dict.move_to_end(obj_id)
-
- def miss_hook(lru_dict, obj_id, obj_size):
- lru_dict[obj_id] = True
-
- def eviction_hook(lru_dict, obj_id, obj_size):
- return next(iter(lru_dict))
-
- def remove_hook(lru_dict, obj_id):
- lru_dict.pop(obj_id, None)
-
- hook_cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook)
-
- reader2 = create_trace_reader()
- if reader2 is None:
- pytest.skip("Warning: Cannot reopen trace file, skipping comparison")
- return # Skip test
-
- hook_obj_miss_ratio, hook_byte_miss_ratio = hook_cache.process_trace(reader2, max_req=max_requests)
-
- # They should be very similar (allowing for some small differences due to implementation details)
- assert abs(native_obj_miss_ratio - hook_obj_miss_ratio) < 0.05, (
- f"Too much difference: {abs(native_obj_miss_ratio - hook_obj_miss_ratio):.4f}"
- )
-
-
-def test_error_handling():
- """Test error handling for process_trace."""
-
- cache = lcs.PythonHookCachePolicy(1024)
-
- reader = create_trace_reader()
- if reader is None:
- pytest.skip("Test trace file not found, skipping error test")
-
- # Try to process trace without setting hooks - should raise RuntimeError
- with pytest.raises(RuntimeError, match="Hooks must be set before processing trace"):
- cache.process_trace(reader)
-
-
-def test_lru_implementation_accuracy():
- """Test that Python hook LRU implementation matches native LRU closely."""
-
- cache_size = 1024 * 1024 # 1MB
- max_requests = 100
-
- # Create readers
- reader1 = create_trace_reader()
- reader2 = create_trace_reader()
-
- if not reader1 or not reader2:
- pytest.skip("Cannot open trace files for LRU accuracy test")
-
- # Test native LRU
- native_cache = lcs.LRU(cache_size)
- native_obj_miss_ratio, native_byte_miss_ratio = native_cache.process_trace(reader1, max_req=max_requests)
-
- # Test Python hook LRU
- hook_cache = lcs.PythonHookCachePolicy(cache_size, "AccuracyTestLRU")
- init_hook, hit_hook, miss_hook, eviction_hook, remove_hook = create_optimized_lru_hooks()
- hook_cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook)
-
- hook_obj_miss_ratio, hook_byte_miss_ratio = hook_cache.process_trace(reader2, max_req=max_requests)
-
- # Calculate difference
- difference = abs(native_obj_miss_ratio - hook_obj_miss_ratio)
- percentage_diff = (difference / native_obj_miss_ratio) * 100 if native_obj_miss_ratio > 0 else 0
-
- # Assert that the difference is small (< 5%)
- assert percentage_diff < 5.0, f"LRU implementation difference too large: {percentage_diff:.4f}%"
-
-
-def create_optimized_lru_hooks():
- """Create optimized LRU hooks that closely match native LRU behavior."""
-
- def init_hook(cache_size):
- return OrderedDict()
-
- def hit_hook(lru_dict, obj_id, obj_size):
- if obj_id in lru_dict:
- lru_dict.move_to_end(obj_id)
-
- def miss_hook(lru_dict, obj_id, obj_size):
- lru_dict[obj_id] = obj_size
-
- def eviction_hook(lru_dict, obj_id, obj_size):
- if lru_dict:
- return next(iter(lru_dict))
- return obj_id
-
- def remove_hook(lru_dict, obj_id):
- lru_dict.pop(obj_id, None)
-
- return init_hook, hit_hook, miss_hook, eviction_hook, remove_hook
diff --git a/libCacheSim-python/tests/test_python_hook_cache.py b/libCacheSim-python/tests/test_python_hook_cache.py
deleted file mode 100644
index 7af8873d..00000000
--- a/libCacheSim-python/tests/test_python_hook_cache.py
+++ /dev/null
@@ -1,205 +0,0 @@
-#!/usr/bin/env python3
-"""
-Test file for PythonHookCachePolicy functionality.
-"""
-
-import pytest
-import libcachesim as lcs
-from dataclasses import dataclass
-from collections import OrderedDict
-
-
-@dataclass
-class CacheTestCase:
- """Represents a single test case for cache operations."""
-
- request: tuple[int, int] # (obj_id, obj_size)
- expected_hit: bool
- expected_obj_count: int
- description: str = ""
-
-
-def create_lru_hooks():
- """Create standard LRU hooks for testing.
-
- Returns:
- tuple: A tuple of (init_hook, hit_hook, miss_hook, eviction_hook, remove_hook)
- """
-
- def init_hook(cache_size):
- return OrderedDict()
-
- def hit_hook(lru_dict, obj_id, obj_size):
- lru_dict.move_to_end(obj_id)
-
- def miss_hook(lru_dict, obj_id, obj_size):
- lru_dict[obj_id] = True
-
- def eviction_hook(lru_dict, obj_id, obj_size):
- return next(iter(lru_dict))
-
- def remove_hook(lru_dict, obj_id):
- lru_dict.pop(obj_id, None)
-
- return init_hook, hit_hook, miss_hook, eviction_hook, remove_hook
-
-
-def create_test_request(obj_id: int, obj_size: int) -> lcs.Request:
- """Create a test request with given parameters.
-
- Args:
- obj_id: Object ID
- obj_size: Object size in bytes
-
- Returns:
- Request: A configured request object
- """
- req = lcs.Request()
- req.obj_id = obj_id
- req.obj_size = obj_size
- return req
-
-
-def test_python_hook_cache():
- """Test the Python hook cache implementation."""
- cache_size = 300 # 3 objects of size 100 each
- cache = lcs.PythonHookCachePolicy(cache_size, "TestLRU")
-
- # Set up hooks
- init_hook, hit_hook, miss_hook, eviction_hook, remove_hook = create_lru_hooks()
- cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook)
-
- # Define test sequence
- test_cases = [
- CacheTestCase((1, 100), False, 1, "Miss - insert 1"),
- CacheTestCase((2, 100), False, 2, "Miss - insert 2"),
- CacheTestCase((3, 100), False, 3, "Miss - insert 3 (cache full)"),
- CacheTestCase((1, 100), True, 3, "Hit - move 1 to end"),
- CacheTestCase((4, 100), False, 3, "Miss - should evict 2 (LRU), insert 4"),
- CacheTestCase((2, 100), False, 3, "Miss - should evict 3, insert 2"),
- CacheTestCase((1, 100), True, 3, "Hit - move 1 to end"),
- ]
-
- # Execute test sequence
- for i, test_case in enumerate(test_cases):
- obj_id, obj_size = test_case.request
- req = create_test_request(obj_id, obj_size)
-
- result = cache.get(req)
- assert result == test_case.expected_hit, f"Request {i + 1} (obj_id={obj_id}):"
- f"Expected {'hit' if test_case.expected_hit else 'miss'} - {test_case.description}"
- assert cache.n_obj == test_case.expected_obj_count, (
- f"Request {i + 1}: Expected {test_case.expected_obj_count} objects - {test_case.description}"
- )
- assert cache.occupied_byte <= cache_size, f"Request {i + 1}: Cache size exceeded"
-
-
-def test_error_handling():
- """Test error handling for uninitialized cache."""
- cache = lcs.PythonHookCachePolicy(1000)
-
- # Try to use cache without setting hooks
- req = create_test_request(1, 100)
-
- with pytest.raises(RuntimeError):
- cache.get(req)
-
-
-def test_lru_comparison():
- """Test Python hook LRU against native LRU to verify identical behavior."""
- cache_size = 300 # 3 objects of size 100 each
-
- # Create native LRU cache
- native_lru = lcs.LRU(cache_size)
-
- # Create Python hook LRU cache
- hook_lru = lcs.PythonHookCachePolicy(cache_size, "TestLRU")
- init_hook, hit_hook, miss_hook, eviction_hook, remove_hook = create_lru_hooks()
- hook_lru.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook)
-
- # Define test sequence with various access patterns
- test_cases = [
- CacheTestCase((1, 100), False, 1, "Miss - insert 1"),
- CacheTestCase((2, 100), False, 2, "Miss - insert 2"),
- CacheTestCase((3, 100), False, 3, "Miss - insert 3 (cache full)"),
- CacheTestCase((1, 100), True, 3, "Hit - move 1 to end"),
- CacheTestCase((4, 100), False, 3, "Miss - should evict 2 (LRU), insert 4"),
- CacheTestCase((2, 100), False, 3, "Miss - should evict 3, insert 2"),
- CacheTestCase((1, 100), True, 3, "Hit - move 1 to end"),
- CacheTestCase((3, 100), False, 3, "Miss - should evict 4, insert 3"),
- CacheTestCase((5, 100), False, 3, "Miss - should evict 2, insert 5"),
- CacheTestCase((1, 100), True, 3, "Hit - move 1 to end"),
- CacheTestCase((3, 100), True, 3, "Hit - move 3 to end"),
- CacheTestCase((6, 100), False, 3, "Miss - should evict 5, insert 6"),
- ]
-
- # Test both caches with identical requests
- for i, test_case in enumerate(test_cases):
- obj_id, obj_size = test_case.request
-
- # Test native LRU
- req_native = create_test_request(obj_id, obj_size)
- native_result = native_lru.get(req_native)
-
- # Test hook LRU
- req_hook = create_test_request(obj_id, obj_size)
- hook_result = hook_lru.get(req_hook)
-
- # Compare results
- assert native_result == hook_result, (
- f"Request {i + 1} (obj_id={obj_id}): Native and hook LRU differ - {test_case.description}"
- )
-
- # Compare cache statistics
- assert native_lru.n_obj == hook_lru.n_obj, f"Request {i + 1}: Object count differs - {test_case.description}"
- assert native_lru.occupied_byte == hook_lru.occupied_byte, (
- f"Request {i + 1}: Occupied bytes differ - {test_case.description}"
- )
-
-
-def test_lru_comparison_variable_sizes():
- """Test Python hook LRU vs Native LRU with variable object sizes."""
- cache_size = 1000 # Total cache capacity
-
- # Create caches
- native_lru = lcs.LRU(cache_size)
- hook_lru = lcs.PythonHookCachePolicy(cache_size, "VariableSizeLRU")
-
- init_hook, hit_hook, miss_hook, eviction_hook, remove_hook = create_lru_hooks()
- hook_lru.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook)
-
- # Define test sequence with variable object sizes
- test_cases = [
- CacheTestCase((1, 200), False, 1, "Miss - insert 1 (200 bytes)"),
- CacheTestCase((2, 300), False, 2, "Miss - insert 2 (300 bytes)"),
- CacheTestCase((3, 400), False, 3, "Miss - insert 3 (400 bytes) - total 900 bytes"),
- CacheTestCase((4, 200), False, 3, "Miss - should evict 1, insert 4 (total would be 1100, over limit)"),
- CacheTestCase((1, 200), False, 3, "Miss - should evict 2, insert 1"),
- CacheTestCase((5, 100), False, 3, "Miss - should evict 3, insert 5"),
- CacheTestCase((4, 200), True, 3, "Hit - access 4"),
- CacheTestCase((6, 500), False, 2, "Miss - should evict multiple objects to fit"),
- CacheTestCase((4, 200), False, 3, "Miss - 4 was evicted"),
- ]
-
- # Test both caches with identical requests
- for i, test_case in enumerate(test_cases):
- obj_id, obj_size = test_case.request
-
- # Test native LRU
- req_native = create_test_request(obj_id, obj_size)
- native_result = native_lru.get(req_native)
-
- # Test hook LRU
- req_hook = create_test_request(obj_id, obj_size)
- hook_result = hook_lru.get(req_hook)
-
- # Compare results
- assert native_result == hook_result, (
- f"Request {i + 1} (obj_id={obj_id}, size={obj_size}): Results differ - {test_case.description}"
- )
-
- # Compare cache statistics
- assert native_lru.n_obj == hook_lru.n_obj, f"Request {i + 1}: Object count differs - {test_case.description}"
- assert native_lru.occupied_byte == hook_lru.occupied_byte, (
- f"Request {i + 1}: Occupied bytes differ - {test_case.description}"
- )
diff --git a/libCacheSim-python/tests/test_trace_generator.py b/libCacheSim-python/tests/test_trace_generator.py
deleted file mode 100644
index 37040026..00000000
--- a/libCacheSim-python/tests/test_trace_generator.py
+++ /dev/null
@@ -1,135 +0,0 @@
-#!/usr/bin/env python3
-"""
-Tests for trace generator module.
-"""
-
-import libcachesim as lcs
-
-
-class TestTraceGeneration:
- """Test trace generation functions."""
-
- # Constants for test readability
- NUM_SAMPLE_REQUESTS = 10 # Number of requests to check in detail
-
- def test_create_zipf_requests_basic(self):
- """Test basic Zipf request creation."""
- generator = lcs.create_zipf_requests(num_objects=100, num_requests=1000, alpha=1.0, obj_size=4000, seed=42)
-
- # Test iteration
- requests = list(generator)
- assert len(requests) == 1000
-
- for req in requests[: self.NUM_SAMPLE_REQUESTS]: # Check first NUM_SAMPLE_REQUESTS
- assert isinstance(req, lcs.Request)
- assert 0 <= req.obj_id < 100
- assert req.obj_size == 4000
- assert req.clock_time >= 0
-
- def test_create_uniform_requests_basic(self):
- """Test basic uniform request creation."""
- generator = lcs.create_uniform_requests(num_objects=100, num_requests=1000, obj_size=4000, seed=42)
-
- # Test iteration
- requests = list(generator)
- assert len(requests) == 1000
-
- for req in requests[: self.NUM_SAMPLE_REQUESTS]: # Check first NUM_SAMPLE_REQUESTS
- assert isinstance(req, lcs.Request)
- assert 0 <= req.obj_id < 100
- assert req.obj_size == 4000
- assert req.clock_time >= 0
-
- def test_zipf_reproducibility(self):
- """Test reproducibility with seed."""
- gen1 = lcs.create_zipf_requests(10, 100, alpha=1.0, seed=42)
- gen2 = lcs.create_zipf_requests(10, 100, alpha=1.0, seed=42)
-
- requests1 = list(gen1)
- requests2 = list(gen2)
-
- assert len(requests1) == len(requests2)
- for req1, req2 in zip(requests1, requests2):
- assert req1.obj_id == req2.obj_id
-
- def test_uniform_reproducibility(self):
- """Test reproducibility with seed."""
- gen1 = lcs.create_uniform_requests(10, 100, seed=42)
- gen2 = lcs.create_uniform_requests(10, 100, seed=42)
-
- requests1 = list(gen1)
- requests2 = list(gen2)
-
- assert len(requests1) == len(requests2)
- for req1, req2 in zip(requests1, requests2):
- assert req1.obj_id == req2.obj_id
-
- def test_different_seeds(self):
- """Test that different seeds produce different results."""
- gen1 = lcs.create_zipf_requests(10, 100, alpha=1.0, seed=42)
- gen2 = lcs.create_zipf_requests(10, 100, alpha=1.0, seed=43)
-
- requests1 = [req.obj_id for req in gen1]
- requests2 = [req.obj_id for req in gen2]
-
- assert requests1 != requests2
-
- def test_zipf_with_cache(self):
- """Test Zipf generator with cache simulation."""
- cache = lcs.LRU(cache_size=50 * 1024) # 50KB cache
- generator = lcs.create_zipf_requests(
- num_objects=100,
- num_requests=1000,
- alpha=1.0,
- obj_size=1000, # 1KB objects
- seed=42,
- )
-
- hit_count = 0
- for req in generator:
- if cache.get(req):
- hit_count += 1
-
- # Should have some hits and some misses
- assert 0 <= hit_count <= 1000
- assert hit_count > 0 # Should have some hits
-
- def test_uniform_with_cache(self):
- """Test uniform generator with cache simulation."""
- cache = lcs.LRU(cache_size=50 * 1024) # 50KB cache
- generator = lcs.create_uniform_requests(
- num_objects=100,
- num_requests=1000,
- obj_size=1000, # 1KB objects
- seed=42,
- )
-
- hit_count = 0
- for req in generator:
- if cache.get(req):
- hit_count += 1
-
- # Should have some hits and some misses
- assert 0 <= hit_count <= 1000
- assert hit_count > 0 # Should have some hits
-
- def test_custom_parameters(self):
- """Test generators with custom parameters."""
- generator = lcs.create_zipf_requests(
- num_objects=50,
- num_requests=200,
- alpha=1.5,
- obj_size=2048,
- time_span=3600, # 1 hour
- start_obj_id=1000,
- seed=123,
- )
-
- requests = list(generator)
- assert len(requests) == 200
-
- # Check custom parameters
- for req in requests[: self.NUM_SAMPLE_REQUESTS // 2]: # Check fewer for shorter test
- assert 1000 <= req.obj_id < 1050 # start_obj_id + num_objects
- assert req.obj_size == 2048
- assert req.clock_time <= 3600
diff --git a/libCacheSim-python/tests/test_unified_interface.py b/libCacheSim-python/tests/test_unified_interface.py
deleted file mode 100644
index a2c7c8c2..00000000
--- a/libCacheSim-python/tests/test_unified_interface.py
+++ /dev/null
@@ -1,181 +0,0 @@
-#!/usr/bin/env python3
-"""
-Test the unified interface for all cache policies.
-"""
-
-import sys
-import os
-import pytest
-
-# Add the parent directory to the Python path for development testing
-sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
-
-try:
- import libcachesim as lcs
-except ImportError as e:
- pytest.skip(f"libcachesim not available: {e}", allow_module_level=True)
-
-from collections import OrderedDict
-
-
-def create_trace_reader():
- """Helper function to create a trace reader.
-
- Returns:
- Reader or None: A trace reader instance, or None if trace file not found.
- """
- data_file = os.path.join(
- os.path.dirname(os.path.dirname(os.path.dirname(__file__))), "data", "cloudPhysicsIO.oracleGeneral.bin"
- )
- if not os.path.exists(data_file):
- return None
- return lcs.open_trace(data_file, lcs.TraceType.ORACLE_GENERAL_TRACE)
-
-
-def create_test_lru_hooks():
- """Create LRU hooks for testing.
-
- Returns:
- tuple: A tuple of (init_hook, hit_hook, miss_hook, eviction_hook, remove_hook)
- """
-
- def init_hook(cache_size):
- """Initialize LRU data structure."""
- return OrderedDict()
-
- def hit_hook(lru_dict, obj_id, obj_size):
- """Handle cache hit by moving to end (most recently used)."""
- if obj_id in lru_dict:
- lru_dict.move_to_end(obj_id)
-
- def miss_hook(lru_dict, obj_id, obj_size):
- """Handle cache miss by adding new object."""
- lru_dict[obj_id] = obj_size
-
- def eviction_hook(lru_dict, obj_id, obj_size):
- """Return the least recently used object ID for eviction."""
- if lru_dict:
- return next(iter(lru_dict))
- return obj_id
-
- def remove_hook(lru_dict, obj_id):
- """Remove object from LRU structure."""
- lru_dict.pop(obj_id, None)
-
- return init_hook, hit_hook, miss_hook, eviction_hook, remove_hook
-
-
-def test_unified_process_trace_interface():
- """Test that all cache policies have the same process_trace interface."""
-
- cache_size = 1024 * 1024 # 1MB
- max_requests = 100
-
- # Create trace reader
- reader = create_trace_reader()
- if not reader:
- pytest.skip("Skipping test: Trace file not available")
-
- # Test different cache policies
- caches = {
- "LRU": lcs.LRU(cache_size),
- "FIFO": lcs.FIFO(cache_size),
- "ARC": lcs.ARC(cache_size),
- }
-
- # Add Python hook cache
- python_cache = lcs.PythonHookCachePolicy(cache_size, "TestLRU")
- init_hook, hit_hook, miss_hook, eviction_hook, remove_hook = create_test_lru_hooks()
- python_cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook)
- caches["Python Hook LRU"] = python_cache
-
- results = {}
- for name, cache in caches.items():
- # Create fresh reader for each test
- test_reader = create_trace_reader()
- if not test_reader:
- pytest.skip(f"Cannot create reader for {name} test")
-
- # Test process_trace method exists
- assert hasattr(cache, "process_trace"), f"{name} missing process_trace method"
-
- # Test process_trace functionality
- obj_miss_ratio, byte_miss_ratio = cache.process_trace(test_reader, max_req=max_requests)
- results[name] = obj_miss_ratio
-
- # Verify miss_ratio is valid
- assert 0.0 <= obj_miss_ratio <= 1.0, f"{name} returned invalid miss_ratio: {obj_miss_ratio}"
-
- # Verify we got results for all caches
- assert len(results) == len(caches), "Not all caches were tested"
-
-
-def test_unified_properties_interface():
- """Test that all cache policies have the same properties interface."""
-
- cache_size = 1024 * 1024
-
- # Create different cache types
- caches = {
- "LRU": lcs.LRU(cache_size),
- "FIFO": lcs.FIFO(cache_size),
- "Python Hook": lcs.PythonHookCachePolicy(cache_size, "TestCache"),
- }
-
- required_properties = ["cache_size", "n_req", "n_obj", "occupied_byte"]
-
- for name, cache in caches.items():
- # Test all required properties exist
- for prop in required_properties:
- assert hasattr(cache, prop), f"{name} missing {prop} property"
-
- # Test cache_size is correct
- assert cache.cache_size == cache_size, f"{name} cache_size mismatch"
-
-
-def test_get_interface_consistency():
- """Test that get() method works consistently across all cache policies."""
-
- cache_size = 1024 * 1024
-
- # Create caches
- caches = {
- "LRU": lcs.LRU(cache_size),
- "FIFO": lcs.FIFO(cache_size),
- }
-
- # Add Python hook cache
- python_cache = lcs.PythonHookCachePolicy(cache_size, "ConsistencyTest")
- init_hook, hit_hook, miss_hook, eviction_hook, remove_hook = create_test_lru_hooks()
- python_cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook)
- caches["Python Hook"] = python_cache
-
- # Create a test request using the proper request class
- test_req = lcs.Request()
- test_req.obj_id = 1
- test_req.obj_size = 1024
-
- for name, cache in caches.items():
- # Reset cache state for consistent testing
- initial_n_req = cache.n_req
- initial_n_obj = cache.n_obj
- initial_occupied = cache.occupied_byte
-
- # Test get method exists
- assert hasattr(cache, "get"), f"{name} missing get method"
-
- # Test first access (should be miss for new object)
- result = cache.get(test_req)
-
- # Test properties updated correctly
- assert cache.n_req > initial_n_req, f"{name} n_req not updated"
- if not result: # If it was a miss, object should be added
- assert cache.n_obj > initial_n_obj, f"{name} n_obj not updated after miss"
- assert cache.occupied_byte > initial_occupied, f"{name} occupied_byte not updated after miss"
-
- # Test second access to same object (should be hit)
- second_result = cache.get(test_req)
-
- # Second access should be a hit (unless cache is too small)
- if cache.cache_size >= test_req.obj_size:
- assert second_result, f"{name} second access should be a hit"
diff --git a/libCacheSim-python/tests/utils.py b/libCacheSim-python/tests/utils.py
deleted file mode 100644
index 0977cc81..00000000
--- a/libCacheSim-python/tests/utils.py
+++ /dev/null
@@ -1,16 +0,0 @@
-import os
-
-
-def get_reference_data(eviction_algo, cache_size_ratio):
- data_file = os.path.join( # noqa: PTH118
- (os.path.dirname(os.path.dirname(__file__))), # noqa: PTH120
- "tests",
- "reference.csv",
- )
- with open(data_file) as f: # noqa: PTH123
- lines = f.readlines()
- key = "3LCache" if eviction_algo == "ThreeLCache" else eviction_algo
- for line in lines:
- if line.startswith(f"{key},{cache_size_ratio}"):
- return float(line.split(",")[-1])
- return None
diff --git a/scripts/install_python_dev.sh b/scripts/install_python_dev.sh
deleted file mode 100644
index d878d89b..00000000
--- a/scripts/install_python_dev.sh
+++ /dev/null
@@ -1,106 +0,0 @@
-#!/bin/bash
-set -euo pipefail
-
-function usage() {
- echo "Usage: $0 [options]"
- echo "Options:"
- echo " -h, --help Show this help message"
- echo " -b, --build-wheels Build the Python wheels"
- exit 1
-}
-# Parse command line arguments
-BUILD_WHEELS=0
-
-while [[ $# -gt 0 ]]; do
- case $1 in
- -h|--help)
- usage
- ;;
- -b|--build-wheels)
- BUILD_WHEELS=1
- shift
- ;;
- *)
- echo "Unknown option: $1"
- usage
- ;;
- esac
-done
-
-
-# Build the main libCacheSim C++ library first
-echo "Building main libCacheSim library..."
-rm -rf ./build
-cmake -G Ninja -B build # -DENABLE_3L_CACHE=ON
-ninja -C build
-
-# Now build and install the Python binding
-echo "Building Python binding..."
-echo "Sync python version..."
-python scripts/sync_python_version.py
-pushd libCacheSim-python
-pip install -e . -vvv
-popd
-
-# Test that the import works
-echo "Testing import..."
-python -c "import libcachesim"
-
-# Run tests
-echo "Running tests..."
-pushd libCacheSim-python
-
-python -m pip install pytest
-python -m pytest .
-popd
-
-# Build wheels if requested
-if [[ $BUILD_WHEELS -eq 1 ]]; then
- echo "--- Building Python wheels for distribution ---"
-
- # --- Environment and dependency checks ---
- echo "Checking dependencies: python3, pip, docker, cibuildwheel..."
-
- if ! command -v python3 &> /dev/null; then
- echo "Error: python3 is not installed. Please install it and run this script again."
- exit 1
- fi
-
- if ! python3 -m pip --version &> /dev/null; then
- echo "Error: pip for python3 is not available. Please install it."
- exit 1
- fi
-
- if ! command -v docker &> /dev/null; then
- echo "Error: docker is not installed. Please install it and ensure the docker daemon is running."
- exit 1
- fi
-
- # Check if user can run docker without sudo, otherwise use sudo
- SUDO_CMD=""
- if ! docker ps &> /dev/null; then
- echo "Warning: Current user cannot run docker. Trying with sudo."
- if sudo docker ps &> /dev/null; then
- SUDO_CMD="sudo"
- else
- echo "Error: Failed to run docker, even with sudo. Please check your docker installation and permissions."
- exit 1
- fi
- fi
-
- if ! python3 -m cibuildwheel --version &> /dev/null; then
- echo "cibuildwheel not found, installing..."
- python3 -m pip install cibuildwheel
- fi
-
- echo "Dependency check completed."
-
- # --- Run cibuildwheel ---
- # The project to build is specified as an argument.
- # cibuildwheel should be run from the repository root.
- # The output directory will be 'wheelhouse/' by default.
- echo "Starting the wheel build process for Linux..."
- ${SUDO_CMD} python3 -m cibuildwheel --platform linux libCacheSim-python
-
- echo "Build process completed successfully. Wheels are in the 'wheelhouse' directory."
-fi
diff --git a/scripts/sync_python_version.py b/scripts/sync_python_version.py
deleted file mode 100644
index 65e51a92..00000000
--- a/scripts/sync_python_version.py
+++ /dev/null
@@ -1,93 +0,0 @@
-#!/usr/bin/env python3
-"""
-Script to synchronize version between libCacheSim main project and Python bindings.
-
-This script reads the version from version.txt and updates the pyproject.toml
-in libCacheSim-python to match.
-"""
-
-import json
-import os
-import sys
-import re
-from pathlib import Path
-
-
-def get_project_root():
- """Get the project root directory."""
- script_dir = Path(__file__).parent
- return script_dir.parent
-
-
-def read_main_version():
- """Read version from version.txt."""
- project_root = get_project_root()
- version_file = project_root / "version.txt"
-
- if not version_file.exists():
- print(f"Error: {version_file} not found", file=sys.stderr)
- sys.exit(1)
-
- with open(version_file, 'r') as f:
- version = f.read().strip()
-
- if not version:
- print("Error: version.txt is empty", file=sys.stderr)
- sys.exit(1)
-
- return version
-
-def update_pyproject_toml(version):
- """Update pyproject.toml with the new version."""
- project_root = get_project_root()
- pyproject_toml_path = project_root / "libCacheSim-python" / "pyproject.toml"
-
- if not pyproject_toml_path.exists():
- print(f"Error: {pyproject_toml_path} not found", file=sys.stderr)
- return False
-
- # Read current pyproject.toml
- with open(pyproject_toml_path, 'r') as f:
- pyproject_data = f.read()
-
- # Update the version line in pyproject.toml, make it can match any version in version.txt, like "0.3.1" or "dev"
- match = re.search(r"version = \"(dev|[0-9]+\.[0-9]+\.[0-9]+)\"", pyproject_data)
- if not match:
- print("Error: Could not find a valid version line in pyproject.toml", file=sys.stderr)
- return False
- current_version = match.group(1)
- if current_version == version:
- print(f"Python binding version already up to date: {version}")
- return False
- # replace the version line with the new version
- pyproject_data = re.sub(r"version = \"(dev|[0-9]+\.[0-9]+\.[0-9]+)\"", f"version = \"{version}\"", pyproject_data)
-
- # Write back to file with proper formatting
- with open(pyproject_toml_path, 'w') as f:
- f.write(pyproject_data)
-
- print(f"Updated Python version: {current_version} → {version}")
- return True
-
-
-def main():
- """Main function."""
- try:
- # Read main project version
- main_version = read_main_version()
- print(f"Main project version: {main_version}")
-
- # Update Python binding version
- updated = update_pyproject_toml(main_version)
-
- if updated:
- print("Python binding version synchronized successfully")
- else:
- print("No changes needed")
- except Exception as e:
- print(f"Error: {e}", file=sys.stderr)
- sys.exit(1)
-
-
-if __name__ == "__main__":
- main()