From 83afbc88480b5196e7be7b20d36b3a41b69987dc Mon Sep 17 00:00:00 2001 From: haochengxia Date: Sun, 3 Aug 2025 06:04:49 +0000 Subject: [PATCH 1/3] [CLEANUP]: Detach python binding code to individual repo --- .github/workflows/pypi-release.yml | 149 -- .github/workflows/python.yml | 41 - CMakeLists.txt | 3 - README.md | 56 +- libCacheSim-python/.gitignore | 59 - libCacheSim-python/CMakeLists.txt | 132 -- libCacheSim-python/MAINFEST.in | 0 libCacheSim-python/README.md | 337 ----- libCacheSim-python/examples/README.md | 280 ---- .../examples/demo_unified_interface.py | 131 -- .../examples/python_hook_cache_example.py | 178 --- .../examples/stream_request_example.py | 154 --- .../examples/zipf_trace_example.py | 243 ---- libCacheSim-python/export/CMakeLists.txt | 38 - libCacheSim-python/export/README.md | 47 - libCacheSim-python/libcachesim/__init__.py | 85 -- libCacheSim-python/libcachesim/__init__.pyi | 293 ---- libCacheSim-python/libcachesim/const.py | 1 - libCacheSim-python/libcachesim/eviction.py | 713 ---------- .../libcachesim/trace_generator.py | 215 --- libCacheSim-python/pyproject.toml | 111 -- libCacheSim-python/requirements.txt | 0 libCacheSim-python/src/pylibcachesim.cpp | 1223 ----------------- libCacheSim-python/tests/conftest.py | 32 - libCacheSim-python/tests/reference.csv | 20 - libCacheSim-python/tests/test_eviction.py | 62 - .../tests/test_process_trace.py | 220 --- .../tests/test_python_hook_cache.py | 205 --- .../tests/test_trace_generator.py | 135 -- .../tests/test_unified_interface.py | 181 --- libCacheSim-python/tests/utils.py | 16 - scripts/install_python_dev.sh | 106 -- scripts/sync_python_version.py | 93 -- 33 files changed, 23 insertions(+), 5536 deletions(-) delete mode 100644 .github/workflows/pypi-release.yml delete mode 100644 .github/workflows/python.yml delete mode 100644 libCacheSim-python/.gitignore delete mode 100644 libCacheSim-python/CMakeLists.txt delete mode 100644 libCacheSim-python/MAINFEST.in delete mode 100644 libCacheSim-python/README.md delete mode 100644 libCacheSim-python/examples/README.md delete mode 100644 libCacheSim-python/examples/demo_unified_interface.py delete mode 100644 libCacheSim-python/examples/python_hook_cache_example.py delete mode 100644 libCacheSim-python/examples/stream_request_example.py delete mode 100644 libCacheSim-python/examples/zipf_trace_example.py delete mode 100644 libCacheSim-python/export/CMakeLists.txt delete mode 100644 libCacheSim-python/export/README.md delete mode 100644 libCacheSim-python/libcachesim/__init__.py delete mode 100644 libCacheSim-python/libcachesim/__init__.pyi delete mode 100644 libCacheSim-python/libcachesim/const.py delete mode 100644 libCacheSim-python/libcachesim/eviction.py delete mode 100644 libCacheSim-python/libcachesim/trace_generator.py delete mode 100644 libCacheSim-python/pyproject.toml delete mode 100644 libCacheSim-python/requirements.txt delete mode 100644 libCacheSim-python/src/pylibcachesim.cpp delete mode 100644 libCacheSim-python/tests/conftest.py delete mode 100644 libCacheSim-python/tests/reference.csv delete mode 100644 libCacheSim-python/tests/test_eviction.py delete mode 100644 libCacheSim-python/tests/test_process_trace.py delete mode 100644 libCacheSim-python/tests/test_python_hook_cache.py delete mode 100644 libCacheSim-python/tests/test_trace_generator.py delete mode 100644 libCacheSim-python/tests/test_unified_interface.py delete mode 100644 libCacheSim-python/tests/utils.py delete mode 100644 scripts/install_python_dev.sh delete mode 100644 scripts/sync_python_version.py diff --git a/.github/workflows/pypi-release.yml b/.github/workflows/pypi-release.yml deleted file mode 100644 index 4733a91ab..000000000 --- a/.github/workflows/pypi-release.yml +++ /dev/null @@ -1,149 +0,0 @@ -name: PyPI Release - -on: - release: - types: [published] - workflow_dispatch: # Allow manual triggering - -permissions: - contents: read - actions: read - id-token: write - -jobs: - build-wheels: - name: Build wheels on ${{ matrix.os }} - runs-on: ${{ matrix.os }} - strategy: - matrix: - os: [ubuntu-latest, macos-latest] - - steps: - - uses: actions/checkout@v4 - with: - submodules: recursive - - - name: Set up Python - uses: actions/setup-python@v4 - with: - python-version: '3.11' - - - name: Set up Docker Buildx (Linux only) - if: runner.os == 'Linux' - uses: docker/setup-buildx-action@v3 - - - name: Install dependencies - run: | - python -m pip install --upgrade pip - python -m pip install cibuildwheel - - - name: Sync Python version - run: python scripts/sync_python_version.py - - - - name: Verify Docker (Linux only) - if: runner.os == 'Linux' - run: | - docker --version - docker info - echo "Docker is ready for cibuildwheel" - - - name: Build wheels - run: python -m cibuildwheel libCacheSim-python --output-dir wheelhouse - - - name: Upload wheels as artifacts - uses: actions/upload-artifact@v4 - with: - name: wheels-${{ matrix.os }} - path: wheelhouse/*.whl - - build-sdist: - name: Build source distribution - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - with: - submodules: recursive - - - name: Set up Python - uses: actions/setup-python@v4 - with: - python-version: '3.11' - - - name: Install build dependencies - run: | - python -m pip install --upgrade pip - python -m pip install build - - - name: Sync Python version - run: python scripts/sync_python_version.py - - - name: Build source distribution - run: python -m build --sdist libCacheSim-python --outdir dist/ - - - name: Upload sdist as artifact - uses: actions/upload-artifact@v4 - with: - name: sdist - path: dist/*.tar.gz - - publish-to-pypi: - name: Publish to PyPI - needs: [build-wheels, build-sdist] - runs-on: ubuntu-latest - if: github.event_name == 'release' && github.event.action == 'published' - environment: - name: pypi - url: https://pypi.org/p/libcachesim - permissions: - id-token: write # IMPORTANT: this permission is mandatory for trusted publishing - - steps: - - name: Download all artifacts - uses: actions/download-artifact@v4 - with: - path: dist/ - - - name: Flatten artifacts directory - run: | - mkdir -p final-dist - find dist/ -name "*.whl" -exec cp {} final-dist/ \; - find dist/ -name "*.tar.gz" -exec cp {} final-dist/ \; - ls -la final-dist/ - - - name: Publish to PyPI - uses: pypa/gh-action-pypi-publish@release/v1 - with: - packages-dir: final-dist/ - skip-existing: true - - publish-to-test-pypi: - name: Publish to TestPyPI - needs: [build-wheels, build-sdist] - runs-on: ubuntu-latest - if: github.event_name == 'workflow_dispatch' - environment: - name: testpypi - url: https://test.pypi.org/p/libcachesim - permissions: - id-token: write # IMPORTANT: this permission is mandatory for trusted publishing - - steps: - - name: Download all artifacts - uses: actions/download-artifact@v4 - with: - path: dist/ - - - name: Flatten artifacts directory - run: | - mkdir -p final-dist - find dist/ -name "*.whl" -exec cp {} final-dist/ \; - find dist/ -name "*.tar.gz" -exec cp {} final-dist/ \; - ls -la final-dist/ - - - name: Publish to TestPyPI - uses: pypa/gh-action-pypi-publish@release/v1 - with: - repository-url: https://test.pypi.org/legacy/ - packages-dir: final-dist/ - skip-existing: true diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml deleted file mode 100644 index 502cfaa79..000000000 --- a/.github/workflows/python.yml +++ /dev/null @@ -1,41 +0,0 @@ -name: Python - -on: [push, pull_request] - -permissions: - contents: read - -jobs: - build: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - - name: Set up Python - uses: actions/setup-python@v4 - with: - python-version: "3.10" - - - name: Prepare - run: bash scripts/install_dependency.sh - - - name: Build main libCacheSim project - run: | - cmake -G Ninja -B build - ninja -C build - - - name: Install Python dependencies - run: | - pip install --upgrade pip - pip install -r requirements.txt - pip install pytest - - - name: Build libCacheSim-python - run: | - cd libCacheSim-python - pip install -e . - - - name: Run tests - run: | - cd libCacheSim-python - pytest tests/ diff --git a/CMakeLists.txt b/CMakeLists.txt index 395a31d9a..ebe0ba9a3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -255,9 +255,6 @@ else() message(STATUS "Building without test") endif() -# Export variables for scikit-build -> build/export_vars.cmake -add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/libCacheSim-python/export) - # libCacheSim unified library compilation and installation # Create a single library that combines all modular libraries add_library(${PROJECT_NAME} STATIC diff --git a/README.md b/README.md index c176bc73f..9e5bbc13d 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@ A high-performance library for building and running cache simulations --- [![build](https://github.com/1a1a11a/libCacheSim/actions/workflows/build.yml/badge.svg)](https://github.com/1a1a11a/libCacheSim/actions/workflows/build.yml) -[![Python Release](https://github.com/1a1a11a/libCacheSim/actions/workflows/pypi-release.yml/badge.svg)](https://github.com/1a1a11a/libCacheSim/actions/workflows/pypi-release.yml) +[![Python Release](https://github.com/cacheMon/libCacheSim-python/actions/workflows/pypi-release.yml/badge.svg)](https://github.com/1a1a11a/libCacheSim/actions/workflows/pypi-release.yml) [![NPM Release](https://github.com/1a1a11a/libCacheSim/actions/workflows/npm-release.yml/badge.svg)](https://github.com/1a1a11a/libCacheSim/actions/workflows/npm-release.yml) [![OpenSSF Scorecard](https://api.scorecard.dev/projects/github.com/1a1a11a/libCacheSim/badge)](https://scorecard.dev/viewer/?uri=github.com/1a1a11a/libCacheSim) @@ -291,12 +291,14 @@ If you are not extremely sensitive to the performance, our python binding can of pip install libcachesim ``` + + ### Simulation with python ```python import libcachesim as lcs -reader = lcs.create_zipf_requests(num_objects=1000, num_requests=10000) # synthetic trace +reader = lcs.SyntheticReader(num_objects=1000, num_of_req=10000) # synthetic trace # reader = lcs.open_trace("./data/cloudPhysicsIO.oracleGeneral.bin") # real trace cache = lcs.FIFO(cache_size=1024*1024) obj_miss_ratio, byte_miss_ratio = cache.process_trace(reader) @@ -310,42 +312,30 @@ With python package, you can extend new algorithm to test your own eviction desi See an example below ```python -import libcachesim as lcs -from collections import deque -from contextlib import suppress - -cache = lcs.PythonHookCachePolicy(cache_size=1024, cache_name="CustomFIFO") - -def init_hook(cache_size): - return deque() # Use deque for FIFO order - -def hit_hook(fifo_queue, obj_id, obj_size): - pass # FIFO doesn't reorder on hit - -def miss_hook(fifo_queue, obj_id, obj_size): - fifo_queue.append(obj_id) # Add to end of queue - -def eviction_hook(fifo_queue, obj_id, obj_size): - return fifo_queue[0] # Return first item (oldest) - -def remove_hook(fifo_queue, obj_id): - with suppress(ValueError): - fifo_queue.remove(obj_id) - -# Set the hooks and test -cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook) - -reader = lcs.open_trace( - trace_path="./data/cloudPhysicsIO.oracleGeneral.bin", - params=lcs.ReaderInitParam(ignore_obj_size=True) +from collections import OrderedDict +from libcachesim import PluginCache, LRU + +plugin_lru_cache = PluginCache( + cache_size=128, + cache_name="LRU", + cache_init_hook=lambda _: OrderedDict(), + cache_hit_hook=lambda data, req: data.move_to_end(req.obj_id, last=True) if req.obj_id in data else None, + cache_miss_hook=lambda data, req: data.__setitem__(req.obj_id, req.obj_size), + cache_eviction_hook=lambda data, _: data.popitem(last=False)[0], + cache_remove_hook=lambda data, obj_id: data.pop(obj_id, None), + cache_free_hook=lambda data: data.clear(), ) -obj_miss_ratio, byte_miss_ratio = cache.process_trace(reader) -print(f"Obj miss ratio: {obj_miss_ratio:.4f}, byte miss ratio: {byte_miss_ratio:.4f}") + +reader = lcs.SyntheticReader(num_objects=1000, num_of_req=10000, obj_size=1) +req_miss_ratio, byte_miss_ratio = plugin_lru_cache.process_trace(reader) +ref_req_miss_ratio, ref_byte_miss_ratio = LRU(128).process_trace(reader) +print(f"plugin req miss ratio {req_miss_ratio}, ref req miss ratio {ref_req_miss_ratio}") +print(f"plugin byte miss ratio {byte_miss_ratio}, ref byte miss ratio {ref_byte_miss_ratio}") ``` -See more information in [README.md](./libCacheSim-python/README.md) of the Python binding. +See more information in [README.md](https://github.com/cacheMon/libCacheSim-python) of the Python binding. --- ## Open source cache traces diff --git a/libCacheSim-python/.gitignore b/libCacheSim-python/.gitignore deleted file mode 100644 index 34712f29d..000000000 --- a/libCacheSim-python/.gitignore +++ /dev/null @@ -1,59 +0,0 @@ -# Automatically generated by `hgimportsvn` -.svn -.hgsvn - -# Ignore local virtualenvs -lib/ -bin/ -include/ -.Python/ - -# These lines are suggested according to the svn:ignore property -# Feel free to enable them by uncommenting them -*.pyc -*.pyo -*.swp -*.class -*.orig -*~ -.hypothesis/ - -# autogenerated -src/_pytest/_version.py -# setuptools -.eggs/ - -doc/*/_build -doc/*/.doctrees -build/ -dist/ -*.egg-info -htmlcov/ -issue/ -env/ -.env/ -.venv/ -/pythonenv*/ -3rdparty/ -.tox -.cache -.pytest_cache -.mypy_cache -.coverage -.coverage.* -coverage.xml -.ropeproject -.idea -.hypothesis -.pydevproject -.project -.settings -.vscode -__pycache__/ -.python-version - -# generated by pip -pip-wheel-metadata/ - -# pytest debug logs generated via --debug -pytestdebug.log \ No newline at end of file diff --git a/libCacheSim-python/CMakeLists.txt b/libCacheSim-python/CMakeLists.txt deleted file mode 100644 index aebee06c3..000000000 --- a/libCacheSim-python/CMakeLists.txt +++ /dev/null @@ -1,132 +0,0 @@ -cmake_minimum_required(VERSION 3.15...3.27) - -# Include exported variables from cache -if(DEFINED LIBCB_BUILD_DIR) - set(PARENT_BUILD_DIR "${LIBCB_BUILD_DIR}") - message(STATUS "Using provided LIBCB_BUILD_DIR: ${LIBCB_BUILD_DIR}") -else() - set(PARENT_BUILD_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../build") -endif() -set(EXPORT_FILE "${PARENT_BUILD_DIR}/export_vars.cmake") - -if(EXISTS "${EXPORT_FILE}") - include("${EXPORT_FILE}") - message(STATUS "Loaded variables from export_vars.cmake") -else() - message(FATAL_ERROR "export_vars.cmake not found at ${EXPORT_FILE}. Please build the main project first (e.g. cd .. && cmake -G Ninja -B build)") -endif() - -# Force enable -fPIC -set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fPIC") -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC") - -project(libCacheSim-python VERSION "${LIBCACHESIM_VERSION}") - -if(LOG_LEVEL_LOWER STREQUAL "default") - if(CMAKE_BUILD_TYPE_LOWER MATCHES "debug") - add_compile_definitions(LOGLEVEL=6) - else() - add_compile_definitions(LOGLEVEL=7) - endif() -elseif(LOG_LEVEL_LOWER STREQUAL "verbose") - add_compile_definitions(LOGLEVEL=5) -elseif(LOG_LEVEL_LOWER STREQUAL "debug") - add_compile_definitions(LOGLEVEL=6) -elseif(LOG_LEVEL_LOWER STREQUAL "info") - add_compile_definitions(LOGLEVEL=7) -elseif(LOG_LEVEL_LOWER STREQUAL "warn") - add_compile_definitions(LOGLEVEL=8) -elseif(LOG_LEVEL_LOWER STREQUAL "error") - add_compile_definitions(LOGLEVEL=9) -else() - add_compile_definitions(LOGLEVEL=7) -endif() - -# Find python and pybind11 -find_package(Python REQUIRED COMPONENTS Interpreter Development.Module) -find_package(pybind11 CONFIG REQUIRED) - -# Include directories for dependencies -include_directories(${GLib_INCLUDE_DIRS}) -include_directories(${GLib_CONFIG_INCLUDE_DIR}) -include_directories(${XGBOOST_INCLUDE_DIR}) -include_directories(${LIGHTGBM_PATH}) -include_directories(${ZSTD_INCLUDE_DIR}) -include_directories(${MAIN_PROJECT_SOURCE_DIR}/libCacheSim/bin) - -# Find the main libCacheSim library -set(MAIN_PROJECT_BUILD_DIR "${PARENT_BUILD_DIR}") -set(MAIN_PROJECT_LIB_PATH "${MAIN_PROJECT_BUILD_DIR}/liblibCacheSim.a") - -if(EXISTS "${MAIN_PROJECT_LIB_PATH}") - message(STATUS "Found pre-built libCacheSim library at ${MAIN_PROJECT_LIB_PATH}") - - # Import the main library as an imported target - add_library(libCacheSim_main STATIC IMPORTED) - set_target_properties(libCacheSim_main PROPERTIES - IMPORTED_LOCATION "${MAIN_PROJECT_LIB_PATH}" - INTERFACE_INCLUDE_DIRECTORIES "${MAIN_PROJECT_SOURCE_DIR}/libCacheSim/include;${MAIN_PROJECT_SOURCE_DIR}/libCacheSim/utils/include;${MAIN_PROJECT_SOURCE_DIR}/libCacheSim" - ) - link_directories(${GLib_LIBRARY_DIRS}) - link_directories(${ZSTD_LIBRARY_DIRS}) - set(LIBCACHESIM_TARGET libCacheSim_main) - -else() - message(FATAL_ERROR "Pre-built libCacheSim library not found. Please build the main project first: cd .. && cmake -G Ninja -B build && ninja -C build") -endif() - -python_add_library(_libcachesim MODULE - src/pylibcachesim.cpp - ${MAIN_PROJECT_SOURCE_DIR}/libCacheSim/bin/cli_reader_utils.c - WITH_SOABI -) - -set_target_properties(_libcachesim PROPERTIES - POSITION_INDEPENDENT_CODE ON - INSTALL_RPATH_USE_LINK_PATH TRUE - BUILD_WITH_INSTALL_RPATH TRUE - INSTALL_RPATH "$ORIGIN" -) - -target_compile_definitions(_libcachesim PRIVATE VERSION_INFO=${PROJECT_VERSION}) - -target_link_libraries(_libcachesim PRIVATE - ${LIBCACHESIM_TARGET} - pybind11::headers - pybind11::module - ${GLib_LIBRARIES} - ${ZSTD_LIBRARIES} -) - -# Add platform-specific link options and libraries -if(CMAKE_SYSTEM_NAME STREQUAL "Linux") - # GNU ld option, only available on Linux - target_link_options(_libcachesim PRIVATE -Wl,--no-as-needed) - target_link_libraries(_libcachesim PRIVATE dl) -elseif(CMAKE_SYSTEM_NAME STREQUAL "Darwin") - # macOS doesn't need --no-as-needed - # dl functions are part of the system library on macOS - # No need to explicitly link dl - - # Find argp library on macOS - find_library(ARGP_LIBRARY argp PATHS /opt/homebrew/lib /usr/local/lib) - if(ARGP_LIBRARY) - target_link_libraries(_libcachesim PRIVATE ${ARGP_LIBRARY}) - endif() - - # Find and link other dependencies that might be needed - find_library(INTL_LIBRARY intl PATHS /opt/homebrew/lib /usr/local/lib) - if(INTL_LIBRARY) - target_link_libraries(_libcachesim PRIVATE ${INTL_LIBRARY}) - endif() -else() - # Other platforms - try to link dl if available - find_library(DL_LIBRARY dl) - if(DL_LIBRARY) - target_link_libraries(_libcachesim PRIVATE ${DL_LIBRARY}) - endif() -endif() - -# install to wheel directory -install(TARGETS _libcachesim LIBRARY DESTINATION libcachesim) diff --git a/libCacheSim-python/MAINFEST.in b/libCacheSim-python/MAINFEST.in deleted file mode 100644 index e69de29bb..000000000 diff --git a/libCacheSim-python/README.md b/libCacheSim-python/README.md deleted file mode 100644 index 23424c3d5..000000000 --- a/libCacheSim-python/README.md +++ /dev/null @@ -1,337 +0,0 @@ -# libCacheSim Python Binding - -[![Python Release](https://github.com/1a1a11a/libCacheSim/actions/workflows/pypi-release.yml/badge.svg)](https://github.com/1a1a11a/libCacheSim/actions/workflows/pypi-release.yml) -[![Python Versions](https://img.shields.io/pypi/pyversions/libcachesim.svg?logo=python&logoColor=white)](https://pypi.org/project/libcachesim) -[![PyPI Version](https://img.shields.io/pypi/v/libcachesim.svg?)](https://pypi.org/project/libcachesim) -[![PyPI - Downloads](https://img.shields.io/pypi/dd/libcachesim)](https://pypistats.org/packages/libcachesim) - -Python bindings for libCacheSim, a high-performance cache simulator and analysis library. - -## Installation - -Binary installers for the latest released version are available at the [Python Package Index (PyPI)](https://pypi.org/project/libcachesim). - -```bash -pip install libcachesim -``` - -### Installation from sources - -If there are no wheels suitable for your environment, consider building from source. - -```bash -git clone https://github.com/1a1a11a/libCacheSim.git -cd libCacheSim - -# Build the main libCacheSim library first -cmake -G Ninja -B build -ninja -C build - -# Install Python binding -cd libCacheSim-python -pip install -e . -``` - -### Testing -```bash -# Run all tests -python -m pytest . - -# Test import -python -c "import libcachesim; print('Success!')" -``` - -## Quick Start - -### Basic Usage - -```python -import libcachesim as lcs - -# Create a cache -cache = lcs.LRU(cache_size=1024*1024) # 1MB cache - -# Process requests -req = lcs.Request() -req.obj_id = 1 -req.obj_size = 100 - -print(cache.get(req)) # False (first access) -print(cache.get(req)) # True (second access) -``` - -### Trace Processing - -To simulate with traces, we need to read the request of traces correctly. `open_trace` is an unified interface for trace reading, which accepet three parameters: - -- `trace_path`: trace path, can be relative or absolutive path. -- `type` (optional): if not given, we will automatically infer the type of trace according to the suffix of the trace file. -- `params` (optional): if not given, default params are applied. - -```python -import libcachesim as lcs - -# Open trace and process efficiently -reader = lcs.open_trace( - trace_path = "./data/cloudPhysicsIO.oracleGeneral.bin", - type = lcs.TraceType.ORACLE_GENERAL_TRACE, - params = lcs.ReaderInitParam(ignore_obj_size=True) -) -cache = lcs.S3FIFO(cache_size=1024*1024) - -# Process entire trace efficiently (C++ backend) -obj_miss_ratio, byte_miss_ratio = cache.process_trace(reader) -print(f"Object miss ratio: {obj_miss_ratio:.4f}, Byte miss ratio: {byte_miss_ratio:.4f}") - -cache = lcs.S3FIFO(cache_size=1024*1024) -# Process with limits and time ranges -obj_miss_ratio, byte_miss_ratio = cache.process_trace( - reader, - start_req=0, - max_req=1000 -) -print(f"Object miss ratio: {obj_miss_ratio:.4f}, Byte miss ratio: {byte_miss_ratio:.4f}") -``` - -## Custom Cache Policies - -Implement custom cache replacement algorithms using pure Python functions - **no C/C++ compilation required**. - -### Python Hook Cache Overview - -The `PythonHookCachePolicy` allows you to define custom caching behavior through Python callback functions. This is perfect for: -- Prototyping new cache algorithms -- Educational purposes and learning -- Research and experimentation -- Custom business logic implementation - -### Hook Functions - -You need to implement these callback functions: - -- **`init_hook(cache_size: int) -> Any`**: Initialize your data structure -- **`hit_hook(data: Any, obj_id: int, obj_size: int) -> None`**: Handle cache hits -- **`miss_hook(data: Any, obj_id: int, obj_size: int) -> None`**: Handle cache misses -- **`eviction_hook(data: Any, obj_id: int, obj_size: int) -> int`**: Return object ID to evict -- **`remove_hook(data: Any, obj_id: int) -> None`**: Clean up when object removed -- **`free_hook(data: Any) -> None`**: [Optional] Final cleanup - -### Example: Custom LRU Implementation - -```python -import libcachesim as lcs -from collections import OrderedDict - -# Create a Python hook-based cache -cache = lcs.PythonHookCachePolicy(cache_size=1024*1024, cache_name="MyLRU") - -# Define LRU policy hooks -def init_hook(cache_size): - return OrderedDict() # Track access order - -def hit_hook(lru_dict, obj_id, obj_size): - lru_dict.move_to_end(obj_id) # Move to most recent - -def miss_hook(lru_dict, obj_id, obj_size): - lru_dict[obj_id] = True # Add to end - -def eviction_hook(lru_dict, obj_id, obj_size): - return next(iter(lru_dict)) # Return least recent - -def remove_hook(lru_dict, obj_id): - lru_dict.pop(obj_id, None) - -# Set the hooks -cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook) - -# Use it like any other cache -req = lcs.Request() -req.obj_id = 1 -req.obj_size = 100 -hit = cache.get(req) -print(f"Cache hit: {hit}") # Should be False (miss) -``` - -### Example: Custom FIFO Implementation - -```python -import libcachesim as lcs -from collections import deque -from contextlib import suppress - -cache = lcs.PythonHookCachePolicy(cache_size=1024, cache_name="CustomFIFO") - -def init_hook(cache_size): - return deque() # Use deque for FIFO order - -def hit_hook(fifo_queue, obj_id, obj_size): - pass # FIFO doesn't reorder on hit - -def miss_hook(fifo_queue, obj_id, obj_size): - fifo_queue.append(obj_id) # Add to end of queue - -def eviction_hook(fifo_queue, obj_id, obj_size): - return fifo_queue[0] # Return first item (oldest) - -def remove_hook(fifo_queue, obj_id): - with suppress(ValueError): - fifo_queue.remove(obj_id) - -# Set the hooks and test -cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook) - -req = lcs.Request(obj_id=1, obj_size=100) -hit = cache.get(req) -print(f"Cache hit: {hit}") # Should be False (miss) -``` - -## Available Algorithms - -### Built-in Cache Algorithms - -#### Basic Algorithms -- **FIFO**: First-In-First-Out -- **LRU**: Least Recently Used -- **LFU**: Least Frequently Used -- **LFUDA**: LFU with Dynamic Aging -- **Clock**: Clock/Second-chance algorithm - -#### Advanced Algorithms -- **QDLP**: Queue Demotion with Lazy Promotion -- **S3FIFO**: Simple, Fast, Fair FIFO (recommended for most workloads) -- **Sieve**: High-performance eviction algorithm -- **ARC**: Adaptive Replacement Cache -- **TwoQ**: Two-Queue algorithm -- **SLRU**: Segmented LRU -- **TinyLFU**: TinyLFU with window -- **WTinyLFU**: Windowed TinyLFU - -#### Research/ML Algorithms -- **LeCaR**: Learning Cache Replacement (adaptive) -- **Cacheus**: Cache replacement policy -- **LRB**: Learning-based cache (if enabled) -- **GLCache**: Machine learning-based cache -- **ThreeLCache**: Three-level cache hierarchy (if enabled) - -#### Optimal Algorithms (for analysis) -- **Belady**: Optimal offline algorithm -- **BeladySize**: Size-aware optimal algorithm - -```python -import libcachesim as lcs - -# All algorithms use the same unified interface -cache_size = 1024 * 1024 # 1MB - -lru_cache = lcs.LRU(cache_size) -s3fifo_cache = lcs.S3FIFO(cache_size) -sieve_cache = lcs.Sieve(cache_size) -arc_cache = lcs.ARC(cache_size) - -# All caches work identically -req = lcs.Request() -req.obj_id = 1 -req.obj_size = 100 -hit = lru_cache.get(req) -print(hit) -``` - -## Examples and Testing - -### Algorithm Comparison -```python -import libcachesim as lcs - -def compare_algorithms(trace_path): - reader = lcs.open_trace(trace_path, lcs.TraceType.VSCSI_TRACE) - algorithms = ['LRU', 'S3FIFO', 'Sieve', 'ARC'] - for algo_name in algorithms: - cache = getattr(lcs, algo_name)(cache_size=1024*1024) - obj_miss_ratio, byte_miss_ratio = cache.process_trace(reader) - print(f"{algo_name}\t\tObj: {obj_miss_ratio:.4f}, Byte: {byte_miss_ratio:.4f}") - -compare_algorithms("./data/cloudPhysicsIO.vscsi") -``` - -### Performance Benchmarking -```python -import time - -def benchmark_cache(cache, num_requests=100000): - """Benchmark cache performance""" - start_time = time.time() - for i in range(num_requests): - req = lcs.Request() - req.obj_id = i % 1000 # Working set of 1000 objects - req.obj_size = 100 - cache.get(req) - end_time = time.time() - throughput = num_requests / (end_time - start_time) - print(f"Processed {num_requests} requests in {end_time - start_time:.2f}s") - print(f"Throughput: {throughput:.0f} requests/sec") - -# Compare performance -lru_cache = lcs.LRU(cache_size=1024*1024) -s3fifo_cache = lcs.S3FIFO(cache_size=1024*1024) - -print("LRU Performance:") -benchmark_cache(lru_cache) - -print("\nS3FIFO Performance:") -benchmark_cache(s3fifo_cache) -``` - -## Advanced Usage - -### Multi-Format Trace Processing - -```python -import libcachesim as lcs - -# Supported trace types -trace_types = { - "oracle": lcs.TraceType.ORACLE_GENERAL_TRACE, - "csv": lcs.TraceType.CSV_TRACE, - "vscsi": lcs.TraceType.VSCSI_TRACE, - "txt": lcs.TraceType.PLAIN_TXT_TRACE -} - -# Open different trace formats -oracle_reader = lcs.open_trace("./data/cloudPhysicsIO.oracleGeneral.bin", trace_types["oracle"]) -csv_reader = lcs.open_trace("./data/cloudPhysicsIO.txt", trace_types["txt"]) - -# Process traces with different caches -caches = [ - lcs.LRU(cache_size=1024*1024), - lcs.S3FIFO(cache_size=1024*1024), - lcs.Sieve(cache_size=1024*1024) -] - -for i, cache in enumerate(caches): - miss_ratio_oracle = cache.process_trace(oracle_reader)[0] - miss_ratio_csv = cache.process_trace(csv_reader)[0] - print(f"Cache {i} miss ratio: {miss_ratio_oracle:.4f}, {miss_ratio_csv:.4f}") -``` - -## Troubleshooting - -### Common Issues - -**Import Error**: Make sure libCacheSim C++ library is built first: -```bash -cmake -G Ninja -B build && ninja -C build -``` - -**Performance Issues**: Use `process_trace()` for large workloads instead of individual `get()` calls for better performance. - -**Memory Usage**: Monitor cache statistics (`cache.occupied_byte`) and ensure proper cache size limits for your system. - -**Custom Cache Issues**: Validate your custom implementation against built-in algorithms using the test functions above. - -**Install with uv**: Since automatically building with `uv` will fail due to incomplete source code, please force install the binary file via `uv pip install libcachesim --only-binary=:all:`. - -### Getting Help - -- Check the [main documentation](../doc/) for detailed guides -- Open issues on [GitHub](https://github.com/1a1a11a/libCacheSim/issues) -- Review [examples](/example) in the main repository diff --git a/libCacheSim-python/examples/README.md b/libCacheSim-python/examples/README.md deleted file mode 100644 index 3b63b7ffb..000000000 --- a/libCacheSim-python/examples/README.md +++ /dev/null @@ -1,280 +0,0 @@ -# libCacheSim Python Examples - -This directory contains examples demonstrating how to use libCacheSim Python bindings for cache simulation and trace generation. - -## Overview - -libCacheSim Python bindings provide a powerful interface for: - -- Cache simulation with various eviction policies (LRU, FIFO, ARC, etc.) -- Synthetic trace generation (Zipf and Uniform distributions) -- Real trace analysis and processing -- Custom cache policy implementation with Python hooks -- Unified interface supporting all cache algorithms - -## Example Files - -### 1. Stream Request Generation (`stream_request_example.py`) - -Demonstrates how to generate synthetic request traces and use them for cache simulation: - -```python -import libcachesim as lcs - -# Create Zipf-distributed requests -zipf_generator = lcs.create_zipf_requests( - num_objects=1000, # 1000 unique objects - num_requests=10000, # 10000 requests - alpha=1.0, # Zipf skewness - obj_size=4000, # Object size in bytes - seed=42 # For reproducibility -) - -# Test with LRU cache -cache = lcs.LRU(cache_size=50*1024*1024) # 50MB cache for better hit ratio -miss_count = sum(1 for req in zipf_generator if not cache.get(req)) -print(f"Final miss ratio: {miss_count / 10000:.3f}") -``` - -**Features**: -- Memory efficient: No temporary files created -- Fast: Direct Request object generation -- Reproducible: Support for random seeds -- Flexible: Easy parameter adjustment - -### 2. Unified Interface Demo (`demo_unified_interface.py`) - -Shows the unified interface for all cache policies, including built-in and custom Python hook caches: - -```python -import libcachesim as lcs - -cache_size = 1024 * 1024 # 1MB - -# Create different cache policies -caches = { - "LRU": lcs.LRU(cache_size), - "FIFO": lcs.FIFO(cache_size), - "ARC": lcs.ARC(cache_size), -} - -# Create Python hook cache -python_cache = lcs.PythonHookCachePolicy(cache_size, "CustomLRU") -# Set hook functions... -caches["Custom Python LRU"] = python_cache - -# Unified interface testing -test_req = lcs.Request() -test_req.obj_id = 1 -test_req.obj_size = 1024 - -for name, cache in caches.items(): - result = cache.get(test_req) - print(f"{name}: {'HIT' if result else 'MISS'}") -``` - -**Benefits of Unified Interface**: -- Same API for all cache policies -- Easy to switch between different algorithms -- Efficient C++ backend trace processing -- Consistent properties and statistics - -### 3. Python Hook Cache (`python_hook_cache_example.py`) - -Demonstrates how to create custom cache policies using Python hooks: - -```python -import libcachesim as lcs -from collections import OrderedDict - -class LRUPolicy: - def __init__(self, cache_size): - self.access_order = OrderedDict() - - def on_hit(self, obj_id, obj_size): - self.access_order.move_to_end(obj_id) - - def on_miss(self, obj_id, obj_size): - self.access_order[obj_id] = True - - def evict(self, obj_id, obj_size): - return next(iter(self.access_order)) - -def create_lru_cache(cache_size): - cache = lcs.PythonHookCachePolicy(cache_size, "PythonLRU") - - def init_hook(cache_size): - return LRUPolicy(cache_size) - - # Set other hooks... - cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook) - return cache -``` - -**Custom Policy Features**: -- Pure Python cache logic implementation -- Support for LRU, FIFO and other policies -- Flexible hook system -- Same interface as built-in policies - -### 4. Zipf Trace Examples (`zipf_trace_example.py`) - -Shows synthetic trace generation methods and algorithm comparison: - -```python -import libcachesim as lcs - -# Method 1: Create Zipf-distributed request generator -zipf_generator = lcs.create_zipf_requests( - num_objects=1000, - num_requests=10000, - alpha=1.0, - obj_size=1024, - seed=42 -) - -# Method 2: Create uniform-distributed request generator -uniform_generator = lcs.create_uniform_requests( - num_objects=1000, - num_requests=10000, - obj_size=1024, - seed=42 -) - -# Compare different Zipf parameters -alphas = [0.5, 1.0, 1.5, 2.0] -for alpha in alphas: - generator = lcs.create_zipf_requests(1000, 10000, alpha=alpha, seed=42) - cache = lcs.LRU(1024*1024) - hit_count = sum(1 for req in generator if cache.get(req)) - hit_ratio = hit_count / 10000 - print(f"α={alpha}: Hit ratio={hit_ratio:.4f}") -``` - -**Synthetic Trace Features**: -- Higher α values create more skewed access patterns -- Memory efficient: No temporary files created -- Request generators for flexible processing -- Suitable for simulating real workloads - -## Key Features - -### Trace Generation -- `create_zipf_requests()`: Create Zipf-distributed request generator -- `create_uniform_requests()`: Create uniform-distributed request generator - -### Cache Algorithms -- **Classic algorithms**: `LRU()`, `FIFO()`, `ARC()`, `Clock()` -- **Modern algorithms**: `S3FIFO()`, `Sieve()`, `TinyLFU()` -- **Custom policies**: `PythonHookCachePolicy()` - -### Trace Processing -- `open_trace()`: Open real trace files -- `process_trace()`: High-performance trace processing - -## Basic Usage Examples - -### 1. Compare Cache Algorithms - -```python -import libcachesim as lcs - -# Test different algorithms -algorithms = ['LRU', 'FIFO', 'ARC', 'S3FIFO'] -cache_size = 1024*1024 - -for algo_name in algorithms: - # Create fresh workload for each algorithm - generator = lcs.create_zipf_requests(1000, 10000, alpha=1.0, seed=42) - cache = getattr(lcs, algo_name)(cache_size) - hit_count = sum(1 for req in generator if cache.get(req)) - print(f"{algo_name}: {hit_count/10000:.3f}") -``` - -### 2. Parameter Sensitivity Analysis - -```python -import libcachesim as lcs - -# Test different Zipf parameters -for alpha in [0.5, 1.0, 1.5, 2.0]: - generator = lcs.create_zipf_requests(1000, 10000, alpha=alpha, seed=42) - cache = lcs.LRU(cache_size=512*1024) - - hit_count = sum(1 for req in generator if cache.get(req)) - print(f"α={alpha}: Hit ratio={hit_count/10000:.3f}") -``` - -## Parameters - -### Trace Generation Parameters -- `num_objects`: Number of unique objects -- `num_requests`: Number of requests to generate -- `alpha`: Zipf skewness (α=1.0 for classic Zipf) -- `obj_size`: Object size in bytes (default: 4000) -- `seed`: Random seed for reproducibility - -### Cache Parameters -- `cache_size`: Cache capacity in bytes -- Algorithm-specific parameters (e.g.,`fifo_size_ratio` for S3FIFO) - -## Running Examples - -```bash -# Navigate to examples directory -cd libCacheSim-python/examples - -# Run stream-based trace generation -python stream_request_example.py - -# Run unified interface demo -python demo_unified_interface.py - -# Run Python hook cache example -python python_hook_cache_example.py - -# Run Zipf trace examples -python zipf_trace_example.py - -# Run all tests -python -m pytest ../tests/ -v -``` - -## Performance Tips - -1. **Use appropriate cache and object sizes**: - ```python - # Good: cache can hold multiple objects - cache = lcs.LRU(cache_size=1024*1024) # 1MB - generator = lcs.create_zipf_requests(1000, 10000, obj_size=1024) # 1KB objects - ``` - -2. **Use seeds for reproducible experiments**: - ```python - generator = lcs.create_zipf_requests(1000, 10000, seed=42) - ``` - -3. **Process large traces with C++ backend**: - ```python - # Fast: C++ processing - obj_miss_ratio, byte_miss_ratio = lcs.process_trace(cache, reader) - - # Slow: Python loop - for req in reader: - cache.get(req) - ``` - -4. **Understand Zipf parameter effects**: - - α=0.5: Slightly skewed, close to uniform distribution - - α=1.0: Classic Zipf distribution - - α=2.0: Highly skewed, few objects get most accesses - -## Testing - -Run comprehensive tests: - -```bash -python -m pytest ../tests/test_trace_generator.py -v -python -m pytest ../tests/test_eviction.py -v -python -m pytest ../tests/test_process_trace.py -v -``` diff --git a/libCacheSim-python/examples/demo_unified_interface.py b/libCacheSim-python/examples/demo_unified_interface.py deleted file mode 100644 index e435e5826..000000000 --- a/libCacheSim-python/examples/demo_unified_interface.py +++ /dev/null @@ -1,131 +0,0 @@ -#!/usr/bin/env python3 -""" -Demo script showing the unified interface for all cache policies. -This demonstrates how to use both native and Python hook-based caches -with the same API for seamless algorithm comparison and switching. -""" - -import sys -import os - -# Add parent directory for development testing -sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) - -try: - import libcachesim as lcs -except ImportError as e: - print(f"Error importing libcachesim: {e}") - print("Make sure the Python binding is built and installed") - sys.exit(1) - -from collections import OrderedDict - - -def create_trace_reader(): - """Helper function to create a trace reader.""" - data_file = os.path.join( - os.path.dirname(os.path.dirname(os.path.dirname(__file__))), "data", "cloudPhysicsIO.oracleGeneral.bin" - ) - if not os.path.exists(data_file): - print(f"Warning: Trace file not found at {data_file}") - return None - return lcs.open_trace(data_file, lcs.TraceType.ORACLE_GENERAL_TRACE) - - -def create_demo_lru_hooks(): - """Create demo LRU hooks for Python-based cache policy.""" - - def init_hook(cache_size): - print(f" Initializing custom LRU with {cache_size} bytes") - return OrderedDict() - - def hit_hook(lru_dict, obj_id, obj_size): - if obj_id in lru_dict: - lru_dict.move_to_end(obj_id) - - def miss_hook(lru_dict, obj_id, obj_size): - lru_dict[obj_id] = obj_size - - def eviction_hook(lru_dict, obj_id, obj_size): - if lru_dict: - return next(iter(lru_dict)) - return obj_id - - def remove_hook(lru_dict, obj_id): - lru_dict.pop(obj_id, None) - - return init_hook, hit_hook, miss_hook, eviction_hook, remove_hook - - -def demo_unified_interface(): - """Demonstrate the unified interface across different cache policies.""" - print("libCacheSim Python Binding - Unified Interface Demo") - print("=" * 60) - - cache_size = 1024 * 1024 # 1MB - - # Create different cache policies - caches = { - "LRU": lcs.LRU(cache_size), - "FIFO": lcs.FIFO(cache_size), - "ARC": lcs.ARC(cache_size), - } - - # Create Python hook-based LRU - python_cache = lcs.PythonHookCachePolicy(cache_size, "CustomLRU") - init_hook, hit_hook, miss_hook, eviction_hook, remove_hook = create_demo_lru_hooks() - python_cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook) - caches["Custom Python LRU"] = python_cache - - print(f"Testing {len(caches)} different cache policies with unified interface:") - - # Demo 1: Single request interface - print("1. Single Request Interface:") - print(" All caches use: cache.get(request)") - - test_req = lcs.Request() - test_req.obj_id = 1 - test_req.obj_size = 1024 - - for name, cache in caches.items(): - result = cache.get(test_req) - print(f" {name:20s}: {'HIT' if result else 'MISS'}") - - # Demo 2: Unified properties interface - print("\n2. Unified Properties Interface:") - print(" All caches provide: cache_size, n_obj, occupied_byte, n_req") - - for name, cache in caches.items(): - print( - f" {name:20s}: size={cache.cache_size}, objs={cache.n_obj}, " - f"bytes={cache.occupied_byte}, reqs={cache.n_req}" - ) - - # Demo 3: Efficient trace processing - print("\n3. Efficient Trace Processing Interface:") - print(" All caches use: cache.process_trace(reader, max_req=N)") - - max_requests = 1000 - - for name, cache in caches.items(): - # Create fresh reader for each cache - reader = create_trace_reader() - if not reader: - print(f" {name:20s}: trace file not available") - continue - - obj_miss_ratio, byte_miss_ratio = cache.process_trace(reader, max_req=max_requests) - print(f" {name:20s}: obj_miss_ratio={obj_miss_ratio:.4f}, byte_miss_ratio={byte_miss_ratio:.4f}") - - print("\nKey Benefits of Unified Interface:") - print(" • Same API for all cache policies (built-in + custom)") - print(" • Easy to switch between different algorithms") - print(" • Efficient trace processing in C++ (no Python overhead)") - print(" • Consistent properties and statistics") - print(" • Type-safe and well-documented") - - print("\nDemo completed! All cache policies work with the same interface.") - - -if __name__ == "__main__": - demo_unified_interface() diff --git a/libCacheSim-python/examples/python_hook_cache_example.py b/libCacheSim-python/examples/python_hook_cache_example.py deleted file mode 100644 index 06d06c457..000000000 --- a/libCacheSim-python/examples/python_hook_cache_example.py +++ /dev/null @@ -1,178 +0,0 @@ -#!/usr/bin/env python3 -""" -Example demonstrating how to create custom cache policies using Python hooks. - -This example shows how to implement LRU and FIFO cache policies using the -PythonHookCachePolicy class, which allows users to define cache behavior using -pure Python functions instead of C/C++ plugins. -""" - -import libcachesim as lcs -from collections import OrderedDict, deque -from contextlib import suppress - - -class LRUPolicy: - """LRU (Least Recently Used) cache policy implementation.""" - - def __init__(self, cache_size): - self.cache_size = cache_size - self.access_order = OrderedDict() # obj_id -> True (for ordering) - - def on_hit(self, obj_id, obj_size): - """Move accessed object to end (most recent).""" - if obj_id in self.access_order: - # Move to end (most recent) - self.access_order.move_to_end(obj_id) - - def on_miss(self, obj_id, obj_size): - """Add new object to end (most recent).""" - self.access_order[obj_id] = True - - def evict(self, obj_id, obj_size): - """Return the least recently used object ID.""" - if self.access_order: - # Return first item (least recent) - victim_id = next(iter(self.access_order)) - return victim_id - raise RuntimeError("No objects to evict") - - def on_remove(self, obj_id): - """Remove object from tracking.""" - self.access_order.pop(obj_id, None) - - -class FIFOPolicy: - """FIFO (First In First Out) cache policy implementation.""" - - def __init__(self, cache_size): - self.cache_size = cache_size - self.insertion_order = deque() # obj_id queue - - def on_hit(self, obj_id, obj_size): - """FIFO doesn't change order on hits.""" - pass - - def on_miss(self, obj_id, obj_size): - """Add new object to end of queue.""" - self.insertion_order.append(obj_id) - - def evict(self, obj_id, obj_size): - """Return the first inserted object ID.""" - if self.insertion_order: - victim_id = self.insertion_order.popleft() - return victim_id - raise RuntimeError("No objects to evict") - - def on_remove(self, obj_id): - """Remove object from tracking.""" - with suppress(ValueError): - self.insertion_order.remove(obj_id) - - -def create_lru_cache(cache_size): - """Create an LRU cache using Python hooks.""" - cache = lcs.PythonHookCachePolicy(cache_size, "PythonLRU") - - def init_hook(cache_size): - return LRUPolicy(cache_size) - - def hit_hook(policy, obj_id, obj_size): - policy.on_hit(obj_id, obj_size) - - def miss_hook(policy, obj_id, obj_size): - policy.on_miss(obj_id, obj_size) - - def eviction_hook(policy, obj_id, obj_size): - return policy.evict(obj_id, obj_size) - - def remove_hook(policy, obj_id): - policy.on_remove(obj_id) - - def free_hook(policy): - # Python garbage collection handles cleanup - pass - - cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook, free_hook) - return cache - - -def create_fifo_cache(cache_size): - """Create a FIFO cache using Python hooks.""" - cache = lcs.PythonHookCachePolicy(cache_size, "PythonFIFO") - - def init_hook(cache_size): - return FIFOPolicy(cache_size) - - def hit_hook(policy, obj_id, obj_size): - policy.on_hit(obj_id, obj_size) - - def miss_hook(policy, obj_id, obj_size): - policy.on_miss(obj_id, obj_size) - - def eviction_hook(policy, obj_id, obj_size): - return policy.evict(obj_id, obj_size) - - def remove_hook(policy, obj_id): - policy.on_remove(obj_id) - - cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook) - return cache - - -def test_cache_policy(cache, name): - """Test a cache policy with sample requests.""" - print(f"\n=== Testing {name} Cache ===") - - # Test requests: obj_id, obj_size - test_requests = [ - (1, 100), - (2, 100), - (3, 100), - (4, 100), - (5, 100), # Fill cache - (1, 100), # Hit - (6, 100), # Miss, should evict something - (2, 100), # Hit or miss depending on policy - (7, 100), # Miss, should evict something - ] - - hits = 0 - misses = 0 - - for obj_id, obj_size in test_requests: - req = lcs.Request() - req.obj_id = obj_id - req.obj_size = obj_size - - hit = cache.get(req) - if hit: - hits += 1 - print(f"Request {obj_id}: HIT") - else: - misses += 1 - print(f"Request {obj_id}: MISS") - - print(f"Total: {hits} hits, {misses} misses") - print(f"Cache stats: {cache.n_obj} objects, {cache.occupied_byte} bytes occupied") - - -def main(): - """Main example function.""" - cache_size = 500 # Bytes (can hold 5 objects of size 100 each) - - # Test LRU cache - lru_cache = create_lru_cache(cache_size) - test_cache_policy(lru_cache, "LRU") - - # Test FIFO cache - fifo_cache = create_fifo_cache(cache_size) - test_cache_policy(fifo_cache, "FIFO") - - print("\n=== Comparison ===") - print("LRU keeps recently accessed items, evicting least recently used") - print("FIFO keeps items in insertion order, evicting oldest inserted") - - -if __name__ == "__main__": - main() diff --git a/libCacheSim-python/examples/stream_request_example.py b/libCacheSim-python/examples/stream_request_example.py deleted file mode 100644 index eed213b76..000000000 --- a/libCacheSim-python/examples/stream_request_example.py +++ /dev/null @@ -1,154 +0,0 @@ -#!/usr/bin/env python3 -""" -Example: Using stream request generators for cache simulation. - -This example demonstrates how to use the stream request generators -to create synthetic traces and run cache simulations without creating -temporary files. -""" - -import libcachesim as lcs - - -def main(): - """Demonstrate stream request generators.""" - print("libCacheSim Stream Request Generation Example") - print("=" * 50) - - # Example 1: Basic Zipf generation with appropriate cache size - print("\n1. Basic Zipf Request Generation") - print("-" * 30) - - # Use reasonable cache and object sizes - cache_size = 50 * 1024 * 1024 # 50MB cache - obj_size = 1024 # 1KB objects - num_objects = 1000 - num_requests = 10000 - - # Create a cache - cache = lcs.LRU(cache_size=cache_size) - - # Create a Zipf-distributed request generator - zipf_generator = lcs.create_zipf_requests( - num_objects=num_objects, - num_requests=num_requests, - alpha=1.0, # Zipf skewness - obj_size=obj_size, # Object size in bytes - seed=42, # For reproducibility - ) - - print(f"Cache size: {cache_size // 1024 // 1024}MB") - print(f"Object size: {obj_size}B") - print(f"Generated {num_requests} Zipf requests for {num_objects} objects") - - # Process the requests directly - hit_count = 0 - for i, req in enumerate(zipf_generator): - if cache.get(req): - hit_count += 1 - - # Print progress every 2000 requests - if (i + 1) % 2000 == 0: - current_hit_ratio = hit_count / (i + 1) - print(f"Processed {i + 1} requests, hit ratio: {current_hit_ratio:.3f}") - - final_hit_ratio = hit_count / num_requests - print(f"Final hit ratio: {final_hit_ratio:.3f}") - - # Example 2: Uniform distribution comparison - print("\n2. Uniform Request Generation") - print("-" * 30) - - # Create a uniform-distributed request generator - uniform_generator = lcs.create_uniform_requests( - num_objects=num_objects, num_requests=num_requests, obj_size=obj_size, seed=42 - ) - - print(f"Generated {num_requests} uniform requests for {num_objects} objects") - - # Reset cache and process uniform requests - cache = lcs.LRU(cache_size=cache_size) - hit_count = 0 - - for i, req in enumerate(uniform_generator): - if cache.get(req): - hit_count += 1 - - if (i + 1) % 2000 == 0: - current_hit_ratio = hit_count / (i + 1) - print(f"Processed {i + 1} requests, hit ratio: {current_hit_ratio:.3f}") - - final_hit_ratio = hit_count / num_requests - print(f"Final hit ratio: {final_hit_ratio:.3f}") - - # Example 3: Compare different Zipf alpha values - print("\n3. Zipf Alpha Parameter Comparison") - print("-" * 30) - - alphas = [0.5, 1.0, 1.5, 2.0] - print(f"{'Alpha':<8} {'Hit Ratio':<12} {'Description'}") - print("-" * 40) - - for alpha in alphas: - generator = lcs.create_zipf_requests( - num_objects=num_objects, num_requests=num_requests, alpha=alpha, obj_size=obj_size, seed=42 - ) - - cache = lcs.LRU(cache_size=cache_size) - hit_count = sum(1 for req in generator if cache.get(req)) - hit_ratio = hit_count / num_requests - - # Describe the skewness - if alpha < 0.8: - description = "Low skew (nearly uniform)" - elif alpha < 1.2: - description = "Classic Zipf" - elif alpha < 1.8: - description = "High skew" - else: - description = "Very high skew" - - print(f"{alpha:<8.1f} {hit_ratio:<12.3f} {description}") - - # Example 4: Cache size sensitivity - print("\n4. Cache Size Sensitivity") - print("-" * 30) - - # Fixed workload - generator = lcs.create_zipf_requests( - num_objects=num_objects, num_requests=num_requests, alpha=1.0, obj_size=obj_size, seed=42 - ) - - cache_sizes = [ - 1 * 1024 * 1024, # 1MB - 5 * 1024 * 1024, # 5MB - 10 * 1024 * 1024, # 10MB - 50 * 1024 * 1024, # 50MB - ] - - print(f"{'Cache Size':<12} {'Hit Ratio':<12} {'Objects Fit'}") - print("-" * 36) - - for cache_size in cache_sizes: - cache = lcs.LRU(cache_size=cache_size) - - # Create fresh generator for each test - test_generator = lcs.create_zipf_requests( - num_objects=num_objects, num_requests=num_requests, alpha=1.0, obj_size=obj_size, seed=42 - ) - - hit_count = sum(1 for req in test_generator if cache.get(req)) - hit_ratio = hit_count / num_requests - objects_fit = cache_size // obj_size - - print(f"{cache_size // 1024 // 1024}MB{'':<8} {hit_ratio:<12.3f} ~{objects_fit}") - - print("\nNotes:") - print("- Higher α values create more skewed access patterns") - print("- Skewed patterns generally have higher hit ratios") - print("- Cache size affects performance, but beyond a point diminishing returns") - print(f"- Working set: {num_objects} objects × {obj_size}B = {num_objects * obj_size // 1024}KB") - - -if __name__ == "__main__": - main() diff --git a/libCacheSim-python/examples/zipf_trace_example.py b/libCacheSim-python/examples/zipf_trace_example.py deleted file mode 100644 index 662ae0fa8..000000000 --- a/libCacheSim-python/examples/zipf_trace_example.py +++ /dev/null @@ -1,243 +0,0 @@ -#!/usr/bin/env python3 -""" -Example demonstrating trace generation and cache simulation in libCacheSim Python bindings. - -This example shows how to: -1. Generate synthetic request traces using available APIs -2. Use the generated traces with cache simulations -3. Compare different algorithms and parameters -""" - -import libcachesim as lcs - - -def example_basic_trace_generation(): - """Basic example of generating synthetic traces.""" - print("=== Basic Synthetic Trace Generation ===") - - # Generate Zipf requests using available API - num_objects = 1000 - num_requests = 10000 - alpha = 1.0 - obj_size = 1024 # 1KB objects - - # Create Zipf-distributed requests - zipf_requests = lcs.create_zipf_requests( - num_objects=num_objects, num_requests=num_requests, alpha=alpha, obj_size=obj_size, seed=42 - ) - - print(f"Generated {num_requests} Zipf requests with α={alpha}") - print(f"Object size: {obj_size}B, Number of unique objects: {num_objects}") - - # Use the requests with a cache - cache = lcs.LRU(cache_size=50 * 1024 * 1024) # 50MB cache - hit_count = sum(1 for req in zipf_requests if cache.get(req)) - hit_ratio = hit_count / num_requests - print(f"LRU cache hit ratio: {hit_ratio:.4f}") - - return hit_ratio - - -def example_compare_zipf_parameters(): - """Compare different Zipf parameters.""" - print("\n=== Comparing Zipf Parameters ===") - - num_objects = 1000 - num_requests = 10000 - cache_size = 50 * 1024 * 1024 # 50MB - obj_size = 1024 # 1KB objects - - alphas = [0.5, 1.0, 1.5, 2.0] - results = {} - - print(f"{'Alpha':<8} {'LRU':<8} {'FIFO':<8} {'ARC':<8} {'Clock':<8}") - print("-" * 40) - - for alpha in alphas: - # Test with different cache policies - policies = { - "LRU": lcs.LRU(cache_size), - "FIFO": lcs.FIFO(cache_size), - "ARC": lcs.ARC(cache_size), - "Clock": lcs.Clock(cache_size), - } - - results[alpha] = {} - hit_ratios = [] - for name, cache in policies.items(): - # Create fresh request iterator for each cache - test_requests = lcs.create_zipf_requests( - num_objects=num_objects, num_requests=num_requests, alpha=alpha, obj_size=obj_size, seed=42 - ) - hit_count = sum(1 for req in test_requests if cache.get(req)) - hit_ratio = hit_count / num_requests - results[alpha][name] = hit_ratio - hit_ratios.append(f"{hit_ratio:.3f}") - - print(f"{alpha:<8.1f} {hit_ratios[0]:<8} {hit_ratios[1]:<8} {hit_ratios[2]:<8} {hit_ratios[3]:<8}") - - return results - - -def example_algorithm_comparison(): - """Compare different cache algorithms.""" - print("\n=== Cache Algorithm Comparison ===") - - # Fixed workload parameters - num_objects = 1000 - num_requests = 10000 - alpha = 1.0 - obj_size = 1024 - cache_size = 10 * 1024 * 1024 # 10MB - - # Available algorithms - algorithms = { - "LRU": lcs.LRU, - "FIFO": lcs.FIFO, - "ARC": lcs.ARC, - "Clock": lcs.Clock, - "S3FIFO": lcs.S3FIFO, - "Sieve": lcs.Sieve, - } - - print(f"Testing with: {num_objects} objects, {num_requests} requests") - print(f"Cache size: {cache_size // 1024 // 1024}MB, Object size: {obj_size}B") - print(f"Zipf alpha: {alpha}") - print() - - print(f"{'Algorithm':<10} {'Hit Ratio':<12} {'Description'}") - print("-" * 45) - - results = {} - for name, cache_class in algorithms.items(): - try: - # Create fresh requests for each algorithm - requests = lcs.create_zipf_requests( - num_objects=num_objects, num_requests=num_requests, alpha=alpha, obj_size=obj_size, seed=42 - ) - - cache = cache_class(cache_size) - hit_count = sum(1 for req in requests if cache.get(req)) - hit_ratio = hit_count / num_requests - results[name] = hit_ratio - - # Add descriptions - descriptions = { - "LRU": "Least Recently Used", - "FIFO": "First In First Out", - "ARC": "Adaptive Replacement Cache", - "Clock": "Clock/Second Chance", - "S3FIFO": "Simple Scalable FIFO", - "Sieve": "Lazy Promotion", - } - - print(f"{name:<10} {hit_ratio:<12.4f} {descriptions.get(name, '')}") - - except Exception as e: - print(f"{name:<10} {'ERROR':<12} {str(e)}") - - return results - - -def example_uniform_vs_zipf(): - """Compare uniform vs Zipf distributions.""" - print("\n=== Uniform vs Zipf Distribution Comparison ===") - - num_objects = 1000 - num_requests = 10000 - obj_size = 1024 - cache_size = 10 * 1024 * 1024 - - # Test uniform distribution - uniform_requests = lcs.create_uniform_requests( - num_objects=num_objects, num_requests=num_requests, obj_size=obj_size, seed=42 - ) - - cache = lcs.LRU(cache_size) - uniform_hits = sum(1 for req in uniform_requests if cache.get(req)) - uniform_hit_ratio = uniform_hits / num_requests - - # Test Zipf distribution - zipf_requests = lcs.create_zipf_requests( - num_objects=num_objects, num_requests=num_requests, alpha=1.0, obj_size=obj_size, seed=42 - ) - - cache = lcs.LRU(cache_size) - zipf_hits = sum(1 for req in zipf_requests if cache.get(req)) - zipf_hit_ratio = zipf_hits / num_requests - - print(f"{'Distribution':<12} {'Hit Ratio':<12} {'Description'}") - print("-" * 45) - print(f"{'Uniform':<12} {uniform_hit_ratio:<12.4f} {'All objects equally likely'}") - print(f"{'Zipf (α=1.0)':<12} {zipf_hit_ratio:<12.4f} {'Some objects much more popular'}") - - print( - f"\nObservation: Zipf typically shows{'higher' if zipf_hit_ratio > uniform_hit_ratio else 'lower'} hit ratios" - ) - print("due to locality of reference (hot objects get cached)") - - -def example_cache_size_analysis(): - """Analyze the effect of different cache sizes.""" - print("\n=== Cache Size Sensitivity Analysis ===") - - num_objects = 1000 - num_requests = 10000 - alpha = 1.0 - obj_size = 1024 - - cache_sizes = [ - 1 * 1024 * 1024, # 1MB - 5 * 1024 * 1024, # 5MB - 10 * 1024 * 1024, # 10MB - 25 * 1024 * 1024, # 25MB - 50 * 1024 * 1024, # 50MB - ] - - print(f"{'Cache Size':<12} {'Objects Fit':<12} {'Hit Ratio':<12} {'Efficiency'}") - print("-" * 55) - - for cache_size in cache_sizes: - requests = lcs.create_zipf_requests( - num_objects=num_objects, num_requests=num_requests, alpha=alpha, obj_size=obj_size, seed=42 - ) - - cache = lcs.LRU(cache_size) - hit_count = sum(1 for req in requests if cache.get(req)) - hit_ratio = hit_count / num_requests - objects_fit = cache_size // obj_size - efficiency = hit_ratio / (cache_size / (1024 * 1024)) # hit ratio per MB - - print(f"{cache_size // 1024 // 1024}MB{'':<8} {objects_fit:<12} {hit_ratio:<12.4f} {efficiency:<12.4f}") - - -def main(): - """Run all examples.""" - print("libCacheSim Python Bindings - Trace Generation Examples") - print("=" * 60) - - try: - # Run examples - example_basic_trace_generation() - example_compare_zipf_parameters() - example_algorithm_comparison() - example_uniform_vs_zipf() - example_cache_size_analysis() - - print("\n" + "=" * 60) - print("All examples completed successfully!") - print("\nKey Takeaways:") - print("• Higher Zipf α values create more skewed access patterns") - print("• Skewed patterns generally result in higher cache hit ratios") - print("• Different algorithms perform differently based on workload") - print("• Cache size has diminishing returns beyond working set size") - - except Exception as e: - print(f"Error running examples: {e}") - import traceback - - traceback.print_exc() - - -if __name__ == "__main__": - main() diff --git a/libCacheSim-python/export/CMakeLists.txt b/libCacheSim-python/export/CMakeLists.txt deleted file mode 100644 index 917e83197..000000000 --- a/libCacheSim-python/export/CMakeLists.txt +++ /dev/null @@ -1,38 +0,0 @@ -# Helper functions are removed since we don't export source files anymore - -set(EXPORT_FILE "${CMAKE_BINARY_DIR}/export_vars.cmake") -file(WRITE "${EXPORT_FILE}" "") - -get_filename_component(MAIN_PROJECT_SOURCE_DIR ${CMAKE_SOURCE_DIR} ABSOLUTE) -file(WRITE ${CMAKE_BINARY_DIR}/export_vars.cmake "set(MAIN_PROJECT_SOURCE_DIR \"${MAIN_PROJECT_SOURCE_DIR}\")\n") -file(APPEND ${CMAKE_BINARY_DIR}/export_vars.cmake "set(dependency_libs \"${dependency_libs}\")\n") -file(APPEND ${CMAKE_BINARY_DIR}/export_vars.cmake "set(LIBCACHESIM_VERSION \"${LIBCACHESIM_VERSION}\")\n") - -# ============================================================================== -# Export project metadata -# ============================================================================== -file(APPEND "${EXPORT_FILE}" "set(LIBCACHESIM_VERSION \"${${PROJECT_NAME}_VERSION}\")\n") - -# ============================================================================== -# Export essential include directory variables -# ============================================================================== -foreach(var IN ITEMS GLib_INCLUDE_DIRS GLib_CONFIG_INCLUDE_DIR XGBOOST_INCLUDE_DIR LIGHTGBM_PATH ZSTD_INCLUDE_DIR) - file(APPEND "${EXPORT_FILE}" "set(${var} \"${${var}}\")\n") -endforeach() - -# ============================================================================== -# Export dependency library variables -# ============================================================================== -file(APPEND "${EXPORT_FILE}" "set(GLib_LIBRARY_DIRS \"${GLib_LIBRARY_DIRS}\")\n") -file(APPEND "${EXPORT_FILE}" "set(GLib_LIBRARIES \"${GLib_LIBRARIES}\")\n") -get_filename_component(ZSTD_LIBRARY_DIR "${ZSTD_LIBRARIES}" DIRECTORY) -file(APPEND "${EXPORT_FILE}" "set(ZSTD_LIBRARY_DIRS \"${ZSTD_LIBRARY_DIRS}\")\n") -file(APPEND "${EXPORT_FILE}" "set(ZSTD_LIBRARIES \"${ZSTD_LIBRARIES}\")\n") -file(APPEND "${EXPORT_FILE}" "set(dependency_libs \"${dependency_libs}\")\n") - -# ============================================================================== -# Export essential build option variables -# ============================================================================== -file(APPEND "${EXPORT_FILE}" "set(LOG_LEVEL_LOWER \"${LOG_LEVEL_LOWER}\")\n") - -message(STATUS "Exported essential variables to ${EXPORT_FILE}") diff --git a/libCacheSim-python/export/README.md b/libCacheSim-python/export/README.md deleted file mode 100644 index 976b1daa8..000000000 --- a/libCacheSim-python/export/README.md +++ /dev/null @@ -1,47 +0,0 @@ -# Python Binding Export System - -Build system bridge for sharing CMake variables between the main libCacheSim project and Python binding. - -## Purpose - -The `export/CMakeLists.txt` exports all necessary build variables (source files, include directories, compiler flags, etc.) from the main project to the Python binding, enabling consistent builds without duplicating configuration. - -## How It Works - -1. **Export**: Main project writes variables to `export_vars.cmake` -2. **Import**: Python binding includes this file during CMake configuration -3. **Build**: Python binding uses shared variables for consistent compilation - -## Key Exported Variables - -### Source Files -- Cache algorithms, data structures, trace readers -- Profilers, utilities, analyzers - -### Build Configuration -- Include directories (main, GLib, ZSTD, XGBoost, LightGBM) -- Compiler flags (C/C++) -- Dependency libraries -- Build options (hugepage, tests, optional features) - -## Usage - -**Main Project** (`CMakeLists.txt`): -```cmake -add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/libCacheSim-python/export) -``` - -**Python Binding** (`libCacheSim-python/CMakeLists.txt`): -```cmake -set(EXPORT_FILE "${CMAKE_CURRENT_SOURCE_DIR}/../build/export_vars.cmake") -include("${EXPORT_FILE}") -``` - -## For Developers - -This system ensures the Python binding automatically picks up changes to: -- New source files added to the main project -- Updated compiler flags or dependencies -- Modified build options - -No manual synchronization needed between main project and Python binding builds. diff --git a/libCacheSim-python/libcachesim/__init__.py b/libCacheSim-python/libcachesim/__init__.py deleted file mode 100644 index 47e693cde..000000000 --- a/libCacheSim-python/libcachesim/__init__.py +++ /dev/null @@ -1,85 +0,0 @@ -"""libCacheSim Python bindings""" - -from __future__ import annotations - -from ._libcachesim import ( - Cache, - Reader, - ReaderInitParam, - Request, - ReqOp, - TraceType, - __doc__, - __version__, - open_trace, - process_trace, - process_trace_python_hook, -) -from .eviction import ( - ARC, - Belady, - BeladySize, - Cacheus, - Clock, - FIFO, - LeCaR, - LFU, - LFUDA, - LRB, - LRU, - PythonHookCachePolicy, - QDLP, - S3FIFO, - Sieve, - SLRU, - ThreeLCache, - TinyLFU, - TwoQ, - WTinyLFU, -) -from .trace_generator import ( - create_zipf_requests, - create_uniform_requests, -) - -__all__ = [ - # Core classes - "Cache", - "Reader", - "Request", - "ReaderInitParam", - # Trace types and operations - "TraceType", - "ReqOp", - # Cache policies - "LRU", - "FIFO", - "ARC", - "Clock", - "LFU", - "LFUDA", - "SLRU", - "S3FIFO", - "Sieve", - "TinyLFU", - "WTinyLFU", - "TwoQ", - "ThreeLCache", - "Belady", - "BeladySize", - "LRB", - "QDLP", - "LeCaR", - "Cacheus", - # Custom cache policy - "PythonHookCachePolicy", - # Functions - "open_trace", - "process_trace", - "process_trace_python_hook", - "create_zipf_requests", - "create_uniform_requests", - # Metadata - "__doc__", - "__version__", -] diff --git a/libCacheSim-python/libcachesim/__init__.pyi b/libCacheSim-python/libcachesim/__init__.pyi deleted file mode 100644 index 6992a74ae..000000000 --- a/libCacheSim-python/libcachesim/__init__.pyi +++ /dev/null @@ -1,293 +0,0 @@ -""" -libCacheSim Python bindings --------------------------- - -.. currentmodule:: libcachesim - -.. autosummary:: - :toctree: _generate - - open_trace - ARC - Clock - FIFO - LRB - LRU - S3FIFO - Sieve - ThreeLCache - TinyLFU - TwoQ - Cache - Request - Reader - reader_init_param_t - TraceType - PythonHookCachePolicy - process_trace - process_trace_python_hook - create_zipf_requests - create_uniform_requests -""" - -from typing import Any, Callable, Optional, Union, overload -from collections.abc import Iterator - -from _libcachesim import TraceType, ReqOp - -def open_trace( - trace_path: str, - type: Optional[TraceType] = None, - reader_init_param: Optional[Union[dict, reader_init_param_t]] = None, -) -> Reader: ... -def process_trace( - cache: Cache, - reader: Reader, - start_req: int = 0, - max_req: int = -1, -) -> tuple[float, float]: - """ - Process a trace with a cache and return miss ratio. - """ - -def process_trace_python_hook( - cache: PythonHookCache, - reader: Reader, - start_req: int = 0, - max_req: int = -1, -) -> tuple[float, float]: - """ - Process a trace with a Python hook cache and return miss ratio. - """ - -# Trace generation functions -def create_zipf_requests( - num_objects: int, - num_requests: int, - alpha: float = 1.0, - obj_size: int = 4000, - time_span: int = 86400 * 7, - start_obj_id: int = 0, - seed: Optional[int] = None, -) -> Iterator[Request]: - """Create a Zipf-distributed request generator. - - Args: - num_objects (int): Number of unique objects - num_requests (int): Number of requests to generate - alpha (float): Zipf skewness parameter (alpha >= 0) - obj_size (int): Object size in bytes - time_span (int): Time span in seconds - start_obj_id (int): Starting object ID - seed (int, optional): Random seed for reproducibility - - Returns: - Iterator[Request]: A generator that yields Request objects - """ - -def create_uniform_requests( - num_objects: int, - num_requests: int, - obj_size: int = 4000, - time_span: int = 86400 * 7, - start_obj_id: int = 0, - seed: Optional[int] = None, -) -> Iterator[Request]: - """Create a uniform-distributed request generator. - - Args: - num_objects (int): Number of unique objects - num_requests (int): Number of requests to generate - obj_size (int): Object size in bytes - time_span (int): Time span in seconds - start_obj_id (int): Starting object ID - seed (int, optional): Random seed for reproducibility - - Returns: - Iterator[Request]: A generator that yields Request objects - """ - -class reader_init_param_t: - time_field: int - obj_id_field: int - obj_size_field: int - delimiter: str - has_header: bool - binary_fmt_str: str - -class Cache: - n_req: int - cache_size: int - @property - def n_obj(self) -> int: ... - @property - def occupied_byte(self) -> int: ... - def get(self, req: Request) -> bool: ... - -class Request: - clock_time: int - hv: int - obj_id: int - obj_size: int - op: ReqOp - - @overload - def __init__(self) -> None: ... - @overload - def __init__( - self, obj_id: int, obj_size: int = 1, clock_time: int = 0, hv: int = 0, op: ReqOp = ReqOp.GET - ) -> None: ... - def __init__( - self, obj_id: Optional[int] = None, obj_size: int = 1, clock_time: int = 0, hv: int = 0, op: ReqOp = ReqOp.GET - ) -> None: - """Create a request instance. - - Args: - obj_id (int, optional): The object ID. - obj_size (int): The object size. (default: 1) - clock_time (int): The clock time. (default: 0) - hv (int): The hash value. (default: 0) - op (ReqOp): The operation. (default: ReqOp.GET) - - Returns: - Request: A new request instance. - """ - -class Reader: - n_read_req: int - n_total_req: int - trace_path: str - file_size: int - def get_wss(self, ignore_obj_size: bool = False) -> int: ... - def seek(self, offset: int, from_beginning: bool = False) -> None: ... - def __iter__(self) -> Reader: ... - def __next__(self) -> Request: ... - -class PythonHookCache: - n_req: int - n_obj: int - occupied_byte: int - cache_size: int - - def __init__(self, cache_size: int, cache_name: str = "PythonHookCache") -> None: ... - def set_hooks( - self, - init_hook: Callable[[int], Any], - hit_hook: Callable[[Any, int, int], None], - miss_hook: Callable[[Any, int, int], None], - eviction_hook: Callable[[Any, int, int], int], - remove_hook: Callable[[Any, int], None], - free_hook: Optional[Callable[[Any], None]] = None, - ) -> None: ... - def get(self, req: Request) -> bool: ... - -# Base class for all eviction policies -class EvictionPolicyBase: - """Abstract base class for all eviction policies.""" - def get(self, req: Request) -> bool: ... - def process_trace(self, reader: Reader, start_req: int = 0, max_req: int = -1) -> tuple[float, float]: ... - @property - def n_req(self) -> int: ... - @property - def n_obj(self) -> int: ... - @property - def occupied_byte(self) -> int: ... - @property - def cache_size(self) -> int: ... - def __repr__(self) -> str: ... - -# Eviction policy classes -class ARC(EvictionPolicyBase): - """Adaptive Replacement Cache policy.""" - def __init__(self, cache_size: int) -> None: ... - -class Belady(EvictionPolicyBase): - """Belady replacement policy (optimal offline algorithm).""" - def __init__(self, cache_size: int) -> None: ... - -class BeladySize(EvictionPolicyBase): - """BeladySize replacement policy (optimal offline algorithm with size consideration).""" - def __init__(self, cache_size: int) -> None: ... - -class Cacheus(EvictionPolicyBase): - """Cacheus replacement policy.""" - def __init__(self, cache_size: int) -> None: ... - -class Clock(EvictionPolicyBase): - """Clock (Second Chance or FIFO-Reinsertion) replacement policy.""" - def __init__(self, cache_size: int, n_bit_counter: int = 1, init_freq: int = 0) -> None: ... - -class FIFO(EvictionPolicyBase): - """First In First Out replacement policy.""" - def __init__(self, cache_size: int) -> None: ... - -class LeCaR(EvictionPolicyBase): - """LeCaR (Learning Cache Replacement) adaptive replacement policy.""" - def __init__(self, cache_size: int) -> None: ... - -class LFU(EvictionPolicyBase): - """LFU (Least Frequently Used) replacement policy.""" - def __init__(self, cache_size: int) -> None: ... - -class LFUDA(EvictionPolicyBase): - """LFUDA (LFU with Dynamic Aging) replacement policy.""" - def __init__(self, cache_size: int) -> None: ... - -class LRB(EvictionPolicyBase): - """LRB (Learning Relaxed Belady) replacement policy.""" - def __init__(self, cache_size: int, objective: str = "byte-miss-ratio") -> None: ... - -class LRU(EvictionPolicyBase): - """Least Recently Used replacement policy.""" - def __init__(self, cache_size: int) -> None: ... - -class QDLP(EvictionPolicyBase): - """QDLP (Queue Demotion with Lazy Promotion) replacement policy.""" - def __init__(self, cache_size: int) -> None: ... - -class S3FIFO(EvictionPolicyBase): - """S3FIFO replacement policy.""" - def __init__( - self, - cache_size: int, - fifo_size_ratio: float = 0.1, - ghost_size_ratio: float = 0.9, - move_to_main_threshold: int = 2, - ) -> None: ... - -class Sieve(EvictionPolicyBase): - """Sieve replacement policy.""" - def __init__(self, cache_size: int) -> None: ... - -class SLRU(EvictionPolicyBase): - """SLRU (Segmented LRU) replacement policy.""" - def __init__(self, cache_size: int) -> None: ... - -class ThreeLCache(EvictionPolicyBase): - """ThreeL cache replacement policy.""" - def __init__(self, cache_size: int, objective: str = "byte-miss-ratio") -> None: ... - -class TinyLFU(EvictionPolicyBase): - """TinyLFU replacement policy.""" - def __init__(self, cache_size: int, main_cache: str = "SLRU", window_size: float = 0.01) -> None: ... - -class TwoQ(EvictionPolicyBase): - """2Q replacement policy.""" - def __init__(self, cache_size: int, ain_size_ratio: float = 0.25, aout_size_ratio: float = 0.5) -> None: ... - -class WTinyLFU(EvictionPolicyBase): - """WTinyLFU (Windowed TinyLFU) replacement policy.""" - def __init__(self, cache_size: int, main_cache: str = "SLRU", window_size: float = 0.01) -> None: ... - -class PythonHookCachePolicy(EvictionPolicyBase): - """Python hook-based cache policy.""" - def __init__(self, cache_size: int, cache_name: str = "PythonHookCache") -> None: ... - def set_hooks( - self, - init_hook: Callable[[int], Any], - hit_hook: Callable[[Any, int, int], None], - miss_hook: Callable[[Any, int, int], None], - eviction_hook: Callable[[Any, int, int], int], - remove_hook: Callable[[Any, int], None], - free_hook: Optional[Callable[[Any], None]] = None, - ) -> None: ... diff --git a/libCacheSim-python/libcachesim/const.py b/libCacheSim-python/libcachesim/const.py deleted file mode 100644 index 9d48db4f9..000000000 --- a/libCacheSim-python/libcachesim/const.py +++ /dev/null @@ -1 +0,0 @@ -from __future__ import annotations diff --git a/libCacheSim-python/libcachesim/eviction.py b/libCacheSim-python/libcachesim/eviction.py deleted file mode 100644 index 63599ec0f..000000000 --- a/libCacheSim-python/libcachesim/eviction.py +++ /dev/null @@ -1,713 +0,0 @@ -"""Registry of eviction policies.""" - -from __future__ import annotations - -from abc import ABC, abstractmethod - -from ._libcachesim import ( - ARC_init, - Belady_init, - BeladySize_init, - Cacheus_init, - Cache, - Clock_init, - FIFO_init, - LeCaR_init, - LFU_init, - LFUDA_init, - LRB_init, - LRU_init, - QDLP_init, - Reader, - Request, - S3FIFO_init, - Sieve_init, - SLRU_init, - ThreeLCache_init, - TinyLFU_init, - TwoQ_init, - WTinyLFU_init, - PythonHookCache, -) - -from .trace_generator import _ZipfRequestGenerator, _UniformRequestGenerator - -# Define generator types once to avoid repeated tuple creation -_GENERATOR_TYPES = (_ZipfRequestGenerator, _UniformRequestGenerator) - - -class EvictionPolicyBase(ABC): - """Abstract base class for all eviction policies.""" - - @abstractmethod - def get(self, req: Request) -> bool: - pass - - @abstractmethod - def __repr__(self) -> str: - pass - - @abstractmethod - def process_trace(self, reader, start_req=0, max_req=-1) -> tuple[float, float]: - """Process a trace with this cache and return miss ratio. - - This method processes trace data entirely on the C++ side to avoid - data movement overhead between Python and C++. - - Args: - reader: The trace reader instance - start_req: Start request index (-1 for no limit) - max_req: Number of requests to process (-1 for no limit) - - Returns: - tuple[float, float]: Object miss ratio (0.0 to 1.0) and byte miss ratio (0.0 to 1.0) - """ - pass - - -class EvictionPolicy(EvictionPolicyBase): - """Base class for all eviction policies.""" - - def __init__(self, cache_size: int, **kwargs) -> None: - self.cache: Cache = self.init_cache(cache_size, **kwargs) - - @abstractmethod - def init_cache(self, cache_size: int, **kwargs) -> Cache: - pass - - def get(self, req: Request) -> bool: - return self.cache.get(req) - - def process_trace(self, reader, start_req=0, max_req=-1) -> tuple[float, float]: - """Process a trace with this cache and return miss ratio. - - This method processes trace data entirely on the C++ side to avoid - data movement overhead between Python and C++. - - Args: - reader: The trace reader instance - start_req: Start request index (-1 for no limit) - max_req: Number of requests to process (-1 for no limit) - - Returns: - tuple[float, float]: Object miss ratio (0.0 to 1.0) and byte miss ratio (0.0 to 1.0) - Example: - >>> cache = LRU(1024*1024) - >>> reader = open_trace("trace.csv", TraceType.CSV_TRACE) - >>> obj_miss_ratio, byte_miss_ratio = cache.process_trace(reader) - >>> print(f"Obj miss ratio: {obj_miss_ratio:.4f}, byte miss ratio: {byte_miss_ratio:.4f}") - """ - obj_miss_ratio = 0.0 - byte_miss_ratio = 0.0 - if not isinstance(reader, Reader): - # streaming generator - if isinstance(reader, _GENERATOR_TYPES): - miss_cnt = 0 - byte_miss_cnt = 0 - total_byte = 0 - for req in reader: - hit = self.get(req) - total_byte += req.obj_size - if not hit: - miss_cnt += 1 - byte_miss_cnt += req.obj_size - obj_miss_ratio = miss_cnt / len(reader) if len(reader) > 0 else 0.0 - byte_miss_ratio = byte_miss_cnt / total_byte if total_byte > 0 else 0.0 - return obj_miss_ratio, byte_miss_ratio - else: - from ._libcachesim import process_trace - - obj_miss_ratio, byte_miss_ratio = process_trace(self.cache, reader, start_req, max_req) - - return obj_miss_ratio, byte_miss_ratio - - def __repr__(self): - return f"{self.__class__.__name__}(cache_size={self.cache.cache_size})" - - @property - def n_req(self): - """Number of requests processed.""" - return self.cache.n_req - - @property - def n_obj(self): - """Number of objects currently in cache.""" - return self.cache.n_obj - - @property - def occupied_byte(self): - """Number of bytes currently occupied in cache.""" - return self.cache.occupied_byte - - @property - def cache_size(self): - """Total cache size in bytes.""" - return self.cache.cache_size - - -class FIFO(EvictionPolicy): - """First In First Out replacement policy. - - Args: - cache_size: Size of the cache - """ - - def init_cache(self, cache_size: int, **kwargs) -> Cache: # noqa: ARG002 - return FIFO_init(cache_size) - - -class Clock(EvictionPolicy): - """Clock (Second Chance or FIFO-Reinsertion) replacement policy. - - Args: - cache_size: Size of the cache - n_bit_counter: Number of bits for counter (default: 1) - init_freq: Initial frequency value (default: 0) - """ - - def __init__(self, cache_size: int, n_bit_counter: int = 1, init_freq: int = 0): - super().__init__(cache_size, n_bit_counter=n_bit_counter, init_freq=init_freq) - - def init_cache(self, cache_size: int, **kwargs): - init_freq = kwargs.get("init_freq", 0) - n_bit_counter = kwargs.get("n_bit_counter", 1) - - if n_bit_counter < 1 or n_bit_counter > 32: - msg = "n_bit_counter must be between 1 and 32" - raise ValueError(msg) - if init_freq < 0 or init_freq > 2**n_bit_counter - 1: - msg = "init_freq must be between 0 and 2^n_bit_counter - 1" - raise ValueError(msg) - - self.init_freq = init_freq - self.n_bit_counter = n_bit_counter - - return Clock_init(cache_size, n_bit_counter, init_freq) - - def __repr__(self): - return ( - f"{self.__class__.__name__}(cache_size={self.cache.cache_size}, " - f"n_bit_counter={self.n_bit_counter}, " - f"init_freq={self.init_freq})" - ) - - -class TwoQ(EvictionPolicy): - """2Q replacement policy. - - 2Q has three queues: Ain, Aout, Am. When a obj hits in Aout, it will be - inserted into Am otherwise it will be inserted into Ain. - - Args: - cache_size: Total size of the cache - ain_size_ratio: Size ratio for Ain queue (default: 0.25) - aout_size_ratio: Size ratio for Aout queue (default: 0.5) - """ - - def __init__(self, cache_size: int, ain_size_ratio: float = 0.25, aout_size_ratio: float = 0.5): - super().__init__(cache_size, ain_size_ratio=ain_size_ratio, aout_size_ratio=aout_size_ratio) - - def init_cache(self, cache_size: int, **kwargs): - ain_size_ratio = kwargs.get("ain_size_ratio", 0.25) - aout_size_ratio = kwargs.get("aout_size_ratio", 0.5) - - if ain_size_ratio <= 0 or aout_size_ratio <= 0: - msg = "ain_size_ratio and aout_size_ratio must be greater than 0" - raise ValueError(msg) - - self.ain_size_ratio = ain_size_ratio - self.aout_size_ratio = aout_size_ratio - - return TwoQ_init(cache_size, ain_size_ratio, aout_size_ratio) - - def __repr__(self): - return ( - f"{self.__class__.__name__}(cache_size={self.cache.cache_size}, " - f"ain_size_ratio={self.ain_size_ratio}, " - f"aout_size_ratio={self.aout_size_ratio})" - ) - - -class LRB(EvictionPolicy): - """LRB (Learning Relaxed Belady) replacement policy. - - LRB is a learning-based replacement policy that uses a neural network to - predict the future access patterns of the cache, randomly select one obj - outside the Belady boundary to evict. - - Args: - cache_size: Size of the cache - objective: Objective function to optimize (default: "byte-miss-ratio") - """ - - def __init__(self, cache_size: int, objective: str = "byte-miss-ratio"): - super().__init__(cache_size, objective=objective) - - def init_cache(self, cache_size: int, **kwargs) -> Cache: - objective = kwargs.get("objective", "byte-miss-ratio") - - if objective not in ["byte-miss-ratio", "byte-hit-ratio"]: - msg = "objective must be either 'byte-miss-ratio' or 'byte-hit-ratio'" - raise ValueError(msg) - - self.objective = objective - - return LRB_init(cache_size, objective) - - def __repr__(self): - return f"{self.__class__.__name__}(cache_size={self.cache.cache_size}, objective={self.objective})" - - -class LRU(EvictionPolicy): - """Least Recently Used replacement policy. - - Args: - cache_size: Size of the cache - """ - - def init_cache(self, cache_size: int, **kwargs): # noqa: ARG002 - return LRU_init(cache_size) - - -class ARC(EvictionPolicy): - """Adaptive Replacement Cache policy. - - ARC is a two-tiered cache with two LRU caches (T1 and T2) and two ghost - lists (B1 and B2). T1 records the obj accessed only once, T2 records - the obj accessed more than once. ARC has an internal parameter `p` to - learn and dynamically control the size of T1 and T2. - - Args: - cache_size: Size of the cache - """ - - def init_cache(self, cache_size: int, **kwargs): # noqa: ARG002 - return ARC_init(cache_size) - - -class S3FIFO(EvictionPolicy): - """S3FIFO replacement policy. - - S3FIFO consists of three FIFO queues: Small, Main, and Ghost. Small - queue gets the obj and records the freq. - When small queue is full, if the obj to evict satisfies the threshold, - it will be moved to main queue. Otherwise, it will be evicted from small - queue and inserted into ghost queue. - When main queue is full, the obj to evict will be evicted and reinserted - like Clock. - If obj hits in the ghost queue, it will be moved to main queue. - - Args: - cache_size: Size of the cache - fifo_size_ratio: Size ratio for FIFO queue (default: 0.1) - ghost_size_ratio: Size ratio for ghost queue (default: 0.9) - move_to_main_threshold: Threshold for moving obj from ghost to main (default: 2) - """ - - def __init__( - self, - cache_size: int, - fifo_size_ratio: float = 0.1, - ghost_size_ratio: float = 0.9, - move_to_main_threshold: int = 2, - ): - super().__init__( - cache_size, - fifo_size_ratio=fifo_size_ratio, - ghost_size_ratio=ghost_size_ratio, - move_to_main_threshold=move_to_main_threshold, - ) - - def init_cache(self, cache_size: int, **kwargs): - fifo_size_ratio = kwargs.get("fifo_size_ratio", 0.1) - ghost_size_ratio = kwargs.get("ghost_size_ratio", 0.9) - move_to_main_threshold = kwargs.get("move_to_main_threshold", 2) - - if fifo_size_ratio <= 0 or ghost_size_ratio <= 0: - msg = "fifo_size_ratio and ghost_size_ratio must be greater than 0" - raise ValueError(msg) - if move_to_main_threshold < 0: - msg = "move_to_main_threshold must be greater or equal to 0" - raise ValueError(msg) - - self.fifo_size_ratio = fifo_size_ratio - self.ghost_size_ratio = ghost_size_ratio - self.move_to_main_threshold = move_to_main_threshold - - return S3FIFO_init(cache_size, fifo_size_ratio, ghost_size_ratio, move_to_main_threshold) - - def __repr__(self): - return ( - f"{self.__class__.__name__}(cache_size={self.cache.cache_size}, " - f"fifo_size_ratio={self.fifo_size_ratio}, " - f"ghost_size_ratio={self.ghost_size_ratio}, " - f"move_to_main_threshold={self.move_to_main_threshold})" - ) - - -class Sieve(EvictionPolicy): - """Sieve replacement policy. - - FIFO-Reinsertion with check pointer. - - Args: - cache_size: Size of the cache - """ - - def init_cache(self, cache_size: int, **kwargs): # noqa: ARG002 - return Sieve_init(cache_size) - - -class ThreeLCache(EvictionPolicy): - """3L-Cache replacement policy. - - Args: - cache_size: Size of the cache - objective: Objective function to optimize (default: "byte-miss-ratio") - """ - - def __init__(self, cache_size: int, objective: str = "byte-miss-ratio"): - super().__init__(cache_size, objective=objective) - - def init_cache(self, cache_size: int, **kwargs): - objective = kwargs.get("objective", "byte-miss-ratio") - - if objective not in ["byte-miss-ratio", "byte-hit-ratio"]: - msg = "objective must be either 'byte-miss-ratio' or 'byte-hit-ratio'" - raise ValueError(msg) - - self.objective = objective - - return ThreeLCache_init(cache_size, objective) - - def __repr__(self): - return f"{self.__class__.__name__}(cache_size={self.cache.cache_size}, objective={self.objective})" - - -class TinyLFU(EvictionPolicy): - """TinyLFU replacement policy. - - Args: - cache_size: Size of the cache - main_cache: Main cache to use (default: "SLRU") - window_size: Window size for TinyLFU (default: 0.01) - """ - - def __init__(self, cache_size: int, main_cache: str = "SLRU", window_size: float = 0.01): - super().__init__(cache_size, main_cache=main_cache, window_size=window_size) - - def init_cache(self, cache_size: int, **kwargs): - main_cache = kwargs.get("main_cache", "SLRU") - window_size = kwargs.get("window_size", 0.01) - - if window_size <= 0: - msg = "window_size must be greater than 0" - raise ValueError(msg) - - self.main_cache = main_cache - self.window_size = window_size - - return TinyLFU_init(cache_size, main_cache, window_size) - - def __repr__(self): - return ( - f"{self.__class__.__name__}(cache_size={self.cache.cache_size}, " - f"main_cache={self.main_cache}, " - f"window_size={self.window_size})" - ) - - -class LFU(EvictionPolicy): - """LFU (Least Frequently Used) replacement policy. - - Args: - cache_size: Size of the cache - """ - - def init_cache(self, cache_size: int, **kwargs): # noqa: ARG002 - return LFU_init(cache_size) - - -class LFUDA(EvictionPolicy): - """LFUDA (LFU with Dynamic Aging) replacement policy. - - Args: - cache_size: Size of the cache - """ - - def init_cache(self, cache_size: int, **kwargs): # noqa: ARG002 - return LFUDA_init(cache_size) - - -class SLRU(EvictionPolicy): - """SLRU (Segmented LRU) replacement policy. - - Args: - cache_size: Size of the cache - """ - - def init_cache(self, cache_size: int, **kwargs): # noqa: ARG002 - return SLRU_init(cache_size) - - -class Belady(EvictionPolicy): - """Belady replacement policy (optimal offline algorithm). - - Note: Requires oracle trace with future access information. - - Args: - cache_size: Size of the cache - """ - - def init_cache(self, cache_size: int, **kwargs): # noqa: ARG002 - return Belady_init(cache_size) - - -class BeladySize(EvictionPolicy): - """BeladySize replacement policy (optimal offline algorithm with size consideration). - - Note: Requires oracle trace with future access information. - - Args: - cache_size: Size of the cache - """ - - def init_cache(self, cache_size: int, **kwargs): # noqa: ARG002 - return BeladySize_init(cache_size) - - -class QDLP(EvictionPolicy): - """QDLP (Queue Demotion with Lazy Promotion) replacement policy. - - Args: - cache_size: Size of the cache - """ - - def init_cache(self, cache_size: int, **kwargs): # noqa: ARG002 - return QDLP_init(cache_size) - - -class LeCaR(EvictionPolicy): - """LeCaR (Learning Cache Replacement) adaptive replacement policy. - - Args: - cache_size: Size of the cache - """ - - def init_cache(self, cache_size: int, **kwargs): # noqa: ARG002 - return LeCaR_init(cache_size) - - -class Cacheus(EvictionPolicy): - """Cacheus replacement policy. - - Args: - cache_size: Size of the cache - """ - - def init_cache(self, cache_size: int, **kwargs): # noqa: ARG002 - return Cacheus_init(cache_size) - - -class WTinyLFU(EvictionPolicy): - """WTinyLFU (Windowed TinyLFU) replacement policy. - - Args: - cache_size: Size of the cache - main_cache: Main cache to use (default: "SLRU") - window_size: Window size for TinyLFU (default: 0.01) - """ - - def __init__(self, cache_size: int, main_cache: str = "SLRU", window_size: float = 0.01): - super().__init__(cache_size, main_cache=main_cache, window_size=window_size) - - def init_cache(self, cache_size: int, **kwargs): - main_cache = kwargs.get("main_cache", "SLRU") - window_size = kwargs.get("window_size", 0.01) - - if window_size <= 0: - msg = "window_size must be greater than 0" - raise ValueError(msg) - - self.main_cache = main_cache - self.window_size = window_size - - return WTinyLFU_init(cache_size, main_cache, window_size) - - def __repr__(self): - return ( - f"{self.__class__.__name__}(cache_size={self.cache.cache_size}, " - f"main_cache={self.main_cache}, " - f"window_size={self.window_size})" - ) - - -class PythonHookCachePolicy(EvictionPolicyBase): - """Python hook-based cache that allows defining custom policies using Python functions. - - This cache implementation allows users to define custom cache replacement algorithms - using pure Python functions instead of compiling C/C++ plugins. Users provide hook - functions for cache initialization, hit handling, miss handling, eviction decisions, - and cleanup. - - Args: - cache_size: Size of the cache in bytes - cache_name: Optional name for the cache (default: "PythonHookCache") - - Hook Functions Required: - init_hook(cache_size: int) -> Any: - Initialize plugin data structures. Return any object to be passed to other hooks. - - hit_hook(plugin_data: Any, obj_id: int, obj_size: int) -> None: - Handle cache hit events. Update internal state as needed. - - miss_hook(plugin_data: Any, obj_id: int, obj_size: int) -> None: - Handle cache miss events. Update internal state for new object. - - eviction_hook(plugin_data: Any, obj_id: int, obj_size: int) -> int: - Determine which object to evict. Return the object ID to be evicted. - - remove_hook(plugin_data: Any, obj_id: int) -> None: - Clean up when objects are removed from cache. - - free_hook(plugin_data: Any) -> None: [Optional] - Clean up plugin resources when cache is destroyed. - - Example: - >>> from collections import OrderedDict - >>> - >>> cache = PythonHookCachePolicy(1024) - >>> - >>> def init_hook(cache_size): - ... return OrderedDict() # LRU tracking - >>> - >>> def hit_hook(lru_dict, obj_id, obj_size): - ... lru_dict.move_to_end(obj_id) # Move to end (most recent) - >>> - >>> def miss_hook(lru_dict, obj_id, obj_size): - ... lru_dict[obj_id] = True # Add to end - >>> - >>> def eviction_hook(lru_dict, obj_id, obj_size): - ... return next(iter(lru_dict)) # Return least recent - >>> - >>> def remove_hook(lru_dict, obj_id): - ... lru_dict.pop(obj_id, None) - >>> - >>> cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook) - >>> - >>> req = Request() - >>> req.obj_id = 1 - >>> req.obj_size = 100 - >>> hit = cache.get(req) - """ - - def __init__(self, cache_size: int, cache_name: str = "PythonHookCache"): - self._cache_size = cache_size - self.cache_name = cache_name - self.cache = PythonHookCache(cache_size, cache_name) - self._hooks_set = False - - def set_hooks(self, init_hook, hit_hook, miss_hook, eviction_hook, remove_hook, free_hook=None): - """Set the hook functions for the cache. - - Args: - init_hook: Function called during cache initialization - hit_hook: Function called on cache hit - miss_hook: Function called on cache miss - eviction_hook: Function called to select eviction candidate - remove_hook: Function called when object is removed - free_hook: Optional function called during cache cleanup - """ - self.cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook, free_hook) - self._hooks_set = True - - def get(self, req: Request) -> bool: - """Process a cache request. - - Args: - req: The cache request to process - - Returns: - True if cache hit, False if cache miss - - Raises: - RuntimeError: If hooks have not been set - """ - if not self._hooks_set: - raise RuntimeError("Hooks must be set before using the cache. Call set_hooks() first.") - return self.cache.get(req) - - def process_trace(self, reader, start_req=0, max_req=-1) -> tuple[float, float]: - """Process a trace with this cache and return miss ratio. - - This method processes trace data entirely on the C++ side to avoid - data movement overhead between Python and C++. - - Args: - reader: The trace reader instance - start_req: Start request index (-1 for no limit) - n_req: Number of requests to process (-1 for no limit) - - Returns: - tuple[float, float]: Object miss ratio (0.0 to 1.0) and byte miss ratio (0.0 to 1.0) - - Raises: - RuntimeError: If hooks have not been set - - Example: - >>> cache = PythonHookCachePolicy(1024*1024) - >>> cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook) - >>> reader = open_trace("trace.csv", TraceType.CSV_TRACE) - >>> obj_miss_ratio, byte_miss_ratio = cache.process_trace(reader) - >>> print(f"Obj miss ratio: {obj_miss_ratio:.4f}, byte miss ratio: {byte_miss_ratio:.4f}") - """ - if not self._hooks_set: - raise RuntimeError("Hooks must be set before processing trace. Call set_hooks() first.") - obj_miss_ratio = 0.0 - byte_miss_ratio = 0.0 - if not isinstance(reader, Reader): - # streaming generator - if isinstance(reader, _GENERATOR_TYPES): - miss_cnt = 0 - byte_miss_cnt = 0 - total_byte = 0 - for req in reader: - hit = self.get(req) - total_byte += req.obj_size - if not hit: - miss_cnt += 1 - byte_miss_cnt += req.obj_size - obj_miss_ratio = miss_cnt / len(reader) if len(reader) > 0 else 0.0 - byte_miss_ratio = byte_miss_cnt / total_byte if total_byte > 0 else 0.0 - return obj_miss_ratio, byte_miss_ratio - else: - from ._libcachesim import process_trace_python_hook - - obj_miss_ratio, byte_miss_ratio = process_trace_python_hook(self.cache, reader, start_req, max_req) - return obj_miss_ratio, byte_miss_ratio - - @property - def n_req(self): - """Number of requests processed.""" - return self.cache.n_req - - @property - def n_obj(self): - """Number of objects currently in cache.""" - return self.cache.n_obj - - @property - def occupied_byte(self): - """Number of bytes currently occupied in cache.""" - return self.cache.occupied_byte - - @property - def cache_size(self): - """Total cache size in bytes.""" - return self.cache.cache_size - - def __repr__(self): - return ( - f"{self.__class__.__name__}(cache_size={self._cache_size}, " - f"cache_name='{self.cache_name}', hooks_set={self._hooks_set})" - ) diff --git a/libCacheSim-python/libcachesim/trace_generator.py b/libCacheSim-python/libcachesim/trace_generator.py deleted file mode 100644 index 8c5802243..000000000 --- a/libCacheSim-python/libcachesim/trace_generator.py +++ /dev/null @@ -1,215 +0,0 @@ -""" -Trace generator module for libCacheSim Python bindings. - -This module provides functions to generate synthetic traces with different distributions. -""" - -import numpy as np -import random -from typing import Optional -from collections.abc import Iterator -from ._libcachesim import Request, ReqOp - - -def _gen_zipf(m: int, alpha: float, n: int, start: int = 0) -> np.ndarray: - """Generate zipf distributed workload (internal function). - - Args: - m (int): The number of objects - alpha (float): The skewness parameter (alpha >= 0) - n (int): The number of requests - start (int, optional): Start object ID. Defaults to 0. - - Returns: - np.ndarray: Array of object IDs following Zipf distribution - """ - if m <= 0 or n <= 0: - raise ValueError("num_objects and num_requests must be positive") - if alpha < 0: - raise ValueError("alpha must be non-negative") - np_tmp = np.power(np.arange(1, m + 1), -alpha) - np_zeta = np.cumsum(np_tmp) - dist_map = np_zeta / np_zeta[-1] - r = np.random.uniform(0, 1, n) - return np.searchsorted(dist_map, r) + start - - -def _gen_uniform(m: int, n: int, start: int = 0) -> np.ndarray: - """Generate uniform distributed workload (internal function). - - Args: - m (int): The number of objects - n (int): The number of requests - start (int, optional): Start object ID. Defaults to 0. - - Returns: - np.ndarray: Array of object IDs following uniform distribution - """ - if m <= 0 or n <= 0: - raise ValueError("num_objects and num_requests must be positive") - return np.random.uniform(0, m, n).astype(int) + start - - -class _ZipfRequestGenerator: - """Zipf-distributed request generator (internal class).""" - - def __init__( - self, - num_objects: int, - num_requests: int, - alpha: float = 1.0, - obj_size: int = 4000, - time_span: int = 86400 * 7, - start_obj_id: int = 0, - seed: Optional[int] = None, - ): - """Initialize Zipf request generator. - - Args: - num_objects (int): Number of unique objects - num_requests (int): Number of requests to generate - alpha (float): Zipf skewness parameter (alpha >= 0) - obj_size (int): Object size in bytes - time_span (int): Time span in seconds - start_obj_id (int): Starting object ID - seed (int, optional): Random seed for reproducibility - """ - self.num_requests = num_requests - self.obj_size = obj_size - self.time_span = time_span - - # Set random seed if provided - if seed is not None: - np.random.seed(seed) - random.seed(seed) - - # Pre-generate object IDs - self.obj_ids = _gen_zipf(num_objects, alpha, num_requests, start_obj_id) - - def __iter__(self) -> Iterator[Request]: - """Iterate over generated requests.""" - for i, obj_id in enumerate(self.obj_ids): - req = Request() - req.clock_time = i * self.time_span // self.num_requests - req.obj_id = obj_id - req.obj_size = self.obj_size - req.op = ReqOp.READ # Default operation - yield req - - def __len__(self) -> int: - """Return the number of requests.""" - return self.num_requests - - -class _UniformRequestGenerator: - """Uniform-distributed request generator (internal class).""" - - def __init__( - self, - num_objects: int, - num_requests: int, - obj_size: int = 4000, - time_span: int = 86400 * 7, - start_obj_id: int = 0, - seed: Optional[int] = None, - ): - """Initialize uniform request generator. - - Args: - num_objects (int): Number of unique objects - num_requests (int): Number of requests to generate - obj_size (int): Object size in bytes - time_span (int): Time span in seconds - start_obj_id (int): Starting object ID - seed (int, optional): Random seed for reproducibility - """ - self.num_requests = num_requests - self.obj_size = obj_size - self.time_span = time_span - - # Set random seed if provided - if seed is not None: - np.random.seed(seed) - random.seed(seed) - - # Pre-generate object IDs - self.obj_ids = _gen_uniform(num_objects, num_requests, start_obj_id) - - def __iter__(self) -> Iterator[Request]: - """Iterate over generated requests.""" - for i, obj_id in enumerate(self.obj_ids): - req = Request() - req.clock_time = i * self.time_span // self.num_requests - req.obj_id = obj_id - req.obj_size = self.obj_size - req.op = ReqOp.READ # Default operation - yield req - - def __len__(self) -> int: - """Return the number of requests.""" - return self.num_requests - - -def create_zipf_requests( - num_objects: int, - num_requests: int, - alpha: float = 1.0, - obj_size: int = 4000, - time_span: int = 86400 * 7, - start_obj_id: int = 0, - seed: Optional[int] = None, -) -> _ZipfRequestGenerator: - """Create a Zipf-distributed request generator. - - Args: - num_objects (int): Number of unique objects - num_requests (int): Number of requests to generate - alpha (float): Zipf skewness parameter (alpha >= 0) - obj_size (int): Object size in bytes - time_span (int): Time span in seconds - start_obj_id (int): Starting object ID - seed (int, optional): Random seed for reproducibility - - Returns: - Generator: A generator that yields Request objects - """ - return _ZipfRequestGenerator( - num_objects=num_objects, - num_requests=num_requests, - alpha=alpha, - obj_size=obj_size, - time_span=time_span, - start_obj_id=start_obj_id, - seed=seed, - ) - - -def create_uniform_requests( - num_objects: int, - num_requests: int, - obj_size: int = 4000, - time_span: int = 86400 * 7, - start_obj_id: int = 0, - seed: Optional[int] = None, -) -> _UniformRequestGenerator: - """Create a uniform-distributed request generator. - - Args: - num_objects (int): Number of unique objects - num_requests (int): Number of requests to generate - obj_size (int): Object size in bytes - time_span (int): Time span in seconds - start_obj_id (int): Starting object ID - seed (int, optional): Random seed for reproducibility - - Returns: - Generator: A generator that yields Request objects - """ - return _UniformRequestGenerator( - num_objects=num_objects, - num_requests=num_requests, - obj_size=obj_size, - time_span=time_span, - start_obj_id=start_obj_id, - seed=seed, - ) diff --git a/libCacheSim-python/pyproject.toml b/libCacheSim-python/pyproject.toml deleted file mode 100644 index 45eb26f16..000000000 --- a/libCacheSim-python/pyproject.toml +++ /dev/null @@ -1,111 +0,0 @@ -[build-system] -requires = ["scikit-build-core>=0.10", "pybind11"] -build-backend = "scikit_build_core.build" - - -[project] -name = "libcachesim" -version = "0.3.2" -description="Python bindings for libCacheSim" -readme = "README.md" -requires-python = ">=3.9" -keywords = ["performance", "cache", "simulator"] -classifiers = [ - "Intended Audience :: Developers", - "License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)", - "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3.11", - "Programming Language :: Python :: 3.12", - "Programming Language :: Python :: 3.13", -] -dependencies = [ - "numpy>=1.20.0", -] - -[project.optional-dependencies] -test = ["pytest"] -dev = [ - "pytest", - "pre-commit", - "ruff>=0.7.0", - "mypy>=1.0.0", -] - - -[tool.scikit-build] -wheel.expand-macos-universal-tags = true - -[tool.pytest.ini_options] -minversion = "8.0" -addopts = ["-ra", "--showlocals", "--strict-markers", "--strict-config", "-m", "not optional"] -xfail_strict = true -log_cli_level = "INFO" -filterwarnings = [ - "error", - "ignore::pytest.PytestCacheWarning", -] -testpaths = ["tests"] -markers = [ - "optional: mark test as optional", -] -python_files = ["test.py", "test_*.py", "*_test.py"] -python_classes = ["Test*"] -python_functions = ["test_*"] - - -[tool.cibuildwheel] -manylinux-x86_64-image = "quay.io/pypa/manylinux_2_34_x86_64" -manylinux-aarch64-image = "quay.io/pypa/manylinux_2_34_aarch64" - -build = ["cp39-*", "cp310-*", "cp311-*", "cp312-*", "cp313-*"] -skip = ["*-win32", "*-manylinux_i686", "*-musllinux*", "pp*"] - -# Set the environment variable for the wheel build step. -environment = { LCS_BUILD_DIR = "{project}/build", MACOSX_DEPLOYMENT_TARGET = "14.0" } - -# Test that the wheel can be imported -test-command = "python -c 'import libcachesim; print(\"Import successful\")'" - -[tool.cibuildwheel.linux] -before-all = "yum install -y yum-utils && yum-config-manager --set-enabled crb && yum install -y ninja-build cmake libzstd-devel glib2-devel" -before-build = "rm -rf {project}/build && cmake -S {project} -B {project}/build -G Ninja && cmake --build {project}/build" - -[tool.cibuildwheel.macos] -before-all = "brew install glib google-perftools argp-standalone xxhash llvm wget cmake ninja zstd xgboost lightgbm" -before-build = "rm -rf {project}/build && cmake -S {project} -B {project}/build -G Ninja -DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 && cmake --build {project}/build" - -[tool.ruff] -# Allow lines to be as long as 120. -line-length = 120 - -[tool.ruff.lint] -select = [ - # pycodestyle - "E", - # Pyflakes - "F", - # pyupgrade - "UP", - # flake8-bugbear - "B", - # flake8-simplify - "SIM", - # isort - # "I", - # flake8-logging-format - "G", -] -ignore = [ - # star imports - "F405", "F403", - # lambda expression assignment - "E731", - # Loop control variable not used within loop body - "B007", - # f-string format - "UP032", - # Can remove once 3.10+ is the minimum Python version - "UP007", - "UP045" -] diff --git a/libCacheSim-python/requirements.txt b/libCacheSim-python/requirements.txt deleted file mode 100644 index e69de29bb..000000000 diff --git a/libCacheSim-python/src/pylibcachesim.cpp b/libCacheSim-python/src/pylibcachesim.cpp deleted file mode 100644 index 5ca90ca21..000000000 --- a/libCacheSim-python/src/pylibcachesim.cpp +++ /dev/null @@ -1,1223 +0,0 @@ -#include -#include -#include - -// Suppress visibility warnings for pybind11 types -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wattributes" - -#include -#include -#include - -#include "config.h" -#include "libCacheSim/cache.h" -#include "libCacheSim/cacheObj.h" -#include "libCacheSim/const.h" -#include "libCacheSim/enum.h" -#include "libCacheSim/logging.h" -#include "libCacheSim/macro.h" -#include "libCacheSim/reader.h" -#include "libCacheSim/request.h" -#include "libCacheSim/sampling.h" -#include "mystr.h" - -/* admission */ -#include "libCacheSim/admissionAlgo.h" - -/* eviction */ -#include "libCacheSim/evictionAlgo.h" - -/* cache simulator */ -#include "libCacheSim/profilerLRU.h" -#include "libCacheSim/simulator.h" - -/* bin */ -#include "cachesim/cache_init.h" -#include "cli_reader_utils.h" - -#define STRINGIFY(x) #x -#define MACRO_STRINGIFY(x) STRINGIFY(x) - -namespace py = pybind11; - -// Helper functions - -// https://stackoverflow.com/questions/874134/find-out-if-string-ends-with-another-string-in-c -static bool ends_with(std::string_view str, std::string_view suffix) { - return str.size() >= suffix.size() && - str.compare(str.size() - suffix.size(), suffix.size(), suffix) == 0; -} - -trace_type_e infer_trace_type(const std::string& trace_path) { - // Infer the trace type based on the file extension - if (trace_path.find("oracleGeneral") != std::string::npos) { - return trace_type_e::ORACLE_GENERAL_TRACE; - } else if (ends_with(trace_path, ".csv")) { - return trace_type_e::CSV_TRACE; - } else if (ends_with(trace_path, ".txt")) { - return trace_type_e::PLAIN_TXT_TRACE; - } else if (ends_with(trace_path, ".bin")) { - return trace_type_e::BIN_TRACE; - } else if (ends_with(trace_path, ".vscsi")) { - return trace_type_e::VSCSI_TRACE; - } else if (ends_with(trace_path, ".twr")) { - return trace_type_e::TWR_TRACE; - } else if (ends_with(trace_path, ".twrns")) { - return trace_type_e::TWRNS_TRACE; - } else if (ends_with(trace_path, ".lcs")) { - return trace_type_e::LCS_TRACE; - } else if (ends_with(trace_path, ".valpin")) { - return trace_type_e::VALPIN_TRACE; - } else { - return trace_type_e::UNKNOWN_TRACE; - } -} - -// Python Hook Cache Implementation -class PythonHookCache { - private: - uint64_t cache_size_; - std::string cache_name_; - std::unordered_map objects_; // obj_id -> obj_size - py::object plugin_data_; - - // Hook functions - py::function init_hook_; - py::function hit_hook_; - py::function miss_hook_; - py::function eviction_hook_; - py::function remove_hook_; - py::object free_hook_; // Changed to py::object to allow py::none() - - public: - uint64_t n_req = 0; - uint64_t n_obj = 0; - uint64_t occupied_byte = 0; - uint64_t cache_size; - - PythonHookCache(uint64_t cache_size, - const std::string& cache_name = "PythonHookCache") - : cache_size_(cache_size), - cache_name_(cache_name), - cache_size(cache_size), - free_hook_(py::none()) {} - - void set_hooks(py::function init_hook, py::function hit_hook, - py::function miss_hook, py::function eviction_hook, - py::function remove_hook, py::object free_hook = py::none()) { - init_hook_ = init_hook; - hit_hook_ = hit_hook; - miss_hook_ = miss_hook; - eviction_hook_ = eviction_hook; - remove_hook_ = remove_hook; - - // Handle free_hook properly - if (!free_hook.is_none()) { - free_hook_ = free_hook; - } else { - free_hook_ = py::none(); - } - - // Initialize plugin data - plugin_data_ = init_hook_(cache_size_); - } - - bool get(const request_t& req) { - n_req++; - - auto it = objects_.find(req.obj_id); - if (it != objects_.end()) { - // Cache hit - hit_hook_(plugin_data_, req.obj_id, req.obj_size); - return true; - } else { - // Cache miss - call miss hook first - miss_hook_(plugin_data_, req.obj_id, req.obj_size); - - // Check if eviction is needed - while (occupied_byte + req.obj_size > cache_size_ && !objects_.empty()) { - // Need to evict - uint64_t victim_id = - eviction_hook_(plugin_data_, req.obj_id, req.obj_size) - .cast(); - auto victim_it = objects_.find(victim_id); - if (victim_it != objects_.end()) { - occupied_byte -= victim_it->second; - objects_.erase(victim_it); - n_obj--; - remove_hook_(plugin_data_, victim_id); - } else { - // Safety check: if eviction hook returns invalid ID, break to avoid - // infinite loop - break; - } - } - - // Insert new object if there's space - if (occupied_byte + req.obj_size <= cache_size_) { - objects_[req.obj_id] = req.obj_size; - occupied_byte += req.obj_size; - n_obj++; - } - - return false; - } - } - - ~PythonHookCache() { - if (!free_hook_.is_none()) { - py::function free_func = free_hook_.cast(); - free_func(plugin_data_); - } - } -}; - -// Restore visibility warnings -#pragma GCC diagnostic pop - -struct CacheDeleter { - void operator()(cache_t* ptr) const { - if (ptr != nullptr) ptr->cache_free(ptr); - } -}; - -struct RequestDeleter { - void operator()(request_t* ptr) const { - if (ptr != nullptr) free_request(ptr); - } -}; - -struct ReaderDeleter { - void operator()(reader_t* ptr) const { - if (ptr != nullptr) close_trace(ptr); - } -}; - -PYBIND11_MODULE(_libcachesim, m) { // NOLINT(readability-named-parameter) - m.doc() = R"pbdoc( - libCacheSim Python bindings - -------------------------- - - .. currentmodule:: libcachesim - - .. autosummary:: - :toctree: _generate - - TODO(haocheng): add meaningful methods - )pbdoc"; - - py::enum_(m, "TraceType") - .value("CSV_TRACE", trace_type_e::CSV_TRACE) - .value("BIN_TRACE", trace_type_e::BIN_TRACE) - .value("PLAIN_TXT_TRACE", trace_type_e::PLAIN_TXT_TRACE) - .value("ORACLE_GENERAL_TRACE", trace_type_e::ORACLE_GENERAL_TRACE) - .value("LCS_TRACE", trace_type_e::LCS_TRACE) - .value("VSCSI_TRACE", trace_type_e::VSCSI_TRACE) - .value("TWR_TRACE", trace_type_e::TWR_TRACE) - .value("TWRNS_TRACE", trace_type_e::TWRNS_TRACE) - .value("ORACLE_SIM_TWR_TRACE", trace_type_e::ORACLE_SIM_TWR_TRACE) - .value("ORACLE_SYS_TWR_TRACE", trace_type_e::ORACLE_SYS_TWR_TRACE) - .value("ORACLE_SIM_TWRNS_TRACE", trace_type_e::ORACLE_SIM_TWRNS_TRACE) - .value("ORACLE_SYS_TWRNS_TRACE", trace_type_e::ORACLE_SYS_TWRNS_TRACE) - .value("VALPIN_TRACE", trace_type_e::VALPIN_TRACE) - .value("UNKNOWN_TRACE", trace_type_e::UNKNOWN_TRACE) - .export_values(); - - py::enum_(m, "ReqOp") - .value("NOP", req_op_e::OP_NOP) - .value("GET", req_op_e::OP_GET) - .value("GETS", req_op_e::OP_GETS) - .value("SET", req_op_e::OP_SET) - .value("ADD", req_op_e::OP_ADD) - .value("CAS", req_op_e::OP_CAS) - .value("REPLACE", req_op_e::OP_REPLACE) - .value("APPEND", req_op_e::OP_APPEND) - .value("PREPEND", req_op_e::OP_PREPEND) - .value("DELETE", req_op_e::OP_DELETE) - .value("INCR", req_op_e::OP_INCR) - .value("DECR", req_op_e::OP_DECR) - .value("READ", req_op_e::OP_READ) - .value("WRITE", req_op_e::OP_WRITE) - .value("UPDATE", req_op_e::OP_UPDATE) - .value("INVALID", req_op_e::OP_INVALID) - .export_values(); - - // *************** structs *************** - /** - * @brief Cache structure - */ - py::class_>(m, "Cache") - .def_readwrite("n_req", &cache_t::n_req) - .def_readwrite("cache_size", &cache_t::cache_size) - // Use proper accessor functions for private fields - .def_property_readonly( - "n_obj", [](const cache_t& self) { return self.get_n_obj(&self); }) - .def_property_readonly( - "occupied_byte", - [](const cache_t& self) { return self.get_occupied_byte(&self); }) - // methods - .def("get", [](cache_t& self, const request_t& req) { - return self.get(&self, &req); - }); - - /** - * @brief Request structure - */ - py::class_>(m, - "Request") - .def(py::init([]() { return new_request(); })) - .def(py::init([](uint64_t obj_id, uint64_t obj_size, uint64_t clock_time, - uint64_t hv, req_op_e op) { - request_t* req = new_request(); - req->obj_id = obj_id; - req->obj_size = obj_size; - req->clock_time = clock_time; - req->hv = hv; - req->op = op; - return req; - }), - py::arg("obj_id"), py::arg("obj_size") = 1, - py::arg("clock_time") = 0, py::arg("hv") = 0, - py::arg("op") = req_op_e::OP_GET, - R"pbdoc( - Create a request instance. - - Args: - obj_id (int): The object ID. - obj_size (int): The object size. (default: 1) - clock_time (int): The clock time. (default: 0) - hv (int): The hash value. (default: 0) - op (req_op_e): The operation. (default: OP_GET) - - Returns: - Request: A new request instance. - )pbdoc") - .def_readwrite("clock_time", &request_t::clock_time) - .def_readwrite("hv", &request_t::hv) - .def_readwrite("obj_id", &request_t::obj_id) - .def_readwrite("obj_size", &request_t::obj_size) - .def_readwrite("op", &request_t::op); - - /** - * @brief Reader structure - */ - py::class_>(m, "Reader") - .def_readwrite("n_read_req", &reader_t::n_read_req) - .def_readwrite("n_total_req", &reader_t::n_total_req) - .def_readwrite("trace_path", &reader_t::trace_path) - .def_readwrite("file_size", &reader_t::file_size) - .def_readwrite("ignore_obj_size", &reader_t::ignore_obj_size) - // methods - .def( - "get_wss", - [](reader_t& self) { - int64_t wss_obj = 0, wss_byte = 0; - cal_working_set_size(&self, &wss_obj, &wss_byte); - return self.ignore_obj_size ? wss_obj : wss_byte; - }, - R"pbdoc( - Get the working set size of the trace. - - Args: - ignore_obj_size (bool): Whether to ignore the object size. - - Returns: - int: The working set size of the trace. - )pbdoc") - .def( - "seek", - [](reader_t& self, int64_t offset, bool from_beginning = false) { - int64_t offset_from_beginning = offset; - if (!from_beginning) { - offset_from_beginning += self.n_read_req; - } - reset_reader(&self); - skip_n_req(&self, offset_from_beginning); - }, - py::arg("offset"), py::arg("from_beginning") = false, - R"pbdoc( - Seek to a specific offset in the trace file. - We only support seeking from current position or from the beginning. - - Can only move forward, not backward. - - Args: - offset (int): The offset to seek to the beginning. - - Raises: - RuntimeError: If seeking fails. - )pbdoc") - .def("__iter__", [](reader_t& self) -> reader_t& { return self; }) - .def("__next__", [](reader_t& self) { - auto req = std::unique_ptr(new_request()); - int ret = read_one_req(&self, req.get()); - if (ret != 0) { - throw py::stop_iteration(); - } - return req; - }); - - // Helper function to apply parameters from dictionary to reader_init_param_t - auto apply_params_from_dict = [](reader_init_param_t& params, - py::dict dict_params) { - // Template field setter with type safety - auto set_if_present = [&](const char* key, auto& field) { - if (dict_params.contains(key)) { - field = - dict_params[key].cast>(); - } - }; - - // Apply all standard fields - set_if_present("time_field", params.time_field); - set_if_present("obj_id_field", params.obj_id_field); - set_if_present("obj_size_field", params.obj_size_field); - set_if_present("has_header", params.has_header); - set_if_present("ignore_obj_size", params.ignore_obj_size); - set_if_present("ignore_size_zero_req", params.ignore_size_zero_req); - set_if_present("obj_id_is_num", params.obj_id_is_num); - set_if_present("obj_id_is_num_set", params.obj_id_is_num_set); - set_if_present("has_header_set", params.has_header_set); - set_if_present("cap_at_n_req", params.cap_at_n_req); - set_if_present("op_field", params.op_field); - set_if_present("ttl_field", params.ttl_field); - set_if_present("cnt_field", params.cnt_field); - set_if_present("tenant_field", params.tenant_field); - set_if_present("next_access_vtime_field", params.next_access_vtime_field); - set_if_present("block_size", params.block_size); - set_if_present("trace_start_offset", params.trace_start_offset); - - // Special fields with custom handling - if (dict_params.contains("delimiter")) { - std::string delim = dict_params["delimiter"].cast(); - params.delimiter = delim.empty() ? ',' : delim[0]; - } - - if (dict_params.contains("binary_fmt_str")) { - // Free existing memory first to prevent leaks - if (params.binary_fmt_str) { - free(params.binary_fmt_str); - params.binary_fmt_str = nullptr; - } - std::string fmt = dict_params["binary_fmt_str"].cast(); - if (!fmt.empty()) { - // Note: Using strdup for C-compatible memory allocation - // Memory is managed by reader_init_param_t destructor/cleanup - params.binary_fmt_str = strdup(fmt.c_str()); - if (!params.binary_fmt_str) { - throw std::runtime_error( - "Failed to allocate memory for binary_fmt_str"); - } - } - } - - if (dict_params.contains("feature_fields")) { - auto ff = dict_params["feature_fields"].cast>(); - if (ff.size() > N_MAX_FEATURES) { - throw py::value_error("Too many feature fields (max " + - std::to_string(N_MAX_FEATURES) + ")"); - } - params.n_feature_fields = static_cast(ff.size()); - // Use copy_n for explicit bounds checking - std::copy_n(ff.begin(), params.n_feature_fields, params.feature_fields); - } - }; - - py::class_(m, "ReaderInitParam") - .def(py::init([]() { - reader_init_param_t params; - set_default_reader_init_params(¶ms); - return params; - }), - "Create with default parameters") - - .def(py::init([apply_params_from_dict](py::kwargs kwargs) { - reader_init_param_t params; - set_default_reader_init_params(¶ms); - - // Convert kwargs to dict and apply using shared helper - py::dict dict_params = py::dict(kwargs); - apply_params_from_dict(params, dict_params); - - return params; - }), - "Create with keyword arguments") - - .def(py::init([apply_params_from_dict](py::dict dict_params) { - reader_init_param_t params; - set_default_reader_init_params(¶ms); - - // Apply using shared helper function - apply_params_from_dict(params, dict_params); - - return params; - }), - py::arg("params"), "Create from dictionary (backward compatibility)") - .def("__repr__", [](const reader_init_param_t& params) { - std::stringstream ss; - ss << "ReaderInitParam(\n"; - - // Group 1: Core fields - ss << " # Core fields\n"; - ss << " time_field=" << params.time_field << ", "; - ss << "obj_id_field=" << params.obj_id_field << ", "; - ss << "obj_size_field=" << params.obj_size_field << ",\n"; - - // Group 2: Flags and options - ss << " # Flags and options\n"; - ss << " has_header=" << params.has_header << ", "; - ss << "ignore_obj_size=" << params.ignore_obj_size << ", "; - ss << "ignore_size_zero_req=" << params.ignore_size_zero_req << ", "; - ss << "obj_id_is_num=" << params.obj_id_is_num << ",\n"; - - // Group 3: Internal state flags - ss << " # Internal state\n"; - ss << " obj_id_is_num_set=" << params.obj_id_is_num_set << ", "; - ss << "has_header_set=" << params.has_header_set << ",\n"; - - // Group 4: Optional fields - ss << " # Optional fields\n"; - ss << " cap_at_n_req=" << params.cap_at_n_req << ", "; - ss << "op_field=" << params.op_field << ", "; - ss << "ttl_field=" << params.ttl_field << ", "; - ss << "cnt_field=" << params.cnt_field << ",\n"; - ss << " tenant_field=" << params.tenant_field << ", "; - ss << "next_access_vtime_field=" << params.next_access_vtime_field - << ",\n"; - - // Group 5: Miscellaneous - ss << " # Miscellaneous\n"; - ss << " block_size=" << params.block_size << ", "; - ss << "trace_start_offset=" << params.trace_start_offset; - ss << "\n)"; - return ss.str(); - }); - - // *************** functions *************** - /** - * @brief Open a trace file for reading - */ - m.def( - "open_trace", - [apply_params_from_dict](const std::string& trace_path, py::object type, - py::object params) { - trace_type_e c_type = UNKNOWN_TRACE; - if (!type.is_none()) { - c_type = type.cast(); - } else { - // If type is None, we can try to infer the type from the file - // extension - c_type = infer_trace_type(trace_path); - if (c_type == UNKNOWN_TRACE) { - throw std::runtime_error("Could not infer trace type from path: " + - trace_path); - } - } - - // Handle different parameter types - reader_init_param_t init_param; - set_default_reader_init_params(&init_param); - - if (py::isinstance(params)) { - // Dictionary parameters - use shared helper function - py::dict dict_params = params.cast(); - apply_params_from_dict(init_param, dict_params); - } else if (!params.is_none()) { - // reader_init_param_t object - direct cast (pybind11 handles - // conversion) - init_param = params.cast(); - } - reader_t* ptr = open_trace(trace_path.c_str(), c_type, &init_param); - return std::unique_ptr(ptr); - }, - py::arg("trace_path"), py::arg("type") = py::none(), - py::arg("params") = py::none(), - R"pbdoc( - Open a trace file for reading. - - Args: - trace_path (str): Path to the trace file. - type (Union[trace_type_e, None]): Type of the trace (e.g., CSV_TRACE). If None, the type will be inferred. - params (Union[dict, reader_init_param_t, None]): Initialization parameters for the reader. - - Returns: - Reader: A new reader instance for the trace. - )pbdoc"); - - /** - * @brief Generic function to create a cache instance. - */ - m.def( - "create_cache", - [](const std::string& eviction_algo, const uint64_t cache_size, - const std::string& eviction_params, - bool consider_obj_metadata) { return nullptr; }, - py::arg("eviction_algo"), py::arg("cache_size"), - py::arg("eviction_params"), py::arg("consider_obj_metadata"), - R"pbdoc( - Create a cache instance. - - Args: - eviction_algo (str): Eviction algorithm to use (e.g., "LRU", "FIFO", "Random"). - cache_size (int): Size of the cache in bytes. - eviction_params (str): Additional parameters for the eviction algorithm. - consider_obj_metadata (bool): Whether to consider object metadata in eviction decisions. - - Returns: - Cache: A new cache instance. - )pbdoc"); - - /* TODO(haocheng): should we support all parameters in the - * common_cache_params_t? (hash_power, etc.) */ - - // Currently supported eviction algorithms with direct initialization: - // - "ARC" - // - "Clock" - // - "FIFO" - // - "LRB" - // - "LRU" - // - "S3FIFO" - // - "Sieve" - // - "ThreeLCache" - // - "TinyLFU" - // - "TwoQ" - - /** - * @brief Create a ARC cache instance. - */ - m.def( - "ARC_init", - [](uint64_t cache_size) { - common_cache_params_t cc_params = {.cache_size = cache_size}; - cache_t* ptr = ARC_init(cc_params, nullptr); - return std::unique_ptr(ptr); - }, - py::arg("cache_size"), - R"pbdoc( - Create a ARC cache instance. - - Args: - cache_size (int): Size of the cache in bytes. - )pbdoc"); - - /** - * @brief Create a Clock cache instance. - */ - m.def( - "Clock_init", - [](uint64_t cache_size, long int n_bit_counter, long int init_freq) { - common_cache_params_t cc_params = {.cache_size = cache_size}; - // assemble the cache specific parameters - std::string cache_specific_params = - "n-bit-counter=" + std::to_string(n_bit_counter) + "," + - "init-freq=" + std::to_string(init_freq); - - cache_t* ptr = Clock_init(cc_params, cache_specific_params.c_str()); - return std::unique_ptr(ptr); - }, - py::arg("cache_size"), py::arg("n_bit_counter") = 1, - py::arg("init_freq") = 0, - R"pbdoc( - Create a Clock cache instance. - - Args: - cache_size (int): Size of the cache in bytes. - n_bit_counter (int): Number of bits for counter (default: 1). - init_freq (int): Initial frequency value (default: 0). - - Returns: - Cache: A new Clock cache instance. - )pbdoc"); - - /** - * @brief Create a FIFO cache instance. - */ - m.def( - "FIFO_init", - [](uint64_t cache_size) { - // Construct common cache parameters - common_cache_params_t cc_params = {.cache_size = cache_size}; - // FIFO no specific parameters, so we pass nullptr - cache_t* ptr = FIFO_init(cc_params, nullptr); - return std::unique_ptr(ptr); - }, - py::arg("cache_size"), - R"pbdoc( - Create a FIFO cache instance. - - Args: - cache_size (int): Size of the cache in bytes. - - Returns: - Cache: A new FIFO cache instance. - )pbdoc"); - -#ifdef ENABLE_LRB - /** - * @brief Create a LRB cache instance. - */ - m.def( - "LRB_init", - [](uint64_t cache_size, std::string objective) { - common_cache_params_t cc_params = {.cache_size = cache_size}; - cache_t* ptr = LRB_init(cc_params, ("objective=" + objective).c_str()); - return std::unique_ptr(ptr); - }, - py::arg("cache_size"), py::arg("objective") = "byte-miss-ratio", - R"pbdoc( - Create a LRB cache instance. - - Args: - cache_size (int): Size of the cache in bytes. - objective (str): Objective function to optimize (default: "byte-miss-ratio"). - - Returns: - Cache: A new LRB cache instance. - )pbdoc"); -#else - // TODO(haocheng): add a dummy function to avoid the error when LRB is not - // enabled - m.def( - "LRB_init", - [](uint64_t cache_size, std::string objective) { - throw std::runtime_error("LRB is not enabled"); - }, - py::arg("cache_size"), py::arg("objective") = "byte-miss-ratio"); -#endif - - /** - * @brief Create a LRU cache instance. - */ - m.def( - "LRU_init", - [](uint64_t cache_size) { - common_cache_params_t cc_params = {.cache_size = cache_size}; - cache_t* ptr = LRU_init(cc_params, nullptr); - return std::unique_ptr(ptr); - }, - py::arg("cache_size"), - R"pbdoc( - Create a LRU cache instance. - - Args: - cache_size (int): Size of the cache in bytes. - - Returns: - Cache: A new LRU cache instance. - )pbdoc"); - - /** - * @brief Create a S3FIFO cache instance. - */ - m.def( - "S3FIFO_init", - [](uint64_t cache_size, double fifo_size_ratio, double ghost_size_ratio, - int move_to_main_threshold) { - common_cache_params_t cc_params = {.cache_size = cache_size}; - cache_t* ptr = S3FIFO_init( - cc_params, - ("fifo-size-ratio=" + std::to_string(fifo_size_ratio) + "," + - "ghost-size-ratio=" + std::to_string(ghost_size_ratio) + "," + - "move-to-main-threshold=" + std::to_string(move_to_main_threshold)) - .c_str()); - return std::unique_ptr(ptr); - }, - py::arg("cache_size"), py::arg("fifo_size_ratio") = 0.10, - py::arg("ghost_size_ratio") = 0.90, py::arg("move_to_main_threshold") = 2, - R"pbdoc( - Create a S3FIFO cache instance. - - Args: - cache_size (int): Size of the cache in bytes. - fifo_size_ratio (float): Ratio of FIFO size to cache size (default: 0.10). - ghost_size_ratio (float): Ratio of ghost size to cache size (default: 0.90). - move_to_main_threshold (int): Threshold for moving to main queue (default: 2). - - Returns: - Cache: A new S3FIFO cache instance. - )pbdoc"); - - /** - * @brief Create a Sieve cache instance. - */ - m.def( - "Sieve_init", - [](uint64_t cache_size) { - common_cache_params_t cc_params = {.cache_size = cache_size}; - cache_t* ptr = Sieve_init(cc_params, nullptr); - return std::unique_ptr(ptr); - }, - py::arg("cache_size"), - R"pbdoc( - Create a Sieve cache instance. - - Args: - cache_size (int): Size of the cache in bytes. - - Returns: - Cache: A new Sieve cache instance. - )pbdoc"); - -#ifdef ENABLE_3L_CACHE - /** - * @brief Create a ThreeL cache instance. - */ - m.def( - "ThreeLCache_init", - [](uint64_t cache_size, std::string objective) { - common_cache_params_t cc_params = {.cache_size = cache_size}; - cache_t* ptr = - ThreeLCache_init(cc_params, ("objective=" + objective).c_str()); - return std::unique_ptr(ptr); - }, - py::arg("cache_size"), py::arg("objective") = "byte-miss-ratio", - R"pbdoc( - Create a ThreeL cache instance. - - Args: - cache_size (int): Size of the cache in bytes. - objective (str): Objective function to optimize (default: "byte-miss-ratio"). - - Returns: - Cache: A new ThreeL cache instance. - )pbdoc"); -#else - // TODO(haocheng): add a dummy function to avoid the error when ThreeLCache is - // not enabled - m.def( - "ThreeLCache_init", - [](uint64_t cache_size, std::string objective) { - throw std::runtime_error("ThreeLCache is not enabled"); - }, - py::arg("cache_size"), py::arg("objective") = "byte-miss-ratio"); -#endif - - /** - * @brief Create a TinyLFU cache instance. - */ - // mark evivtion parsing need change - m.def( - "TinyLFU_init", - [](uint64_t cache_size, std::string main_cache, double window_size) { - common_cache_params_t cc_params = {.cache_size = cache_size}; - cache_t* ptr = WTinyLFU_init( - cc_params, ("main-cache=" + main_cache + "," + - "window-size=" + std::to_string(window_size)) - .c_str()); - return std::unique_ptr(ptr); - }, - py::arg("cache_size"), py::arg("main_cache") = "SLRU", - py::arg("window_size") = 0.01, - R"pbdoc( - Create a TinyLFU cache instance. - - Args: - cache_size (int): Size of the cache in bytes. - main_cache (str): Main cache to use (default: "SLRU"). - window_size (float): Window size for TinyLFU (default: 0.01). - - Returns: - Cache: A new TinyLFU cache instance. - )pbdoc"); - - /** - * @brief Create a TwoQ cache instance. - */ - m.def( - "TwoQ_init", - [](uint64_t cache_size, double Ain_size_ratio, double Aout_size_ratio) { - common_cache_params_t cc_params = {.cache_size = cache_size}; - cache_t* ptr = TwoQ_init( - cc_params, - ("Ain-size-ratio=" + std::to_string(Ain_size_ratio) + "," + - "Aout-size-ratio=" + std::to_string(Aout_size_ratio)) - .c_str()); - return std::unique_ptr(ptr); - }, - py::arg("cache_size"), py::arg("Ain_size_ratio") = 0.25, - py::arg("Aout_size_ratio") = 0.5, - R"pbdoc( - Create a TwoQ cache instance. - - Args: - cache_size (int): Size of the cache in bytes. - Ain_size_ratio (float): Ratio of A-in size to cache size (default: 0.25). - Aout_size_ratio (float): Ratio of A-out size to cache size (default: 0.5). - - Returns: - Cache: A new TwoQ cache instance. - )pbdoc"); - - /** - * @brief Create a LFU cache instance. - */ - m.def( - "LFU_init", - [](uint64_t cache_size) { - common_cache_params_t cc_params = {.cache_size = cache_size}; - cache_t* ptr = LFU_init(cc_params, nullptr); - return std::unique_ptr(ptr); - }, - py::arg("cache_size"), - R"pbdoc( - Create a LFU cache instance. - - Args: - cache_size (int): Size of the cache in bytes. - - Returns: - Cache: A new LFU cache instance. - )pbdoc"); - - /** - * @brief Create a LFUDA cache instance. - */ - m.def( - "LFUDA_init", - [](uint64_t cache_size) { - common_cache_params_t cc_params = {.cache_size = cache_size}; - cache_t* ptr = LFUDA_init(cc_params, nullptr); - return std::unique_ptr(ptr); - }, - py::arg("cache_size"), - R"pbdoc( - Create a LFUDA cache instance. - - Args: - cache_size (int): Size of the cache in bytes. - - Returns: - Cache: A new LFUDA cache instance. - )pbdoc"); - - /** - * @brief Create a SLRU cache instance. - */ - m.def( - "SLRU_init", - [](uint64_t cache_size) { - common_cache_params_t cc_params = {.cache_size = cache_size}; - cache_t* ptr = SLRU_init(cc_params, nullptr); - return std::unique_ptr(ptr); - }, - py::arg("cache_size"), - R"pbdoc( - Create a SLRU cache instance. - - Args: - cache_size (int): Size of the cache in bytes. - - Returns: - Cache: A new SLRU cache instance. - )pbdoc"); - - /** - * @brief Create a Belady cache instance. - */ - m.def( - "Belady_init", - [](uint64_t cache_size) { - common_cache_params_t cc_params = {.cache_size = cache_size}; - cache_t* ptr = Belady_init(cc_params, nullptr); - return std::unique_ptr(ptr); - }, - py::arg("cache_size"), - R"pbdoc( - Create a Belady cache instance. - - Args: - cache_size (int): Size of the cache in bytes. - - Returns: - Cache: A new Belady cache instance. - )pbdoc"); - - /** - * @brief Create a BeladySize cache instance. - */ - m.def( - "BeladySize_init", - [](uint64_t cache_size) { - common_cache_params_t cc_params = {.cache_size = cache_size}; - cache_t* ptr = BeladySize_init(cc_params, nullptr); - return std::unique_ptr(ptr); - }, - py::arg("cache_size"), - R"pbdoc( - Create a BeladySize cache instance. - - Args: - cache_size (int): Size of the cache in bytes. - - Returns: - Cache: A new BeladySize cache instance. - )pbdoc"); - - /** - * @brief Create a QDLP cache instance. - */ - m.def( - "QDLP_init", - [](uint64_t cache_size) { - common_cache_params_t cc_params = {.cache_size = cache_size}; - cache_t* ptr = QDLP_init(cc_params, nullptr); - return std::unique_ptr(ptr); - }, - py::arg("cache_size"), - R"pbdoc( - Create a QDLP cache instance. - - Args: - cache_size (int): Size of the cache in bytes. - - Returns: - Cache: A new QDLP cache instance. - )pbdoc"); - - /** - * @brief Create a LeCaR cache instance. - */ - m.def( - "LeCaR_init", - [](uint64_t cache_size) { - common_cache_params_t cc_params = {.cache_size = cache_size}; - cache_t* ptr = LeCaR_init(cc_params, nullptr); - return std::unique_ptr(ptr); - }, - py::arg("cache_size"), - R"pbdoc( - Create a LeCaR cache instance. - - Args: - cache_size (int): Size of the cache in bytes. - - Returns: - Cache: A new LeCaR cache instance. - )pbdoc"); - - /** - * @brief Create a Cacheus cache instance. - */ - m.def( - "Cacheus_init", - [](uint64_t cache_size) { - common_cache_params_t cc_params = {.cache_size = cache_size}; - cache_t* ptr = Cacheus_init(cc_params, nullptr); - return std::unique_ptr(ptr); - }, - py::arg("cache_size"), - R"pbdoc( - Create a Cacheus cache instance. - - Args: - cache_size (int): Size of the cache in bytes. - - Returns: - Cache: A new Cacheus cache instance. - )pbdoc"); - - /** - * @brief Create a WTinyLFU cache instance. - */ - m.def( - "WTinyLFU_init", - [](uint64_t cache_size, std::string main_cache, double window_size) { - common_cache_params_t cc_params = {.cache_size = cache_size}; - cache_t* ptr = WTinyLFU_init( - cc_params, ("main-cache=" + main_cache + "," + - "window-size=" + std::to_string(window_size)) - .c_str()); - return std::unique_ptr(ptr); - }, - py::arg("cache_size"), py::arg("main_cache") = "SLRU", - py::arg("window_size") = 0.01, - R"pbdoc( - Create a WTinyLFU cache instance. - - Args: - cache_size (int): Size of the cache in bytes. - main_cache (str): Main cache to use (default: "SLRU"). - window_size (float): Window size for TinyLFU (default: 0.01). - - Returns: - Cache: A new WTinyLFU cache instance. - )pbdoc"); - - /** - * @brief Create a Python hook-based cache instance. - */ - py::class_(m, "PythonHookCache") - .def(py::init(), py::arg("cache_size"), - py::arg("cache_name") = "PythonHookCache") - .def("set_hooks", &PythonHookCache::set_hooks, py::arg("init_hook"), - py::arg("hit_hook"), py::arg("miss_hook"), py::arg("eviction_hook"), - py::arg("remove_hook"), py::arg("free_hook") = py::none(), - R"pbdoc( - Set the hook functions for the cache. - - Args: - init_hook (callable): Function called during cache initialization. - Signature: init_hook(cache_size: int) -> Any - hit_hook (callable): Function called on cache hit. - Signature: hit_hook(plugin_data: Any, obj_id: int, obj_size: int) -> None - miss_hook (callable): Function called on cache miss. - Signature: miss_hook(plugin_data: Any, obj_id: int, obj_size: int) -> None - eviction_hook (callable): Function called to select eviction candidate. - Signature: eviction_hook(plugin_data: Any, obj_id: int, obj_size: int) -> int - remove_hook (callable): Function called when object is removed. - Signature: remove_hook(plugin_data: Any, obj_id: int) -> None - free_hook (callable, optional): Function called during cache cleanup. - Signature: free_hook(plugin_data: Any) -> None - )pbdoc") - .def("get", &PythonHookCache::get, py::arg("req"), - R"pbdoc( - Process a cache request. - - Args: - req (Request): The cache request to process. - - Returns: - bool: True if cache hit, False if cache miss. - )pbdoc") - .def_readwrite("n_req", &PythonHookCache::n_req) - .def_readwrite("n_obj", &PythonHookCache::n_obj) - .def_readwrite("occupied_byte", &PythonHookCache::occupied_byte) - .def_readwrite("cache_size", &PythonHookCache::cache_size); - - /** - * @brief Process a trace with a cache and return miss ratio. - */ - m.def( - "process_trace", - [](cache_t& cache, reader_t& reader, int64_t start_req = 0, - int64_t max_req = -1) { - reset_reader(&reader); - if (start_req > 0) { - skip_n_req(&reader, start_req); - } - - request_t* req = new_request(); - int64_t n_req = 0, n_hit = 0; - int64_t bytes_req = 0, bytes_hit = 0; - bool hit; - - read_one_req(&reader, req); - while (req->valid) { - n_req += 1; - bytes_req += req->obj_size; - hit = cache.get(&cache, req); - if (hit) { - n_hit += 1; - bytes_hit += req->obj_size; - } - read_one_req(&reader, req); - if (max_req > 0 && n_req >= max_req) { - break; // Stop if we reached the max request limit - } - } - - free_request(req); - // return the miss ratio - double obj_miss_ratio = n_req > 0 ? 1.0 - (double)n_hit / n_req : 0.0; - double byte_miss_ratio = - bytes_req > 0 ? 1.0 - (double)bytes_hit / bytes_req : 0.0; - return std::make_tuple(obj_miss_ratio, byte_miss_ratio); - }, - py::arg("cache"), py::arg("reader"), py::arg("start_req") = 0, - py::arg("max_req") = -1, - R"pbdoc( - Process a trace with a cache and return miss ratio. - - This function processes trace data entirely on the C++ side to avoid - data movement overhead between Python and C++. - - Args: - cache (Cache): The cache instance to use for processing. - reader (Reader): The trace reader instance. - start_req (int): The starting request number to process from (default: 0, from the beginning). - max_req (int): Maximum number of requests to process (-1 for no limit). - - Returns: - float: Object miss ratio (0.0 to 1.0). - float: Byte miss ratio (0.0 to 1.0). - - Example: - >>> cache = libcachesim.LRU(1024*1024) - >>> reader = libcachesim.open_trace("trace.csv", libcachesim.TraceType.CSV_TRACE) - >>> obj_miss_ratio, byte_miss_ratio = libcachesim.process_trace(cache, reader) - >>> print(f"Obj miss ratio: {obj_miss_ratio:.4f}, Byte miss ratio: {byte_miss_ratio:.4f}") - )pbdoc"); - - /** - * @brief Process a trace with a Python hook cache and return miss ratio. - */ - m.def( - "process_trace_python_hook", - [](PythonHookCache& cache, reader_t& reader, int64_t start_req = 0, - int64_t max_req = -1) { - reset_reader(&reader); - if (start_req > 0) { - skip_n_req(&reader, start_req); - } - - request_t* req = new_request(); - int64_t n_req = 0, n_hit = 0; - int64_t bytes_req = 0, bytes_hit = 0; - bool hit; - - read_one_req(&reader, req); - while (req->valid) { - n_req += 1; - bytes_req += req->obj_size; - hit = cache.get(*req); - if (hit) { - n_hit += 1; - bytes_hit += req->obj_size; - } - read_one_req(&reader, req); - if (max_req > 0 && n_req >= max_req) { - break; // Stop if we reached the max request limit - } - } - - free_request(req); - // return the miss ratio - double obj_miss_ratio = n_req > 0 ? 1.0 - (double)n_hit / n_req : 0.0; - double byte_miss_ratio = - bytes_req > 0 ? 1.0 - (double)bytes_hit / bytes_req : 0.0; - return std::make_tuple(obj_miss_ratio, byte_miss_ratio); - }, - py::arg("cache"), py::arg("reader"), py::arg("start_req") = 0, - py::arg("max_req") = -1, - R"pbdoc( - Process a trace with a Python hook cache and return miss ratio. - - This function processes trace data entirely on the C++ side to avoid - data movement overhead between Python and C++. Specifically designed - for PythonHookCache instances. - - Args: - cache (PythonHookCache): The Python hook cache instance to use. - reader (Reader): The trace reader instance. - start_req (int): The starting request number to process from (0 for beginning). - max_req (int): Maximum number of requests to process (-1 for no limit). - - Returns: - float: Object miss ratio (0.0 to 1.0). - float: Byte miss ratio (0.0 to 1.0). - - Example: - >>> cache = libcachesim.PythonHookCachePolicy(1024*1024) - >>> cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook) - >>> reader = libcachesim.open_trace("trace.csv", libcachesim.TraceType.CSV_TRACE) - >>> obj_miss_ratio, byte_miss_ratio = libcachesim.process_trace_python_hook(cache.cache, reader) - >>> print(f"Obj miss ratio: {obj_miss_ratio:.4f}, Byte miss ratio: {byte_miss_ratio:.4f}") - )pbdoc"); - -#ifdef VERSION_INFO - m.attr("__version__") = MACRO_STRINGIFY(VERSION_INFO); -#else - m.attr("__version__") = "dev"; -#endif -} diff --git a/libCacheSim-python/tests/conftest.py b/libCacheSim-python/tests/conftest.py deleted file mode 100644 index a3e2705e1..000000000 --- a/libCacheSim-python/tests/conftest.py +++ /dev/null @@ -1,32 +0,0 @@ -from __future__ import annotations - -import os -import gc - -import pytest - -from libcachesim import Reader, TraceType, open_trace - - -@pytest.fixture -def mock_reader(): - data_file = os.path.join( # noqa: PTH118 - os.path.dirname(os.path.dirname(os.path.dirname(__file__))), # noqa: PTH120 - "data", - "cloudPhysicsIO.oracleGeneral.bin", - ) - reader: Reader = open_trace( - data_file, - type=TraceType.ORACLE_GENERAL_TRACE, - ) - try: - yield reader - finally: - # More careful cleanup - try: - if hasattr(reader, "close"): - reader.close() - except Exception: # Be specific about exception type - pass - # Don't explicitly del reader here, let Python handle it - gc.collect() diff --git a/libCacheSim-python/tests/reference.csv b/libCacheSim-python/tests/reference.csv deleted file mode 100644 index cb569d0c9..000000000 --- a/libCacheSim-python/tests/reference.csv +++ /dev/null @@ -1,20 +0,0 @@ -FIFO,0.01,0.8368 -ARC,0.01,0.8222 -Clock,0.01,0.8328 -LRB,0.01,0.8339 -LRU,0.01,0.8339 -S3FIFO,0.01,0.8235 -Sieve,0.01,0.8231 -3LCache,0.01,0.8339 -TinyLFU,0.01,0.8262 -TwoQ,0.01,0.8276 -FIFO,0.1,0.8075 -ARC,0.1,0.7688 -Clock,0.1,0.8086 -LRB,0.1,0.8097 -LRU,0.1,0.8097 -S3FIFO,0.1,0.7542 -Sieve,0.1,0.7903 -3LCache,0.1,0.8097 -TinyLFU,0.1,0.7666 -TwoQ,0.1,0.7695 diff --git a/libCacheSim-python/tests/test_eviction.py b/libCacheSim-python/tests/test_eviction.py deleted file mode 100644 index a51aae860..000000000 --- a/libCacheSim-python/tests/test_eviction.py +++ /dev/null @@ -1,62 +0,0 @@ -import pytest - -from libcachesim import ( - ARC, - FIFO, - LRU, - S3FIFO, - Clock, - Sieve, - TinyLFU, - TwoQ, -) -from tests.utils import get_reference_data - - -@pytest.mark.parametrize( - "eviction_algo", - [ - FIFO, - ARC, - Clock, - LRU, - S3FIFO, - Sieve, - TinyLFU, - TwoQ, - ], -) -@pytest.mark.parametrize("cache_size_ratio", [0.01]) -def test_eviction_algo(eviction_algo, cache_size_ratio, mock_reader): - cache = None - try: - # create a cache with the eviction policy - cache = eviction_algo(cache_size=int(mock_reader.get_wss() * cache_size_ratio)) - req_count = 0 - miss_count = 0 - - # Limit the number of requests to avoid long test times - # max_requests = 1000 - for i, req in enumerate(mock_reader): - # if i >= max_requests: - # break - hit = cache.get(req) - if not hit: - miss_count += 1 - req_count += 1 - - if req_count == 0: - pytest.skip("No requests processed") - - miss_ratio = miss_count / req_count - reference_miss_ratio = get_reference_data(eviction_algo.__name__, cache_size_ratio) - if reference_miss_ratio is None: - pytest.skip(f"No reference data for {eviction_algo.__name__} with cache size ratio {cache_size_ratio}") - assert abs(miss_ratio - reference_miss_ratio) < 0.01, ( - f"Miss ratio {miss_ratio} is not close to reference {reference_miss_ratio}" - ) - - except Exception as e: - pytest.fail(f"Error in test_eviction_algo: {e}") - finally: - pass diff --git a/libCacheSim-python/tests/test_process_trace.py b/libCacheSim-python/tests/test_process_trace.py deleted file mode 100644 index 1dbfb486f..000000000 --- a/libCacheSim-python/tests/test_process_trace.py +++ /dev/null @@ -1,220 +0,0 @@ -#!/usr/bin/env python3 -""" -Test file for process_trace functionality. -""" - -import sys -import os -import pytest - -# Add the parent directory to the Python path for development testing -sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) - -try: - import libcachesim as lcs -except ImportError as e: - pytest.skip(f"libcachesim not available: {e}", allow_module_level=True) - -from collections import OrderedDict - - -def create_trace_reader(): - """Helper function to create a trace reader with binary trace file.""" - data_file = os.path.join( - os.path.dirname(os.path.dirname(os.path.dirname(__file__))), "data", "cloudPhysicsIO.oracleGeneral.bin" - ) - if not os.path.exists(data_file): - return None - return lcs.open_trace(data_file, lcs.TraceType.ORACLE_GENERAL_TRACE) - - -def test_process_trace_native(): - """Test process_trace with native LRU cache.""" - - # Open trace - reader = create_trace_reader() - if reader is None: - pytest.skip("Test trace file not found, skipping test") - - # Create LRU cache - cache = lcs.LRU(1024 * 1024) # 1MB cache - - # Process trace and get miss ratio - obj_miss_ratio, byte_miss_ratio = cache.process_trace(reader, max_req=1000) - - # Verify miss ratio is reasonable (should be between 0 and 1) - assert 0.0 <= obj_miss_ratio <= 1.0, f"Invalid miss ratio: {obj_miss_ratio}" - - -def test_process_trace_python_hook(): - """Test process_trace with Python hook cache.""" - - # Open trace - reader = create_trace_reader() - if reader is None: - pytest.skip("Test trace file not found, skipping test") - - # Create Python hook LRU cache - cache = lcs.PythonHookCachePolicy(1024 * 1024, "TestLRU") - - # Define LRU hooks - def init_hook(cache_size): - return OrderedDict() - - def hit_hook(lru_dict, obj_id, obj_size): - lru_dict.move_to_end(obj_id) - - def miss_hook(lru_dict, obj_id, obj_size): - lru_dict[obj_id] = True - - def eviction_hook(lru_dict, obj_id, obj_size): - return next(iter(lru_dict)) - - def remove_hook(lru_dict, obj_id): - lru_dict.pop(obj_id, None) - - # Set hooks - cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook) - - # Test both methods - # Method 1: Direct function call - miss_ratio1 = lcs.process_trace_python_hook(cache.cache, reader, max_req=1000)[0] - - # Need to reopen the trace for second test - reader2 = create_trace_reader() - if reader2 is None: - pytest.skip("Warning: Cannot reopen trace file, skipping second test") - # Continue with just the first test result - assert miss_ratio1 is not None and 0.0 <= miss_ratio1 <= 1.0, f"Invalid miss ratio: {miss_ratio1}" - return - - # Reset cache for fair comparison - cache2 = lcs.PythonHookCachePolicy(1024 * 1024, "TestLRU2") - cache2.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook) - - # Method 2: Convenience method - miss_ratio2 = cache2.process_trace(reader2, max_req=1000)[0] - - # Verify both methods give the same result and miss ratios are reasonable - assert 0.0 <= miss_ratio1 <= 1.0, f"Invalid miss ratio 1: {miss_ratio1}" - assert 0.0 <= miss_ratio2 <= 1.0, f"Invalid miss ratio 2: {miss_ratio2}" - assert abs(miss_ratio1 - miss_ratio2) < 0.001, ( - f"Different results from the two methods: {miss_ratio1} vs {miss_ratio2}" - ) - - -def test_compare_native_vs_python_hook(): - """Compare native LRU vs Python hook LRU using process_trace.""" - - cache_size = 512 * 1024 # 512KB cache - max_requests = 500 - - # Test native LRU - native_cache = lcs.LRU(cache_size) - reader1 = create_trace_reader() - if reader1 is None: - pytest.skip("Test trace file not found, skipping test") - - native_obj_miss_ratio, native_byte_miss_ratio = native_cache.process_trace(reader1, max_req=max_requests) - - # Test Python hook LRU - hook_cache = lcs.PythonHookCachePolicy(cache_size, "HookLRU") - - def init_hook(cache_size): - return OrderedDict() - - def hit_hook(lru_dict, obj_id, obj_size): - lru_dict.move_to_end(obj_id) - - def miss_hook(lru_dict, obj_id, obj_size): - lru_dict[obj_id] = True - - def eviction_hook(lru_dict, obj_id, obj_size): - return next(iter(lru_dict)) - - def remove_hook(lru_dict, obj_id): - lru_dict.pop(obj_id, None) - - hook_cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook) - - reader2 = create_trace_reader() - if reader2 is None: - pytest.skip("Warning: Cannot reopen trace file, skipping comparison") - return # Skip test - - hook_obj_miss_ratio, hook_byte_miss_ratio = hook_cache.process_trace(reader2, max_req=max_requests) - - # They should be very similar (allowing for some small differences due to implementation details) - assert abs(native_obj_miss_ratio - hook_obj_miss_ratio) < 0.05, ( - f"Too much difference: {abs(native_obj_miss_ratio - hook_obj_miss_ratio):.4f}" - ) - - -def test_error_handling(): - """Test error handling for process_trace.""" - - cache = lcs.PythonHookCachePolicy(1024) - - reader = create_trace_reader() - if reader is None: - pytest.skip("Test trace file not found, skipping error test") - - # Try to process trace without setting hooks - should raise RuntimeError - with pytest.raises(RuntimeError, match="Hooks must be set before processing trace"): - cache.process_trace(reader) - - -def test_lru_implementation_accuracy(): - """Test that Python hook LRU implementation matches native LRU closely.""" - - cache_size = 1024 * 1024 # 1MB - max_requests = 100 - - # Create readers - reader1 = create_trace_reader() - reader2 = create_trace_reader() - - if not reader1 or not reader2: - pytest.skip("Cannot open trace files for LRU accuracy test") - - # Test native LRU - native_cache = lcs.LRU(cache_size) - native_obj_miss_ratio, native_byte_miss_ratio = native_cache.process_trace(reader1, max_req=max_requests) - - # Test Python hook LRU - hook_cache = lcs.PythonHookCachePolicy(cache_size, "AccuracyTestLRU") - init_hook, hit_hook, miss_hook, eviction_hook, remove_hook = create_optimized_lru_hooks() - hook_cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook) - - hook_obj_miss_ratio, hook_byte_miss_ratio = hook_cache.process_trace(reader2, max_req=max_requests) - - # Calculate difference - difference = abs(native_obj_miss_ratio - hook_obj_miss_ratio) - percentage_diff = (difference / native_obj_miss_ratio) * 100 if native_obj_miss_ratio > 0 else 0 - - # Assert that the difference is small (< 5%) - assert percentage_diff < 5.0, f"LRU implementation difference too large: {percentage_diff:.4f}%" - - -def create_optimized_lru_hooks(): - """Create optimized LRU hooks that closely match native LRU behavior.""" - - def init_hook(cache_size): - return OrderedDict() - - def hit_hook(lru_dict, obj_id, obj_size): - if obj_id in lru_dict: - lru_dict.move_to_end(obj_id) - - def miss_hook(lru_dict, obj_id, obj_size): - lru_dict[obj_id] = obj_size - - def eviction_hook(lru_dict, obj_id, obj_size): - if lru_dict: - return next(iter(lru_dict)) - return obj_id - - def remove_hook(lru_dict, obj_id): - lru_dict.pop(obj_id, None) - - return init_hook, hit_hook, miss_hook, eviction_hook, remove_hook diff --git a/libCacheSim-python/tests/test_python_hook_cache.py b/libCacheSim-python/tests/test_python_hook_cache.py deleted file mode 100644 index 7af8873dc..000000000 --- a/libCacheSim-python/tests/test_python_hook_cache.py +++ /dev/null @@ -1,205 +0,0 @@ -#!/usr/bin/env python3 -""" -Test file for PythonHookCachePolicy functionality. -""" - -import pytest -import libcachesim as lcs -from dataclasses import dataclass -from collections import OrderedDict - - -@dataclass -class CacheTestCase: - """Represents a single test case for cache operations.""" - - request: tuple[int, int] # (obj_id, obj_size) - expected_hit: bool - expected_obj_count: int - description: str = "" - - -def create_lru_hooks(): - """Create standard LRU hooks for testing. - - Returns: - tuple: A tuple of (init_hook, hit_hook, miss_hook, eviction_hook, remove_hook) - """ - - def init_hook(cache_size): - return OrderedDict() - - def hit_hook(lru_dict, obj_id, obj_size): - lru_dict.move_to_end(obj_id) - - def miss_hook(lru_dict, obj_id, obj_size): - lru_dict[obj_id] = True - - def eviction_hook(lru_dict, obj_id, obj_size): - return next(iter(lru_dict)) - - def remove_hook(lru_dict, obj_id): - lru_dict.pop(obj_id, None) - - return init_hook, hit_hook, miss_hook, eviction_hook, remove_hook - - -def create_test_request(obj_id: int, obj_size: int) -> lcs.Request: - """Create a test request with given parameters. - - Args: - obj_id: Object ID - obj_size: Object size in bytes - - Returns: - Request: A configured request object - """ - req = lcs.Request() - req.obj_id = obj_id - req.obj_size = obj_size - return req - - -def test_python_hook_cache(): - """Test the Python hook cache implementation.""" - cache_size = 300 # 3 objects of size 100 each - cache = lcs.PythonHookCachePolicy(cache_size, "TestLRU") - - # Set up hooks - init_hook, hit_hook, miss_hook, eviction_hook, remove_hook = create_lru_hooks() - cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook) - - # Define test sequence - test_cases = [ - CacheTestCase((1, 100), False, 1, "Miss - insert 1"), - CacheTestCase((2, 100), False, 2, "Miss - insert 2"), - CacheTestCase((3, 100), False, 3, "Miss - insert 3 (cache full)"), - CacheTestCase((1, 100), True, 3, "Hit - move 1 to end"), - CacheTestCase((4, 100), False, 3, "Miss - should evict 2 (LRU), insert 4"), - CacheTestCase((2, 100), False, 3, "Miss - should evict 3, insert 2"), - CacheTestCase((1, 100), True, 3, "Hit - move 1 to end"), - ] - - # Execute test sequence - for i, test_case in enumerate(test_cases): - obj_id, obj_size = test_case.request - req = create_test_request(obj_id, obj_size) - - result = cache.get(req) - assert result == test_case.expected_hit, f"Request {i + 1} (obj_id={obj_id}):" - f"Expected {'hit' if test_case.expected_hit else 'miss'} - {test_case.description}" - assert cache.n_obj == test_case.expected_obj_count, ( - f"Request {i + 1}: Expected {test_case.expected_obj_count} objects - {test_case.description}" - ) - assert cache.occupied_byte <= cache_size, f"Request {i + 1}: Cache size exceeded" - - -def test_error_handling(): - """Test error handling for uninitialized cache.""" - cache = lcs.PythonHookCachePolicy(1000) - - # Try to use cache without setting hooks - req = create_test_request(1, 100) - - with pytest.raises(RuntimeError): - cache.get(req) - - -def test_lru_comparison(): - """Test Python hook LRU against native LRU to verify identical behavior.""" - cache_size = 300 # 3 objects of size 100 each - - # Create native LRU cache - native_lru = lcs.LRU(cache_size) - - # Create Python hook LRU cache - hook_lru = lcs.PythonHookCachePolicy(cache_size, "TestLRU") - init_hook, hit_hook, miss_hook, eviction_hook, remove_hook = create_lru_hooks() - hook_lru.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook) - - # Define test sequence with various access patterns - test_cases = [ - CacheTestCase((1, 100), False, 1, "Miss - insert 1"), - CacheTestCase((2, 100), False, 2, "Miss - insert 2"), - CacheTestCase((3, 100), False, 3, "Miss - insert 3 (cache full)"), - CacheTestCase((1, 100), True, 3, "Hit - move 1 to end"), - CacheTestCase((4, 100), False, 3, "Miss - should evict 2 (LRU), insert 4"), - CacheTestCase((2, 100), False, 3, "Miss - should evict 3, insert 2"), - CacheTestCase((1, 100), True, 3, "Hit - move 1 to end"), - CacheTestCase((3, 100), False, 3, "Miss - should evict 4, insert 3"), - CacheTestCase((5, 100), False, 3, "Miss - should evict 2, insert 5"), - CacheTestCase((1, 100), True, 3, "Hit - move 1 to end"), - CacheTestCase((3, 100), True, 3, "Hit - move 3 to end"), - CacheTestCase((6, 100), False, 3, "Miss - should evict 5, insert 6"), - ] - - # Test both caches with identical requests - for i, test_case in enumerate(test_cases): - obj_id, obj_size = test_case.request - - # Test native LRU - req_native = create_test_request(obj_id, obj_size) - native_result = native_lru.get(req_native) - - # Test hook LRU - req_hook = create_test_request(obj_id, obj_size) - hook_result = hook_lru.get(req_hook) - - # Compare results - assert native_result == hook_result, ( - f"Request {i + 1} (obj_id={obj_id}): Native and hook LRU differ - {test_case.description}" - ) - - # Compare cache statistics - assert native_lru.n_obj == hook_lru.n_obj, f"Request {i + 1}: Object count differs - {test_case.description}" - assert native_lru.occupied_byte == hook_lru.occupied_byte, ( - f"Request {i + 1}: Occupied bytes differ - {test_case.description}" - ) - - -def test_lru_comparison_variable_sizes(): - """Test Python hook LRU vs Native LRU with variable object sizes.""" - cache_size = 1000 # Total cache capacity - - # Create caches - native_lru = lcs.LRU(cache_size) - hook_lru = lcs.PythonHookCachePolicy(cache_size, "VariableSizeLRU") - - init_hook, hit_hook, miss_hook, eviction_hook, remove_hook = create_lru_hooks() - hook_lru.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook) - - # Define test sequence with variable object sizes - test_cases = [ - CacheTestCase((1, 200), False, 1, "Miss - insert 1 (200 bytes)"), - CacheTestCase((2, 300), False, 2, "Miss - insert 2 (300 bytes)"), - CacheTestCase((3, 400), False, 3, "Miss - insert 3 (400 bytes) - total 900 bytes"), - CacheTestCase((4, 200), False, 3, "Miss - should evict 1, insert 4 (total would be 1100, over limit)"), - CacheTestCase((1, 200), False, 3, "Miss - should evict 2, insert 1"), - CacheTestCase((5, 100), False, 3, "Miss - should evict 3, insert 5"), - CacheTestCase((4, 200), True, 3, "Hit - access 4"), - CacheTestCase((6, 500), False, 2, "Miss - should evict multiple objects to fit"), - CacheTestCase((4, 200), False, 3, "Miss - 4 was evicted"), - ] - - # Test both caches with identical requests - for i, test_case in enumerate(test_cases): - obj_id, obj_size = test_case.request - - # Test native LRU - req_native = create_test_request(obj_id, obj_size) - native_result = native_lru.get(req_native) - - # Test hook LRU - req_hook = create_test_request(obj_id, obj_size) - hook_result = hook_lru.get(req_hook) - - # Compare results - assert native_result == hook_result, ( - f"Request {i + 1} (obj_id={obj_id}, size={obj_size}): Results differ - {test_case.description}" - ) - - # Compare cache statistics - assert native_lru.n_obj == hook_lru.n_obj, f"Request {i + 1}: Object count differs - {test_case.description}" - assert native_lru.occupied_byte == hook_lru.occupied_byte, ( - f"Request {i + 1}: Occupied bytes differ - {test_case.description}" - ) diff --git a/libCacheSim-python/tests/test_trace_generator.py b/libCacheSim-python/tests/test_trace_generator.py deleted file mode 100644 index 37040026e..000000000 --- a/libCacheSim-python/tests/test_trace_generator.py +++ /dev/null @@ -1,135 +0,0 @@ -#!/usr/bin/env python3 -""" -Tests for trace generator module. -""" - -import libcachesim as lcs - - -class TestTraceGeneration: - """Test trace generation functions.""" - - # Constants for test readability - NUM_SAMPLE_REQUESTS = 10 # Number of requests to check in detail - - def test_create_zipf_requests_basic(self): - """Test basic Zipf request creation.""" - generator = lcs.create_zipf_requests(num_objects=100, num_requests=1000, alpha=1.0, obj_size=4000, seed=42) - - # Test iteration - requests = list(generator) - assert len(requests) == 1000 - - for req in requests[: self.NUM_SAMPLE_REQUESTS]: # Check first NUM_SAMPLE_REQUESTS - assert isinstance(req, lcs.Request) - assert 0 <= req.obj_id < 100 - assert req.obj_size == 4000 - assert req.clock_time >= 0 - - def test_create_uniform_requests_basic(self): - """Test basic uniform request creation.""" - generator = lcs.create_uniform_requests(num_objects=100, num_requests=1000, obj_size=4000, seed=42) - - # Test iteration - requests = list(generator) - assert len(requests) == 1000 - - for req in requests[: self.NUM_SAMPLE_REQUESTS]: # Check first NUM_SAMPLE_REQUESTS - assert isinstance(req, lcs.Request) - assert 0 <= req.obj_id < 100 - assert req.obj_size == 4000 - assert req.clock_time >= 0 - - def test_zipf_reproducibility(self): - """Test reproducibility with seed.""" - gen1 = lcs.create_zipf_requests(10, 100, alpha=1.0, seed=42) - gen2 = lcs.create_zipf_requests(10, 100, alpha=1.0, seed=42) - - requests1 = list(gen1) - requests2 = list(gen2) - - assert len(requests1) == len(requests2) - for req1, req2 in zip(requests1, requests2): - assert req1.obj_id == req2.obj_id - - def test_uniform_reproducibility(self): - """Test reproducibility with seed.""" - gen1 = lcs.create_uniform_requests(10, 100, seed=42) - gen2 = lcs.create_uniform_requests(10, 100, seed=42) - - requests1 = list(gen1) - requests2 = list(gen2) - - assert len(requests1) == len(requests2) - for req1, req2 in zip(requests1, requests2): - assert req1.obj_id == req2.obj_id - - def test_different_seeds(self): - """Test that different seeds produce different results.""" - gen1 = lcs.create_zipf_requests(10, 100, alpha=1.0, seed=42) - gen2 = lcs.create_zipf_requests(10, 100, alpha=1.0, seed=43) - - requests1 = [req.obj_id for req in gen1] - requests2 = [req.obj_id for req in gen2] - - assert requests1 != requests2 - - def test_zipf_with_cache(self): - """Test Zipf generator with cache simulation.""" - cache = lcs.LRU(cache_size=50 * 1024) # 50KB cache - generator = lcs.create_zipf_requests( - num_objects=100, - num_requests=1000, - alpha=1.0, - obj_size=1000, # 1KB objects - seed=42, - ) - - hit_count = 0 - for req in generator: - if cache.get(req): - hit_count += 1 - - # Should have some hits and some misses - assert 0 <= hit_count <= 1000 - assert hit_count > 0 # Should have some hits - - def test_uniform_with_cache(self): - """Test uniform generator with cache simulation.""" - cache = lcs.LRU(cache_size=50 * 1024) # 50KB cache - generator = lcs.create_uniform_requests( - num_objects=100, - num_requests=1000, - obj_size=1000, # 1KB objects - seed=42, - ) - - hit_count = 0 - for req in generator: - if cache.get(req): - hit_count += 1 - - # Should have some hits and some misses - assert 0 <= hit_count <= 1000 - assert hit_count > 0 # Should have some hits - - def test_custom_parameters(self): - """Test generators with custom parameters.""" - generator = lcs.create_zipf_requests( - num_objects=50, - num_requests=200, - alpha=1.5, - obj_size=2048, - time_span=3600, # 1 hour - start_obj_id=1000, - seed=123, - ) - - requests = list(generator) - assert len(requests) == 200 - - # Check custom parameters - for req in requests[: self.NUM_SAMPLE_REQUESTS // 2]: # Check fewer for shorter test - assert 1000 <= req.obj_id < 1050 # start_obj_id + num_objects - assert req.obj_size == 2048 - assert req.clock_time <= 3600 diff --git a/libCacheSim-python/tests/test_unified_interface.py b/libCacheSim-python/tests/test_unified_interface.py deleted file mode 100644 index a2c7c8c26..000000000 --- a/libCacheSim-python/tests/test_unified_interface.py +++ /dev/null @@ -1,181 +0,0 @@ -#!/usr/bin/env python3 -""" -Test the unified interface for all cache policies. -""" - -import sys -import os -import pytest - -# Add the parent directory to the Python path for development testing -sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) - -try: - import libcachesim as lcs -except ImportError as e: - pytest.skip(f"libcachesim not available: {e}", allow_module_level=True) - -from collections import OrderedDict - - -def create_trace_reader(): - """Helper function to create a trace reader. - - Returns: - Reader or None: A trace reader instance, or None if trace file not found. - """ - data_file = os.path.join( - os.path.dirname(os.path.dirname(os.path.dirname(__file__))), "data", "cloudPhysicsIO.oracleGeneral.bin" - ) - if not os.path.exists(data_file): - return None - return lcs.open_trace(data_file, lcs.TraceType.ORACLE_GENERAL_TRACE) - - -def create_test_lru_hooks(): - """Create LRU hooks for testing. - - Returns: - tuple: A tuple of (init_hook, hit_hook, miss_hook, eviction_hook, remove_hook) - """ - - def init_hook(cache_size): - """Initialize LRU data structure.""" - return OrderedDict() - - def hit_hook(lru_dict, obj_id, obj_size): - """Handle cache hit by moving to end (most recently used).""" - if obj_id in lru_dict: - lru_dict.move_to_end(obj_id) - - def miss_hook(lru_dict, obj_id, obj_size): - """Handle cache miss by adding new object.""" - lru_dict[obj_id] = obj_size - - def eviction_hook(lru_dict, obj_id, obj_size): - """Return the least recently used object ID for eviction.""" - if lru_dict: - return next(iter(lru_dict)) - return obj_id - - def remove_hook(lru_dict, obj_id): - """Remove object from LRU structure.""" - lru_dict.pop(obj_id, None) - - return init_hook, hit_hook, miss_hook, eviction_hook, remove_hook - - -def test_unified_process_trace_interface(): - """Test that all cache policies have the same process_trace interface.""" - - cache_size = 1024 * 1024 # 1MB - max_requests = 100 - - # Create trace reader - reader = create_trace_reader() - if not reader: - pytest.skip("Skipping test: Trace file not available") - - # Test different cache policies - caches = { - "LRU": lcs.LRU(cache_size), - "FIFO": lcs.FIFO(cache_size), - "ARC": lcs.ARC(cache_size), - } - - # Add Python hook cache - python_cache = lcs.PythonHookCachePolicy(cache_size, "TestLRU") - init_hook, hit_hook, miss_hook, eviction_hook, remove_hook = create_test_lru_hooks() - python_cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook) - caches["Python Hook LRU"] = python_cache - - results = {} - for name, cache in caches.items(): - # Create fresh reader for each test - test_reader = create_trace_reader() - if not test_reader: - pytest.skip(f"Cannot create reader for {name} test") - - # Test process_trace method exists - assert hasattr(cache, "process_trace"), f"{name} missing process_trace method" - - # Test process_trace functionality - obj_miss_ratio, byte_miss_ratio = cache.process_trace(test_reader, max_req=max_requests) - results[name] = obj_miss_ratio - - # Verify miss_ratio is valid - assert 0.0 <= obj_miss_ratio <= 1.0, f"{name} returned invalid miss_ratio: {obj_miss_ratio}" - - # Verify we got results for all caches - assert len(results) == len(caches), "Not all caches were tested" - - -def test_unified_properties_interface(): - """Test that all cache policies have the same properties interface.""" - - cache_size = 1024 * 1024 - - # Create different cache types - caches = { - "LRU": lcs.LRU(cache_size), - "FIFO": lcs.FIFO(cache_size), - "Python Hook": lcs.PythonHookCachePolicy(cache_size, "TestCache"), - } - - required_properties = ["cache_size", "n_req", "n_obj", "occupied_byte"] - - for name, cache in caches.items(): - # Test all required properties exist - for prop in required_properties: - assert hasattr(cache, prop), f"{name} missing {prop} property" - - # Test cache_size is correct - assert cache.cache_size == cache_size, f"{name} cache_size mismatch" - - -def test_get_interface_consistency(): - """Test that get() method works consistently across all cache policies.""" - - cache_size = 1024 * 1024 - - # Create caches - caches = { - "LRU": lcs.LRU(cache_size), - "FIFO": lcs.FIFO(cache_size), - } - - # Add Python hook cache - python_cache = lcs.PythonHookCachePolicy(cache_size, "ConsistencyTest") - init_hook, hit_hook, miss_hook, eviction_hook, remove_hook = create_test_lru_hooks() - python_cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook) - caches["Python Hook"] = python_cache - - # Create a test request using the proper request class - test_req = lcs.Request() - test_req.obj_id = 1 - test_req.obj_size = 1024 - - for name, cache in caches.items(): - # Reset cache state for consistent testing - initial_n_req = cache.n_req - initial_n_obj = cache.n_obj - initial_occupied = cache.occupied_byte - - # Test get method exists - assert hasattr(cache, "get"), f"{name} missing get method" - - # Test first access (should be miss for new object) - result = cache.get(test_req) - - # Test properties updated correctly - assert cache.n_req > initial_n_req, f"{name} n_req not updated" - if not result: # If it was a miss, object should be added - assert cache.n_obj > initial_n_obj, f"{name} n_obj not updated after miss" - assert cache.occupied_byte > initial_occupied, f"{name} occupied_byte not updated after miss" - - # Test second access to same object (should be hit) - second_result = cache.get(test_req) - - # Second access should be a hit (unless cache is too small) - if cache.cache_size >= test_req.obj_size: - assert second_result, f"{name} second access should be a hit" diff --git a/libCacheSim-python/tests/utils.py b/libCacheSim-python/tests/utils.py deleted file mode 100644 index 0977cc815..000000000 --- a/libCacheSim-python/tests/utils.py +++ /dev/null @@ -1,16 +0,0 @@ -import os - - -def get_reference_data(eviction_algo, cache_size_ratio): - data_file = os.path.join( # noqa: PTH118 - (os.path.dirname(os.path.dirname(__file__))), # noqa: PTH120 - "tests", - "reference.csv", - ) - with open(data_file) as f: # noqa: PTH123 - lines = f.readlines() - key = "3LCache" if eviction_algo == "ThreeLCache" else eviction_algo - for line in lines: - if line.startswith(f"{key},{cache_size_ratio}"): - return float(line.split(",")[-1]) - return None diff --git a/scripts/install_python_dev.sh b/scripts/install_python_dev.sh deleted file mode 100644 index d878d89b9..000000000 --- a/scripts/install_python_dev.sh +++ /dev/null @@ -1,106 +0,0 @@ -#!/bin/bash -set -euo pipefail - -function usage() { - echo "Usage: $0 [options]" - echo "Options:" - echo " -h, --help Show this help message" - echo " -b, --build-wheels Build the Python wheels" - exit 1 -} -# Parse command line arguments -BUILD_WHEELS=0 - -while [[ $# -gt 0 ]]; do - case $1 in - -h|--help) - usage - ;; - -b|--build-wheels) - BUILD_WHEELS=1 - shift - ;; - *) - echo "Unknown option: $1" - usage - ;; - esac -done - - -# Build the main libCacheSim C++ library first -echo "Building main libCacheSim library..." -rm -rf ./build -cmake -G Ninja -B build # -DENABLE_3L_CACHE=ON -ninja -C build - -# Now build and install the Python binding -echo "Building Python binding..." -echo "Sync python version..." -python scripts/sync_python_version.py -pushd libCacheSim-python -pip install -e . -vvv -popd - -# Test that the import works -echo "Testing import..." -python -c "import libcachesim" - -# Run tests -echo "Running tests..." -pushd libCacheSim-python - -python -m pip install pytest -python -m pytest . -popd - -# Build wheels if requested -if [[ $BUILD_WHEELS -eq 1 ]]; then - echo "--- Building Python wheels for distribution ---" - - # --- Environment and dependency checks --- - echo "Checking dependencies: python3, pip, docker, cibuildwheel..." - - if ! command -v python3 &> /dev/null; then - echo "Error: python3 is not installed. Please install it and run this script again." - exit 1 - fi - - if ! python3 -m pip --version &> /dev/null; then - echo "Error: pip for python3 is not available. Please install it." - exit 1 - fi - - if ! command -v docker &> /dev/null; then - echo "Error: docker is not installed. Please install it and ensure the docker daemon is running." - exit 1 - fi - - # Check if user can run docker without sudo, otherwise use sudo - SUDO_CMD="" - if ! docker ps &> /dev/null; then - echo "Warning: Current user cannot run docker. Trying with sudo." - if sudo docker ps &> /dev/null; then - SUDO_CMD="sudo" - else - echo "Error: Failed to run docker, even with sudo. Please check your docker installation and permissions." - exit 1 - fi - fi - - if ! python3 -m cibuildwheel --version &> /dev/null; then - echo "cibuildwheel not found, installing..." - python3 -m pip install cibuildwheel - fi - - echo "Dependency check completed." - - # --- Run cibuildwheel --- - # The project to build is specified as an argument. - # cibuildwheel should be run from the repository root. - # The output directory will be 'wheelhouse/' by default. - echo "Starting the wheel build process for Linux..." - ${SUDO_CMD} python3 -m cibuildwheel --platform linux libCacheSim-python - - echo "Build process completed successfully. Wheels are in the 'wheelhouse' directory." -fi diff --git a/scripts/sync_python_version.py b/scripts/sync_python_version.py deleted file mode 100644 index 65e51a92f..000000000 --- a/scripts/sync_python_version.py +++ /dev/null @@ -1,93 +0,0 @@ -#!/usr/bin/env python3 -""" -Script to synchronize version between libCacheSim main project and Python bindings. - -This script reads the version from version.txt and updates the pyproject.toml -in libCacheSim-python to match. -""" - -import json -import os -import sys -import re -from pathlib import Path - - -def get_project_root(): - """Get the project root directory.""" - script_dir = Path(__file__).parent - return script_dir.parent - - -def read_main_version(): - """Read version from version.txt.""" - project_root = get_project_root() - version_file = project_root / "version.txt" - - if not version_file.exists(): - print(f"Error: {version_file} not found", file=sys.stderr) - sys.exit(1) - - with open(version_file, 'r') as f: - version = f.read().strip() - - if not version: - print("Error: version.txt is empty", file=sys.stderr) - sys.exit(1) - - return version - -def update_pyproject_toml(version): - """Update pyproject.toml with the new version.""" - project_root = get_project_root() - pyproject_toml_path = project_root / "libCacheSim-python" / "pyproject.toml" - - if not pyproject_toml_path.exists(): - print(f"Error: {pyproject_toml_path} not found", file=sys.stderr) - return False - - # Read current pyproject.toml - with open(pyproject_toml_path, 'r') as f: - pyproject_data = f.read() - - # Update the version line in pyproject.toml, make it can match any version in version.txt, like "0.3.1" or "dev" - match = re.search(r"version = \"(dev|[0-9]+\.[0-9]+\.[0-9]+)\"", pyproject_data) - if not match: - print("Error: Could not find a valid version line in pyproject.toml", file=sys.stderr) - return False - current_version = match.group(1) - if current_version == version: - print(f"Python binding version already up to date: {version}") - return False - # replace the version line with the new version - pyproject_data = re.sub(r"version = \"(dev|[0-9]+\.[0-9]+\.[0-9]+)\"", f"version = \"{version}\"", pyproject_data) - - # Write back to file with proper formatting - with open(pyproject_toml_path, 'w') as f: - f.write(pyproject_data) - - print(f"Updated Python version: {current_version} → {version}") - return True - - -def main(): - """Main function.""" - try: - # Read main project version - main_version = read_main_version() - print(f"Main project version: {main_version}") - - # Update Python binding version - updated = update_pyproject_toml(main_version) - - if updated: - print("Python binding version synchronized successfully") - else: - print("No changes needed") - except Exception as e: - print(f"Error: {e}", file=sys.stderr) - sys.exit(1) - - -if __name__ == "__main__": - main() From d95637dc21b9a03034ea1be22d880c61f98d3112 Mon Sep 17 00:00:00 2001 From: haochengxia Date: Sun, 3 Aug 2025 06:17:18 +0000 Subject: [PATCH 2/3] Refine LRU plugin example --- README.md | 45 +++++++++++++++++++++++++++++++++------------ 1 file changed, 33 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 9e5bbc13d..71a2a02f0 100644 --- a/README.md +++ b/README.md @@ -296,11 +296,11 @@ pip install libcachesim ### Simulation with python ```python -import libcachesim as lcs +from libcachesim import SyntheticReader, TraceReader, FIFO -reader = lcs.SyntheticReader(num_objects=1000, num_of_req=10000) # synthetic trace -# reader = lcs.open_trace("./data/cloudPhysicsIO.oracleGeneral.bin") # real trace -cache = lcs.FIFO(cache_size=1024*1024) +reader = SyntheticReader(num_objects=1000, num_of_req=10000) # synthetic trace +# reader = TraceReader("./data/cloudPhysicsIO.oracleGeneral.bin") # real trace +cache = FIFO(cache_size=1024*1024) obj_miss_ratio, byte_miss_ratio = cache.process_trace(reader) print(f"Obj miss ratio: {obj_miss_ratio:.4f}, byte miss ratio: {byte_miss_ratio:.4f}") ``` @@ -313,17 +313,38 @@ With python package, you can extend new algorithm to test your own eviction desi ```python from collections import OrderedDict -from libcachesim import PluginCache, LRU +from typing import Any + +from libcachesim import PluginCache, LRU, CommonCacheParams, Request + +def init_hook(_: CommonCacheParams) -> Any: + return OrderedDict() + +def hit_hook(data: Any, req: Request) -> None: + data.move_to_end(req.obj_id, last=True) + +def miss_hook(data: Any, req: Request) -> None: + data.__setitem__(req.obj_id, req.obj_size) + +def eviction_hook(data: Any, _: Request) -> int: + return data.popitem(last=False)[0] + +def remove_hook(data: Any, obj_id: int) -> None: + data.pop(obj_id, None) + +def free_hook(data: Any) -> None: + data.clear() + plugin_lru_cache = PluginCache( cache_size=128, - cache_name="LRU", - cache_init_hook=lambda _: OrderedDict(), - cache_hit_hook=lambda data, req: data.move_to_end(req.obj_id, last=True) if req.obj_id in data else None, - cache_miss_hook=lambda data, req: data.__setitem__(req.obj_id, req.obj_size), - cache_eviction_hook=lambda data, _: data.popitem(last=False)[0], - cache_remove_hook=lambda data, obj_id: data.pop(obj_id, None), - cache_free_hook=lambda data: data.clear(), + cache_init_hook=init_hook, + cache_hit_hook=hit_hook, + cache_miss_hook=miss_hook, + cache_eviction_hook=eviction_hook, + cache_remove_hook=remove_hook, + cache_free_hook=free_hook, + cache_name="Plugin_LRU", ) reader = lcs.SyntheticReader(num_objects=1000, num_of_req=10000, obj_size=1) From b72873ca3e39259043540b8cb77725280b2c2e2e Mon Sep 17 00:00:00 2001 From: Percy Date: Mon, 4 Aug 2025 01:54:14 -0400 Subject: [PATCH 3/3] Add dist info in examples using synthetic reader --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 71a2a02f0..fb03d36ab 100644 --- a/README.md +++ b/README.md @@ -298,7 +298,7 @@ pip install libcachesim ```python from libcachesim import SyntheticReader, TraceReader, FIFO -reader = SyntheticReader(num_objects=1000, num_of_req=10000) # synthetic trace +reader = SyntheticReader(num_objects=1000, num_of_req=10000, alpha=1.0, dist="zipf") # synthetic trace # reader = TraceReader("./data/cloudPhysicsIO.oracleGeneral.bin") # real trace cache = FIFO(cache_size=1024*1024) obj_miss_ratio, byte_miss_ratio = cache.process_trace(reader) @@ -347,7 +347,7 @@ plugin_lru_cache = PluginCache( cache_name="Plugin_LRU", ) -reader = lcs.SyntheticReader(num_objects=1000, num_of_req=10000, obj_size=1) +reader = lcs.SyntheticReader(num_objects=1000, num_of_req=10000, obj_size=1, alpha=1.0, dist="zipf") req_miss_ratio, byte_miss_ratio = plugin_lru_cache.process_trace(reader) ref_req_miss_ratio, ref_byte_miss_ratio = LRU(128).process_trace(reader) print(f"plugin req miss ratio {req_miss_ratio}, ref req miss ratio {ref_req_miss_ratio}")