zilliztech
diff --git a/‎README.md‎
Lines changed: 1 addition & 1 deletion b/‎README.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/README.md‎
Lines changed: 27 additions & 0 deletions b/‎examples/README.md‎
Lines changed: 27 additions & 0 deletions
diff --git a/‎examples/eviction/wtinylfu_eviction.py‎
Lines changed: 92 additions & 0 deletions b/‎examples/eviction/wtinylfu_eviction.py‎
Lines changed: 92 additions & 0 deletions
diff --git a/‎gptcache/manager/eviction/count_min_sketch.py‎
Lines changed: 113 additions & 0 deletions b/‎gptcache/manager/eviction/count_min_sketch.py‎
Lines changed: 113 additions & 0 deletions
diff --git a/‎gptcache/manager/eviction/doorkeeper.py‎
Lines changed: 66 additions & 0 deletions b/‎gptcache/manager/eviction/doorkeeper.py‎
Lines changed: 66 additions & 0 deletions
diff --git a/‎gptcache/manager/eviction/manager.py‎
Lines changed: 7 additions & 2 deletions b/‎gptcache/manager/eviction/manager.py‎
Lines changed: 7 additions & 2 deletions
@@ -372,7 +372,7 @@ The **Cache Manager** is responsible for controlling the operation of both the *
     - [x] Support FIFO eviction policy.
     - [x] Support LFU eviction policy.
     - [x] Support RR eviction policy.
-    - [ ] Support more complicated eviction policies.
+    - [x] Support W-TinyLFU eviction policy with cost-aware admission.
   - **Distributed Caching**
 
   If you were to scale your GPTCache deployment horizontally using in-memory caching, it won't be possible. Since the cached information would be limited to the single pod.
 
@@ -16,6 +16,7 @@
     - [Start server](#start-server)
   - [Benchmark](#benchmark)
   - [How to use post-process function](#how-to-use-post-process-function)
+  - [How to set the eviction policy](#how-to-set-the-eviction-policy)
 
 ## How to run Visual Question Answering with MiniGPT-4
 
@@ -715,3 +716,29 @@ cache.init(
 ```
 
 See [processor/post_example.py](./processor/post_example.py) for a runnable example.
+
+## How to set the `eviction` policy
+
+GPTCache supports several eviction policies: LRU (default), FIFO, LFU, and W-TinyLFU.
+
+### W-TinyLFU eviction
+
+The W-TinyLFU policy combines a TinyLFU admission filter with a segmented LRU, achieving near-optimal hit rates. It optionally supports cost-aware admission for LLM workloads where response regeneration costs vary.
+
+See [eviction/wtinylfu_eviction.py](./eviction/wtinylfu_eviction.py) for full examples.
+
+```python
+from gptcache.manager import get_data_manager, CacheBase, VectorBase
+from gptcache.manager.eviction import EvictionBase
+
+data_manager = get_data_manager(
+    cache_base=CacheBase("sqlite"),
+    vector_base=VectorBase("faiss", dimension=onnx.dimension),
+    eviction_base=EvictionBase(
+        "wtinylfu",
+        maxsize=200,
+        clean_size=50,
+        cost_aware=True,      # weight admission by response token count
+    ),
+)
+```
@@ -0,0 +1,92 @@
+from gptcache import Cache
+from gptcache.embedding import Onnx
+from gptcache.manager import get_data_manager, CacheBase, VectorBase
+from gptcache.manager.eviction import EvictionBase
+
+
+def wtinylfu_basic_example():
+    """
+    Basic W-TinyLFU eviction example.
+
+    Uses the default settings: 1% window, 20/80 probation/protected split,
+    cost-aware admission enabled. The policy combines frequency-based
+    admission filtering (TinyLFU) with cost-weighted eviction decisions,
+    preferring to retain expensive-to-regenerate cache entries.
+    """
+    onnx = Onnx()
+    data_manager = get_data_manager(
+        cache_base=CacheBase("sqlite"),
+        vector_base=VectorBase("faiss", dimension=onnx.dimension),
+        eviction_base=EvictionBase(
+            "wtinylfu",
+            maxsize=200,
+            clean_size=50,
+        ),
+    )
+
+    cache = Cache()
+    cache.init(data_manager=data_manager)
+    question = "What is github?"
+    answer = "Online platform for version control and code collaboration."
+    embedding = onnx.to_embeddings(question)
+    cache.import_data([question], [answer], [embedding])
+
+
+def wtinylfu_custom_params_example():
+    """
+    W-TinyLFU with custom parameters.
+
+    Tunable parameters:
+    - window_pct: window cache as % of total capacity (default: 1.0)
+    - probation_pct: probation segment as % of main cache (default: 20.0)
+    - cost_aware: enable cost-weighted admission (default: True)
+    - cms_width_multiplier: Count-Min Sketch width scaling (default: 1)
+    - reset_multiplier: CMS aging interval as multiple of capacity (default: 10)
+    """
+    onnx = Onnx()
+    data_manager = get_data_manager(
+        cache_base=CacheBase("sqlite"),
+        vector_base=VectorBase("faiss", dimension=onnx.dimension),
+        eviction_base=EvictionBase(
+            "wtinylfu",
+            maxsize=500,
+            clean_size=100,
+            window_pct=2.0,
+            probation_pct=25.0,
+            cost_aware=True,
+        ),
+    )
+
+    cache = Cache()
+    cache.init(data_manager=data_manager)
+    question = "Explain quantum computing"
+    answer = "Quantum computing uses quantum bits (qubits) that can exist in superposition..."
+    embedding = onnx.to_embeddings(question)
+    cache.import_data([question], [answer], [embedding])
+
+
+def wtinylfu_no_cost_example():
+    """
+    W-TinyLFU without cost awareness (pure frequency-based admission).
+
+    When cost_aware=False, the admission decision uses only the TinyLFU
+    frequency estimate, equivalent to Caffeine's default policy.
+    """
+    onnx = Onnx()
+    data_manager = get_data_manager(
+        cache_base=CacheBase("sqlite"),
+        vector_base=VectorBase("faiss", dimension=onnx.dimension),
+        eviction_base=EvictionBase(
+            "wtinylfu",
+            maxsize=200,
+            clean_size=50,
+            cost_aware=False,
+        ),
+    )
+
+    cache = Cache()
+    cache.init(data_manager=data_manager)
+    question = "What is machine learning?"
+    answer = "A subset of AI that enables systems to learn from data."
+    embedding = onnx.to_embeddings(question)
+    cache.import_data([question], [answer], [embedding])
@@ -0,0 +1,113 @@
+"""4-bit packed Count-Min Sketch for frequency estimation.
+
+Uses the same design as Caffeine/Theine: 4 hash functions, 4-bit counters
+packed 16 per uint64 word, periodic halving for aging.
+"""
+
+import numpy as np
+
+
+def _next_power_of_2(n: int) -> int:
+    if n <= 0:
+        return 1
+    n -= 1
+    n |= n >> 1
+    n |= n >> 2
+    n |= n >> 4
+    n |= n >> 8
+    n |= n >> 16
+    n |= n >> 32
+    return n + 1
+
+
+def _rehash(h: int) -> int:
+    h = (h ^ (h >> 32)) & 0xFFFFFFFFFFFFFFFF
+    h = (h * 0x94D049BB133111EB) & 0xFFFFFFFFFFFFFFFF
+    h = (h ^ (h >> 32)) & 0xFFFFFFFFFFFFFFFF
+    return h
+
+
+_RESET_MASK = np.uint64(0x7777777777777777)
+_MAX_COUNT = 15
+
+
+class CountMinSketch:
+    """4-bit packed Count-Min Sketch with 4 hash functions.
+
+    Each counter is 4 bits (max value 15). 16 counters are packed into
+    one uint64 word. The sketch uses 4 independent hash functions derived
+    via iterative rehashing.
+
+    :param capacity: expected max number of tracked items (determines width)
+    :param width_multiplier: width = next_power_of_2(capacity * multiplier)
+    :param sample_size_multiplier: reset after this * capacity increments
+    """
+
+    def __init__(
+        self,
+        capacity: int,
+        width_multiplier: int = 1,
+        sample_size_multiplier: int = 10,
+    ):
+        self._width = _next_power_of_2(max(capacity * width_multiplier, 16))
+        self._mask = self._width - 1
+        # 4 rows, each row has width counters, packed 16 per uint64
+        words_per_row = max(self._width // 16, 1)
+        self._table = np.zeros(4 * words_per_row, dtype=np.uint64)
+        self._words_per_row = words_per_row
+        self._additions = 0
+        self._sample_size = max(capacity * sample_size_multiplier, 16)
+
+    def increment(self, key_hash: int) -> bool:
+        """Increment counters for the given hash. Returns True if any counter changed."""
+        h0 = _rehash(key_hash)
+        h1 = _rehash(h0)
+        h2 = _rehash(h1)
+        h3 = _rehash(h2)
+
+        added = self._inc_counter(0, h0 & self._mask)
+        added |= self._inc_counter(1, h1 & self._mask)
+        added |= self._inc_counter(2, h2 & self._mask)
+        added |= self._inc_counter(3, h3 & self._mask)
+
+        if added:
+            self._additions += 1
+
+        return added
+
+    def estimate(self, key_hash: int) -> int:
+        """Return the estimated frequency (minimum across all rows)."""
+        h0 = _rehash(key_hash)
+        h1 = _rehash(h0)
+        h2 = _rehash(h1)
+        h3 = _rehash(h2)
+
+        c0 = self._read_counter(0, h0 & self._mask)
+        c1 = self._read_counter(1, h1 & self._mask)
+        c2 = self._read_counter(2, h2 & self._mask)
+        c3 = self._read_counter(3, h3 & self._mask)
+
+        return min(c0, c1, c2, c3)
+
+    def reset(self):
+        """Halve all counters (aging / decay)."""
+        self._table = (self._table >> np.uint64(1)) & _RESET_MASK
+        self._additions = self._additions // 2
+
+    def _inc_counter(self, row: int, index: int) -> bool:
+        word_idx = row * self._words_per_row + index // 16
+        nibble_pos = np.uint64((index % 16) * 4)
+        current = int((self._table[word_idx] >> nibble_pos) & np.uint64(0xF))
+        if current < _MAX_COUNT:
+            self._table[word_idx] += np.uint64(1) << nibble_pos
+            return True
+        return False
+
+    def _read_counter(self, row: int, index: int) -> int:
+        word_idx = row * self._words_per_row + index // 16
+        nibble_pos = np.uint64((index % 16) * 4)
+        return int((self._table[word_idx] >> nibble_pos) & np.uint64(0xF))
+
+    @property
+    def additions(self) -> int:
+        return self._additions
@@ -0,0 +1,66 @@
+"""Bloom filter doorkeeper for TinyLFU admission control.
+
+Filters out one-hit-wonders: only items seen at least twice get their
+Count-Min Sketch counters incremented. This prevents long-tail pollution.
+"""
+
+import math
+
+import numpy as np
+
+
+class Doorkeeper:
+    """Simple Bloom filter that tracks whether an item has been seen before.
+
+    :param capacity: expected number of insertions before reset
+    :param fp_rate: target false positive rate (default 1%)
+    """
+
+    def __init__(self, capacity: int = 10000, fp_rate: float = 0.01):
+        # Optimal sizing: m = -n*ln(p) / (ln2)^2, k = (m/n)*ln2
+        if capacity <= 0:
+            capacity = 16
+        m = int(-capacity * math.log(fp_rate) / (math.log(2) ** 2))
+        m = max(m, 64)
+        self._num_bits = m
+        self._num_hashes = max(int((m / capacity) * math.log(2)), 1)
+        # Bit array stored as uint64 words
+        self._words = np.zeros((m + 63) // 64, dtype=np.uint64)
+
+    def allow(self, key_hash: int) -> bool:
+        """Check if key was seen before, then add it.
+
+        Returns True if the key was already present (second+ access).
+        Always adds the key regardless.
+        """
+        already_present = self.contains(key_hash)
+        self.add(key_hash)
+        return already_present
+
+    def contains(self, key_hash: int) -> bool:
+        """Check membership without modifying the filter."""
+        for i in range(self._num_hashes):
+            bit_pos = self._hash_pos(key_hash, i)
+            word_idx = bit_pos >> 6  # bit_pos // 64
+            bit_idx = np.uint64(bit_pos & 63)
+            if not (self._words[word_idx] & (np.uint64(1) << bit_idx)):
+                return False
+        return True
+
+    def add(self, key_hash: int):
+        """Add a key to the filter."""
+        for i in range(self._num_hashes):
+            bit_pos = self._hash_pos(key_hash, i)
+            word_idx = bit_pos >> 6
+            bit_idx = np.uint64(bit_pos & 63)
+            self._words[word_idx] |= np.uint64(1) << bit_idx
+
+    def clear(self):
+        """Reset the filter (remove all entries)."""
+        self._words[:] = np.uint64(0)
+
+    def _hash_pos(self, key_hash: int, i: int) -> int:
+        # Double hashing: h(i) = (h1 + i*h2) mod m
+        h1 = key_hash & 0xFFFFFFFF
+        h2 = (key_hash >> 32) & 0xFFFFFFFF
+        return ((h1 + i * h2) & 0xFFFFFFFFFFFFFFFF) % self._num_bits
@@ -43,6 +43,11 @@ def get(
             from gptcache.manager.eviction.distributed_cache import NoOpEviction
             eviction_base = NoOpEviction()
             return eviction_base
+        if name == "wtinylfu":
+            from gptcache.manager.eviction.wtinylfu_eviction import WTinyLFUEviction
+            eviction_base = WTinyLFUEviction(
+                maxsize=maxsize, clean_size=clean_size, on_evict=on_evict, **kwargs
+            )
+            return eviction_base
 
-        else:
-            raise NotFoundError("eviction base", name)
+        raise NotFoundError("eviction base", name)