|
| 1 | +""" |
| 2 | +This module implements the S4FIFO cache replacement policy. |
| 3 | +
|
| 4 | +It is a online learned S3FIFO policy that uses a simple heuristic to decide the |
| 5 | +hyperparameters. |
| 6 | +""" |
| 7 | + |
| 8 | +import time |
| 9 | +from collections import defaultdict |
| 10 | +from enum import Enum |
| 11 | + |
| 12 | +class Phase(Enum): |
| 13 | + WARMUP = 1 |
| 14 | + FEATURE_COLLECTION = 2 |
| 15 | + ONLINE_PREDICTION = 3 |
| 16 | + |
| 17 | +class S4FIFO: |
| 18 | + """ |
| 19 | + S4FIFO cache replacement policy with online learning capabilities. |
| 20 | + """ |
| 21 | + |
| 22 | + def __init__(self, cache_size: int, feature_collection_requests: int = 10000, |
| 23 | + warmup_threshold: float = 0.95): |
| 24 | + """ |
| 25 | + Initialize the S4FIFO policy with the given cache size. |
| 26 | +
|
| 27 | + :param cache_size: The size of the cache. |
| 28 | + :param feature_collection_requests: Number of requests for feature collection phase. |
| 29 | + :param warmup_threshold: Threshold for cache fullness to end warmup phase. |
| 30 | + """ |
| 31 | + self.cache_size = cache_size |
| 32 | + self.cache = [] |
| 33 | + self.access_count = 0 |
| 34 | + |
| 35 | + # Phase management |
| 36 | + self.current_phase = Phase.WARMUP |
| 37 | + self.warmup_threshold = warmup_threshold |
| 38 | + self.feature_collection_requests = feature_collection_requests |
| 39 | + self.feature_collection_count = 0 |
| 40 | + |
| 41 | + # Feature collection |
| 42 | + self.features = { |
| 43 | + 'hit_rate': 0.0, |
| 44 | + 'miss_rate': 0.0, |
| 45 | + 'access_pattern_entropy': 0.0, |
| 46 | + 'temporal_locality': 0.0, |
| 47 | + 'working_set_size_estimate': 0.0 |
| 48 | + } |
| 49 | + self.hit_count = 0 |
| 50 | + self.miss_count = 0 |
| 51 | + self.access_history = [] |
| 52 | + self.item_frequencies = defaultdict(int) |
| 53 | + self.last_access_times = {} |
| 54 | + |
| 55 | + # Hyperparameters (can be adjusted by model prediction) |
| 56 | + self.eviction_ratio = 0.1 # Fraction of cache to evict when full |
| 57 | + self.frequency_threshold = 2 # Minimum frequency for item promotion |
| 58 | + |
| 59 | + def warmup(self, items): |
| 60 | + """ |
| 61 | + Warm up the cache with a list of items. |
| 62 | +
|
| 63 | + :param items: A list of items to warm up the cache. |
| 64 | + """ |
| 65 | + for item in items: |
| 66 | + self.access(item) |
| 67 | + |
| 68 | + def observe(self, item): |
| 69 | + """ |
| 70 | + Observe an item without affecting cache state (for prediction/analysis). |
| 71 | +
|
| 72 | + :param item: The item to observe. |
| 73 | + :return: Whether the item would be a hit or miss. |
| 74 | + """ |
| 75 | + return item in self.cache |
| 76 | + |
| 77 | + def _check_phase_transition(self): |
| 78 | + """Check if we need to transition to the next phase.""" |
| 79 | + if self.current_phase == Phase.WARMUP: |
| 80 | + # Transition to feature collection when cache is sufficiently full |
| 81 | + cache_fullness = len(self.cache) / self.cache_size |
| 82 | + if cache_fullness >= self.warmup_threshold: |
| 83 | + self.current_phase = Phase.FEATURE_COLLECTION |
| 84 | + self.feature_collection_count = 0 |
| 85 | + print(f"Phase transition: WARMUP -> FEATURE_COLLECTION (cache {cache_fullness:.2%} full)") |
| 86 | + |
| 87 | + elif self.current_phase == Phase.FEATURE_COLLECTION: |
| 88 | + # Transition to online prediction after collecting enough features |
| 89 | + if self.feature_collection_count >= self.feature_collection_requests: |
| 90 | + self.current_phase = Phase.ONLINE_PREDICTION |
| 91 | + self._extract_features() |
| 92 | + self._predict_hyperparameters() |
| 93 | + print(f"Phase transition: FEATURE_COLLECTION -> ONLINE_PREDICTION") |
| 94 | + print(f"Extracted features: {self.features}") |
| 95 | + |
| 96 | + def _extract_features(self): |
| 97 | + """Extract features from the collected data during feature collection phase.""" |
| 98 | + total_requests = self.hit_count + self.miss_count |
| 99 | + if total_requests > 0: |
| 100 | + self.features['hit_rate'] = self.hit_count / total_requests |
| 101 | + self.features['miss_rate'] = self.miss_count / total_requests |
| 102 | + |
| 103 | + # Calculate access pattern entropy |
| 104 | + if self.access_history: |
| 105 | + from collections import Counter |
| 106 | + import math |
| 107 | + access_counts = Counter(self.access_history) |
| 108 | + total_accesses = len(self.access_history) |
| 109 | + entropy = 0 |
| 110 | + for count in access_counts.values(): |
| 111 | + p = count / total_accesses |
| 112 | + if p > 0: |
| 113 | + entropy -= p * math.log2(p) |
| 114 | + self.features['access_pattern_entropy'] = entropy |
| 115 | + |
| 116 | + # Estimate working set size |
| 117 | + unique_items = len(set(self.access_history[-1000:])) # Last 1000 accesses |
| 118 | + self.features['working_set_size_estimate'] = unique_items / self.cache_size |
| 119 | + |
| 120 | + # Calculate temporal locality |
| 121 | + if len(self.access_history) > 1: |
| 122 | + reaccesses = 0 |
| 123 | + window_size = min(100, len(self.access_history)) |
| 124 | + for i in range(len(self.access_history) - window_size, len(self.access_history)): |
| 125 | + if self.access_history[i] in self.access_history[max(0, i-window_size):i]: |
| 126 | + reaccesses += 1 |
| 127 | + self.features['temporal_locality'] = reaccesses / window_size if window_size > 0 else 0 |
| 128 | + |
| 129 | + def _predict_hyperparameters(self): |
| 130 | + """ |
| 131 | + Use extracted features to predict optimal hyperparameters. |
| 132 | + This is a simple heuristic - in practice, you would use a trained ML model. |
| 133 | + """ |
| 134 | + hit_rate = self.features['hit_rate'] |
| 135 | + entropy = self.features['access_pattern_entropy'] |
| 136 | + temporal_locality = self.features['temporal_locality'] |
| 137 | + |
| 138 | + # Simple heuristic-based hyperparameter adjustment |
| 139 | + if hit_rate < 0.3: # Low hit rate - more aggressive eviction |
| 140 | + self.eviction_ratio = 0.2 |
| 141 | + self.frequency_threshold = 1 |
| 142 | + elif hit_rate > 0.7: # High hit rate - conservative eviction |
| 143 | + self.eviction_ratio = 0.05 |
| 144 | + self.frequency_threshold = 3 |
| 145 | + else: # Medium hit rate - balanced approach |
| 146 | + self.eviction_ratio = 0.1 |
| 147 | + self.frequency_threshold = 2 |
| 148 | + |
| 149 | + # Adjust based on temporal locality |
| 150 | + if temporal_locality > 0.5: |
| 151 | + self.frequency_threshold = max(1, self.frequency_threshold - 1) |
| 152 | + |
| 153 | + print(f"Updated hyperparameters: eviction_ratio={self.eviction_ratio}, " |
| 154 | + f"frequency_threshold={self.frequency_threshold}") |
| 155 | + |
| 156 | + def _collect_features(self, item, is_hit): |
| 157 | + """Collect features during the feature collection phase.""" |
| 158 | + if self.current_phase == Phase.FEATURE_COLLECTION: |
| 159 | + self.access_history.append(item) |
| 160 | + self.item_frequencies[item] += 1 |
| 161 | + self.last_access_times[item] = self.access_count |
| 162 | + |
| 163 | + if is_hit: |
| 164 | + self.hit_count += 1 |
| 165 | + else: |
| 166 | + self.miss_count += 1 |
| 167 | + |
| 168 | + self.feature_collection_count += 1 |
| 169 | + |
| 170 | + |
| 171 | + def access(self, item): |
| 172 | + """ |
| 173 | + Access an item in the cache with phase-aware behavior. |
| 174 | +
|
| 175 | + :param item: The item to access. |
| 176 | + :return: True if hit, False if miss. |
| 177 | + """ |
| 178 | + self.access_count += 1 |
| 179 | + is_hit = item in self.cache |
| 180 | + |
| 181 | + # Check for phase transitions |
| 182 | + self._check_phase_transition() |
| 183 | + |
| 184 | + # Collect features if in feature collection phase |
| 185 | + self._collect_features(item, is_hit) |
| 186 | + |
| 187 | + if not is_hit: |
| 188 | + # Cache miss - need to add item |
| 189 | + if len(self.cache) >= self.cache_size: |
| 190 | + self._evict_items() |
| 191 | + self.cache.append(item) |
| 192 | + else: |
| 193 | + # Cache hit - move item to end (LRU behavior) |
| 194 | + self.cache.remove(item) |
| 195 | + self.cache.append(item) |
| 196 | + |
| 197 | + return is_hit |
| 198 | + |
| 199 | + def _evict_items(self): |
| 200 | + """ |
| 201 | + Evict items from cache based on current hyperparameters and phase. |
| 202 | + """ |
| 203 | + if self.current_phase == Phase.WARMUP: |
| 204 | + # Simple FIFO eviction during warmup |
| 205 | + self.cache.pop(0) |
| 206 | + elif self.current_phase == Phase.FEATURE_COLLECTION: |
| 207 | + # FIFO eviction but collect statistics |
| 208 | + self.cache.pop(0) |
| 209 | + else: # ONLINE_PREDICTION phase |
| 210 | + # Use learned hyperparameters for more sophisticated eviction |
| 211 | + num_to_evict = max(1, int(self.cache_size * self.eviction_ratio)) |
| 212 | + |
| 213 | + # Remove items with low frequency first |
| 214 | + if hasattr(self, 'item_frequencies') and self.item_frequencies: |
| 215 | + # Sort by frequency and recency |
| 216 | + items_with_scores = [] |
| 217 | + for i, item in enumerate(self.cache[:num_to_evict * 2]): # Consider more items |
| 218 | + freq = self.item_frequencies.get(item, 1) |
| 219 | + recency_score = (len(self.cache) - i) / len(self.cache) # Higher for more recent |
| 220 | + combined_score = freq * 0.7 + recency_score * 0.3 |
| 221 | + items_with_scores.append((combined_score, item)) |
| 222 | + |
| 223 | + # Sort by score (ascending - lower scores evicted first) |
| 224 | + items_with_scores.sort() |
| 225 | + |
| 226 | + # Evict lowest scoring items |
| 227 | + for _, item in items_with_scores[:num_to_evict]: |
| 228 | + if item in self.cache: |
| 229 | + self.cache.remove(item) |
| 230 | + else: |
| 231 | + # Fallback to simple FIFO |
| 232 | + for _ in range(num_to_evict): |
| 233 | + if self.cache: |
| 234 | + self.cache.pop(0) |
| 235 | + |
| 236 | + def get_cache_info(self): |
| 237 | + """ |
| 238 | + Get information about current cache state and phase. |
| 239 | +
|
| 240 | + :return: Dictionary with cache information. |
| 241 | + """ |
| 242 | + total_requests = self.hit_count + self.miss_count |
| 243 | + current_hit_rate = self.hit_count / total_requests if total_requests > 0 else 0 |
| 244 | + |
| 245 | + return { |
| 246 | + 'phase': self.current_phase.name, |
| 247 | + 'cache_size': len(self.cache), |
| 248 | + 'max_cache_size': self.cache_size, |
| 249 | + 'total_requests': self.access_count, |
| 250 | + 'current_hit_rate': current_hit_rate, |
| 251 | + 'feature_collection_progress': self.feature_collection_count / self.feature_collection_requests if self.current_phase == Phase.FEATURE_COLLECTION else 1.0, |
| 252 | + 'hyperparameters': { |
| 253 | + 'eviction_ratio': self.eviction_ratio, |
| 254 | + 'frequency_threshold': self.frequency_threshold |
| 255 | + } |
| 256 | + } |
0 commit comments