Skip to content

Commit c4a9196

Browse files
committed
Init online
1 parent 0289c08 commit c4a9196

4 files changed

Lines changed: 1060 additions & 0 deletions

File tree

Lines changed: 256 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,256 @@
1+
"""
2+
This module implements the S4FIFO cache replacement policy.
3+
4+
It is a online learned S3FIFO policy that uses a simple heuristic to decide the
5+
hyperparameters.
6+
"""
7+
8+
import time
9+
from collections import defaultdict
10+
from enum import Enum
11+
12+
class Phase(Enum):
13+
WARMUP = 1
14+
FEATURE_COLLECTION = 2
15+
ONLINE_PREDICTION = 3
16+
17+
class S4FIFO:
18+
"""
19+
S4FIFO cache replacement policy with online learning capabilities.
20+
"""
21+
22+
def __init__(self, cache_size: int, feature_collection_requests: int = 10000,
23+
warmup_threshold: float = 0.95):
24+
"""
25+
Initialize the S4FIFO policy with the given cache size.
26+
27+
:param cache_size: The size of the cache.
28+
:param feature_collection_requests: Number of requests for feature collection phase.
29+
:param warmup_threshold: Threshold for cache fullness to end warmup phase.
30+
"""
31+
self.cache_size = cache_size
32+
self.cache = []
33+
self.access_count = 0
34+
35+
# Phase management
36+
self.current_phase = Phase.WARMUP
37+
self.warmup_threshold = warmup_threshold
38+
self.feature_collection_requests = feature_collection_requests
39+
self.feature_collection_count = 0
40+
41+
# Feature collection
42+
self.features = {
43+
'hit_rate': 0.0,
44+
'miss_rate': 0.0,
45+
'access_pattern_entropy': 0.0,
46+
'temporal_locality': 0.0,
47+
'working_set_size_estimate': 0.0
48+
}
49+
self.hit_count = 0
50+
self.miss_count = 0
51+
self.access_history = []
52+
self.item_frequencies = defaultdict(int)
53+
self.last_access_times = {}
54+
55+
# Hyperparameters (can be adjusted by model prediction)
56+
self.eviction_ratio = 0.1 # Fraction of cache to evict when full
57+
self.frequency_threshold = 2 # Minimum frequency for item promotion
58+
59+
def warmup(self, items):
60+
"""
61+
Warm up the cache with a list of items.
62+
63+
:param items: A list of items to warm up the cache.
64+
"""
65+
for item in items:
66+
self.access(item)
67+
68+
def observe(self, item):
69+
"""
70+
Observe an item without affecting cache state (for prediction/analysis).
71+
72+
:param item: The item to observe.
73+
:return: Whether the item would be a hit or miss.
74+
"""
75+
return item in self.cache
76+
77+
def _check_phase_transition(self):
78+
"""Check if we need to transition to the next phase."""
79+
if self.current_phase == Phase.WARMUP:
80+
# Transition to feature collection when cache is sufficiently full
81+
cache_fullness = len(self.cache) / self.cache_size
82+
if cache_fullness >= self.warmup_threshold:
83+
self.current_phase = Phase.FEATURE_COLLECTION
84+
self.feature_collection_count = 0
85+
print(f"Phase transition: WARMUP -> FEATURE_COLLECTION (cache {cache_fullness:.2%} full)")
86+
87+
elif self.current_phase == Phase.FEATURE_COLLECTION:
88+
# Transition to online prediction after collecting enough features
89+
if self.feature_collection_count >= self.feature_collection_requests:
90+
self.current_phase = Phase.ONLINE_PREDICTION
91+
self._extract_features()
92+
self._predict_hyperparameters()
93+
print(f"Phase transition: FEATURE_COLLECTION -> ONLINE_PREDICTION")
94+
print(f"Extracted features: {self.features}")
95+
96+
def _extract_features(self):
97+
"""Extract features from the collected data during feature collection phase."""
98+
total_requests = self.hit_count + self.miss_count
99+
if total_requests > 0:
100+
self.features['hit_rate'] = self.hit_count / total_requests
101+
self.features['miss_rate'] = self.miss_count / total_requests
102+
103+
# Calculate access pattern entropy
104+
if self.access_history:
105+
from collections import Counter
106+
import math
107+
access_counts = Counter(self.access_history)
108+
total_accesses = len(self.access_history)
109+
entropy = 0
110+
for count in access_counts.values():
111+
p = count / total_accesses
112+
if p > 0:
113+
entropy -= p * math.log2(p)
114+
self.features['access_pattern_entropy'] = entropy
115+
116+
# Estimate working set size
117+
unique_items = len(set(self.access_history[-1000:])) # Last 1000 accesses
118+
self.features['working_set_size_estimate'] = unique_items / self.cache_size
119+
120+
# Calculate temporal locality
121+
if len(self.access_history) > 1:
122+
reaccesses = 0
123+
window_size = min(100, len(self.access_history))
124+
for i in range(len(self.access_history) - window_size, len(self.access_history)):
125+
if self.access_history[i] in self.access_history[max(0, i-window_size):i]:
126+
reaccesses += 1
127+
self.features['temporal_locality'] = reaccesses / window_size if window_size > 0 else 0
128+
129+
def _predict_hyperparameters(self):
130+
"""
131+
Use extracted features to predict optimal hyperparameters.
132+
This is a simple heuristic - in practice, you would use a trained ML model.
133+
"""
134+
hit_rate = self.features['hit_rate']
135+
entropy = self.features['access_pattern_entropy']
136+
temporal_locality = self.features['temporal_locality']
137+
138+
# Simple heuristic-based hyperparameter adjustment
139+
if hit_rate < 0.3: # Low hit rate - more aggressive eviction
140+
self.eviction_ratio = 0.2
141+
self.frequency_threshold = 1
142+
elif hit_rate > 0.7: # High hit rate - conservative eviction
143+
self.eviction_ratio = 0.05
144+
self.frequency_threshold = 3
145+
else: # Medium hit rate - balanced approach
146+
self.eviction_ratio = 0.1
147+
self.frequency_threshold = 2
148+
149+
# Adjust based on temporal locality
150+
if temporal_locality > 0.5:
151+
self.frequency_threshold = max(1, self.frequency_threshold - 1)
152+
153+
print(f"Updated hyperparameters: eviction_ratio={self.eviction_ratio}, "
154+
f"frequency_threshold={self.frequency_threshold}")
155+
156+
def _collect_features(self, item, is_hit):
157+
"""Collect features during the feature collection phase."""
158+
if self.current_phase == Phase.FEATURE_COLLECTION:
159+
self.access_history.append(item)
160+
self.item_frequencies[item] += 1
161+
self.last_access_times[item] = self.access_count
162+
163+
if is_hit:
164+
self.hit_count += 1
165+
else:
166+
self.miss_count += 1
167+
168+
self.feature_collection_count += 1
169+
170+
171+
def access(self, item):
172+
"""
173+
Access an item in the cache with phase-aware behavior.
174+
175+
:param item: The item to access.
176+
:return: True if hit, False if miss.
177+
"""
178+
self.access_count += 1
179+
is_hit = item in self.cache
180+
181+
# Check for phase transitions
182+
self._check_phase_transition()
183+
184+
# Collect features if in feature collection phase
185+
self._collect_features(item, is_hit)
186+
187+
if not is_hit:
188+
# Cache miss - need to add item
189+
if len(self.cache) >= self.cache_size:
190+
self._evict_items()
191+
self.cache.append(item)
192+
else:
193+
# Cache hit - move item to end (LRU behavior)
194+
self.cache.remove(item)
195+
self.cache.append(item)
196+
197+
return is_hit
198+
199+
def _evict_items(self):
200+
"""
201+
Evict items from cache based on current hyperparameters and phase.
202+
"""
203+
if self.current_phase == Phase.WARMUP:
204+
# Simple FIFO eviction during warmup
205+
self.cache.pop(0)
206+
elif self.current_phase == Phase.FEATURE_COLLECTION:
207+
# FIFO eviction but collect statistics
208+
self.cache.pop(0)
209+
else: # ONLINE_PREDICTION phase
210+
# Use learned hyperparameters for more sophisticated eviction
211+
num_to_evict = max(1, int(self.cache_size * self.eviction_ratio))
212+
213+
# Remove items with low frequency first
214+
if hasattr(self, 'item_frequencies') and self.item_frequencies:
215+
# Sort by frequency and recency
216+
items_with_scores = []
217+
for i, item in enumerate(self.cache[:num_to_evict * 2]): # Consider more items
218+
freq = self.item_frequencies.get(item, 1)
219+
recency_score = (len(self.cache) - i) / len(self.cache) # Higher for more recent
220+
combined_score = freq * 0.7 + recency_score * 0.3
221+
items_with_scores.append((combined_score, item))
222+
223+
# Sort by score (ascending - lower scores evicted first)
224+
items_with_scores.sort()
225+
226+
# Evict lowest scoring items
227+
for _, item in items_with_scores[:num_to_evict]:
228+
if item in self.cache:
229+
self.cache.remove(item)
230+
else:
231+
# Fallback to simple FIFO
232+
for _ in range(num_to_evict):
233+
if self.cache:
234+
self.cache.pop(0)
235+
236+
def get_cache_info(self):
237+
"""
238+
Get information about current cache state and phase.
239+
240+
:return: Dictionary with cache information.
241+
"""
242+
total_requests = self.hit_count + self.miss_count
243+
current_hit_rate = self.hit_count / total_requests if total_requests > 0 else 0
244+
245+
return {
246+
'phase': self.current_phase.name,
247+
'cache_size': len(self.cache),
248+
'max_cache_size': self.cache_size,
249+
'total_requests': self.access_count,
250+
'current_hit_rate': current_hit_rate,
251+
'feature_collection_progress': self.feature_collection_count / self.feature_collection_requests if self.current_phase == Phase.FEATURE_COLLECTION else 1.0,
252+
'hyperparameters': {
253+
'eviction_ratio': self.eviction_ratio,
254+
'frequency_threshold': self.frequency_threshold
255+
}
256+
}

0 commit comments

Comments
 (0)