99// libCacheSim
1010//
1111// Modified by Haocheng at 08/23/2025
12+ // Feature collection added at 12/06/2025
1213
1314#include "dataStructure/hashtable/hashtable.h"
1415#include "libCacheSim/evictionAlgo.h"
16+ #include "S4FIFO_features.h"
1517
1618#ifdef __cplusplus
1719extern "C" {
@@ -56,11 +58,17 @@ typedef struct {
5658 S4FIFO_phase_callback_t
5759 phase_callback ; // Optional callback when phase changes
5860 void * callback_user_data ; // User data passed to callback
61+
62+ // ==== Feature collection ====
63+ int32_t feature_num_buckets ; // Number of buckets for hit position histograms
64+ S4FIFO_feature_collector_t * feature_collector ; // NULL if not collecting
65+ S4FIFO_feature_vector_t last_features ; // Most recent feature vector
5966} S4FIFO_params_t ;
6067
6168static const char * DEFAULT_CACHE_PARAMS =
6269 "small-size-ratio=0.10,ghost-size-ratio=0.90,move-to-main-threshold=2,"
63- "small-skip-ratio=0,ghost-to-main-threshold=0,feature-collect-reqs=10000" ;
70+ "small-skip-ratio=0,ghost-to-main-threshold=0,feature-collect-reqs=10000,"
71+ "feature-num-buckets=16" ;
6472
6573// ***********************************************************************
6674// **** ****
@@ -156,6 +164,16 @@ cache_t *S4FIFO_init(const common_cache_params_t ccache_params,
156164 params -> phase_callback = NULL ;
157165 params -> callback_user_data = NULL ;
158166
167+ /* Initialize feature collector if collect_features is enabled */
168+ params -> feature_collector = NULL ;
169+ if (params -> collect_features ) {
170+ params -> feature_collector = malloc (sizeof (S4FIFO_feature_collector_t ));
171+ feature_collector_init (params -> feature_collector , ccache_params .cache_size ,
172+ params -> feature_collect_reqs ,
173+ params -> feature_num_buckets );
174+ }
175+ memset (& params -> last_features , 0 , sizeof (S4FIFO_feature_vector_t ));
176+
159177 return cache ;
160178}
161179
@@ -172,6 +190,9 @@ static void S4FIFO_free(cache_t *cache) {
172190 params -> ghost_fifo -> cache_free (params -> ghost_fifo );
173191 }
174192 params -> main_fifo -> cache_free (params -> main_fifo );
193+ if (params -> feature_collector != NULL ) {
194+ free (params -> feature_collector );
195+ }
175196 free (cache -> eviction_params );
176197 cache_struct_free (cache );
177198}
@@ -230,9 +251,24 @@ static bool S4FIFO_get(cache_t *cache, const request_t *req) {
230251 if (params -> phase_callback != NULL ) {
231252 params -> phase_callback (cache , old_phase , new_phase );
232253 }
254+ /* Extract features at phase transition (for ML training) */
255+ if (params -> feature_collector != NULL ) {
256+ feature_collector_get_features (
257+ params -> feature_collector , & params -> last_features ,
258+ params -> small_fifo -> get_occupied_byte (params -> small_fifo ),
259+ params -> main_fifo -> get_occupied_byte (params -> main_fifo ),
260+ params -> ghost_fifo
261+ ? params -> ghost_fifo -> get_occupied_byte (params -> ghost_fifo )
262+ : 0 );
263+ }
233264 }
234265 }
235266
267+ /* Feature collection: record request */
268+ if (params -> feature_collector != NULL ) {
269+ feature_collector_record_request (params -> feature_collector , cache -> n_req );
270+ }
271+
236272 bool cache_hit = cache_get_base (cache , req );
237273
238274 return cache_hit ;
@@ -272,13 +308,21 @@ static cache_obj_t *S4FIFO_find(cache_t *cache, const request_t *req,
272308
273309 /* update cache is true from now */
274310 params -> hit_on_ghost = false;
311+ S4FIFO_feature_collector_t * fc = params -> feature_collector ;
312+
275313 cache_obj_t * obj = params -> small_fifo -> find (params -> small_fifo , req , true);
276314 if (obj != NULL ) {
277315 /* S4FIFO: update the frequency */
278316 if ((int64_t )(- obj -> time_stamp + params -> s_counter ) >=
279317 (int64_t )(params -> small_skip_ratio * params -> small_fifo -> cache_size )) {
280318 obj -> S4FIFO .freq += 1 ;
281319 }
320+ /* Feature collection: hit in small FIFO */
321+ if (fc != NULL ) {
322+ feature_collector_record_hit_small (fc , obj -> S4FIFO .insertion_time ,
323+ cache -> n_req );
324+ feature_collector_record_repeat (fc );
325+ }
282326 return obj ;
283327 }
284328
@@ -292,6 +336,15 @@ static cache_obj_t *S4FIFO_find(cache_t *cache, const request_t *req,
292336 int64_t ghost_freq = ghost_obj -> S4FIFO .freq ;
293337
294338 if (ghost_freq >= params -> ghost_to_main_threshold ) {
339+ /* Feature collection: ghost hit with hole-adjusted position */
340+ if (fc != NULL ) {
341+ feature_collector_record_hit_ghost (fc , ghost_obj -> S4FIFO .insertion_time ,
342+ ghost_obj -> S4FIFO .insert_bucket ,
343+ cache -> n_req );
344+ // Record middle removal since we're promoting this object
345+ feature_collector_record_ghost_removal (fc , cache -> n_req );
346+ }
347+
295348 params -> ghost_fifo -> remove (params -> ghost_fifo , req -> obj_id );
296349 params -> hit_on_ghost = true;
297350 params -> hit_on_ghost_freq = ghost_freq ;
@@ -304,6 +357,12 @@ static cache_obj_t *S4FIFO_find(cache_t *cache, const request_t *req,
304357 obj = params -> main_fifo -> find (params -> main_fifo , req , true);
305358 if (obj != NULL ) {
306359 obj -> S4FIFO .freq += 1 ;
360+ /* Feature collection: hit in main FIFO */
361+ if (fc != NULL ) {
362+ feature_collector_record_hit_main (fc , obj -> S4FIFO .insertion_time ,
363+ cache -> n_req );
364+ feature_collector_record_repeat (fc );
365+ }
307366 }
308367
309368 return obj ;
@@ -322,6 +381,7 @@ static cache_obj_t *S4FIFO_find(cache_t *cache, const request_t *req,
322381 */
323382static cache_obj_t * S4FIFO_insert (cache_t * cache , const request_t * req ) {
324383 S4FIFO_params_t * params = (S4FIFO_params_t * )cache -> eviction_params ;
384+ S4FIFO_feature_collector_t * fc = params -> feature_collector ;
325385 cache_obj_t * obj = NULL ;
326386
327387 cache_t * small = params -> small_fifo ;
@@ -332,6 +392,12 @@ static cache_obj_t *S4FIFO_insert(cache_t *cache, const request_t *req) {
332392 params -> hit_on_ghost = false;
333393 params -> hit_on_ghost_freq = 0 ;
334394 obj = main -> insert (main , req );
395+ /* Feature collection: record insertion to main */
396+ if (fc != NULL && obj != NULL ) {
397+ obj -> S4FIFO .insertion_time = cache -> n_req ;
398+ obj -> S4FIFO .insert_bucket =
399+ (int32_t )feature_collector_record_insert_main (fc , cache -> n_req );
400+ }
335401 } else {
336402 /* insert into small fifo */
337403 if (req -> obj_size >= small -> cache_size ) {
@@ -341,16 +407,31 @@ static cache_obj_t *S4FIFO_insert(cache_t *cache, const request_t *req) {
341407 if (!params -> has_evicted &&
342408 small -> get_occupied_byte (small ) >= small -> cache_size ) {
343409 obj = main -> insert (main , req );
410+ /* Feature collection: record insertion to main */
411+ if (fc != NULL && obj != NULL ) {
412+ obj -> S4FIFO .insertion_time = cache -> n_req ;
413+ obj -> S4FIFO .insert_bucket =
414+ (int32_t )feature_collector_record_insert_main (fc , cache -> n_req );
415+ }
344416 } else {
345417 obj = small -> insert (small , req );
346418 params
347419 -> s_counter ++ ; // only increase s_counter when insert into small fifo
348420 obj -> time_stamp = params -> s_counter ;
421+ /* Feature collection: record insertion to small and unique object */
422+ if (fc != NULL && obj != NULL ) {
423+ obj -> S4FIFO .insertion_time = cache -> n_req ;
424+ obj -> S4FIFO .insert_bucket =
425+ (int32_t )feature_collector_record_insert_small (fc , cache -> n_req );
426+ feature_collector_record_unique (fc );
427+ }
349428 }
350429 }
351430
352431 // if an object is inserted from ghost to main, we also set it to zero
353- obj -> S4FIFO .freq = 0 ;
432+ if (obj != NULL ) {
433+ obj -> S4FIFO .freq = 0 ;
434+ }
354435
355436 return obj ;
356437}
@@ -375,6 +456,7 @@ static void S4FIFO_evict_small(cache_t *cache, const request_t *req) {
375456 cache_t * small = params -> small_fifo ;
376457 cache_t * ghost = params -> ghost_fifo ;
377458 cache_t * main = params -> main_fifo ;
459+ S4FIFO_feature_collector_t * fc = params -> feature_collector ;
378460
379461 bool has_evicted = false;
380462 while (!has_evicted && small -> get_occupied_byte (small ) > 0 ) {
@@ -384,16 +466,38 @@ static void S4FIFO_evict_small(cache_t *cache, const request_t *req) {
384466 copy_cache_obj_to_request (params -> req_local , obj_to_evict );
385467
386468 if (obj_to_evict -> S4FIFO .freq >= params -> move_to_main_threshold ) {
387- main -> insert (main , params -> req_local );
469+ cache_obj_t * main_obj = main -> insert (main , params -> req_local );
470+ /* Feature collection: promoted to main, update insertion time */
471+ if (fc != NULL && main_obj != NULL ) {
472+ main_obj -> S4FIFO .insertion_time = cache -> n_req ;
473+ main_obj -> S4FIFO .insert_bucket =
474+ (int32_t )feature_collector_record_insert_main (fc , cache -> n_req );
475+ }
388476 } else {
389477 // insert to ghost
390478 if (ghost != NULL ) {
391479 int64_t small_freq = obj_to_evict -> S4FIFO .freq ;
480+
481+ /* Feature collection: check if ghost will evict (tail removal) */
482+ int64_t ghost_n_obj_before = ghost -> get_n_obj (ghost );
483+
392484 ghost -> get (ghost , params -> req_local );
485+
486+ /* Note: ghost tail eviction is NOT a middle removal, so we don't
487+ * record it. Middle removal only happens when ghost hit promotes
488+ * object to main. */
489+ (void )ghost_n_obj_before ; // Unused now
490+
393491 // let the obj inherit the freq from small fifo (exact value)
394492 cache_obj_t * ghost_obj = ghost -> find (ghost , params -> req_local , false);
395493 if (ghost_obj != NULL ) {
396494 ghost_obj -> S4FIFO .freq = small_freq ;
495+ /* Store insertion time and bucket for hit position tracking */
496+ ghost_obj -> S4FIFO .insertion_time = cache -> n_req ;
497+ if (fc != NULL ) {
498+ ghost_obj -> S4FIFO .insert_bucket =
499+ (int32_t )feature_collector_record_insert_ghost (fc , cache -> n_req );
500+ }
397501 }
398502 }
399503 has_evicted = true;
@@ -407,6 +511,7 @@ static void S4FIFO_evict_small(cache_t *cache, const request_t *req) {
407511
408512static void S4FIFO_evict_main (cache_t * cache , const request_t * req ) {
409513 S4FIFO_params_t * params = (S4FIFO_params_t * )cache -> eviction_params ;
514+ S4FIFO_feature_collector_t * fc = params -> feature_collector ;
410515 cache_t * main = params -> main_fifo ;
411516
412517 bool has_evicted = false;
@@ -423,6 +528,12 @@ static void S4FIFO_evict_main(cache_t *cache, const request_t *req) {
423528 cache_obj_t * new_obj = main -> insert (main , params -> req_local );
424529 // clock with 2-bit counter
425530 new_obj -> S4FIFO .freq = MIN (freq , 3 ) - 1 ;
531+ /* Feature collection: update insertion time for reinserted object */
532+ if (fc != NULL ) {
533+ new_obj -> S4FIFO .insertion_time = cache -> n_req ;
534+ new_obj -> S4FIFO .insert_bucket =
535+ (int32_t )feature_collector_record_insert_main (fc , cache -> n_req );
536+ }
426537
427538 } else {
428539 bool removed = main -> remove (main , obj_to_evict -> obj_id );
@@ -546,6 +657,11 @@ static void S4FIFO_parse_params(cache_t *cache,
546657 params -> ghost_to_main_threshold = atoi (value );
547658 } else if (strcasecmp (key , "feature-collect-reqs" ) == 0 ) {
548659 params -> feature_collect_reqs = strtoll (value , NULL , 10 );
660+ } else if (strcasecmp (key , "feature-num-buckets" ) == 0 ) {
661+ params -> feature_num_buckets = atoi (value );
662+ } else if (strcasecmp (key , "collect-features" ) == 0 ) {
663+ params -> collect_features =
664+ (strcasecmp (value , "true" ) == 0 || atoi (value ) == 1 );
549665 } else if (strcasecmp (key , "print" ) == 0 ) {
550666 printf ("parameters: %s\n" , S4FIFO_current_params (params ));
551667 exit (0 );
@@ -634,6 +750,96 @@ const char *S4FIFO_get_phase_name(S4FIFO_phase_t phase) {
634750 }
635751}
636752
753+ // ***********************************************************************
754+ // **** ****
755+ // **** Feature collection API ****
756+ // **** ****
757+ // ***********************************************************************
758+
759+ /**
760+ * @brief Enable feature collection (must be called before first request)
761+ *
762+ * @param cache the cache
763+ * @param window_size window size for feature collection (0 = use default)
764+ * @param num_buckets number of buckets for histograms (0 = use default 16)
765+ */
766+ void S4FIFO_enable_feature_collection (cache_t * cache , int64_t window_size ,
767+ int32_t num_buckets ) {
768+ S4FIFO_params_t * params = (S4FIFO_params_t * )cache -> eviction_params ;
769+ if (params -> feature_collector == NULL ) {
770+ params -> feature_collector = malloc (sizeof (S4FIFO_feature_collector_t ));
771+ feature_collector_init (params -> feature_collector , cache -> cache_size ,
772+ window_size > 0 ? window_size
773+ : params -> feature_collect_reqs ,
774+ num_buckets > 0 ? num_buckets
775+ : params -> feature_num_buckets );
776+ }
777+ params -> collect_features = true;
778+ }
779+
780+ /**
781+ * @brief Disable feature collection and free resources
782+ *
783+ * @param cache the cache
784+ */
785+ void S4FIFO_disable_feature_collection (cache_t * cache ) {
786+ S4FIFO_params_t * params = (S4FIFO_params_t * )cache -> eviction_params ;
787+ if (params -> feature_collector != NULL ) {
788+ free (params -> feature_collector );
789+ params -> feature_collector = NULL ;
790+ }
791+ params -> collect_features = false;
792+ }
793+
794+ /**
795+ * @brief Get the current feature vector
796+ *
797+ * @param cache the cache
798+ * @param fv output feature vector (caller allocated)
799+ * @return true if features are available, false otherwise
800+ */
801+ bool S4FIFO_get_features (cache_t * cache , S4FIFO_feature_vector_t * fv ) {
802+ S4FIFO_params_t * params = (S4FIFO_params_t * )cache -> eviction_params ;
803+ if (params -> feature_collector == NULL ) {
804+ return false;
805+ }
806+
807+ feature_collector_get_features (
808+ params -> feature_collector , fv ,
809+ params -> small_fifo -> get_occupied_byte (params -> small_fifo ),
810+ params -> main_fifo -> get_occupied_byte (params -> main_fifo ),
811+ params -> ghost_fifo
812+ ? params -> ghost_fifo -> get_occupied_byte (params -> ghost_fifo )
813+ : 0 );
814+ return true;
815+ }
816+
817+ /**
818+ * @brief Get the last cached feature vector (computed at phase transitions)
819+ *
820+ * @param cache the cache
821+ * @return pointer to last feature vector (valid until next phase transition)
822+ */
823+ const S4FIFO_feature_vector_t * S4FIFO_get_last_features (cache_t * cache ) {
824+ S4FIFO_params_t * params = (S4FIFO_params_t * )cache -> eviction_params ;
825+ return & params -> last_features ;
826+ }
827+
828+ /**
829+ * @brief Print current features to file (for debugging/logging)
830+ *
831+ * @param cache the cache
832+ * @param fp output file pointer
833+ */
834+ void S4FIFO_print_features (cache_t * cache , FILE * fp ) {
835+ S4FIFO_feature_vector_t fv ;
836+ if (S4FIFO_get_features (cache , & fv )) {
837+ feature_vector_print (& fv , fp );
838+ } else {
839+ fprintf (fp , "Feature collection not enabled\n" );
840+ }
841+ }
842+
637843#ifdef __cplusplus
638844}
639845#endif
0 commit comments