@@ -151,7 +151,7 @@ class EmbeddingModel(Enum):
151151 E5_LARGE_V2 = ("emb_e5_large_v2" , "E5_Large_v2" , "float16" , 512 )
152152 E5_LARGE_V2_FT = ("emb_e5_large_v2_ft" , "E5_Large_v2" , "float16" , 512 )
153153 OPENAI_TEXT_EMBEDDING_SMALL = (
154- "emb_openai_text_embedding_small " ,
154+ "emb_text-embedding-3-small " ,
155155 "text-embedding-3-small" ,
156156 "float16" ,
157157 1536 ,
@@ -177,7 +177,7 @@ class LargeLanguageModel(Enum):
177177 None ,
178178 )
179179 GPT_4O_MINI = ("response_gpt-4o-mini" , "GPT-4o-mini" , "float16" , None )
180- GPT_4O_NANO = ("response_gpt-4.1-nano" , "GPT-4.1-nano" , "float16" , None )
180+ GPT_4_1_NANO = ("response_gpt-4.1-nano" , "GPT-4.1-nano" , "float16" , None )
181181 GPT_4_1 = ("response_gpt-4.1" , "gpt-4.1-2025-04-14" , "float16" , None )
182182
183183
@@ -219,6 +219,8 @@ class Dataset(Enum):
219219 SEM_BENCHMARK_ARENA = "vCache/SemBenchmarkLmArena"
220220 # HuggingFace: https://huggingface.co/datasets/vCache/SemBenchmarkSearchQueries
221221 SEM_BENCHMARK_SEARCH_QUERIES = "vCache/SemBenchmarkSearchQueries"
222+ # HuggingFace: https://huggingface.co/datasets/vCache/SemBenchmarkCombo
223+ SEM_BENCHMARK_COMBO = "vCache/SemBenchmarkCombo"
222224 # Example for custom dataset. The path is relative to 'benchmarks/your_datasets/'
223225 CUSTOM_EXAMPLE = "your_datasets/your_custom_dataset.parquet"
224226
@@ -238,7 +240,7 @@ class GeneratePlotsOnly(Enum):
238240### Benchmark Config ###################################################################################################
239241########################################################################################################################
240242
241- CONFIDENCE_INTERVALS_ITERATIONS : int = 3
243+ CONFIDENCE_INTERVALS_ITERATIONS : int = 1
242244DISABLE_PROGRESS_BAR : bool = False
243245KEEP_SPLIT : int = 100
244246MAX_VECTOR_DB_CAPACITY : int = 150000
@@ -299,6 +301,26 @@ class GeneratePlotsOnly(Enum):
299301 MRUEvictionPolicy (max_size = 2000 , watermark = 0.99 , eviction_percentage = 0.1 ),
300302 50 ,
301303 ),
304+ # vCache Paper: Figure X (Third embedding model ablation)
305+ (
306+ EmbeddingModel .OPENAI_TEXT_EMBEDDING_SMALL ,
307+ LargeLanguageModel .GPT_4_1_NANO ,
308+ Dataset .SEM_BENCHMARK_ARENA ,
309+ GeneratePlotsOnly .NO ,
310+ BenchmarkComparisonSimilarityEvaluator (),
311+ MRUEvictionPolicy (max_size = 100000 , watermark = 0.99 , eviction_percentage = 0.1 ),
312+ 60000 ,
313+ ),
314+ # vCache Paper: Figure X (SemBenchmarkCombo)
315+ (
316+ EmbeddingModel .GTE ,
317+ LargeLanguageModel .LLAMA_3_8B ,
318+ Dataset .SEM_BENCHMARK_COMBO ,
319+ GeneratePlotsOnly .NO ,
320+ BenchmarkComparisonSimilarityEvaluator (),
321+ MRUEvictionPolicy (max_size = 100000 , watermark = 0.99 , eviction_percentage = 0.1 ),
322+ 27500 ,
323+ ),
302324]
303325
304326BASELINES_TO_RUN : List [Baseline ] = [
@@ -1433,4 +1455,4 @@ def main():
14331455
14341456
14351457if __name__ == "__main__" :
1436- main ()
1458+ main ()
0 commit comments