Skip to content

Commit 2198367

Browse files
Added SemBenchmarkCombo to benchmark script
1 parent ce2ed72 commit 2198367

1 file changed

Lines changed: 26 additions & 4 deletions

File tree

benchmarks/benchmark.py

Lines changed: 26 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,7 @@ class EmbeddingModel(Enum):
151151
E5_LARGE_V2 = ("emb_e5_large_v2", "E5_Large_v2", "float16", 512)
152152
E5_LARGE_V2_FT = ("emb_e5_large_v2_ft", "E5_Large_v2", "float16", 512)
153153
OPENAI_TEXT_EMBEDDING_SMALL = (
154-
"emb_openai_text_embedding_small",
154+
"emb_text-embedding-3-small",
155155
"text-embedding-3-small",
156156
"float16",
157157
1536,
@@ -177,7 +177,7 @@ class LargeLanguageModel(Enum):
177177
None,
178178
)
179179
GPT_4O_MINI = ("response_gpt-4o-mini", "GPT-4o-mini", "float16", None)
180-
GPT_4O_NANO = ("response_gpt-4.1-nano", "GPT-4.1-nano", "float16", None)
180+
GPT_4_1_NANO = ("response_gpt-4.1-nano", "GPT-4.1-nano", "float16", None)
181181
GPT_4_1 = ("response_gpt-4.1", "gpt-4.1-2025-04-14", "float16", None)
182182

183183

@@ -219,6 +219,8 @@ class Dataset(Enum):
219219
SEM_BENCHMARK_ARENA = "vCache/SemBenchmarkLmArena"
220220
# HuggingFace: https://huggingface.co/datasets/vCache/SemBenchmarkSearchQueries
221221
SEM_BENCHMARK_SEARCH_QUERIES = "vCache/SemBenchmarkSearchQueries"
222+
# HuggingFace: https://huggingface.co/datasets/vCache/SemBenchmarkCombo
223+
SEM_BENCHMARK_COMBO = "vCache/SemBenchmarkCombo"
222224
# Example for custom dataset. The path is relative to 'benchmarks/your_datasets/'
223225
CUSTOM_EXAMPLE = "your_datasets/your_custom_dataset.parquet"
224226

@@ -238,7 +240,7 @@ class GeneratePlotsOnly(Enum):
238240
### Benchmark Config ###################################################################################################
239241
########################################################################################################################
240242

241-
CONFIDENCE_INTERVALS_ITERATIONS: int = 3
243+
CONFIDENCE_INTERVALS_ITERATIONS: int = 1
242244
DISABLE_PROGRESS_BAR: bool = False
243245
KEEP_SPLIT: int = 100
244246
MAX_VECTOR_DB_CAPACITY: int = 150000
@@ -299,6 +301,26 @@ class GeneratePlotsOnly(Enum):
299301
MRUEvictionPolicy(max_size=2000, watermark=0.99, eviction_percentage=0.1),
300302
50,
301303
),
304+
# vCache Paper: Figure X (Third embedding model ablation)
305+
(
306+
EmbeddingModel.OPENAI_TEXT_EMBEDDING_SMALL,
307+
LargeLanguageModel.GPT_4_1_NANO,
308+
Dataset.SEM_BENCHMARK_ARENA,
309+
GeneratePlotsOnly.NO,
310+
BenchmarkComparisonSimilarityEvaluator(),
311+
MRUEvictionPolicy(max_size=100000, watermark=0.99, eviction_percentage=0.1),
312+
60000,
313+
),
314+
# vCache Paper: Figure X (SemBenchmarkCombo)
315+
(
316+
EmbeddingModel.GTE,
317+
LargeLanguageModel.LLAMA_3_8B,
318+
Dataset.SEM_BENCHMARK_COMBO,
319+
GeneratePlotsOnly.NO,
320+
BenchmarkComparisonSimilarityEvaluator(),
321+
MRUEvictionPolicy(max_size=100000, watermark=0.99, eviction_percentage=0.1),
322+
27500,
323+
),
302324
]
303325

304326
BASELINES_TO_RUN: List[Baseline] = [
@@ -1433,4 +1455,4 @@ def main():
14331455

14341456

14351457
if __name__ == "__main__":
1436-
main()
1458+
main()

0 commit comments

Comments
 (0)