Skip to content

Commit 4679b26

Browse files
committed
Refactor benchmark scripts: update naming conventions to explicit key=value format for consistency across files
1 parent 15d1fde commit 4679b26

4 files changed

Lines changed: 188 additions & 72 deletions

File tree

bindings/python/examples/benchmark-vector/benchmark_vector_params.py

Lines changed: 9 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -479,19 +479,17 @@ def run_benchmark():
479479
global arcadedb
480480
import arcadedb_embedded as arcadedb
481481

482-
# Create config suffix for naming
483-
config_suffix = f"xmx{args.xmx}_loc{args.location_cache_size}_graph{args.graph_build_cache_size}_mut{args.mutations_before_rebuild}"
484-
if args.quantization != "NONE":
485-
config_suffix += f"_quant{args.quantization}"
486-
if args.store_vectors_in_graph:
487-
config_suffix += "_storeVectors"
488-
489-
db_base_path = f"./jvector_{args.dataset}_size_{args.dataset_size}_{config_suffix}"
482+
# Create config suffix for naming with explicit key=value format
483+
config_suffix = f"xmx={args.xmx}_loccache={args.location_cache_size}_graphcache={args.graph_build_cache_size}_mutations={args.mutations_before_rebuild}_quant={args.quantization}_store={'ON' if args.store_vectors_in_graph else 'OFF'}"
484+
485+
db_base_path = (
486+
f"./jvector_dataset={args.dataset}_size={args.dataset_size}_{config_suffix}"
487+
)
490488
k_values = [10]
491489

492-
# Output file
490+
# Output file with explicit key=value naming
493491
md_file = (
494-
f"benchmark_jvector_{args.dataset}_size_{args.dataset_size}_{config_suffix}.md"
492+
f"benchmark_dataset={args.dataset}_size={args.dataset_size}_{config_suffix}.md"
495493
)
496494

497495
all_dataset_sizes = {
@@ -554,11 +552,7 @@ def run_benchmark():
554552
# quantization passed via args now
555553

556554
# Unique DB path for this build config
557-
db_path = f"{db_base_path}_{max_connections}_{beam_width}"
558-
if args.quantization != "NONE":
559-
db_path += f"_{args.quantization}"
560-
if args.store_vectors_in_graph:
561-
db_path += "_graphstore"
555+
db_path = f"{db_base_path}_maxconn={max_connections}_beam={beam_width}"
562556

563557
print(
564558
f"\n [Build Config] max_connections={max_connections}, beam={beam_width}, quant={args.quantization}, graph_store={args.store_vectors_in_graph}"

bindings/python/examples/benchmark-vector/plot_benchmark_results.py

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -110,12 +110,21 @@ def plot_dataset(dataset_key, dataset_name):
110110
if "jvector" in filename:
111111
algo_name = "JVector"
112112
is_jvector = True
113-
log_pattern = f"jvector-*-full_*_memory.log" # Simplified pattern matching
114-
# Need to be more specific to match dataset
115-
if "euclidean" in dataset_key:
116-
log_pattern = "jvector-euclidean-full_*_memory.log"
117-
else:
118-
log_pattern = "jvector-angular-full_*_memory.log"
113+
114+
# Try matching new explicit key=value format log files
115+
# Pattern: jvector-dataset={dataset_key}_size=full_..._memory.log
116+
# We use glob with wildcards for other params
117+
# Note: Now uses xmx= instead of heap= and includes mutations=
118+
log_pattern = f"jvector-dataset=*{dataset_key}*_size=full_*_memory.log"
119+
120+
# Fallback for older formats if not found later
121+
if not glob.glob(os.path.join(LOGS_DIR, log_pattern)):
122+
log_pattern = f"jvector-*-full_*_memory.log"
123+
# Need to be more specific to match dataset
124+
if "euclidean" in dataset_key:
125+
log_pattern = "jvector-euclidean-full_*_memory.log"
126+
else:
127+
log_pattern = "jvector-angular-full_*_memory.log"
119128

120129
elif "faiss" in filename:
121130
dataset_type = "euclidean" if "euclidean" in dataset_key else "angular"
@@ -231,11 +240,9 @@ def plot_dataset(dataset_key, dataset_name):
231240
plt.xlim(0.0, 1.025)
232241

233242
# Save plot
234-
output_png = os.path.join(FIGURES_DIR, f"plot_{dataset_key}.png")
235-
output_pdf = os.path.join(FIGURES_DIR, f"plot_{dataset_key}.pdf")
236-
plt.savefig(output_png)
243+
output_pdf = os.path.join(FIGURES_DIR, f"plot_dataset={dataset_key}.pdf")
237244
plt.savefig(output_pdf)
238-
print(f"Saved plots to {output_png} and {output_pdf}")
245+
print(f"Saved plot to {output_pdf}")
239246

240247

241248
def main():

0 commit comments

Comments
 (0)