Skip to content

Commit b6290bd

Browse files
committed
feat(benchmark): Add CPU logging to benchmark script and enhance JVM thread settings
1 parent 981e2e4 commit b6290bd

3 files changed

Lines changed: 35 additions & 3 deletions

File tree

bindings/python/examples/benchmark-vector/benchmark_arcadedb_msmarco.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
import json
2828
import re
2929
import shutil
30+
import threading
3031
import time
3132
from pathlib import Path
3233
from typing import Dict, List, Tuple
@@ -66,6 +67,27 @@ def timed_section(name: str, fn):
6667
return result, dur, start_rss, end_rss
6768

6869

70+
def start_cpu_logger(interval_sec: int = 2):
71+
"""Log process CPU% and RSS periodically without blocking main work."""
72+
73+
if not psutil:
74+
return None
75+
76+
proc = psutil.Process()
77+
proc.cpu_percent(None) # prime
78+
stop_event = threading.Event()
79+
80+
def _loop():
81+
while not stop_event.wait(interval_sec):
82+
cpu = proc.cpu_percent(None)
83+
rss = proc.memory_info().rss / (1024 * 1024)
84+
print(f"[cpu] {cpu:5.1f}% | rss={rss:8.1f} MB")
85+
86+
t = threading.Thread(target=_loop, daemon=True)
87+
t.start()
88+
return stop_event
89+
90+
6991
# -------------------------
7092
# dataset helpers (reuse MSMARCO shard layout)
7193
# -------------------------
@@ -435,6 +457,8 @@ def main():
435457
)
436458
args = ap.parse_args()
437459

460+
stop_cpu = start_cpu_logger(2)
461+
438462
np.random.seed(args.seed)
439463
eval_k = 50
440464

@@ -670,6 +694,8 @@ def record(name: str, result, dur, rss_start, rss_end):
670694
record("close_db_final", {}, dur, r0, r1)
671695
except Exception:
672696
pass
697+
if stop_cpu:
698+
stop_cpu.set()
673699

674700
db_size = dir_size_mb(db_path)
675701
rss_after_vals = [

bindings/python/examples/benchmark-vector/run_arcadedb_sweep.sh

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,9 @@ fi
4545
THREAD_ENV=""
4646
if [[ -n "$THREADS_PER_TASK" ]]; then
4747
THREAD_ENV="OMP_NUM_THREADS=${THREADS_PER_TASK} MKL_NUM_THREADS=${THREADS_PER_TASK} OPENBLAS_NUM_THREADS=${THREADS_PER_TASK} VECLIB_MAXIMUM_THREADS=${THREADS_PER_TASK} BLIS_NUM_THREADS=${THREADS_PER_TASK} NUMEXPR_NUM_THREADS=${THREADS_PER_TASK}"
48-
# Restrict JVM ForkJoinPool for JVector parallelism
49-
export ARCADEDB_JVM_ARGS="${ARCADEDB_JVM_ARGS:-} -Djava.util.concurrent.ForkJoinPool.common.parallelism=${THREADS_PER_TASK}"
48+
# Restrict JVM pools for JVector: common pool, physical core count, and reported processors
49+
JVM_THREAD_FLAGS="-Djava.util.concurrent.ForkJoinPool.common.parallelism=${THREADS_PER_TASK} -Djvector.physical_core_count=${THREADS_PER_TASK} -XX:ActiveProcessorCount=${THREADS_PER_TASK}"
50+
export ARCADEDB_JVM_ARGS="${ARCADEDB_JVM_ARGS:-} ${JVM_THREAD_FLAGS}"
5051
fi
5152

5253
BASE="${THREAD_ENV} python \"${BENCH_PY}\" --dataset-dir \"${DATASET_DIR}\" --db-root \"${DB_ROOT}\""

bindings/python/jar_exclusions.txt

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,5 +6,10 @@
66
# Format: One pattern per line, comments start with #
77
# Empty lines are ignored
88

9-
# gRPC wire protocol (~40MB) - not commonly needed for Python use cases
109
arcadedb-grpcw-*.jar
10+
js-language-*.jar
11+
icu4j-*.jar
12+
truffle-*.jar
13+
regex-*.jar
14+
arcadedb-metrics-*.jar
15+
arcadedb-mongodbw-*.jar

0 commit comments

Comments
 (0)