Skip to content

Commit 38eb0c5

Browse files
committed
Enhance version metadata collection and processing in matrix scripts
- Updated various matrix scripts to include detailed version metadata collection for databases and backends. - Introduced functions to format and summarize version information for better reporting. - Adjusted dataset configurations for improved performance and resource allocation. - Enhanced error handling in async executor to ensure robust processing checks. - Added tests to validate async executor behavior under load conditions.
1 parent 6f9148d commit 38eb0c5

27 files changed

Lines changed: 1459 additions & 103 deletions

.github/workflows/test-python-examples.yml

Lines changed: 31 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -274,9 +274,6 @@ jobs:
274274
- name: Run all examples
275275
id: run_examples
276276
shell: bash
277-
env:
278-
# Increase JVM heap for large CSV imports (example 04)
279-
ARCADEDB_JVM_ARGS: "-Xmx8g -Xms8g"
280277
run: |
281278
cd bindings/python/examples
282279
@@ -318,41 +315,49 @@ jobs:
318315
example_args="--dataset movielens-small --export"
319316
example_name="$example (movielens-small dataset with export)"
320317
timeout_duration=900 # 15 minutes
318+
example_jvm_args="-Xmx8g -Xms8g"
321319
;;
322320
"05_csv_import_graph.py")
323321
example_args="--dataset movielens-small --method sql --import-jsonl ./exports/movielens_small_db.jsonl.tgz --export"
324322
example_name="$example (movielens-small dataset, embedded sql method, import/export)"
325323
timeout_duration=900 # 15 minutes
324+
example_jvm_args=""
326325
;;
327326
"06_vector_search_recommendations.py")
328327
example_args="--import-jsonl ./exports/movielens_graph_small_db.jsonl.tgz"
329328
example_name="$example (vector search, import from JSONL)"
330329
timeout_duration=900 # 15 minutes
330+
example_jvm_args=""
331331
;;
332332
"07_stackoverflow_tables_oltp.py")
333333
example_args="--dataset stackoverflow-tiny --db arcadedb --threads 1 --transactions 1000 --batch-size 500 --mem-limit 2g --run-label ci07"
334334
example_name="$example (stackoverflow-tiny, arcadedb, minimal oltp)"
335335
timeout_duration=900
336+
example_jvm_args=""
336337
;;
337338
"08_stackoverflow_tables_olap.py")
338339
example_args="--dataset stackoverflow-tiny --db arcadedb --threads 1 --batch-size 500 --query-runs 1 --query-order fixed --mem-limit 2g --run-label ci08"
339340
example_name="$example (stackoverflow-tiny, arcadedb, minimal olap)"
340341
timeout_duration=900
342+
example_jvm_args=""
341343
;;
342344
"09_stackoverflow_graph_oltp.py")
343345
example_args="--dataset stackoverflow-tiny --db arcadedb --arcadedb-oltp-language cypher --threads 1 --transactions 1000 --batch-size 500 --mem-limit 2g --run-label ci09"
344346
example_name="$example (stackoverflow-tiny graph oltp, arcadedb cypher, minimal)"
345347
timeout_duration=900
348+
example_jvm_args=""
346349
;;
347350
"10_stackoverflow_graph_olap.py")
348351
example_args="--dataset stackoverflow-tiny --db arcadedb_cypher --threads 1 --batch-size 500 --query-runs 1 --query-order fixed --mem-limit 2g --run-label ci10"
349352
example_name="$example (stackoverflow-tiny graph olap, arcadedb cypher, minimal)"
350353
timeout_duration=900
354+
example_jvm_args=""
351355
;;
352356
"11_vector_index_build.py")
353357
example_args="--backend arcadedb --dataset stackoverflow-tiny --threads 1 --mem-limit 2g --batch-size 500 --max-connections 16 --beam-width 100 --quantization NONE --run-label ci11_arcadedb"
354358
example_name="$example (vector build, arcadedb backend, minimal)"
355359
timeout_duration=1200
360+
example_jvm_args=""
356361
;;
357362
"12_vector_search.py")
358363
db_path=$(find ./my_test_databases -maxdepth 1 -type d -name 'backend=arcadedb_dataset=stackoverflow-tiny_*run=ci11_arcadedb' | head -n 1)
@@ -365,31 +370,37 @@ jobs:
365370
example_args="--backend arcadedb --dataset stackoverflow-tiny --db-path $db_path --overquery-factors 1 --k 10 --query-limit 100 --query-runs 1 --query-order fixed --threads 1 --mem-limit 2g --run-label ci12_arcadedb"
366371
example_name="$example (vector search, arcadedb backend, minimal)"
367372
timeout_duration=1200
373+
example_jvm_args=""
368374
;;
369375
"13_stackoverflow_hybrid_queries.py")
370376
example_args="--dataset stackoverflow-tiny --batch-size 500 --encode-batch-size 64 --top-k 5 --candidate-limit 100 --min-reputation 100 --infer-sample-limit 5000 --run-label ci13"
371377
example_name="$example (hybrid pipeline, tiny + minimal limits)"
372378
timeout_duration=1800
379+
example_jvm_args=""
373380
;;
374381
"14_lifecycle_timing.py")
375-
example_args="--runs 1 --table-records 2000 --graph-vertices 500 --vector-records 500 --vector-dimensions 32 --query-runs 10 --jvm-heap 1g"
382+
example_args="--runs 1 --table-records 2000 --graph-vertices 500 --vector-records 500 --vector-dimensions 32 --query-runs 10 --jvm-heap 4g"
376383
example_name="$example (lifecycle benchmark, minimal)"
377384
timeout_duration=900
385+
example_jvm_args=""
378386
;;
379387
"15_import_database_vs_transactional_table_ingest.py")
380-
example_args="--rows-per-table 2000 --tables 2 --columns 6 --string-size 32 --batch-size 500 --async-parallel 1 --parallel 1 --heap-size 2g --work-dir ./my_test_databases/import_vs_txn_dummy_ci"
388+
example_args="--rows-per-table 2000 --tables 2 --columns 6 --string-size 32 --batch-size 500 --async-parallel 1 --parallel 1 --heap-size 4g --work-dir ./my_test_databases/import_vs_txn_dummy_ci"
381389
example_name="$example (table ingest benchmark, minimal)"
382390
timeout_duration=1200
391+
example_jvm_args=""
383392
;;
384393
"16_import_database_vs_transactional_graph_ingest.py")
385-
example_args="--vertices 2000 --edges 4000 --vertex-int-props 3 --vertex-str-props 2 --edge-int-props 1 --edge-str-props 1 --string-size 32 --batch-size 500 --async-parallel 1 --parallel 1 --heap-size 2g --work-dir ./my_test_databases/import_vs_txn_graph_ci"
394+
example_args="--vertices 2000 --edges 4000 --vertex-int-props 3 --vertex-str-props 2 --edge-int-props 1 --edge-str-props 1 --string-size 32 --batch-size 500 --async-parallel 1 --parallel 1 --heap-size 4g --work-dir ./my_test_databases/import_vs_txn_graph_ci"
386395
example_name="$example (graph ingest benchmark, minimal)"
387396
timeout_duration=1200
397+
example_jvm_args=""
388398
;;
389399
*)
390400
example_args=""
391401
example_name="$example"
392402
timeout_duration=900 # 15 minutes default
403+
example_jvm_args=""
393404
;;
394405
esac
395406
@@ -400,11 +411,23 @@ jobs:
400411
echo "----------------------------------------"
401412
402413
# Run the example with appropriate parameters
403-
if $TIMEOUT_CMD $timeout_duration python "$example" $example_args > "$log_file" 2>&1; then
414+
if [ -n "$example_jvm_args" ]; then
415+
if ARCADEDB_JVM_ARGS="$example_jvm_args" JAVA_TOOL_OPTIONS="" _JAVA_OPTIONS="" $TIMEOUT_CMD $timeout_duration python "$example" $example_args > "$log_file" 2>&1; then
416+
exit_code=0
417+
else
418+
exit_code=$?
419+
fi
420+
else
421+
if ARCADEDB_JVM_ARGS="" JAVA_TOOL_OPTIONS="" _JAVA_OPTIONS="" $TIMEOUT_CMD $timeout_duration python "$example" $example_args > "$log_file" 2>&1; then
422+
exit_code=0
423+
else
424+
exit_code=$?
425+
fi
426+
fi
427+
if [ $exit_code -eq 0 ]; then
404428
echo "✅ PASSED: $example_name" | tee -a $results_file
405429
passed=$((passed + 1))
406430
else
407-
exit_code=$?
408431
if [ $exit_code -eq 124 ]; then
409432
echo "⏱️ TIMEOUT: $example_name (exceeded $((timeout_duration/60)) minutes)" | tee -a $results_file
410433
failed=$((failed + 1))

bindings/python/examples/07_stackoverflow_tables_oltp.py

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2004,10 +2004,16 @@ def run_in_docker(args):
20042004

20052005
docker = shutil.which("docker")
20062006
if not docker:
2007-
raise RuntimeError("docker not found in PATH")
2007+
return False
20082008

20092009
repo_root = Path(__file__).resolve().parents[3]
2010-
user_spec = f"{os.getuid()}:{os.getgid()}"
2010+
host_uid = os.getuid() if hasattr(os, "getuid") else None
2011+
host_gid = os.getgid() if hasattr(os, "getgid") else None
2012+
user_spec = (
2013+
f"{host_uid}:{host_gid}"
2014+
if host_uid is not None and host_gid is not None
2015+
else None
2016+
)
20112017

20122018
filtered_args = []
20132019
skip_next = False
@@ -2066,10 +2072,13 @@ def run_in_docker(args):
20662072
docker_image = "postgres:latest"
20672073

20682074
cmd = [docker, "run", "--rm"]
2069-
if args.db != "postgresql":
2075+
if args.db != "postgresql" and user_spec is not None:
20702076
cmd.extend(["--user", user_spec])
2071-
else:
2072-
cmd.extend(["-e", f"HOST_UID={os.getuid()}", "-e", f"HOST_GID={os.getgid()}"])
2077+
elif args.db == "postgresql":
2078+
if host_uid is not None:
2079+
cmd.extend(["-e", f"HOST_UID={host_uid}"])
2080+
if host_gid is not None:
2081+
cmd.extend(["-e", f"HOST_GID={host_gid}"])
20732082

20742083
cmd.extend(
20752084
[

bindings/python/examples/08_stackoverflow_tables_olap.py

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1312,10 +1312,16 @@ def run_in_docker(args):
13121312

13131313
docker = shutil.which("docker")
13141314
if not docker:
1315-
raise RuntimeError("docker not found in PATH")
1315+
return False
13161316

13171317
repo_root = Path(__file__).resolve().parents[3]
1318-
user_spec = f"{os.getuid()}:{os.getgid()}"
1318+
host_uid = os.getuid() if hasattr(os, "getuid") else None
1319+
host_gid = os.getgid() if hasattr(os, "getgid") else None
1320+
user_spec = (
1321+
f"{host_uid}:{host_gid}"
1322+
if host_uid is not None and host_gid is not None
1323+
else None
1324+
)
13191325

13201326
filtered_args = []
13211327
skip_next = False
@@ -1373,10 +1379,13 @@ def run_in_docker(args):
13731379
"--rm",
13741380
]
13751381

1376-
if args.db != "postgresql":
1382+
if args.db != "postgresql" and user_spec is not None:
13771383
cmd.extend(["--user", user_spec])
1378-
else:
1379-
cmd.extend(["-e", f"HOST_UID={os.getuid()}", "-e", f"HOST_GID={os.getgid()}"])
1384+
elif args.db == "postgresql":
1385+
if host_uid is not None:
1386+
cmd.extend(["-e", f"HOST_UID={host_uid}"])
1387+
if host_gid is not None:
1388+
cmd.extend(["-e", f"HOST_GID={host_gid}"])
13801389

13811390
cmd.extend(
13821391
[

bindings/python/examples/09_stackoverflow_graph_oltp.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7719,9 +7719,13 @@ def run_in_docker(args) -> bool:
77197719
return False
77207720

77217721
repo_root = Path(__file__).resolve().parents[3]
7722-
host_uid = os.getuid()
7723-
host_gid = os.getgid()
7724-
user_spec = f"{host_uid}:{host_gid}"
7722+
host_uid = os.getuid() if hasattr(os, "getuid") else None
7723+
host_gid = os.getgid() if hasattr(os, "getgid") else None
7724+
user_spec = (
7725+
f"{host_uid}:{host_gid}"
7726+
if host_uid is not None and host_gid is not None
7727+
else None
7728+
)
77257729
filtered_args = []
77267730
skip_next = False
77277731
for arg in sys.argv[1:]:
@@ -7763,8 +7767,6 @@ def run_in_docker(args) -> bool:
77637767
docker,
77647768
"run",
77657769
"--rm",
7766-
"-u",
7767-
user_spec,
77687770
"--memory",
77697771
args.mem_limit,
77707772
"--cpus",
@@ -7783,6 +7785,9 @@ def run_in_docker(args) -> bool:
77837785
inner_cmd,
77847786
]
77857787

7788+
if user_spec is not None:
7789+
cmd[3:3] = ["-u", user_spec]
7790+
77867791
print("Launching Docker container...")
77877792
subprocess.run(cmd, check=True)
77887793
return True

bindings/python/examples/10_stackoverflow_graph_olap.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4906,7 +4906,13 @@ def run_in_docker(args) -> bool:
49064906
return False
49074907

49084908
repo_root = Path(__file__).resolve().parents[3]
4909-
user_spec = f"{os.getuid()}:{os.getgid()}"
4909+
host_uid = os.getuid() if hasattr(os, "getuid") else None
4910+
host_gid = os.getgid() if hasattr(os, "getgid") else None
4911+
user_spec = (
4912+
f"{host_uid}:{host_gid}"
4913+
if host_uid is not None and host_gid is not None
4914+
else None
4915+
)
49104916

49114917
filtered_args = []
49124918
skip_next = False
@@ -4952,8 +4958,6 @@ def run_in_docker(args) -> bool:
49524958
docker,
49534959
"run",
49544960
"--rm",
4955-
"-u",
4956-
user_spec,
49574961
"--memory",
49584962
args.mem_limit,
49594963
"--cpus",
@@ -4972,6 +4976,9 @@ def run_in_docker(args) -> bool:
49724976
inner_cmd,
49734977
]
49744978

4979+
if user_spec is not None:
4980+
cmd[3:3] = ["-u", user_spec]
4981+
49754982
print("Launching Docker container...")
49764983
subprocess.run(cmd, check=True)
49774984
return True

bindings/python/examples/11_vector_index_build.py

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1351,10 +1351,16 @@ def run_in_docker(args) -> bool:
13511351

13521352
docker = shutil.which("docker")
13531353
if not docker:
1354-
raise RuntimeError("docker not found in PATH")
1354+
return False
13551355

13561356
repo_root = Path(__file__).resolve().parents[3]
1357-
user_spec = f"{os.getuid()}:{os.getgid()}"
1357+
host_uid = os.getuid() if hasattr(os, "getuid") else None
1358+
host_gid = os.getgid() if hasattr(os, "getgid") else None
1359+
user_spec = (
1360+
f"{host_uid}:{host_gid}"
1361+
if host_uid is not None and host_gid is not None
1362+
else None
1363+
)
13581364
workspace_mount_src = str(repo_root)
13591365
workspace_mount_dst = "/workspace"
13601366
work_dir = "/workspace/bindings/python/examples"
@@ -1425,7 +1431,7 @@ def run_in_docker(args) -> bool:
14251431
f"python -u 11_vector_index_build.py {' '.join(filtered_args)}",
14261432
]
14271433
)
1428-
run_user_args = ["-u", user_spec]
1434+
run_user_args = ["-u", user_spec] if user_spec is not None else []
14291435
elif args.backend == "faiss":
14301436
inner_cmd = " && ".join(
14311437
[
@@ -1437,7 +1443,7 @@ def run_in_docker(args) -> bool:
14371443
f"python -u 11_vector_index_build.py {' '.join(filtered_args)}",
14381444
]
14391445
)
1440-
run_user_args = ["-u", user_spec]
1446+
run_user_args = ["-u", user_spec] if user_spec is not None else []
14411447
elif args.backend == "lancedb":
14421448
inner_cmd = " && ".join(
14431449
[
@@ -1449,7 +1455,7 @@ def run_in_docker(args) -> bool:
14491455
f"python -u 11_vector_index_build.py {' '.join(filtered_args)}",
14501456
]
14511457
)
1452-
run_user_args = ["-u", user_spec]
1458+
run_user_args = ["-u", user_spec] if user_spec is not None else []
14531459
elif args.backend == "qdrant":
14541460
inner_cmd = " && ".join(
14551461
[
@@ -1490,6 +1496,8 @@ def run_in_docker(args) -> bool:
14901496
)
14911497
run_user_args = []
14921498
else:
1499+
if host_uid is None or host_gid is None:
1500+
return False
14931501
bench_user = "benchusr"
14941502
user_inner_cmd = " && ".join(
14951503
[

bindings/python/examples/12_vector_search.py

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1667,10 +1667,16 @@ def run_in_docker(args) -> bool:
16671667

16681668
docker = shutil.which("docker")
16691669
if not docker:
1670-
raise RuntimeError("docker not found in PATH")
1670+
return False
16711671

16721672
repo_root = Path(__file__).resolve().parents[3]
1673-
user_spec = f"{os.getuid()}:{os.getgid()}"
1673+
host_uid = os.getuid() if hasattr(os, "getuid") else None
1674+
host_gid = os.getgid() if hasattr(os, "getgid") else None
1675+
user_spec = (
1676+
f"{host_uid}:{host_gid}"
1677+
if host_uid is not None and host_gid is not None
1678+
else None
1679+
)
16741680
workspace_mount_src = str(repo_root)
16751681
workspace_mount_dst = "/workspace"
16761682
work_dir = "/workspace/bindings/python/examples"
@@ -1738,7 +1744,7 @@ def run_in_docker(args) -> bool:
17381744
f"python -u 12_vector_search.py {' '.join(filtered_args)}",
17391745
]
17401746
)
1741-
run_user_args = ["-u", user_spec]
1747+
run_user_args = ["-u", user_spec] if user_spec is not None else []
17421748
elif args.backend == "faiss":
17431749
inner_cmd = " && ".join(
17441750
[
@@ -1750,7 +1756,7 @@ def run_in_docker(args) -> bool:
17501756
f"python -u 12_vector_search.py {' '.join(filtered_args)}",
17511757
]
17521758
)
1753-
run_user_args = ["-u", user_spec]
1759+
run_user_args = ["-u", user_spec] if user_spec is not None else []
17541760
elif args.backend == "lancedb":
17551761
inner_cmd = " && ".join(
17561762
[
@@ -1762,7 +1768,7 @@ def run_in_docker(args) -> bool:
17621768
f"python -u 12_vector_search.py {' '.join(filtered_args)}",
17631769
]
17641770
)
1765-
run_user_args = ["-u", user_spec]
1771+
run_user_args = ["-u", user_spec] if user_spec is not None else []
17661772
elif args.backend == "bruteforce":
17671773
inner_cmd = " && ".join(
17681774
[
@@ -1774,7 +1780,7 @@ def run_in_docker(args) -> bool:
17741780
f"python -u 12_vector_search.py {' '.join(filtered_args)}",
17751781
]
17761782
)
1777-
run_user_args = ["-u", user_spec]
1783+
run_user_args = ["-u", user_spec] if user_spec is not None else []
17781784
elif args.backend == "qdrant":
17791785
inner_cmd = " && ".join(
17801786
[
@@ -1815,6 +1821,8 @@ def run_in_docker(args) -> bool:
18151821
)
18161822
run_user_args = []
18171823
else:
1824+
if host_uid is None or host_gid is None:
1825+
return False
18181826
user_inner_cmd = " && ".join(
18191827
[
18201828
"python3 -m venv /tmp/bench-venv",

0 commit comments

Comments
 (0)