Skip to content

Commit e739721

Browse files
committed
Refactor vector parameter tests to use SQL commands for schema creation
- Updated test cases in `test_vector_params_verification.py` to replace schema creation methods with SQL commands for creating vertex types and properties. - Modified test cases in `test_vector_sql.py` to utilize SQL commands for creating vertex types and properties, ensuring consistency across tests. - Improved index verification by querying the schema directly via SQL instead of relying on schema methods.
1 parent 2816b03 commit e739721

65 files changed

Lines changed: 33591 additions & 4009 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

β€Ž.github/workflows/test-python-bindings.ymlβ€Ž

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@ jobs:
126126
- '3.11'
127127
- '3.12'
128128
- '3.13'
129-
# - '3.14' # Temporarily disabled to reduce wheel storage usage
129+
- '3.14'
130130
# Temporarily limit to four platforms (skip macOS x86_64, Windows ARM64)
131131
# platform: ['linux/amd64', 'linux/arm64', 'darwin/amd64', 'darwin/arm64', 'windows/amd64', 'windows/arm64']
132132
platform: ['linux/amd64', 'linux/arm64', 'darwin/arm64', 'windows/amd64']
@@ -433,7 +433,6 @@ jobs:
433433
echo "**Package**: arcadedb-embedded" >> $GITHUB_STEP_SUMMARY
434434
echo "" >> $GITHUB_STEP_SUMMARY
435435
echo "ℹ️ **Note**: Some platform/Python combinations are excluded from testing:" >> $GITHUB_STEP_SUMMARY
436-
echo "- Python 3.14 is temporarily disabled to reduce wheel storage usage" >> $GITHUB_STEP_SUMMARY
437436
echo "- Windows ARM64 (no GitHub-hosted runners available)" >> $GITHUB_STEP_SUMMARY
438437
echo "- macOS x86_64 (temporarily disabled)" >> $GITHUB_STEP_SUMMARY
439438
else

β€Ž.github/workflows/test-python-examples.ymlβ€Ž

Lines changed: 98 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ on:
2525
description: "Glob pattern(s) for examples to run (space-separated, relative to bindings/python/examples)."
2626
required: false
2727
type: string
28-
default: "0[1-7]_*.py"
28+
default: "0[1-9]_*.py 1[0-6]_*.py"
2929
build-version:
3030
description: "Override package version (PEP 440) for build.sh"
3131
required: false
@@ -37,10 +37,10 @@ on:
3737
examples:
3838
description: "Glob pattern(s) for examples to run (space-separated, relative to bindings/python/examples)."
3939
required: false
40-
default: "0[1-7]_*.py"
40+
default: "0[1-9]_*.py 1[0-6]_*.py"
4141

4242
env:
43-
EXAMPLES: ${{ inputs.examples || '0[1-7]_*.py' }}
43+
EXAMPLES: ${{ inputs.examples || '0[1-9]_*.py 1[0-6]_*.py' }}
4444

4545
permissions:
4646
contents: read
@@ -135,7 +135,7 @@ jobs:
135135
- '3.11'
136136
- '3.12'
137137
- '3.13'
138-
# - '3.14' # Temporarily disabled to reduce wheel storage usage
138+
- '3.14'
139139
# Temporarily limit to four platforms (skip macOS x86_64, Windows ARM64)
140140
# platform: ['linux/amd64', 'linux/arm64', 'darwin/amd64', 'darwin/arm64', 'windows/amd64', 'windows/arm64']
141141
platform: ['linux/amd64', 'linux/arm64', 'darwin/arm64', 'windows/amd64']
@@ -247,20 +247,15 @@ jobs:
247247
python3 download_data.py movielens-small
248248
fi
249249
250-
if echo "$examples" | grep -qE '(^|[[:space:]])(08_stackoverflow_tables_oltp|09_stackoverflow_tables_olap|10_stackoverflow_graph_oltp|11_stackoverflow_graph_olap|14_stackoverflow_hybrid_queries)\.py([[:space:]]|$)'; then
251-
echo "πŸ“₯ Downloading Stack Overflow Small dataset..."
252-
python3 download_data.py stackoverflow-small --no-vectors
250+
if echo "$examples" | grep -qE '(^|[[:space:]])(07_stackoverflow_tables_oltp|08_stackoverflow_tables_olap|09_stackoverflow_graph_oltp|10_stackoverflow_graph_olap|13_stackoverflow_hybrid_queries)\.py([[:space:]]|$)'; then
251+
echo "πŸ“₯ Downloading Stack Overflow Tiny dataset (no vectors)..."
252+
python3 download_data.py stackoverflow-tiny --no-vectors
253253
fi
254254
255-
- name: Download datasets
256-
shell: bash
257-
run: |
258-
uv pip install --system tqdm py7zr lxml
259-
cd bindings/python/examples
260-
echo "πŸ“₯ Downloading MovieLens Small dataset..."
261-
python3 download_data.py movielens-small
262-
echo "πŸ“₯ Downloading Stack Overflow Small dataset..."
263-
python3 download_data.py stackoverflow-small --no-vectors
255+
if echo "$examples" | grep -qE '(^|[[:space:]])(11_vector_index_build|12_vector_search)\.py([[:space:]]|$)'; then
256+
echo "πŸ“₯ Downloading Stack Overflow Tiny dataset (with vectors)..."
257+
python3 download_data.py stackoverflow-tiny
258+
fi
264259
265260
- name: Install timeout command (macOS only)
266261
if: matrix.platform == 'darwin/amd64' || matrix.platform == 'darwin/arm64'
@@ -279,9 +274,6 @@ jobs:
279274
- name: Run all examples
280275
id: run_examples
281276
shell: bash
282-
env:
283-
# Increase JVM heap for large CSV imports (example 04)
284-
ARCADEDB_JVM_ARGS: "-Xmx8g -Xms8g"
285277
run: |
286278
cd bindings/python/examples
287279
@@ -323,21 +315,92 @@ jobs:
323315
example_args="--dataset movielens-small --export"
324316
example_name="$example (movielens-small dataset with export)"
325317
timeout_duration=900 # 15 minutes
318+
example_jvm_args="-Xmx8g -Xms8g"
326319
;;
327320
"05_csv_import_graph.py")
328-
example_args="--dataset movielens-small --method java --import-jsonl ./exports/movielens_small_db.jsonl.tgz --export"
329-
example_name="$example (movielens-small dataset, embedded java method, import/export)"
321+
example_args="--dataset movielens-small --method sql --import-jsonl ./exports/movielens_small_db.jsonl.tgz --export"
322+
example_name="$example (movielens-small dataset, embedded sql method, import/export)"
330323
timeout_duration=900 # 15 minutes
324+
example_jvm_args=""
331325
;;
332326
"06_vector_search_recommendations.py")
333327
example_args="--import-jsonl ./exports/movielens_graph_small_db.jsonl.tgz"
334328
example_name="$example (vector search, import from JSONL)"
335329
timeout_duration=900 # 15 minutes
330+
example_jvm_args=""
331+
;;
332+
"07_stackoverflow_tables_oltp.py")
333+
example_args="--dataset stackoverflow-tiny --db arcadedb_sql --threads 1 --transactions 1000 --batch-size 500 --mem-limit 2g --run-label ci07"
334+
example_name="$example (stackoverflow-tiny, arcadedb_sql, minimal oltp)"
335+
timeout_duration=900
336+
example_jvm_args=""
337+
;;
338+
"08_stackoverflow_tables_olap.py")
339+
example_args="--dataset stackoverflow-tiny --db arcadedb_sql --threads 1 --batch-size 500 --query-runs 1 --query-order fixed --mem-limit 2g --run-label ci08"
340+
example_name="$example (stackoverflow-tiny, arcadedb_sql, minimal olap)"
341+
timeout_duration=900
342+
example_jvm_args=""
343+
;;
344+
"09_stackoverflow_graph_oltp.py")
345+
example_args="--dataset stackoverflow-tiny --db arcadedb_cypher --threads 1 --transactions 1000 --batch-size 500 --mem-limit 2g --run-label ci09"
346+
example_name="$example (stackoverflow-tiny graph oltp, arcadedb cypher, minimal)"
347+
timeout_duration=900
348+
example_jvm_args=""
349+
;;
350+
"10_stackoverflow_graph_olap.py")
351+
example_args="--dataset stackoverflow-tiny --db arcadedb_cypher --threads 1 --batch-size 500 --query-runs 1 --query-order fixed --mem-limit 2g --run-label ci10"
352+
example_name="$example (stackoverflow-tiny graph olap, arcadedb cypher, minimal)"
353+
timeout_duration=900
354+
example_jvm_args=""
355+
;;
356+
"11_vector_index_build.py")
357+
example_args="--backend arcadedb_sql --dataset stackoverflow-tiny --threads 1 --mem-limit 2g --batch-size 500 --max-connections 16 --beam-width 100 --quantization NONE --run-label ci11_arcadedb_sql"
358+
example_name="$example (vector build, arcadedb_sql backend, minimal)"
359+
timeout_duration=1200
360+
example_jvm_args=""
361+
;;
362+
"12_vector_search.py")
363+
db_path=$(find ./my_test_databases -maxdepth 1 -type d -name 'backend=arcadedb_sql_dataset=stackoverflow-tiny_*run=ci11_arcadedb_sql' | head -n 1)
364+
if [ -z "$db_path" ]; then
365+
echo "❌ Missing build output for 12_vector_search.py. Ensure 11_vector_index_build.py ran first." | tee -a $results_file
366+
failed=$((failed + 1))
367+
echo ""
368+
continue
369+
fi
370+
example_args="--backend arcadedb_sql --dataset stackoverflow-tiny --db-path $db_path --overquery-factors 1 --k 10 --query-limit 100 --query-runs 1 --query-order fixed --threads 1 --mem-limit 2g --run-label ci12_arcadedb_sql"
371+
example_name="$example (vector search, arcadedb_sql backend, minimal)"
372+
timeout_duration=1200
373+
example_jvm_args=""
374+
;;
375+
"13_stackoverflow_hybrid_queries.py")
376+
example_args="--dataset stackoverflow-tiny --batch-size 500 --encode-batch-size 64 --top-k 5 --candidate-limit 100 --min-reputation 100 --infer-sample-limit 5000 --run-label ci13"
377+
example_name="$example (hybrid pipeline, tiny + minimal limits)"
378+
timeout_duration=1800
379+
example_jvm_args=""
380+
;;
381+
"14_lifecycle_timing.py")
382+
example_args="--runs 1 --table-records 2000 --graph-vertices 500 --vector-records 500 --vector-dimensions 32 --query-runs 10 --jvm-heap 4g"
383+
example_name="$example (lifecycle benchmark, minimal)"
384+
timeout_duration=900
385+
example_jvm_args=""
386+
;;
387+
"15_import_database_vs_transactional_table_ingest.py")
388+
example_args="--rows-per-table 2000 --tables 2 --columns 6 --string-size 32 --batch-size 500 --async-parallel 1 --parallel 1 --heap-size 4g --work-dir ./my_test_databases/import_vs_txn_dummy_ci"
389+
example_name="$example (table ingest benchmark, minimal)"
390+
timeout_duration=1200
391+
example_jvm_args=""
392+
;;
393+
"16_import_database_vs_transactional_graph_ingest.py")
394+
example_args="--vertices 2000 --edges 4000 --vertex-int-props 3 --vertex-str-props 2 --edge-int-props 1 --edge-str-props 1 --string-size 32 --batch-size 500 --async-parallel 1 --parallel 1 --heap-size 4g --work-dir ./my_test_databases/import_vs_txn_graph_ci"
395+
example_name="$example (graph ingest benchmark, minimal)"
396+
timeout_duration=1200
397+
example_jvm_args=""
336398
;;
337399
*)
338400
example_args=""
339401
example_name="$example"
340402
timeout_duration=900 # 15 minutes default
403+
example_jvm_args=""
341404
;;
342405
esac
343406
@@ -348,11 +411,23 @@ jobs:
348411
echo "----------------------------------------"
349412
350413
# Run the example with appropriate parameters
351-
if $TIMEOUT_CMD $timeout_duration python "$example" $example_args > "$log_file" 2>&1; then
414+
if [ -n "$example_jvm_args" ]; then
415+
if ARCADEDB_JVM_ARGS="$example_jvm_args" JAVA_TOOL_OPTIONS="" _JAVA_OPTIONS="" $TIMEOUT_CMD $timeout_duration python "$example" $example_args > "$log_file" 2>&1; then
416+
exit_code=0
417+
else
418+
exit_code=$?
419+
fi
420+
else
421+
if ARCADEDB_JVM_ARGS="" JAVA_TOOL_OPTIONS="" _JAVA_OPTIONS="" $TIMEOUT_CMD $timeout_duration python "$example" $example_args > "$log_file" 2>&1; then
422+
exit_code=0
423+
else
424+
exit_code=$?
425+
fi
426+
fi
427+
if [ $exit_code -eq 0 ]; then
352428
echo "βœ… PASSED: $example_name" | tee -a $results_file
353429
passed=$((passed + 1))
354430
else
355-
exit_code=$?
356431
if [ $exit_code -eq 124 ]; then
357432
echo "⏱️ TIMEOUT: $example_name (exceeded $((timeout_duration/60)) minutes)" | tee -a $results_file
358433
failed=$((failed + 1))

β€Žbindings/python/Dockerfile.buildβ€Ž

Lines changed: 26 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ ARG TARGET_PLATFORM=linux-x64
1616
# When set to 1, prefer jars provided in bindings/python/local-jars/lib from the build context.
1717
# If no local jars are present, the build fails fast to avoid silently falling back.
1818
ARG USE_LOCAL_JARS=0
19+
ARG LOCAL_JARS_HASH=none
1920

2021
# Stage 1: Use prebuilt ArcadeDB image to obtain compiled JARs
2122
# JARs are filtered based on jar_exclusions.txt in later stages
@@ -31,6 +32,7 @@ RUN yum -y install findutils binutils && yum clean all
3132

3233
ARG TARGET_PLATFORM
3334
ARG USE_LOCAL_JARS
35+
ARG LOCAL_JARS_HASH
3436

3537
WORKDIR /build
3638

@@ -41,6 +43,9 @@ COPY --from=java-builder /home/arcadedb/lib /build/upstream-jars/
4143
# Optionally bring in locally built jars from the repo (bindings/python/local-jars/lib)
4244
COPY bindings/python/local-jars/lib/ /build/local-jars/
4345

46+
# Make local-jar content changes part of Docker's cache key for this stage.
47+
RUN echo "LOCAL_JARS_HASH=${LOCAL_JARS_HASH}"
48+
4449
# Select jar source: local when requested and available; otherwise fall back to upstream image
4550
RUN if [ "$USE_LOCAL_JARS" = "1" ]; then \
4651
if [ -d /build/local-jars ] && [ "$(ls -1 /build/local-jars | wc -l)" -gt 0 ]; then \
@@ -73,27 +78,41 @@ RUN echo "πŸ—‘οΈ Removing excluded JARs..." && \
7378
# We exclude jboss/wildfly JARs because they have broken module descriptors that fail analysis.
7479
# We also do NOT provide a classpath, forcing jdeps to ignore all missing dependencies (intra-jar or external).
7580
RUN echo "πŸ” Analyzing JARs with jdeps..." && \
76-
DETECTED_MODULES=$(find /build/jars -name "*.jar" | grep -v "jboss" | grep -v "wildfly" | grep -v "smallrye" | xargs jdeps --print-module-deps --ignore-missing-deps --multi-release 25 | grep -v "Warning" | tr ',' '\n' | grep -v "Warning" | grep -v ":" | grep -v "/" | sort -u | paste -sd "," -) && \
77-
REQUIRED_MODULES="${DETECTED_MODULES},jdk.zipfs,jdk.unsupported" && \
81+
JMODS_DIR="${JAVA_HOME}/jmods" && \
82+
if [ ! -d "$JMODS_DIR" ]; then JMODS_DIR="${JAVA_HOME}/lib/jmods"; fi && \
83+
DETECTED_MODULES=$(find /build/jars -name "*.jar" | grep -v "jboss" | grep -v "wildfly" | grep -v "smallrye" | xargs jdeps --print-module-deps --ignore-missing-deps --multi-release 25 | tr ',' '\n' | sed 's/^ *//;s/ *$//' | grep -E '^[a-zA-Z0-9_.]+$' | sort -u | paste -sd "," -) && \
84+
if [ -d "$JMODS_DIR" ]; then \
85+
AVAILABLE_MODULES=$(find "$JMODS_DIR" -name "*.jmod" -printf "%f\n" | sed 's/\.jmod$//' | sort -u | paste -sd "|" -) ; \
86+
FILTERED_MODULES=$(echo "${DETECTED_MODULES}" | tr ',' '\n' | sed '/^$/d' | grep -E "^(${AVAILABLE_MODULES})$" | sort -u | paste -sd "," -) ; \
87+
else \
88+
FILTERED_MODULES="${DETECTED_MODULES}" ; \
89+
fi && \
90+
if [ -n "${FILTERED_MODULES}" ]; then \
91+
REQUIRED_MODULES="${FILTERED_MODULES},java.se,jdk.management,jdk.zipfs,jdk.unsupported,jdk.incubator.vector" ; \
92+
else \
93+
REQUIRED_MODULES="java.se,jdk.management,jdk.zipfs,jdk.unsupported,jdk.incubator.vector" ; \
94+
fi && \
7895
echo "πŸ”¨ Building minimal JRE for platform: ${TARGET_PLATFORM}" && \
79-
echo "πŸ“¦ Detected modules: ${DETECTED_MODULES}" && \
96+
echo "πŸ“¦ Detected modules (raw): ${DETECTED_MODULES}" && \
97+
echo "πŸ“¦ Detected modules (filtered): ${FILTERED_MODULES}" && \
8098
echo "πŸ“¦ Final modules list: ${REQUIRED_MODULES}" && \
8199
echo "πŸ“¦ Required modules:" && \
82100
echo "$REQUIRED_MODULES" | tr ',' '\n' | sed 's/^/ - /' && \
83-
JMODS_DIR="${JAVA_HOME}/jmods" && \
84-
if [ ! -d "$JMODS_DIR" ]; then JMODS_DIR="${JAVA_HOME}/lib/jmods"; fi && \
85101
if [ -d "$JMODS_DIR" ]; then \
86102
echo "" ; \
87103
echo "πŸ”¨ Running jlink..." ; \
88-
jlink \
104+
if ! jlink \
89105
--module-path "$JMODS_DIR" \
90106
--add-modules "${REQUIRED_MODULES}" \
91107
--ignore-signing-information \
92108
--strip-debug \
93109
--no-man-pages \
94110
--no-header-files \
95111
--compress zip-9 \
96-
--output /build/jre ; \
112+
--output /build/jre ; then \
113+
echo "❌ jlink failed" ; \
114+
exit 1 ; \
115+
fi ; \
97116
echo "" ; \
98117
echo "βœ… JRE build complete!" ; \
99118
else \

0 commit comments

Comments
Β (0)