Skip to content

Update Python bindings, docs, and release workflows #258

Update Python bindings, docs, and release workflows

Update Python bindings, docs, and release workflows #258

name: Test Python Examples
on:
# Run on push to bindings/python/ directory
push:
paths:
- 'bindings/python/**'
- '.github/workflows/test-python-examples.yml'
# Run on pull request affecting bindings/python/
pull_request:
paths:
- 'bindings/python/**'
- '.github/workflows/test-python-examples.yml'
# Run after release workflow completes
workflow_run:
workflows: ["Release"]
types: [completed]
# Allow being called by other workflows (e.g., release workflow)
workflow_call:
inputs:
examples:
description: "Glob pattern(s) for examples to run (space-separated, relative to bindings/python/examples)."
required: false
type: string
default: "0[1-9]_*.py 1[0-6]_*.py"
build-version:
description: "Override package version (PEP 440) for build.sh"
required: false
type: string
# Allow manual trigger
workflow_dispatch:
inputs:
examples:
description: "Glob pattern(s) for examples to run (space-separated, relative to bindings/python/examples)."
required: false
default: "0[1-9]_*.py 1[0-6]_*.py"
env:
EXAMPLES: ${{ inputs.examples || '0[1-9]_*.py 1[0-6]_*.py' }}
permissions:
contents: read
jobs:
# First job: Download ArcadeDB JARs (platform-agnostic)
download-jars:
name: Download ArcadeDB JARs
runs-on: ubuntu-24.04
steps:
- name: Checkout code
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@4d04d5d9486b7bd6fa91e7baf45bbb4f8b9deedd # v4.0.0
- name: Download JARs from ArcadeDB Docker image
shell: bash
run: |
cd bindings/python
# Detect ArcadeDB version from pom.xml (Docker format: X.Y.Z-SNAPSHOT)
ARCADEDB_TAG=$(python3 extract_version.py --format=docker)
echo "📌 ArcadeDB version: $ARCADEDB_TAG"
# Download JARs from official Docker image
echo "📦 Downloading JARs from arcadedata/arcadedb:$ARCADEDB_TAG..."
# Create output directory
mkdir -p src/arcadedb_embedded/jars
# Create a temporary container and copy JARs from it
CONTAINER_ID=$(docker create arcadedata/arcadedb:$ARCADEDB_TAG)
docker cp $CONTAINER_ID:/home/arcadedb/lib/. src/arcadedb_embedded/jars/
docker rm $CONTAINER_ID
echo "✅ Downloaded $(ls -1 src/arcadedb_embedded/jars/*.jar | wc -l) JARs"
- name: Remove excluded JARs
shell: bash
run: |
cd bindings/python
JARS_DIR="src/arcadedb_embedded/jars"
EXCLUSIONS_FILE="jar_exclusions.txt"
if [[ -f "$EXCLUSIONS_FILE" ]]; then
echo "🗑️ Removing excluded JARs from jar_exclusions.txt..."
EXCLUSION_COUNT=0
while IFS= read -r pattern || [[ -n "$pattern" ]]; do
# Skip empty lines and comments
if [[ -n "$pattern" ]] && [[ ! "$pattern" =~ ^# ]]; then
echo " Processing pattern: $pattern"
# Remove matching JARs
for jar in "$JARS_DIR"/$pattern; do
if [[ -f "$jar" ]]; then
rm -f "$jar"
echo " - Removed: $(basename "$jar")"
EXCLUSION_COUNT=$((EXCLUSION_COUNT + 1))
fi
done
fi
done < "$EXCLUSIONS_FILE"
JAR_COUNT_AFTER=$(ls -1 "$JARS_DIR"/*.jar 2>/dev/null | wc -l)
echo "✅ Removed $EXCLUSION_COUNT JAR(s), $JAR_COUNT_AFTER remaining"
else
echo "⚠️ No jar_exclusions.txt found, skipping exclusions"
fi
- name: Upload filtered JARs as artifact
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
with:
name: arcadedb-jars-examples
path: bindings/python/src/arcadedb_embedded/jars/*.jar
retention-days: 1
# Second job: Test examples on each platform
test-examples:
name: Test Python Examples (${{ matrix.platform }}, Python ${{ matrix.python-version }})
runs-on: ${{ matrix.runs-on }}
needs: download-jars
env:
BUILD_VERSION: ${{ inputs.build-version }}
strategy:
fail-fast: false
matrix:
python-version:
- '3.10'
- '3.11'
- '3.12'
- '3.13'
- '3.14'
# Temporarily limit to four platforms (skip macOS x86_64, Windows ARM64)
# platform: ['linux/amd64', 'linux/arm64', 'darwin/amd64', 'darwin/arm64', 'windows/amd64', 'windows/arm64']
platform: ['linux/amd64', 'linux/arm64', 'darwin/arm64', 'windows/amd64']
include:
- platform: linux/amd64
runs-on: ubuntu-24.04
- platform: linux/arm64
runs-on: ubuntu-24.04-arm
- platform: darwin/arm64
runs-on: macos-15
- platform: windows/amd64
runs-on: windows-2025
# - platform: darwin/amd64
# runs-on: macos-15-intel
# - platform: windows/arm64
# runs-on: windows-11-arm
# macOS x86_64 and Windows ARM64 temporarily disabled
steps:
- name: Checkout code
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Download JARs artifact
uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1
with:
name: arcadedb-jars-examples
path: bindings/python/src/arcadedb_embedded/jars
- name: Set up Java (for native builds on macOS/Windows)
if: matrix.platform != 'linux/amd64' && matrix.platform != 'linux/arm64'
uses: actions/setup-java@be666c2fcd27ec809703dec50e508c2fdc7f6654 # v5.2.0
with:
distribution: 'corretto'
java-version: '25'
- name: Set up Docker Buildx (Linux only)
if: matrix.platform == 'linux/amd64' || matrix.platform == 'linux/arm64'
uses: docker/setup-buildx-action@4d04d5d9486b7bd6fa91e7baf45bbb4f8b9deedd # v4.0.0
- name: Set up Python
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
with:
python-version: ${{ matrix.python-version }}
- name: Setup UV package manager
shell: bash
run: |
curl -LsSf https://astral.sh/uv/install.sh | sh
echo "$HOME/.local/bin" >> "$GITHUB_PATH"
export PATH="$HOME/.local/bin:$PATH"
uv --version
- name: Install Python build dependencies
shell: bash
run: |
uv pip install --system build wheel setuptools
# Windows currently enabled, no symlink needed
# - name: Create python3 symlink (Windows only)
# if: matrix.platform == 'windows/amd64' || matrix.platform == 'windows/arm64'
# shell: bash
# run: |
# PYTHON_DIR=$(dirname "$(which python)")
# ln -s "$PYTHON_DIR/python.exe" "$PYTHON_DIR/python3.exe" || true
# python3 --version
- name: Build arcadedb-embedded (${{ matrix.platform }})
shell: bash
run: |
cd bindings/python
echo "🔨 Building arcadedb-embedded for ${{ matrix.platform }} with Python ${{ matrix.python-version }}..."
./build.sh ${{ matrix.platform }} ${{ matrix.python-version }}
# Note: Java is NOT required - arcadedb-embedded has bundled JRE!
- name: Install ArcadeDB Python bindings
shell: bash
run: |
cd bindings/python
uv pip install --system dist/*embed*.whl
- name: Install example dependencies
shell: bash
run: |
# Install dependencies needed by examples
# PyTorch is required by sentence-transformers, even on macOS
# For Python 3.13+, wheels might only be on PyTorch official index
if [[ "$OSTYPE" == "darwin"* ]]; then
# macOS: Install CPU-only PyTorch (supports MPS acceleration)
uv pip install --system torch --index-url https://download.pytorch.org/whl/cpu
uv pip install --system "numpy<2.0" requests sentence-transformers
else
# Linux & Windows: Install CPU-only PyTorch to save space (avoid CUDA)
uv pip install --system torch --index-url https://download.pytorch.org/whl/cpu
uv pip install --system numpy requests sentence-transformers
fi
- name: Download datasets
shell: bash
run: |
uv pip install --system tqdm py7zr lxml
cd bindings/python/examples
# Determine which examples are selected
examples=$(ls $EXAMPLES 2>/dev/null || true)
if echo "$examples" | grep -qE '(^|[[:space:]])(04_csv_import_documents|05_csv_import_graph|06_vector_search_recommendations)\.py([[:space:]]|$)'; then
echo "📥 Downloading MovieLens Small dataset..."
python3 download_data.py movielens-small
fi
if echo "$examples" | grep -qE '(^|[[:space:]])(07_stackoverflow_tables_oltp|08_stackoverflow_tables_olap|09_stackoverflow_graph_oltp|10_stackoverflow_graph_olap|13_stackoverflow_hybrid_queries)\.py([[:space:]]|$)'; then
echo "📥 Downloading Stack Overflow Tiny dataset (no vectors)..."
python3 download_data.py stackoverflow-tiny --no-vectors
fi
if echo "$examples" | grep -qE '(^|[[:space:]])(11_vector_index_build|12_vector_search)\.py([[:space:]]|$)'; then
echo "📥 Downloading Stack Overflow Tiny dataset (with vectors)..."
python3 download_data.py stackoverflow-tiny
fi
- name: Install timeout command (macOS only)
if: matrix.platform == 'darwin/amd64' || matrix.platform == 'darwin/arm64'
shell: bash
run: |
# macOS doesn't have timeout command by default, use coreutils
brew install coreutils
- name: Set UTF-8 encoding (Windows only)
if: matrix.platform == 'windows/amd64' || matrix.platform == 'windows/arm64'
shell: bash
run: |
echo "PYTHONIOENCODING=utf-8" >> $GITHUB_ENV
echo "PYTHONUTF8=1" >> $GITHUB_ENV
- name: Run all examples
id: run_examples
shell: bash
run: |
cd bindings/python/examples
echo "🚀 Running Python Examples..."
echo ""
# Initialize counters
total=0
passed=0
failed=0
skipped=0
# Create results file
results_file="example-results.txt"
> $results_file
# Select example files by pattern(s) in EXAMPLES (space-separated globs)
examples=$(ls $EXAMPLES 2>/dev/null | sort)
if [ -z "$examples" ]; then
echo "❌ No example files found!"
exit 1
fi
# Detect timeout command (macOS uses gtimeout, Linux uses timeout)
if command -v gtimeout &> /dev/null; then
TIMEOUT_CMD="gtimeout"
else
TIMEOUT_CMD="timeout"
fi
# Run each example
for example in $examples; do
total=$((total + 1))
# Set example-specific parameters and timeout
case "$example" in
"04_csv_import_documents.py")
example_args="--dataset movielens-small --export"
example_name="$example (movielens-small dataset with export)"
timeout_duration=900 # 15 minutes
example_jvm_args="-Xmx8g -Xms8g"
;;
"05_csv_import_graph.py")
example_args="--dataset movielens-small --method sql --import-jsonl ./exports/movielens_small_db.jsonl.tgz --export"
example_name="$example (movielens-small dataset, embedded sql method, import/export)"
timeout_duration=900 # 15 minutes
example_jvm_args=""
;;
"06_vector_search_recommendations.py")
example_args="--import-jsonl ./exports/movielens_graph_small_db.jsonl.tgz"
example_name="$example (vector search, import from JSONL)"
timeout_duration=900 # 15 minutes
example_jvm_args=""
;;
"07_stackoverflow_tables_oltp.py")
example_args="--dataset stackoverflow-tiny --db arcadedb_sql --threads 1 --transactions 1000 --batch-size 500 --mem-limit 2g --run-label ci07"
example_name="$example (stackoverflow-tiny, arcadedb_sql, minimal oltp)"
timeout_duration=900
example_jvm_args=""
;;
"08_stackoverflow_tables_olap.py")
example_args="--dataset stackoverflow-tiny --db arcadedb_sql --threads 1 --batch-size 500 --query-runs 1 --query-order fixed --mem-limit 2g --run-label ci08"
example_name="$example (stackoverflow-tiny, arcadedb_sql, minimal olap)"
timeout_duration=900
example_jvm_args=""
;;
"09_stackoverflow_graph_oltp.py")
example_args="--dataset stackoverflow-tiny --db arcadedb_cypher --threads 1 --transactions 1000 --batch-size 500 --mem-limit 2g --run-label ci09"
example_name="$example (stackoverflow-tiny graph oltp, arcadedb cypher, minimal)"
timeout_duration=900
example_jvm_args=""
;;
"10_stackoverflow_graph_olap.py")
example_args="--dataset stackoverflow-tiny --db arcadedb_cypher --threads 1 --batch-size 500 --query-runs 1 --query-order fixed --mem-limit 2g --run-label ci10"
example_name="$example (stackoverflow-tiny graph olap, arcadedb cypher, minimal)"
timeout_duration=900
example_jvm_args=""
;;
"11_vector_index_build.py")
example_args="--backend arcadedb_sql --dataset stackoverflow-tiny --threads 1 --mem-limit 2g --batch-size 500 --max-connections 16 --beam-width 100 --quantization NONE --run-label ci11_arcadedb_sql"
example_name="$example (vector build, arcadedb_sql backend, minimal)"
timeout_duration=1200
example_jvm_args=""
;;
"12_vector_search.py")
db_path=$(find ./my_test_databases -maxdepth 1 -type d -name 'backend=arcadedb_sql_dataset=stackoverflow-tiny_*run=ci11_arcadedb_sql' | head -n 1)
if [ -z "$db_path" ]; then
echo "❌ Missing build output for 12_vector_search.py. Ensure 11_vector_index_build.py ran first." | tee -a $results_file
failed=$((failed + 1))
echo ""
continue
fi
example_args="--backend arcadedb_sql --dataset stackoverflow-tiny --db-path $db_path --overquery-factors 1 --k 10 --query-limit 100 --query-runs 1 --query-order fixed --threads 1 --mem-limit 2g --run-label ci12_arcadedb_sql"
example_name="$example (vector search, arcadedb_sql backend, minimal)"
timeout_duration=1200
example_jvm_args=""
;;
"13_stackoverflow_hybrid_queries.py")
example_args="--dataset stackoverflow-tiny --batch-size 500 --encode-batch-size 64 --top-k 5 --candidate-limit 100 --min-reputation 100 --infer-sample-limit 5000 --run-label ci13"
example_name="$example (hybrid pipeline, tiny + minimal limits)"
timeout_duration=1800
example_jvm_args=""
;;
"14_lifecycle_timing.py")
example_args="--runs 1 --table-records 2000 --graph-vertices 500 --vector-records 500 --vector-dimensions 32 --query-runs 10 --jvm-heap 4g"
example_name="$example (lifecycle benchmark, minimal)"
timeout_duration=900
example_jvm_args=""
;;
"15_import_database_vs_transactional_table_ingest.py")
example_args="--rows-per-table 2000 --tables 2 --columns 6 --string-size 32 --batch-size 500 --async-parallel 1 --parallel 1 --heap-size 4g --work-dir ./my_test_databases/import_vs_txn_dummy_ci"
example_name="$example (table ingest benchmark, minimal)"
timeout_duration=1200
example_jvm_args=""
;;
"16_import_database_vs_transactional_graph_ingest.py")
example_args="--vertices 2000 --edges 4000 --vertex-int-props 3 --vertex-str-props 2 --edge-int-props 1 --edge-str-props 1 --string-size 32 --batch-size 500 --async-parallel 1 --parallel 1 --heap-size 4g --work-dir ./my_test_databases/import_vs_txn_graph_ci"
example_name="$example (graph ingest benchmark, minimal)"
timeout_duration=1200
example_jvm_args=""
;;
*)
example_args=""
example_name="$example"
timeout_duration=900 # 15 minutes default
example_jvm_args=""
;;
esac
log_file="${example%.py}.log"
echo "----------------------------------------"
echo "📝 Running: $example_name"
echo "----------------------------------------"
# Run the example with appropriate parameters
if [ -n "$example_jvm_args" ]; then
if ARCADEDB_JVM_ARGS="$example_jvm_args" JAVA_TOOL_OPTIONS="" _JAVA_OPTIONS="" $TIMEOUT_CMD $timeout_duration python "$example" $example_args > "$log_file" 2>&1; then
exit_code=0
else
exit_code=$?
fi
else
if ARCADEDB_JVM_ARGS="" JAVA_TOOL_OPTIONS="" _JAVA_OPTIONS="" $TIMEOUT_CMD $timeout_duration python "$example" $example_args > "$log_file" 2>&1; then
exit_code=0
else
exit_code=$?
fi
fi
if [ $exit_code -eq 0 ]; then
echo "✅ PASSED: $example_name" | tee -a $results_file
passed=$((passed + 1))
else
if [ $exit_code -eq 124 ]; then
echo "⏱️ TIMEOUT: $example_name (exceeded $((timeout_duration/60)) minutes)" | tee -a $results_file
failed=$((failed + 1))
else
echo "❌ FAILED: $example_name (exit code: $exit_code)" | tee -a $results_file
failed=$((failed + 1))
fi
# Show last 20 lines of error log
echo "Last 20 lines of output:"
tail -n 20 "$log_file"
fi
echo ""
done
# Print summary
echo "========================================"
echo "📊 EXAMPLE TEST SUMMARY"
echo "========================================"
echo "Total: $total"
echo "Passed: $passed ✅"
echo "Failed: $failed ❌"
echo "Skipped: $skipped ⏭️"
echo "========================================"
echo ""
echo "Examples pattern(s): $EXAMPLES"
# Output to GitHub Actions
echo "total=$total" >> $GITHUB_OUTPUT
echo "passed=$passed" >> $GITHUB_OUTPUT
echo "failed=$failed" >> $GITHUB_OUTPUT
echo "skipped=$skipped" >> $GITHUB_OUTPUT
# Show detailed results
echo "Detailed Results:"
cat $results_file
# Exit with error if any failed
if [ $failed -gt 0 ]; then
echo "❌ Some examples failed!"
exit 1
else
echo "✅ All examples passed!"
fi
# Save list of examples for summary
echo "$examples" > examples-ran.txt
- name: Generate test summary
if: always()
shell: bash
run: |
cd bindings/python/examples
echo "## 🎮 Python Examples Test Results (${{ matrix.platform }})" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
total="${{ steps.run_examples.outputs.total || '0' }}"
passed="${{ steps.run_examples.outputs.passed || '0' }}"
failed="${{ steps.run_examples.outputs.failed || '0' }}"
if [ "${{ steps.run_examples.outcome }}" = "success" ]; then
echo "✅ **Status**: ALL EXAMPLES PASSED ($passed/$total)" >> $GITHUB_STEP_SUMMARY
else
echo "❌ **Status**: SOME EXAMPLES FAILED ($passed/$total passed)" >> $GITHUB_STEP_SUMMARY
fi
echo "" >> $GITHUB_STEP_SUMMARY
echo "### Summary" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
echo "| Metric | Count |" >> $GITHUB_STEP_SUMMARY
echo "|--------|------:|" >> $GITHUB_STEP_SUMMARY
echo "| 📝 Total | $total |" >> $GITHUB_STEP_SUMMARY
echo "| ✅ Passed | $passed |" >> $GITHUB_STEP_SUMMARY
echo "| ❌ Failed | $failed |" >> $GITHUB_STEP_SUMMARY
echo "| ⏭️ Skipped | ${{ steps.run_examples.outputs.skipped || '0' }} |" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
# Add detailed results if available
if [ -f example-results.txt ]; then
echo "### Detailed Results" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
echo "\`\`\`" >> $GITHUB_STEP_SUMMARY
cat example-results.txt >> $GITHUB_STEP_SUMMARY
echo "\`\`\`" >> $GITHUB_STEP_SUMMARY
fi
echo "" >> $GITHUB_STEP_SUMMARY
echo "### Examples Tested" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
echo "**Pattern(s)**: $EXAMPLES" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
if [ -f examples-ran.txt ]; then
while IFS= read -r example; do
[ -n "$example" ] && echo "- **$example**" >> $GITHUB_STEP_SUMMARY
done < examples-ran.txt
fi
echo "" >> $GITHUB_STEP_SUMMARY
- name: Upload example logs
if: always()
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
with:
name: example-logs-${{ matrix.platform == 'linux/amd64' && 'linux-amd64' || matrix.platform == 'linux/arm64' && 'linux-arm64' || matrix.platform == 'darwin/amd64' && 'darwin-amd64' || matrix.platform == 'darwin/arm64' && 'darwin-arm64' || matrix.platform == 'windows/amd64' && 'windows-amd64' || 'windows-arm64' }}-py${{ matrix.python-version }}
path: |
bindings/python/examples/*.log
bindings/python/examples/example-results.txt
retention-days: 7
- name: Upload example databases
if: failure()
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
with:
name: example-databases-${{ matrix.platform == 'linux/amd64' && 'linux-amd64' || matrix.platform == 'linux/arm64' && 'linux-arm64' || matrix.platform == 'darwin/amd64' && 'darwin-amd64' || matrix.platform == 'darwin/arm64' && 'darwin-arm64' || matrix.platform == 'windows/amd64' && 'windows-amd64' || 'windows-arm64' }}-py${{ matrix.python-version }}
path: bindings/python/examples/my_test_databases/
retention-days: 3
# Summary job that checks all platforms
test-examples-summary:
name: Examples Test Summary
needs: test-examples
runs-on: ubuntu-latest
if: always()
steps:
- name: Check test results
shell: bash
run: |
echo "## 🎯 Overall Examples Test Summary" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
if [ "${{ needs.test-examples.result }}" = "success" ]; then
echo "✅ **All platforms passed example testing!**" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
echo "All examples ran successfully across all enabled platforms." >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
echo "**Platforms tested**: linux/amd64, linux/arm64, darwin/arm64, windows/amd64" >> $GITHUB_STEP_SUMMARY
else
echo "❌ **Some platforms failed example testing**" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
echo "Please check the individual platform jobs for details." >> $GITHUB_STEP_SUMMARY
exit 1
fi