Skip to content

Run performance benchmark tests #47

Run performance benchmark tests

Run performance benchmark tests #47

Workflow file for this run

name: Run performance benchmark tests
on:
workflow_run:
workflows: ["Build docker images"]
types:
- completed
# Allow manual triggering with custom version
workflow_dispatch:
inputs:
version:
description: 'Data Caterer version to benchmark (e.g., 0.17.0)'
required: false
type: string
skip_existence_check:
description: 'Skip check for existing benchmark results'
required: false
type: boolean
default: false
jobs:
build:
runs-on: ubuntu-latest
# Only run if:
# 1. Manual trigger, OR
# 2. Build workflow completed successfully after a tag push
if: |
github.event_name == 'workflow_dispatch' ||
(github.event.workflow_run.conclusion == 'success' &&
github.event.workflow_run.event == 'push')
steps:
- name: Get branch and checkout ref
id: branch
run: |
if [ "${{ github.event_name }}" == "workflow_dispatch" ]; then
# For manual trigger, use the current branch for both
echo "base_branch=${{ github.ref_name }}" >> $GITHUB_OUTPUT
echo "checkout_ref=${{ github.ref_name }}" >> $GITHUB_OUTPUT
else
# For workflow_run trigger, get the head branch
head_branch="${{ github.event.workflow_run.head_branch }}"
# If the head_branch looks like a tag (matches semantic version pattern),
# use main as the base branch for the PR but checkout from main
if [[ "$head_branch" =~ ^[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
echo "Detected tag: $head_branch, using 'main' as base branch"
echo "base_branch=main" >> $GITHUB_OUTPUT
echo "checkout_ref=main" >> $GITHUB_OUTPUT
else
echo "base_branch=$head_branch" >> $GITHUB_OUTPUT
echo "checkout_ref=$head_branch" >> $GITHUB_OUTPUT
fi
fi
- name: Checkout monorepo
uses: actions/checkout@v4
with:
# Checkout the branch, not the commit, to avoid detached HEAD
ref: ${{ steps.branch.outputs.checkout_ref }}
fetch-depth: 2
- name: Determine version to benchmark
id: benchmark_version
working-directory: example
run: |
if [ "${{ github.event_name }}" == "workflow_dispatch" ] && [ -n "${{ inputs.version }}" ]; then
echo "value=${{ inputs.version }}" >> $GITHUB_OUTPUT
echo "Using manually specified version: ${{ inputs.version }}"
else
version=$(grep -E "^version=" ../gradle.properties | cut -d= -f2)
echo "value=${version}" >> $GITHUB_OUTPUT
echo "Using version from gradle.properties: ${version}"
fi
- name: Check if benchmark has already run
working-directory: example
run: |
skip_check="${{ inputs.skip_existence_check }}"
version="${{ steps.benchmark_version.outputs.value }}"
if [ "$skip_check" == "true" ]; then
echo "Skipping existence check as requested"
exit 0
fi
if [ ! -f benchmark/results/benchmark_results_${version}.txt ]; then
echo "No benchmark results for version: $version, starting to run benchmarks"
else
echo "Benchmarks already run for version: $version!"
echo "Set 'skip_existence_check' to true to re-run anyway"
exit 1
fi
- name: Wait for Docker image to be available
working-directory: example
run: |
version="${{ steps.benchmark_version.outputs.value }}"
echo "Waiting for Docker image datacatering/data-caterer:${version} to be available..."
max_attempts=10
attempt=1
while [ $attempt -le $max_attempts ]; do
if docker pull datacatering/data-caterer:${version} 2>/dev/null; then
echo "Docker image is available!"
break
else
echo "Attempt $attempt/$max_attempts: Image not yet available, waiting 30 seconds..."
sleep 30
((attempt++))
fi
done
if [ $attempt -gt $max_attempts ]; then
echo "ERROR: Docker image not available after $max_attempts attempts"
exit 1
fi
- name: Checkout datafusion-comet repo
uses: actions/checkout@v4
with:
fetch-depth: 2
repository: apache/datafusion-comet
path: example/benchmark/build/datafusion-comet
- name: Get Spark query engine jars
working-directory: example
run: bash benchmark/setup_query_engine_jars.sh
- name: Run benchmark script
working-directory: example
env:
BENCHMARK_VERSION: ${{ steps.benchmark_version.outputs.value }}
run: |
bash benchmark/run_benchmark.sh
bash benchmark/compare_benchmark_results.sh "${{ steps.benchmark_version.outputs.value }}"
- name: Create pull request
uses: peter-evans/create-pull-request@v6
with:
title: Add benchmark results for version ${{ steps.benchmark_version.outputs.value }}
body: |
Automated benchmark results for Data Caterer version ${{ steps.benchmark_version.outputs.value }}
This PR adds benchmark performance metrics comparing different configurations.
Triggered by: ${{ github.event_name }}
branch: benchmark-results-${{ steps.benchmark_version.outputs.value }}
base: ${{ steps.branch.outputs.base_branch }}
commit-message: Add benchmark results for version ${{ steps.benchmark_version.outputs.value }}