Run performance benchmark tests #47

Workflow file for this run

.github/workflows/benchmark.yml at 176baa3

	name: Run performance benchmark tests

	on:
	workflow_run:
	workflows: ["Build docker images"]
	types:
	- completed
	# Allow manual triggering with custom version
	workflow_dispatch:
	inputs:
	version:
	description: 'Data Caterer version to benchmark (e.g., 0.17.0)'
	required: false
	type: string
	skip_existence_check:
	description: 'Skip check for existing benchmark results'
	required: false
	type: boolean
	default: false

	jobs:
	build:
	runs-on: ubuntu-latest
	# Only run if:
	# 1. Manual trigger, OR
	# 2. Build workflow completed successfully after a tag push
	if: \|
	github.event_name == 'workflow_dispatch' \|\|
	(github.event.workflow_run.conclusion == 'success' &&
	github.event.workflow_run.event == 'push')
	steps:
	- name: Get branch and checkout ref
	id: branch
	run: \|
	if [ "${{ github.event_name }}" == "workflow_dispatch" ]; then
	# For manual trigger, use the current branch for both
	echo "base_branch=${{ github.ref_name }}" >> $GITHUB_OUTPUT
	echo "checkout_ref=${{ github.ref_name }}" >> $GITHUB_OUTPUT
	else
	# For workflow_run trigger, get the head branch
	head_branch="${{ github.event.workflow_run.head_branch }}"
	# If the head_branch looks like a tag (matches semantic version pattern),
	# use main as the base branch for the PR but checkout from main
	if [[ "$head_branch" =~ ^[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
	echo "Detected tag: $head_branch, using 'main' as base branch"
	echo "base_branch=main" >> $GITHUB_OUTPUT
	echo "checkout_ref=main" >> $GITHUB_OUTPUT
	else
	echo "base_branch=$head_branch" >> $GITHUB_OUTPUT
	echo "checkout_ref=$head_branch" >> $GITHUB_OUTPUT
	fi
	fi
	- name: Checkout monorepo
	uses: actions/checkout@v4
	with:
	# Checkout the branch, not the commit, to avoid detached HEAD
	ref: ${{ steps.branch.outputs.checkout_ref }}
	fetch-depth: 2
	- name: Determine version to benchmark
	id: benchmark_version
	working-directory: example
	run: \|
	if [ "${{ github.event_name }}" == "workflow_dispatch" ] && [ -n "${{ inputs.version }}" ]; then
	echo "value=${{ inputs.version }}" >> $GITHUB_OUTPUT
	echo "Using manually specified version: ${{ inputs.version }}"
	else
	version=$(grep -E "^version=" ../gradle.properties \| cut -d= -f2)
	echo "value=${version}" >> $GITHUB_OUTPUT
	echo "Using version from gradle.properties: ${version}"
	fi
	- name: Check if benchmark has already run
	working-directory: example
	run: \|
	skip_check="${{ inputs.skip_existence_check }}"
	version="${{ steps.benchmark_version.outputs.value }}"

	if [ "$skip_check" == "true" ]; then
	echo "Skipping existence check as requested"
	exit 0
	fi

	if [ ! -f benchmark/results/benchmark_results_${version}.txt ]; then
	echo "No benchmark results for version: $version, starting to run benchmarks"
	else
	echo "Benchmarks already run for version: $version!"
	echo "Set 'skip_existence_check' to true to re-run anyway"
	exit 1
	fi
	- name: Wait for Docker image to be available
	working-directory: example
	run: \|
	version="${{ steps.benchmark_version.outputs.value }}"
	echo "Waiting for Docker image datacatering/data-caterer:${version} to be available..."
	max_attempts=10
	attempt=1
	while [ $attempt -le $max_attempts ]; do
	if docker pull datacatering/data-caterer:${version} 2>/dev/null; then
	echo "Docker image is available!"
	break
	else
	echo "Attempt $attempt/$max_attempts: Image not yet available, waiting 30 seconds..."
	sleep 30
	((attempt++))
	fi
	done

	if [ $attempt -gt $max_attempts ]; then
	echo "ERROR: Docker image not available after $max_attempts attempts"
	exit 1
	fi
	- name: Checkout datafusion-comet repo
	uses: actions/checkout@v4
	with:
	fetch-depth: 2
	repository: apache/datafusion-comet
	path: example/benchmark/build/datafusion-comet
	- name: Get Spark query engine jars
	working-directory: example
	run: bash benchmark/setup_query_engine_jars.sh
	- name: Run benchmark script
	working-directory: example
	env:
	BENCHMARK_VERSION: ${{ steps.benchmark_version.outputs.value }}
	run: \|
	bash benchmark/run_benchmark.sh
	bash benchmark/compare_benchmark_results.sh "${{ steps.benchmark_version.outputs.value }}"
	- name: Create pull request
	uses: peter-evans/create-pull-request@v6
	with:
	title: Add benchmark results for version ${{ steps.benchmark_version.outputs.value }}
	body: \|
	Automated benchmark results for Data Caterer version ${{ steps.benchmark_version.outputs.value }}

	This PR adds benchmark performance metrics comparing different configurations.

	Triggered by: ${{ github.event_name }}
	branch: benchmark-results-${{ steps.benchmark_version.outputs.value }}
	base: ${{ steps.branch.outputs.base_branch }}
	commit-message: Add benchmark results for version ${{ steps.benchmark_version.outputs.value }}

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Uh oh!

Run performance benchmark tests #47

Workflow file

Run performance benchmark tests #47

Uh oh!

Workflow file for this run