Run performance benchmark tests #47
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Run performance benchmark tests | |
| on: | |
| workflow_run: | |
| workflows: ["Build docker images"] | |
| types: | |
| - completed | |
| # Allow manual triggering with custom version | |
| workflow_dispatch: | |
| inputs: | |
| version: | |
| description: 'Data Caterer version to benchmark (e.g., 0.17.0)' | |
| required: false | |
| type: string | |
| skip_existence_check: | |
| description: 'Skip check for existing benchmark results' | |
| required: false | |
| type: boolean | |
| default: false | |
| jobs: | |
| build: | |
| runs-on: ubuntu-latest | |
| # Only run if: | |
| # 1. Manual trigger, OR | |
| # 2. Build workflow completed successfully after a tag push | |
| if: | | |
| github.event_name == 'workflow_dispatch' || | |
| (github.event.workflow_run.conclusion == 'success' && | |
| github.event.workflow_run.event == 'push') | |
| steps: | |
| - name: Get branch and checkout ref | |
| id: branch | |
| run: | | |
| if [ "${{ github.event_name }}" == "workflow_dispatch" ]; then | |
| # For manual trigger, use the current branch for both | |
| echo "base_branch=${{ github.ref_name }}" >> $GITHUB_OUTPUT | |
| echo "checkout_ref=${{ github.ref_name }}" >> $GITHUB_OUTPUT | |
| else | |
| # For workflow_run trigger, get the head branch | |
| head_branch="${{ github.event.workflow_run.head_branch }}" | |
| # If the head_branch looks like a tag (matches semantic version pattern), | |
| # use main as the base branch for the PR but checkout from main | |
| if [[ "$head_branch" =~ ^[0-9]+\.[0-9]+\.[0-9]+$ ]]; then | |
| echo "Detected tag: $head_branch, using 'main' as base branch" | |
| echo "base_branch=main" >> $GITHUB_OUTPUT | |
| echo "checkout_ref=main" >> $GITHUB_OUTPUT | |
| else | |
| echo "base_branch=$head_branch" >> $GITHUB_OUTPUT | |
| echo "checkout_ref=$head_branch" >> $GITHUB_OUTPUT | |
| fi | |
| fi | |
| - name: Checkout monorepo | |
| uses: actions/checkout@v4 | |
| with: | |
| # Checkout the branch, not the commit, to avoid detached HEAD | |
| ref: ${{ steps.branch.outputs.checkout_ref }} | |
| fetch-depth: 2 | |
| - name: Determine version to benchmark | |
| id: benchmark_version | |
| working-directory: example | |
| run: | | |
| if [ "${{ github.event_name }}" == "workflow_dispatch" ] && [ -n "${{ inputs.version }}" ]; then | |
| echo "value=${{ inputs.version }}" >> $GITHUB_OUTPUT | |
| echo "Using manually specified version: ${{ inputs.version }}" | |
| else | |
| version=$(grep -E "^version=" ../gradle.properties | cut -d= -f2) | |
| echo "value=${version}" >> $GITHUB_OUTPUT | |
| echo "Using version from gradle.properties: ${version}" | |
| fi | |
| - name: Check if benchmark has already run | |
| working-directory: example | |
| run: | | |
| skip_check="${{ inputs.skip_existence_check }}" | |
| version="${{ steps.benchmark_version.outputs.value }}" | |
| if [ "$skip_check" == "true" ]; then | |
| echo "Skipping existence check as requested" | |
| exit 0 | |
| fi | |
| if [ ! -f benchmark/results/benchmark_results_${version}.txt ]; then | |
| echo "No benchmark results for version: $version, starting to run benchmarks" | |
| else | |
| echo "Benchmarks already run for version: $version!" | |
| echo "Set 'skip_existence_check' to true to re-run anyway" | |
| exit 1 | |
| fi | |
| - name: Wait for Docker image to be available | |
| working-directory: example | |
| run: | | |
| version="${{ steps.benchmark_version.outputs.value }}" | |
| echo "Waiting for Docker image datacatering/data-caterer:${version} to be available..." | |
| max_attempts=10 | |
| attempt=1 | |
| while [ $attempt -le $max_attempts ]; do | |
| if docker pull datacatering/data-caterer:${version} 2>/dev/null; then | |
| echo "Docker image is available!" | |
| break | |
| else | |
| echo "Attempt $attempt/$max_attempts: Image not yet available, waiting 30 seconds..." | |
| sleep 30 | |
| ((attempt++)) | |
| fi | |
| done | |
| if [ $attempt -gt $max_attempts ]; then | |
| echo "ERROR: Docker image not available after $max_attempts attempts" | |
| exit 1 | |
| fi | |
| - name: Checkout datafusion-comet repo | |
| uses: actions/checkout@v4 | |
| with: | |
| fetch-depth: 2 | |
| repository: apache/datafusion-comet | |
| path: example/benchmark/build/datafusion-comet | |
| - name: Get Spark query engine jars | |
| working-directory: example | |
| run: bash benchmark/setup_query_engine_jars.sh | |
| - name: Run benchmark script | |
| working-directory: example | |
| env: | |
| BENCHMARK_VERSION: ${{ steps.benchmark_version.outputs.value }} | |
| run: | | |
| bash benchmark/run_benchmark.sh | |
| bash benchmark/compare_benchmark_results.sh "${{ steps.benchmark_version.outputs.value }}" | |
| - name: Create pull request | |
| uses: peter-evans/create-pull-request@v6 | |
| with: | |
| title: Add benchmark results for version ${{ steps.benchmark_version.outputs.value }} | |
| body: | | |
| Automated benchmark results for Data Caterer version ${{ steps.benchmark_version.outputs.value }} | |
| This PR adds benchmark performance metrics comparing different configurations. | |
| Triggered by: ${{ github.event_name }} | |
| branch: benchmark-results-${{ steps.benchmark_version.outputs.value }} | |
| base: ${{ steps.branch.outputs.base_branch }} | |
| commit-message: Add benchmark results for version ${{ steps.benchmark_version.outputs.value }} |