CI - Build & Benchmark #66
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: CI - Build & Benchmark | |
| on: | |
| push: | |
| branches: | |
| - master | |
| - develop | |
| pull_request: | |
| branches: | |
| - master | |
| - develop | |
| workflow_dispatch: | |
| concurrency: | |
| group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} | |
| cancel-in-progress: true | |
| jobs: | |
| build-matrix: | |
| name: Build (${{ matrix.artifact_name }}) | |
| runs-on: ubuntu-22.04 | |
| timeout-minutes: 30 | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| include: | |
| - artifact_name: tpch-benchmark-orc-async | |
| steps: | |
| - name: Checkout code with submodules | |
| uses: actions/checkout@v4 | |
| with: | |
| submodules: recursive | |
| fetch-depth: 1 | |
| - name: Install dependencies | |
| run: bash scripts/ci_install_deps.sh | |
| - name: Create cache directories | |
| run: | | |
| mkdir -p /home/runner/dependencies/{lib,include}/{cmake,pkgconfig} | |
| mkdir -p /home/runner/dependencies/include/{arrow,parquet,orc} | |
| - name: Restore Arrow and ORC cache | |
| id: cache-arrow-orc | |
| uses: actions/cache@v4 | |
| with: | |
| path: | | |
| /home/runner/dependencies/lib/libarrow* | |
| /home/runner/dependencies/lib/libparquet* | |
| /home/runner/dependencies/lib/liborc* | |
| /home/runner/dependencies/lib/cmake/arrow | |
| /home/runner/dependencies/lib/cmake/orc | |
| /home/runner/dependencies/lib/pkgconfig/arrow* | |
| /home/runner/dependencies/lib/pkgconfig/liborc* | |
| /home/runner/dependencies/include/arrow | |
| /home/runner/dependencies/include/parquet | |
| /home/runner/dependencies/include/orc | |
| key: arrow-orc-cache-${{ github.sha }} | |
| restore-keys: arrow-orc-cache- | |
| - name: Build Arrow and ORC from source | |
| if: steps.cache-arrow-orc.outputs.cache-hit != 'true' | |
| run: bash scripts/ci_build_arrow_and_orc.sh ON /home/runner/dependencies | |
| - name: Cache Arrow and ORC libraries | |
| uses: actions/cache@v4 | |
| with: | |
| path: | | |
| /home/runner/dependencies/lib/libarrow* | |
| /home/runner/dependencies/lib/libparquet* | |
| /home/runner/dependencies/lib/liborc* | |
| /home/runner/dependencies/lib/cmake/arrow | |
| /home/runner/dependencies/lib/cmake/orc | |
| /home/runner/dependencies/lib/pkgconfig/arrow* | |
| /home/runner/dependencies/lib/pkgconfig/liborc* | |
| /home/runner/dependencies/include/arrow | |
| /home/runner/dependencies/include/parquet | |
| /home/runner/dependencies/include/orc | |
| key: arrow-orc-cache-${{ github.sha }} | |
| - name: Configure CMake | |
| run: | | |
| cmake -B build \ | |
| -DCMAKE_BUILD_TYPE=RelWithDebInfo \ | |
| -DCMAKE_PREFIX_PATH=/home/runner/dependencies \ | |
| -DTPCH_ENABLE_ORC=ON \ | |
| -DTPCH_ENABLE_ASYNC_IO=ON \ | |
| -DTPCH_ENABLE_ASAN=OFF \ | |
| -DTPCH_BUILD_TESTS=OFF | |
| - name: Build project | |
| run: cmake --build build -j$(nproc) | |
| - name: Verify executable | |
| run: test -f build/tpch_benchmark && echo "✓ Executable created successfully" | |
| - name: Upload build artifact | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: ${{ matrix.artifact_name }} | |
| path: build/tpch_benchmark | |
| retention-days: 1 | |
| if-no-files-found: error | |
| benchmark-suite: | |
| name: Benchmark Suite | |
| runs-on: ubuntu-22.04 | |
| needs: build-matrix | |
| timeout-minutes: 20 | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| include: | |
| - format: csv | |
| table: lineitem | |
| - format: csv | |
| table: orders | |
| - format: csv | |
| table: customer | |
| - format: csv | |
| table: part | |
| - format: csv | |
| table: partsupp | |
| - format: csv | |
| table: supplier | |
| - format: csv | |
| table: nation | |
| - format: csv | |
| table: region | |
| - format: parquet | |
| table: lineitem | |
| - format: parquet | |
| table: orders | |
| - format: parquet | |
| table: customer | |
| - format: parquet | |
| table: part | |
| - format: parquet | |
| table: partsupp | |
| - format: parquet | |
| table: supplier | |
| - format: parquet | |
| table: nation | |
| - format: parquet | |
| table: region | |
| - format: orc | |
| table: lineitem | |
| steps: | |
| - name: Checkout code (for dists.dss and scripts) | |
| uses: actions/checkout@v4 | |
| with: | |
| fetch-depth: 1 | |
| - name: Checkout tpch submodule (for dists.dss) | |
| run: git submodule update --init --depth 1 -- third_party/tpch | |
| - name: Install dependencies | |
| run: bash scripts/ci_install_deps.sh | |
| - name: Create cache directories | |
| run: | | |
| mkdir -p /home/runner/dependencies/{lib,include}/{cmake,pkgconfig} | |
| mkdir -p /home/runner/dependencies/include/{arrow,parquet,orc} | |
| - name: Restore Arrow and ORC cache | |
| uses: actions/cache@v4 | |
| with: | |
| path: | | |
| /home/runner/dependencies/lib/libarrow* | |
| /home/runner/dependencies/lib/libparquet* | |
| /home/runner/dependencies/lib/liborc* | |
| /home/runner/dependencies/lib/cmake/arrow | |
| /home/runner/dependencies/lib/cmake/orc | |
| /home/runner/dependencies/lib/pkgconfig/arrow* | |
| /home/runner/dependencies/lib/pkgconfig/liborc* | |
| /home/runner/dependencies/include/arrow | |
| /home/runner/dependencies/include/parquet | |
| /home/runner/dependencies/include/orc | |
| key: arrow-orc-cache-${{ github.sha }} | |
| restore-keys: arrow-orc-cache- | |
| - name: Download build artifact | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: tpch-benchmark-orc-async | |
| path: . | |
| - name: Setup benchmark executable | |
| run: | | |
| chmod +x tpch_benchmark | |
| mkdir -p benchmark-results | |
| # Setup library path to use cached dependencies | |
| export LD_LIBRARY_PATH=/home/runner/dependencies/lib:$LD_LIBRARY_PATH | |
| echo "LD_LIBRARY_PATH=$LD_LIBRARY_PATH" >> $GITHUB_ENV | |
| - name: Verify library installation | |
| run: | | |
| echo "=== Current LD_LIBRARY_PATH ===" | |
| echo "LD_LIBRARY_PATH=$LD_LIBRARY_PATH" | |
| echo "" | |
| echo "=== Searching for libparquet ===" | |
| find /home/runner/dependencies -name "libparquet*" 2>/dev/null | head -5 || echo "libparquet not found" | |
| echo "" | |
| echo "=== Searching for liborc ===" | |
| find /home/runner/dependencies -name "liborc*" 2>/dev/null | head -5 || echo "liborc not found" | |
| - name: Run format coverage benchmark | |
| continue-on-error: true | |
| run: | | |
| timeout 600 ./tpch_benchmark \ | |
| --use-dbgen \ | |
| --scale-factor 1 \ | |
| --format ${{ matrix.format }} \ | |
| --table ${{ matrix.table }} \ | |
| --output-dir benchmark-results/ \ | |
| 2>&1 | grep -v "^DEBUG:" | tee "benchmark-results/${{ matrix.format }}_${{ matrix.table }}_baseline.log" | |
| # Fail if process dumped core | |
| if grep -q "dumped core" "benchmark-results/${{ matrix.format }}_${{ matrix.table }}_baseline.log"; then | |
| echo "ERROR: Benchmark crashed with core dump" | |
| exit 1 | |
| fi | |
| - name: Upload benchmark logs | |
| if: always() | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: benchmark-logs-suite-${{ matrix.format }}-${{ matrix.table }} | |
| path: benchmark-results/${{ matrix.format }}_${{ matrix.table }}_baseline.log | |
| retention-days: 30 | |
| if-no-files-found: ignore | |
| optimization-benchmarks: | |
| name: Optimization Benchmarks (${{ matrix.mode }}) | |
| runs-on: ubuntu-22.04 | |
| needs: build-matrix | |
| timeout-minutes: 20 | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| mode: [baseline, zero-copy, true-zero-copy] | |
| table: [lineitem, orders, part] | |
| steps: | |
| - name: Checkout code (for dists.dss and scripts) | |
| uses: actions/checkout@v4 | |
| with: | |
| fetch-depth: 1 | |
| - name: Checkout tpch submodule (for dists.dss) | |
| run: git submodule update --init --depth 1 -- third_party/tpch | |
| - name: Install dependencies | |
| run: bash scripts/ci_install_deps.sh | |
| - name: Create cache directories | |
| run: | | |
| mkdir -p /home/runner/dependencies/{lib,include}/{cmake,pkgconfig} | |
| mkdir -p /home/runner/dependencies/include/{arrow,parquet,orc} | |
| - name: Restore Arrow and ORC cache | |
| uses: actions/cache@v4 | |
| with: | |
| path: | | |
| /home/runner/dependencies/lib/libarrow* | |
| /home/runner/dependencies/lib/libparquet* | |
| /home/runner/dependencies/lib/liborc* | |
| /home/runner/dependencies/lib/cmake/arrow | |
| /home/runner/dependencies/lib/cmake/orc | |
| /home/runner/dependencies/lib/pkgconfig/arrow* | |
| /home/runner/dependencies/lib/pkgconfig/liborc* | |
| /home/runner/dependencies/include/arrow | |
| /home/runner/dependencies/include/parquet | |
| /home/runner/dependencies/include/orc | |
| key: arrow-orc-cache-${{ github.sha }} | |
| restore-keys: arrow-orc-cache- | |
| - name: Download build artifact | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: tpch-benchmark-orc-async | |
| path: . | |
| - name: Setup benchmark executable | |
| run: | | |
| chmod +x tpch_benchmark | |
| mkdir -p benchmark-results | |
| # Setup library path to use cached dependencies | |
| export LD_LIBRARY_PATH=/home/runner/dependencies/lib:$LD_LIBRARY_PATH | |
| echo "LD_LIBRARY_PATH=$LD_LIBRARY_PATH" >> $GITHUB_ENV | |
| - name: Verify library installation | |
| run: | | |
| echo "=== Current LD_LIBRARY_PATH ===" | |
| echo "LD_LIBRARY_PATH=$LD_LIBRARY_PATH" | |
| echo "" | |
| echo "=== Searching for libparquet ===" | |
| find /home/runner/dependencies -name "libparquet*" 2>/dev/null | head -5 || echo "libparquet not found" | |
| echo "" | |
| echo "=== Searching for liborc ===" | |
| find /home/runner/dependencies -name "liborc*" 2>/dev/null | head -5 || echo "liborc not found" | |
| - name: Run optimization benchmark | |
| continue-on-error: true | |
| run: | | |
| MODE_FLAGS="" | |
| if [ "${{ matrix.mode }}" = "zero-copy" ]; then | |
| MODE_FLAGS="--zero-copy" | |
| elif [ "${{ matrix.mode }}" = "true-zero-copy" ]; then | |
| MODE_FLAGS="--true-zero-copy" | |
| fi | |
| timeout 600 ./tpch_benchmark \ | |
| --use-dbgen \ | |
| --scale-factor 1 \ | |
| --format parquet \ | |
| --table ${{ matrix.table }} \ | |
| --output-dir benchmark-results/ \ | |
| $MODE_FLAGS \ | |
| 2>&1 | grep -v "^DEBUG:" | tee "benchmark-results/parquet_${{ matrix.table }}_${{ matrix.mode }}.log" | |
| # Fail if process dumped core | |
| if grep -q "dumped core" "benchmark-results/parquet_${{ matrix.table }}_${{ matrix.mode }}.log"; then | |
| echo "ERROR: Benchmark crashed with core dump" | |
| exit 1 | |
| fi | |
| - name: Upload benchmark logs | |
| if: always() | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: benchmark-logs-optimization-${{ matrix.mode }}-${{ matrix.table }} | |
| path: benchmark-results/parquet_${{ matrix.table }}_${{ matrix.mode }}.log | |
| retention-days: 30 | |
| if-no-files-found: ignore | |
| results-aggregation: | |
| name: Aggregate Results | |
| runs-on: ubuntu-22.04 | |
| needs: [benchmark-suite, optimization-benchmarks] | |
| if: always() | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| - name: Download all benchmark artifacts | |
| uses: actions/download-artifact@v4 | |
| with: | |
| path: all-results | |
| - name: Prepare benchmark results directory | |
| run: | | |
| mkdir -p benchmark-results | |
| find all-results -name "*.log" -exec cp {} benchmark-results/ \; | |
| - name: Generate summary report | |
| run: | | |
| python3 scripts/parse_benchmark_logs.py benchmark-results > benchmark-results/ci_summary.json || true | |
| - name: Generate HTML visualization | |
| if: always() | |
| run: | | |
| if [ -f benchmark-results/ci_summary.json ]; then | |
| python3 scripts/visualize_benchmark_results.py benchmark-results/ci_summary.json benchmark-results/report.html | |
| else | |
| echo "No summary JSON found, skipping visualization" | |
| fi | |
| - name: Upload aggregated results | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: benchmark-results | |
| path: | | |
| benchmark-results/*.log | |
| benchmark-results/*.json | |
| benchmark-results/*.html | |
| retention-days: 30 | |
| if-no-files-found: ignore | |
| - name: Print summary | |
| if: always() | |
| run: | | |
| if [ -f benchmark-results/ci_summary.json ]; then | |
| echo "=== Benchmark Summary ===" | |
| python3 -m json.tool benchmark-results/ci_summary.json || cat benchmark-results/ci_summary.json | |
| else | |
| echo "No summary generated (logs may not exist yet)" | |
| fi | |
| status-check: | |
| name: Status Check | |
| runs-on: ubuntu-22.04 | |
| needs: [build-matrix] | |
| if: always() | |
| steps: | |
| - name: Check build status | |
| run: | | |
| if [ "${{ needs.build-matrix.result }}" = "success" ]; then | |
| echo "✓ All builds passed" | |
| exit 0 | |
| else | |
| echo "✗ Some builds failed" | |
| exit 1 | |
| fi |