Skip to content

CI - Build & Benchmark #66

CI - Build & Benchmark

CI - Build & Benchmark #66

Workflow file for this run

name: CI - Build & Benchmark
on:
push:
branches:
- master
- develop
pull_request:
branches:
- master
- develop
workflow_dispatch:
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
jobs:
build-matrix:
name: Build (${{ matrix.artifact_name }})
runs-on: ubuntu-22.04
timeout-minutes: 30
strategy:
fail-fast: false
matrix:
include:
- artifact_name: tpch-benchmark-orc-async
steps:
- name: Checkout code with submodules
uses: actions/checkout@v4
with:
submodules: recursive
fetch-depth: 1
- name: Install dependencies
run: bash scripts/ci_install_deps.sh
- name: Create cache directories
run: |
mkdir -p /home/runner/dependencies/{lib,include}/{cmake,pkgconfig}
mkdir -p /home/runner/dependencies/include/{arrow,parquet,orc}
- name: Restore Arrow and ORC cache
id: cache-arrow-orc
uses: actions/cache@v4
with:
path: |
/home/runner/dependencies/lib/libarrow*
/home/runner/dependencies/lib/libparquet*
/home/runner/dependencies/lib/liborc*
/home/runner/dependencies/lib/cmake/arrow
/home/runner/dependencies/lib/cmake/orc
/home/runner/dependencies/lib/pkgconfig/arrow*
/home/runner/dependencies/lib/pkgconfig/liborc*
/home/runner/dependencies/include/arrow
/home/runner/dependencies/include/parquet
/home/runner/dependencies/include/orc
key: arrow-orc-cache-${{ github.sha }}
restore-keys: arrow-orc-cache-
- name: Build Arrow and ORC from source
if: steps.cache-arrow-orc.outputs.cache-hit != 'true'
run: bash scripts/ci_build_arrow_and_orc.sh ON /home/runner/dependencies
- name: Cache Arrow and ORC libraries
uses: actions/cache@v4
with:
path: |
/home/runner/dependencies/lib/libarrow*
/home/runner/dependencies/lib/libparquet*
/home/runner/dependencies/lib/liborc*
/home/runner/dependencies/lib/cmake/arrow
/home/runner/dependencies/lib/cmake/orc
/home/runner/dependencies/lib/pkgconfig/arrow*
/home/runner/dependencies/lib/pkgconfig/liborc*
/home/runner/dependencies/include/arrow
/home/runner/dependencies/include/parquet
/home/runner/dependencies/include/orc
key: arrow-orc-cache-${{ github.sha }}
- name: Configure CMake
run: |
cmake -B build \
-DCMAKE_BUILD_TYPE=RelWithDebInfo \
-DCMAKE_PREFIX_PATH=/home/runner/dependencies \
-DTPCH_ENABLE_ORC=ON \
-DTPCH_ENABLE_ASYNC_IO=ON \
-DTPCH_ENABLE_ASAN=OFF \
-DTPCH_BUILD_TESTS=OFF
- name: Build project
run: cmake --build build -j$(nproc)
- name: Verify executable
run: test -f build/tpch_benchmark && echo "✓ Executable created successfully"
- name: Upload build artifact
uses: actions/upload-artifact@v4
with:
name: ${{ matrix.artifact_name }}
path: build/tpch_benchmark
retention-days: 1
if-no-files-found: error
benchmark-suite:
name: Benchmark Suite
runs-on: ubuntu-22.04
needs: build-matrix
timeout-minutes: 20
strategy:
fail-fast: false
matrix:
include:
- format: csv
table: lineitem
- format: csv
table: orders
- format: csv
table: customer
- format: csv
table: part
- format: csv
table: partsupp
- format: csv
table: supplier
- format: csv
table: nation
- format: csv
table: region
- format: parquet
table: lineitem
- format: parquet
table: orders
- format: parquet
table: customer
- format: parquet
table: part
- format: parquet
table: partsupp
- format: parquet
table: supplier
- format: parquet
table: nation
- format: parquet
table: region
- format: orc
table: lineitem
steps:
- name: Checkout code (for dists.dss and scripts)
uses: actions/checkout@v4
with:
fetch-depth: 1
- name: Checkout tpch submodule (for dists.dss)
run: git submodule update --init --depth 1 -- third_party/tpch
- name: Install dependencies
run: bash scripts/ci_install_deps.sh
- name: Create cache directories
run: |
mkdir -p /home/runner/dependencies/{lib,include}/{cmake,pkgconfig}
mkdir -p /home/runner/dependencies/include/{arrow,parquet,orc}
- name: Restore Arrow and ORC cache
uses: actions/cache@v4
with:
path: |
/home/runner/dependencies/lib/libarrow*
/home/runner/dependencies/lib/libparquet*
/home/runner/dependencies/lib/liborc*
/home/runner/dependencies/lib/cmake/arrow
/home/runner/dependencies/lib/cmake/orc
/home/runner/dependencies/lib/pkgconfig/arrow*
/home/runner/dependencies/lib/pkgconfig/liborc*
/home/runner/dependencies/include/arrow
/home/runner/dependencies/include/parquet
/home/runner/dependencies/include/orc
key: arrow-orc-cache-${{ github.sha }}
restore-keys: arrow-orc-cache-
- name: Download build artifact
uses: actions/download-artifact@v4
with:
name: tpch-benchmark-orc-async
path: .
- name: Setup benchmark executable
run: |
chmod +x tpch_benchmark
mkdir -p benchmark-results
# Setup library path to use cached dependencies
export LD_LIBRARY_PATH=/home/runner/dependencies/lib:$LD_LIBRARY_PATH
echo "LD_LIBRARY_PATH=$LD_LIBRARY_PATH" >> $GITHUB_ENV
- name: Verify library installation
run: |
echo "=== Current LD_LIBRARY_PATH ==="
echo "LD_LIBRARY_PATH=$LD_LIBRARY_PATH"
echo ""
echo "=== Searching for libparquet ==="
find /home/runner/dependencies -name "libparquet*" 2>/dev/null | head -5 || echo "libparquet not found"
echo ""
echo "=== Searching for liborc ==="
find /home/runner/dependencies -name "liborc*" 2>/dev/null | head -5 || echo "liborc not found"
- name: Run format coverage benchmark
continue-on-error: true
run: |
timeout 600 ./tpch_benchmark \
--use-dbgen \
--scale-factor 1 \
--format ${{ matrix.format }} \
--table ${{ matrix.table }} \
--output-dir benchmark-results/ \
2>&1 | grep -v "^DEBUG:" | tee "benchmark-results/${{ matrix.format }}_${{ matrix.table }}_baseline.log"
# Fail if process dumped core
if grep -q "dumped core" "benchmark-results/${{ matrix.format }}_${{ matrix.table }}_baseline.log"; then
echo "ERROR: Benchmark crashed with core dump"
exit 1
fi
- name: Upload benchmark logs
if: always()
uses: actions/upload-artifact@v4
with:
name: benchmark-logs-suite-${{ matrix.format }}-${{ matrix.table }}
path: benchmark-results/${{ matrix.format }}_${{ matrix.table }}_baseline.log
retention-days: 30
if-no-files-found: ignore
optimization-benchmarks:
name: Optimization Benchmarks (${{ matrix.mode }})
runs-on: ubuntu-22.04
needs: build-matrix
timeout-minutes: 20
strategy:
fail-fast: false
matrix:
mode: [baseline, zero-copy, true-zero-copy]
table: [lineitem, orders, part]
steps:
- name: Checkout code (for dists.dss and scripts)
uses: actions/checkout@v4
with:
fetch-depth: 1
- name: Checkout tpch submodule (for dists.dss)
run: git submodule update --init --depth 1 -- third_party/tpch
- name: Install dependencies
run: bash scripts/ci_install_deps.sh
- name: Create cache directories
run: |
mkdir -p /home/runner/dependencies/{lib,include}/{cmake,pkgconfig}
mkdir -p /home/runner/dependencies/include/{arrow,parquet,orc}
- name: Restore Arrow and ORC cache
uses: actions/cache@v4
with:
path: |
/home/runner/dependencies/lib/libarrow*
/home/runner/dependencies/lib/libparquet*
/home/runner/dependencies/lib/liborc*
/home/runner/dependencies/lib/cmake/arrow
/home/runner/dependencies/lib/cmake/orc
/home/runner/dependencies/lib/pkgconfig/arrow*
/home/runner/dependencies/lib/pkgconfig/liborc*
/home/runner/dependencies/include/arrow
/home/runner/dependencies/include/parquet
/home/runner/dependencies/include/orc
key: arrow-orc-cache-${{ github.sha }}
restore-keys: arrow-orc-cache-
- name: Download build artifact
uses: actions/download-artifact@v4
with:
name: tpch-benchmark-orc-async
path: .
- name: Setup benchmark executable
run: |
chmod +x tpch_benchmark
mkdir -p benchmark-results
# Setup library path to use cached dependencies
export LD_LIBRARY_PATH=/home/runner/dependencies/lib:$LD_LIBRARY_PATH
echo "LD_LIBRARY_PATH=$LD_LIBRARY_PATH" >> $GITHUB_ENV
- name: Verify library installation
run: |
echo "=== Current LD_LIBRARY_PATH ==="
echo "LD_LIBRARY_PATH=$LD_LIBRARY_PATH"
echo ""
echo "=== Searching for libparquet ==="
find /home/runner/dependencies -name "libparquet*" 2>/dev/null | head -5 || echo "libparquet not found"
echo ""
echo "=== Searching for liborc ==="
find /home/runner/dependencies -name "liborc*" 2>/dev/null | head -5 || echo "liborc not found"
- name: Run optimization benchmark
continue-on-error: true
run: |
MODE_FLAGS=""
if [ "${{ matrix.mode }}" = "zero-copy" ]; then
MODE_FLAGS="--zero-copy"
elif [ "${{ matrix.mode }}" = "true-zero-copy" ]; then
MODE_FLAGS="--true-zero-copy"
fi
timeout 600 ./tpch_benchmark \
--use-dbgen \
--scale-factor 1 \
--format parquet \
--table ${{ matrix.table }} \
--output-dir benchmark-results/ \
$MODE_FLAGS \
2>&1 | grep -v "^DEBUG:" | tee "benchmark-results/parquet_${{ matrix.table }}_${{ matrix.mode }}.log"
# Fail if process dumped core
if grep -q "dumped core" "benchmark-results/parquet_${{ matrix.table }}_${{ matrix.mode }}.log"; then
echo "ERROR: Benchmark crashed with core dump"
exit 1
fi
- name: Upload benchmark logs
if: always()
uses: actions/upload-artifact@v4
with:
name: benchmark-logs-optimization-${{ matrix.mode }}-${{ matrix.table }}
path: benchmark-results/parquet_${{ matrix.table }}_${{ matrix.mode }}.log
retention-days: 30
if-no-files-found: ignore
results-aggregation:
name: Aggregate Results
runs-on: ubuntu-22.04
needs: [benchmark-suite, optimization-benchmarks]
if: always()
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Download all benchmark artifacts
uses: actions/download-artifact@v4
with:
path: all-results
- name: Prepare benchmark results directory
run: |
mkdir -p benchmark-results
find all-results -name "*.log" -exec cp {} benchmark-results/ \;
- name: Generate summary report
run: |
python3 scripts/parse_benchmark_logs.py benchmark-results > benchmark-results/ci_summary.json || true
- name: Generate HTML visualization
if: always()
run: |
if [ -f benchmark-results/ci_summary.json ]; then
python3 scripts/visualize_benchmark_results.py benchmark-results/ci_summary.json benchmark-results/report.html
else
echo "No summary JSON found, skipping visualization"
fi
- name: Upload aggregated results
uses: actions/upload-artifact@v4
with:
name: benchmark-results
path: |
benchmark-results/*.log
benchmark-results/*.json
benchmark-results/*.html
retention-days: 30
if-no-files-found: ignore
- name: Print summary
if: always()
run: |
if [ -f benchmark-results/ci_summary.json ]; then
echo "=== Benchmark Summary ==="
python3 -m json.tool benchmark-results/ci_summary.json || cat benchmark-results/ci_summary.json
else
echo "No summary generated (logs may not exist yet)"
fi
status-check:
name: Status Check
runs-on: ubuntu-22.04
needs: [build-matrix]
if: always()
steps:
- name: Check build status
run: |
if [ "${{ needs.build-matrix.result }}" = "success" ]; then
echo "✓ All builds passed"
exit 0
else
echo "✗ Some builds failed"
exit 1
fi