CI - Build & Benchmark #66

Workflow file for this run

	name: CI - Build & Benchmark

	on:
	push:
	branches:
	- master
	- develop
	pull_request:
	branches:
	- master
	- develop
	workflow_dispatch:

	concurrency:
	group: ${{ github.workflow }}-${{ github.head_ref \|\| github.run_id }}
	cancel-in-progress: true

	jobs:
	build-matrix:
	name: Build (${{ matrix.artifact_name }})
	runs-on: ubuntu-22.04
	timeout-minutes: 30

	strategy:
	fail-fast: false
	matrix:
	include:
	- artifact_name: tpch-benchmark-orc-async

	steps:
	- name: Checkout code with submodules
	uses: actions/checkout@v4
	with:
	submodules: recursive
	fetch-depth: 1

	- name: Install dependencies
	run: bash scripts/ci_install_deps.sh

	- name: Create cache directories
	run: \|
	mkdir -p /home/runner/dependencies/{lib,include}/{cmake,pkgconfig}
	mkdir -p /home/runner/dependencies/include/{arrow,parquet,orc}

	- name: Restore Arrow and ORC cache
	id: cache-arrow-orc
	uses: actions/cache@v4
	with:
	path: \|
	/home/runner/dependencies/lib/libarrow*
	/home/runner/dependencies/lib/libparquet*
	/home/runner/dependencies/lib/liborc*
	/home/runner/dependencies/lib/cmake/arrow
	/home/runner/dependencies/lib/cmake/orc
	/home/runner/dependencies/lib/pkgconfig/arrow*
	/home/runner/dependencies/lib/pkgconfig/liborc*
	/home/runner/dependencies/include/arrow
	/home/runner/dependencies/include/parquet
	/home/runner/dependencies/include/orc
	key: arrow-orc-cache-${{ github.sha }}
	restore-keys: arrow-orc-cache-

	- name: Build Arrow and ORC from source
	if: steps.cache-arrow-orc.outputs.cache-hit != 'true'
	run: bash scripts/ci_build_arrow_and_orc.sh ON /home/runner/dependencies

	- name: Cache Arrow and ORC libraries
	uses: actions/cache@v4
	with:
	path: \|
	/home/runner/dependencies/lib/libarrow*
	/home/runner/dependencies/lib/libparquet*
	/home/runner/dependencies/lib/liborc*
	/home/runner/dependencies/lib/cmake/arrow
	/home/runner/dependencies/lib/cmake/orc
	/home/runner/dependencies/lib/pkgconfig/arrow*
	/home/runner/dependencies/lib/pkgconfig/liborc*
	/home/runner/dependencies/include/arrow
	/home/runner/dependencies/include/parquet
	/home/runner/dependencies/include/orc
	key: arrow-orc-cache-${{ github.sha }}

	- name: Configure CMake
	run: \|
	cmake -B build \
	-DCMAKE_BUILD_TYPE=RelWithDebInfo \
	-DCMAKE_PREFIX_PATH=/home/runner/dependencies \
	-DTPCH_ENABLE_ORC=ON \
	-DTPCH_ENABLE_ASYNC_IO=ON \
	-DTPCH_ENABLE_ASAN=OFF \
	-DTPCH_BUILD_TESTS=OFF

	- name: Build project
	run: cmake --build build -j$(nproc)

	- name: Verify executable
	run: test -f build/tpch_benchmark && echo "✓ Executable created successfully"

	- name: Upload build artifact
	uses: actions/upload-artifact@v4
	with:
	name: ${{ matrix.artifact_name }}
	path: build/tpch_benchmark
	retention-days: 1
	if-no-files-found: error

	benchmark-suite:
	name: Benchmark Suite
	runs-on: ubuntu-22.04
	needs: build-matrix
	timeout-minutes: 20

	strategy:
	fail-fast: false
	matrix:
	include:
	- format: csv
	table: lineitem
	- format: csv
	table: orders
	- format: csv
	table: customer
	- format: csv
	table: part
	- format: csv
	table: partsupp
	- format: csv
	table: supplier
	- format: csv
	table: nation
	- format: csv
	table: region
	- format: parquet
	table: lineitem
	- format: parquet
	table: orders
	- format: parquet
	table: customer
	- format: parquet
	table: part
	- format: parquet
	table: partsupp
	- format: parquet
	table: supplier
	- format: parquet
	table: nation
	- format: parquet
	table: region
	- format: orc
	table: lineitem

	steps:
	- name: Checkout code (for dists.dss and scripts)
	uses: actions/checkout@v4
	with:
	fetch-depth: 1

	- name: Checkout tpch submodule (for dists.dss)
	run: git submodule update --init --depth 1 -- third_party/tpch

	- name: Install dependencies
	run: bash scripts/ci_install_deps.sh

	- name: Create cache directories
	run: \|
	mkdir -p /home/runner/dependencies/{lib,include}/{cmake,pkgconfig}
	mkdir -p /home/runner/dependencies/include/{arrow,parquet,orc}

	- name: Restore Arrow and ORC cache
	uses: actions/cache@v4
	with:
	path: \|
	/home/runner/dependencies/lib/libarrow*
	/home/runner/dependencies/lib/libparquet*
	/home/runner/dependencies/lib/liborc*
	/home/runner/dependencies/lib/cmake/arrow
	/home/runner/dependencies/lib/cmake/orc
	/home/runner/dependencies/lib/pkgconfig/arrow*
	/home/runner/dependencies/lib/pkgconfig/liborc*
	/home/runner/dependencies/include/arrow
	/home/runner/dependencies/include/parquet
	/home/runner/dependencies/include/orc
	key: arrow-orc-cache-${{ github.sha }}
	restore-keys: arrow-orc-cache-

	- name: Download build artifact
	uses: actions/download-artifact@v4
	with:
	name: tpch-benchmark-orc-async
	path: .

	- name: Setup benchmark executable
	run: \|
	chmod +x tpch_benchmark
	mkdir -p benchmark-results
	# Setup library path to use cached dependencies
	export LD_LIBRARY_PATH=/home/runner/dependencies/lib:$LD_LIBRARY_PATH
	echo "LD_LIBRARY_PATH=$LD_LIBRARY_PATH" >> $GITHUB_ENV

	- name: Verify library installation
	run: \|
	echo "=== Current LD_LIBRARY_PATH ==="
	echo "LD_LIBRARY_PATH=$LD_LIBRARY_PATH"
	echo ""
	echo "=== Searching for libparquet ==="
	find /home/runner/dependencies -name "libparquet*" 2>/dev/null \| head -5 \|\| echo "libparquet not found"
	echo ""
	echo "=== Searching for liborc ==="
	find /home/runner/dependencies -name "liborc*" 2>/dev/null \| head -5 \|\| echo "liborc not found"

	- name: Run format coverage benchmark
	continue-on-error: true
	run: \|
	timeout 600 ./tpch_benchmark \
	--use-dbgen \
	--scale-factor 1 \
	--format ${{ matrix.format }} \
	--table ${{ matrix.table }} \
	--output-dir benchmark-results/ \
	2>&1 \| grep -v "^DEBUG:" \| tee "benchmark-results/${{ matrix.format }}_${{ matrix.table }}_baseline.log"

	# Fail if process dumped core
	if grep -q "dumped core" "benchmark-results/${{ matrix.format }}_${{ matrix.table }}_baseline.log"; then
	echo "ERROR: Benchmark crashed with core dump"
	exit 1
	fi

	- name: Upload benchmark logs
	if: always()
	uses: actions/upload-artifact@v4
	with:
	name: benchmark-logs-suite-${{ matrix.format }}-${{ matrix.table }}
	path: benchmark-results/${{ matrix.format }}_${{ matrix.table }}_baseline.log
	retention-days: 30
	if-no-files-found: ignore

	optimization-benchmarks:
	name: Optimization Benchmarks (${{ matrix.mode }})
	runs-on: ubuntu-22.04
	needs: build-matrix
	timeout-minutes: 20

	strategy:
	fail-fast: false
	matrix:
	mode: [baseline, zero-copy, true-zero-copy]
	table: [lineitem, orders, part]

	steps:
	- name: Checkout code (for dists.dss and scripts)
	uses: actions/checkout@v4
	with:
	fetch-depth: 1

	- name: Checkout tpch submodule (for dists.dss)
	run: git submodule update --init --depth 1 -- third_party/tpch

	- name: Install dependencies
	run: bash scripts/ci_install_deps.sh

	- name: Create cache directories
	run: \|
	mkdir -p /home/runner/dependencies/{lib,include}/{cmake,pkgconfig}
	mkdir -p /home/runner/dependencies/include/{arrow,parquet,orc}

	- name: Restore Arrow and ORC cache
	uses: actions/cache@v4
	with:
	path: \|
	/home/runner/dependencies/lib/libarrow*
	/home/runner/dependencies/lib/libparquet*
	/home/runner/dependencies/lib/liborc*
	/home/runner/dependencies/lib/cmake/arrow
	/home/runner/dependencies/lib/cmake/orc
	/home/runner/dependencies/lib/pkgconfig/arrow*
	/home/runner/dependencies/lib/pkgconfig/liborc*
	/home/runner/dependencies/include/arrow
	/home/runner/dependencies/include/parquet
	/home/runner/dependencies/include/orc
	key: arrow-orc-cache-${{ github.sha }}
	restore-keys: arrow-orc-cache-

	- name: Download build artifact
	uses: actions/download-artifact@v4
	with:
	name: tpch-benchmark-orc-async
	path: .

	- name: Setup benchmark executable
	run: \|
	chmod +x tpch_benchmark
	mkdir -p benchmark-results
	# Setup library path to use cached dependencies
	export LD_LIBRARY_PATH=/home/runner/dependencies/lib:$LD_LIBRARY_PATH
	echo "LD_LIBRARY_PATH=$LD_LIBRARY_PATH" >> $GITHUB_ENV

	- name: Verify library installation
	run: \|
	echo "=== Current LD_LIBRARY_PATH ==="
	echo "LD_LIBRARY_PATH=$LD_LIBRARY_PATH"
	echo ""
	echo "=== Searching for libparquet ==="
	find /home/runner/dependencies -name "libparquet*" 2>/dev/null \| head -5 \|\| echo "libparquet not found"
	echo ""
	echo "=== Searching for liborc ==="
	find /home/runner/dependencies -name "liborc*" 2>/dev/null \| head -5 \|\| echo "liborc not found"

	- name: Run optimization benchmark
	continue-on-error: true
	run: \|
	MODE_FLAGS=""
	if [ "${{ matrix.mode }}" = "zero-copy" ]; then
	MODE_FLAGS="--zero-copy"
	elif [ "${{ matrix.mode }}" = "true-zero-copy" ]; then
	MODE_FLAGS="--true-zero-copy"
	fi

	timeout 600 ./tpch_benchmark \
	--use-dbgen \
	--scale-factor 1 \
	--format parquet \
	--table ${{ matrix.table }} \
	--output-dir benchmark-results/ \
	$MODE_FLAGS \
	2>&1 \| grep -v "^DEBUG:" \| tee "benchmark-results/parquet_${{ matrix.table }}_${{ matrix.mode }}.log"

	# Fail if process dumped core
	if grep -q "dumped core" "benchmark-results/parquet_${{ matrix.table }}_${{ matrix.mode }}.log"; then
	echo "ERROR: Benchmark crashed with core dump"
	exit 1
	fi

	- name: Upload benchmark logs
	if: always()
	uses: actions/upload-artifact@v4
	with:
	name: benchmark-logs-optimization-${{ matrix.mode }}-${{ matrix.table }}
	path: benchmark-results/parquet_${{ matrix.table }}_${{ matrix.mode }}.log
	retention-days: 30
	if-no-files-found: ignore

	results-aggregation:
	name: Aggregate Results
	runs-on: ubuntu-22.04
	needs: [benchmark-suite, optimization-benchmarks]
	if: always()

	steps:
	- name: Checkout code
	uses: actions/checkout@v4

	- name: Download all benchmark artifacts
	uses: actions/download-artifact@v4
	with:
	path: all-results

	- name: Prepare benchmark results directory
	run: \|
	mkdir -p benchmark-results
	find all-results -name "*.log" -exec cp {} benchmark-results/ \;

	- name: Generate summary report
	run: \|
	python3 scripts/parse_benchmark_logs.py benchmark-results > benchmark-results/ci_summary.json \|\| true

	- name: Generate HTML visualization
	if: always()
	run: \|
	if [ -f benchmark-results/ci_summary.json ]; then
	python3 scripts/visualize_benchmark_results.py benchmark-results/ci_summary.json benchmark-results/report.html
	else
	echo "No summary JSON found, skipping visualization"
	fi

	- name: Upload aggregated results
	uses: actions/upload-artifact@v4
	with:
	name: benchmark-results
	path: \|
	benchmark-results/*.log
	benchmark-results/*.json
	benchmark-results/*.html
	retention-days: 30
	if-no-files-found: ignore

	- name: Print summary
	if: always()
	run: \|
	if [ -f benchmark-results/ci_summary.json ]; then
	echo "=== Benchmark Summary ==="
	python3 -m json.tool benchmark-results/ci_summary.json \|\| cat benchmark-results/ci_summary.json
	else
	echo "No summary generated (logs may not exist yet)"
	fi

	status-check:
	name: Status Check
	runs-on: ubuntu-22.04
	needs: [build-matrix]
	if: always()

	steps:
	- name: Check build status
	run: \|
	if [ "${{ needs.build-matrix.result }}" = "success" ]; then
	echo "✓ All builds passed"
	exit 0
	else
	echo "✗ Some builds failed"
	exit 1
	fi

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

CI - Build & Benchmark #66

Workflow file

CI - Build & Benchmark #66

Uh oh!

Workflow file for this run