Skip to content

Commit e6d0e08

Browse files
authored
Use R2 bucket for duckdb libraries (#8486)
- Use ci-builds.vortex.dev R2 bucket as source for duckdb release and commit builds. - Mirror release builds from duckdb github releases page. Build commits from source. - For commit builds also try to download from R2 (useful for testing pre-release in CI) - Gate test_geometry for duckdb under release builds only. Running it for commits means we need to bundle "spatial extension which is hard on macos due to openssl-dev symbols Signed-off-by: Mikhail Kot <mikhail@spiraldb.com>
1 parent df3e829 commit e6d0e08

7 files changed

Lines changed: 323 additions & 19 deletions

File tree

.github/workflows/ci.yml

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,25 @@ env:
2525
NIGHTLY_TOOLCHAIN: nightly-2026-02-05
2626

2727
jobs:
28+
duckdb-mirror:
29+
name: "Mirror DuckDB to R2"
30+
if: github.event_name == 'pull_request'
31+
uses: ./.github/workflows/duckdb-r2.yml
32+
secrets: inherit
33+
34+
duckdb-ready:
35+
name: "DuckDB libraries available in R2"
36+
needs: duckdb-mirror
37+
if: ${{ !cancelled() }}
38+
runs-on: ubuntu-latest
39+
timeout-minutes: 5
40+
steps:
41+
- name: Verify DuckDB mirror
42+
if: ${{ needs.duckdb-mirror.result == 'failure' }}
43+
run: |
44+
echo "DuckDB mirror failed; downstream builds would 404"
45+
exit 1
46+
2847
lint-toml:
2948
runs-on: ubuntu-latest
3049
timeout-minutes: 10
@@ -115,6 +134,7 @@ jobs:
115134

116135
rust-docs:
117136
name: "Rust (docs)"
137+
needs: duckdb-ready
118138
timeout-minutes: 30
119139
runs-on: >-
120140
${{ github.repository == 'vortex-data/vortex'
@@ -204,6 +224,7 @@ jobs:
204224

205225
rust-lint:
206226
name: "Rust (lint)"
227+
needs: duckdb-ready
207228
timeout-minutes: 30
208229
runs-on: >-
209230
${{ github.repository == 'vortex-data/vortex'
@@ -301,6 +322,7 @@ jobs:
301322
302323
rust-test-other:
303324
name: "Rust tests (${{ matrix.os }})"
325+
needs: duckdb-ready
304326
timeout-minutes: 30
305327
strategy:
306328
fail-fast: false
@@ -422,6 +444,7 @@ jobs:
422444

423445
sqllogic-test:
424446
name: "SQL logic tests"
447+
needs: duckdb-ready
425448
runs-on: >-
426449
${{ github.repository == 'vortex-data/vortex'
427450
&& format('runs-on={0}/runner=amd64-medium/image=ubuntu24-full-x64-pre-v2/tag=sql-logic-test', github.run_id)

.github/workflows/duckdb-r2.yml

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
name: DuckDB R2 mirror
2+
3+
# If we have an archive for a commit or a tag in R2, return it.
4+
# Otherwise, if it's a tag, download release archive from Duckdb Github
5+
# page and upload it to R2.
6+
# If it's a commit, build Duckdb from source and upload it to R2.
7+
on:
8+
workflow_call: { }
9+
10+
concurrency:
11+
group: duckdb-r2-${{ github.event.pull_request.number || github.ref }}
12+
cancel-in-progress: false
13+
14+
permissions:
15+
contents: read
16+
17+
env:
18+
PUBLIC_BASE_URL: "https://ci-builds.vortex.dev"
19+
R2_BUCKET: "duckdb-builds"
20+
R2_ENDPOINT_URL: "https://52bdeab5651e1584747feefd051fd566.r2.cloudflarestorage.com"
21+
22+
jobs:
23+
check:
24+
name: "Resolve DuckDB version and check R2"
25+
runs-on: ubuntu-latest
26+
timeout-minutes: 10
27+
outputs:
28+
version: ${{ steps.resolve.outputs.version }}
29+
ref_dir: ${{ steps.resolve.outputs.ref_dir }}
30+
release: ${{ steps.resolve.outputs.release }}
31+
matrix: ${{ steps.resolve.outputs.matrix }}
32+
any_missing: ${{ steps.resolve.outputs.any_missing }}
33+
steps:
34+
- uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6
35+
- name: Resolve version and check R2
36+
id: resolve
37+
run: sh scripts/duckdb-r2-resolve.sh
38+
39+
mirror:
40+
name: "Mirror DuckDB ${{ matrix.archive }} to R2"
41+
needs: check
42+
if: >-
43+
needs.check.outputs.any_missing == 'true' &&
44+
github.repository == 'vortex-data/vortex' &&
45+
github.event.pull_request.head.repo.full_name == github.repository
46+
environment: duckdb-build
47+
timeout-minutes: 120
48+
strategy:
49+
fail-fast: false
50+
matrix: ${{ fromJSON(needs.check.outputs.matrix) }}
51+
runs-on: ${{ matrix.runner }}
52+
steps:
53+
- uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6
54+
55+
- name: Install build dependencies (Linux)
56+
if: needs.check.outputs.release != 'true' && runner.os == 'Linux'
57+
run: |
58+
sudo apt-get update
59+
sudo apt-get install -y ninja-build libcurl4-openssl-dev zip unzip
60+
61+
# MacOS already has ninja and p7zip
62+
63+
- name: Prepare ${{ matrix.archive }}
64+
env:
65+
ARCHIVE: ${{ matrix.archive }}
66+
REF_DIR: ${{ needs.check.outputs.ref_dir }}
67+
RELEASE: ${{ needs.check.outputs.release }}
68+
PLATFORM_OS: ${{ matrix.os }}
69+
run: sh scripts/duckdb-r2-prepare.sh
70+
71+
- name: Upload to R2
72+
env:
73+
AWS_ACCESS_KEY_ID: ${{ secrets.DUCKDB_R2_ACCESS_KEY_ID }}
74+
AWS_SECRET_ACCESS_KEY: ${{ secrets.DUCKDB_R2_SECRET_ACCESS_KEY }}
75+
AWS_REGION: "us-east-1"
76+
AWS_ENDPOINT_URL: ${{ env.R2_ENDPOINT_URL }}
77+
run: |
78+
set -Eeuo pipefail
79+
python3 scripts/s3-upload.py \
80+
--bucket "$R2_BUCKET" \
81+
--key "${{ needs.check.outputs.ref_dir }}/${{ matrix.archive }}" \
82+
--body "${{ matrix.archive }}" \
83+
--checksum-algorithm CRC32

.github/workflows/rust-instrumented.yml

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,28 @@ env:
2222
NIGHTLY_TOOLCHAIN: nightly-2026-02-05
2323

2424
jobs:
25+
duckdb-mirror:
26+
name: "Mirror DuckDB to R2"
27+
if: github.event_name == 'pull_request'
28+
uses: ./.github/workflows/duckdb-r2.yml
29+
secrets: inherit
30+
31+
duckdb-ready:
32+
name: "DuckDB libraries available in R2"
33+
needs: duckdb-mirror
34+
if: ${{ !cancelled() }}
35+
runs-on: ubuntu-latest
36+
timeout-minutes: 5
37+
steps:
38+
- name: Verify DuckDB mirror
39+
if: ${{ needs.duckdb-mirror.result == 'failure' }}
40+
run: |
41+
echo "DuckDB mirror failed"
42+
exit 1
43+
2544
rust-coverage:
2645
name: "Rust tests (coverage) (${{ matrix.suite }})"
46+
needs: duckdb-ready
2747
timeout-minutes: 30
2848
permissions:
2949
id-token: write

scripts/duckdb-r2-prepare.sh

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
#!/bin/sh
2+
3+
# SPDX-License-Identifier: Apache-2.0
4+
# SPDX-FileCopyrightText: Copyright the Vortex contributors
5+
#
6+
# Produce $ARCHIVE for upload to R2: either download DuckDB release for
7+
# $REF_DIR or build DuckDB from source at commit $REF_DIR and pack
8+
# libraries and headers.
9+
#
10+
# Required env vars: ARCHIVE, REF_DIR, RELEASE, PLATFORM_OS
11+
12+
set -eu
13+
14+
if [ "$RELEASE" = "true" ]; then
15+
echo "Mirroring DuckDB release ${REF_DIR}/${ARCHIVE}"
16+
curl -fSL --retry 3 -o "$ARCHIVE" \
17+
"https://github.com/duckdb/duckdb/releases/download/${REF_DIR}/${ARCHIVE}"
18+
else
19+
echo "Building DuckDB commit ${REF_DIR} from source"
20+
21+
curl -fSL --retry 3 -o duckdb-src.zip \
22+
"https://github.com/duckdb/duckdb/archive/${REF_DIR}.zip"
23+
24+
# macos zip extract error: cannot create
25+
# <...>/issue2628_������.csv Illegal byte sequence
26+
if [ "$PLATFORM_OS" = "osx" ]; then
27+
7z x duckdb-src.zip
28+
else
29+
unzip -q duckdb-src.zip
30+
fi
31+
32+
src_dir="duckdb-${REF_DIR}"
33+
extra=""
34+
if [ "$PLATFORM_OS" = "osx" ]; then
35+
extra="OSX_BUILD_UNIVERSAL=1"
36+
fi
37+
38+
make -C "$src_dir" \
39+
GEN=ninja \
40+
DISABLE_SANITIZER=1 \
41+
THREADSAN=0 \
42+
BUILD_SHELL=false \
43+
BUILD_UNITTESTS=false \
44+
ENABLE_UNITTEST_CPP_TESTS=false \
45+
BUILD_EXTENSIONS="parquet;tpch;tpcds" \
46+
$extra
47+
48+
lib_dir="${src_dir}/build/release/src"
49+
stage="stage"
50+
mkdir -p "$stage"
51+
52+
cp -a "${lib_dir}/libduckdb.so" "$stage/" 2>/dev/null || true
53+
cp -a "${lib_dir}/libduckdb.dylib" "$stage/" 2>/dev/null || true
54+
cp -a "${lib_dir}/libduckdb_static.a" "$stage/"
55+
cp -a "${src_dir}/src/include/duckdb.h" "$stage/" 2>/dev/null || true
56+
cp -a "${src_dir}/src/include/duckdb.hpp" "$stage/" 2>/dev/null || true
57+
58+
( cd "$stage" && zip -r "../${ARCHIVE}" . )
59+
fi
60+
61+
ls -la "$ARCHIVE"

scripts/duckdb-r2-resolve.sh

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
#!/bin/sh
2+
3+
# SPDX-License-Identifier: Apache-2.0
4+
# SPDX-FileCopyrightText: Copyright the Vortex contributors
5+
#
6+
# Resolve DuckDB version in vortex-duckdb/build.rs and check which archives
7+
# are present in R2. Output "version", "ref_dir", "release", "matrix", and
8+
# "any_missing".
9+
#
10+
# Required env vars: PUBLIC_BASE_URL, GITHUB_OUTPUT
11+
12+
set -eu
13+
14+
version=$(grep -oP 'DEFAULT_DUCKDB_VERSION:\s*&str\s*=\s*"\K[^"]+' \
15+
vortex-duckdb/build.rs)
16+
17+
# vortex-duckdb/build.rs: >=2 dot-separated numbers are a
18+
# tagged release (ref dir "vX.Y.Z"), anything else is a commit.
19+
ref=${version#v}
20+
if printf '%s' "$ref" | grep -Eq '^[0-9]+(\.[0-9]+)+$'; then
21+
release=true
22+
ref_dir="v$ref"
23+
else
24+
release=false
25+
ref_dir="$ref"
26+
fi
27+
28+
echo "DuckDB $version release=$release"
29+
30+
entries=$(mktemp)
31+
trap 'rm -f "$entries"' EXIT
32+
33+
for archive in \
34+
libduckdb-linux-amd64.zip \
35+
libduckdb-linux-arm64.zip \
36+
libduckdb-osx-universal.zip; do
37+
38+
url="${PUBLIC_BASE_URL}/${ref_dir}/${archive}"
39+
code=$(curl -o /dev/null -s -w '%{http_code}' --head "$url" || echo 000)
40+
if [ "$code" = "200" ]; then
41+
echo "present in R2: $archive"
42+
continue
43+
fi
44+
45+
echo "missing in R2 (HTTP $code): $archive"
46+
case "$archive" in
47+
*linux-amd64*) runner=ubuntu-latest; os=linux; arch=amd64 ;;
48+
*linux-arm64*) runner=ubuntu-24.04-arm; os=linux; arch=arm64 ;;
49+
*osx-universal*) runner=macos-14; os=osx; arch=universal ;;
50+
esac
51+
jq -nc \
52+
--arg archive "$archive" \
53+
--arg runner "$runner" \
54+
--arg os "$os" \
55+
--arg arch "$arch" \
56+
'{archive: $archive, runner: $runner, os: $os, arch: $arch}' >> "$entries"
57+
done
58+
59+
if [ -s "$entries" ]; then
60+
include=$(jq -sc '.' < "$entries")
61+
matrix=$(jq -nc --argjson include "$include" '{include: $include}')
62+
any_missing=true
63+
else
64+
matrix='{"include":[]}'
65+
any_missing=false
66+
fi
67+
68+
echo "any_missing=$any_missing"
69+
70+
{
71+
echo "version=$version"
72+
echo "ref_dir=$ref_dir"
73+
echo "release=$release"
74+
echo "matrix=$matrix"
75+
echo "any_missing=$any_missing"
76+
} >> "$GITHUB_OUTPUT"

0 commit comments

Comments
 (0)