Skip to content

Commit 7b99986

Browse files
committed
ci: parallelize image builds with matrix strategy and add reprovision step
1 parent 4f21936 commit 7b99986

1 file changed

Lines changed: 150 additions & 47 deletions

File tree

.github/workflows/build-images.yml

Lines changed: 150 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -9,29 +9,31 @@ on:
99

1010
workflow_dispatch:
1111
inputs:
12-
toolchain:
13-
description: 'Toolchain to build'
12+
toolchains:
13+
description: 'Toolchains to build (comma-separated, or "all")'
1414
required: true
15-
default: 'base'
16-
type: choice
17-
options:
18-
- base
19-
- node-22
20-
- python-3.12
21-
- go-1.22
15+
default: 'all'
16+
type: string
17+
reprovision:
18+
description: 'Trigger reprovision on nodes after upload'
19+
required: false
20+
default: true
21+
type: boolean
2222

2323
concurrency:
2424
group: build-images
2525
cancel-in-progress: false
2626

2727
env:
2828
CARGO_TERM_COLOR: always
29+
ALL_TOOLCHAINS: base,node-22,bun,python-3.12,go-1.22
2930

3031
jobs:
31-
build:
32-
name: Build Images
32+
# Build the guest agent once, shared across all toolchain builds
33+
build-agent:
34+
name: Build Guest Agent
3335
runs-on: ubuntu-latest
34-
timeout-minutes: 30
36+
timeout-minutes: 15
3537
steps:
3638
- uses: actions/checkout@v4
3739

@@ -41,18 +43,68 @@ jobs:
4143

4244
- uses: Swatinem/rust-cache@v2
4345

44-
- name: Install build dependencies
45-
run: sudo apt-get update && sudo apt-get install -y protobuf-compiler musl-tools debootstrap e2fsprogs
46+
- name: Install protobuf compiler and musl tools
47+
run: sudo apt-get update && sudo apt-get install -y protobuf-compiler musl-tools
4648

4749
- name: Build guest agent (static musl binary)
50+
run: cargo build --release --package sandchest-agent --target x86_64-unknown-linux-musl --features vsock
51+
52+
- name: Verify vsock support
4853
run: |
49-
cargo clean -p sandchest-agent --target x86_64-unknown-linux-musl --release
50-
cargo build --release --package sandchest-agent --target x86_64-unknown-linux-musl --features vsock
54+
VSOCK_COUNT=$(strings target/x86_64-unknown-linux-musl/release/sandchest-agent | grep -c vsock || true)
55+
echo "vsock string count: $VSOCK_COUNT"
56+
if [ "$VSOCK_COUNT" -lt 10 ]; then
57+
echo "ERROR: agent binary missing vsock support"
58+
exit 1
59+
fi
5160
52-
- name: Verify agent has vsock support
61+
- uses: actions/upload-artifact@v4
62+
with:
63+
name: guest-agent
64+
path: target/x86_64-unknown-linux-musl/release/sandchest-agent
65+
retention-days: 1
66+
67+
# Compute the matrix of toolchains to build
68+
matrix:
69+
name: Compute Matrix
70+
runs-on: ubuntu-latest
71+
outputs:
72+
toolchains: ${{ steps.set.outputs.toolchains }}
73+
steps:
74+
- id: set
5375
run: |
54-
strings target/x86_64-unknown-linux-musl/release/sandchest-agent | grep -c vsock || { echo "ERROR: agent binary missing vsock support"; exit 1; }
55-
ls -la target/x86_64-unknown-linux-musl/release/sandchest-agent
76+
INPUT="${{ github.event.inputs.toolchains || 'all' }}"
77+
ALL="base,node-22,bun,python-3.12,go-1.22"
78+
if [ "$INPUT" = "all" ]; then
79+
LIST="$ALL"
80+
else
81+
LIST="$INPUT"
82+
fi
83+
# Convert comma-separated to JSON array
84+
JSON=$(echo "$LIST" | tr ',' '\n' | jq -R . | jq -sc .)
85+
echo "toolchains=$JSON" >> "$GITHUB_OUTPUT"
86+
echo "Building: $JSON"
87+
88+
# Build each toolchain in parallel
89+
build-image:
90+
name: Build ${{ matrix.toolchain }}
91+
needs: [build-agent, matrix]
92+
runs-on: ubuntu-latest
93+
timeout-minutes: 30
94+
strategy:
95+
fail-fast: false
96+
matrix:
97+
toolchain: ${{ fromJSON(needs.matrix.outputs.toolchains) }}
98+
steps:
99+
- uses: actions/checkout@v4
100+
101+
- uses: actions/download-artifact@v4
102+
with:
103+
name: guest-agent
104+
path: agent-bin/
105+
106+
- name: Install build dependencies
107+
run: sudo apt-get update && sudo apt-get install -y debootstrap e2fsprogs
56108

57109
- name: Fetch kernel
58110
working-directory: images
@@ -61,69 +113,120 @@ jobs:
61113
- name: Build base rootfs with guest agent
62114
working-directory: images
63115
run: |
116+
chmod +x ../agent-bin/sandchest-agent
64117
sudo make rootfs \
65-
AGENT_BIN=../target/x86_64-unknown-linux-musl/release/sandchest-agent
118+
TOOLCHAIN=${{ matrix.toolchain }} \
119+
AGENT_BIN=../agent-bin/sandchest-agent
66120
67121
- name: Install toolchain
68-
if: ${{ github.event.inputs.toolchain && github.event.inputs.toolchain != 'base' }}
122+
if: matrix.toolchain != 'base'
69123
working-directory: images
70-
run: |
71-
sudo make toolchain \
72-
TOOLCHAIN=${{ github.event.inputs.toolchain }}
124+
run: sudo make toolchain TOOLCHAIN=${{ matrix.toolchain }}
73125

74-
- name: Verify agent inside rootfs has vsock
126+
- name: Verify agent inside rootfs
75127
run: |
76-
ROOTFS="images/output/ubuntu-22.04/${{ github.event.inputs.toolchain || 'base' }}/rootfs.ext4"
128+
ROOTFS="images/output/ubuntu-22.04/${{ matrix.toolchain }}/rootfs.ext4"
77129
MNT=$(mktemp -d)
78130
sudo mount -o loop,ro "$ROOTFS" "$MNT"
79131
AGENT="$MNT/usr/local/bin/sandchest-guest-agent"
80-
echo "Agent binary inside rootfs:"
81132
ls -la "$AGENT"
82133
VSOCK_COUNT=$(strings "$AGENT" | grep -c vsock || true)
83134
echo "vsock string count: $VSOCK_COUNT"
84135
sudo umount "$MNT" && rmdir "$MNT"
85136
if [ "$VSOCK_COUNT" -lt 10 ]; then
86-
echo "ERROR: agent binary inside rootfs is missing vsock support ($VSOCK_COUNT strings)"
137+
echo "ERROR: agent binary inside rootfs is missing vsock support"
87138
exit 1
88139
fi
89140
90141
- name: Validate image
91142
working-directory: images
92-
run: ./scripts/validate-image.sh --output output/ubuntu-22.04/${{ github.event.inputs.toolchain || 'base' }}
143+
run: ./scripts/validate-image.sh --output output/ubuntu-22.04/${{ matrix.toolchain }}
93144

94-
- name: Upload kernel to R2
145+
- name: Upload rootfs to R2
95146
env:
96147
AWS_ACCESS_KEY_ID: ${{ secrets.R2_ACCESS_KEY_ID }}
97148
AWS_SECRET_ACCESS_KEY: ${{ secrets.R2_SECRET_ACCESS_KEY }}
98149
AWS_DEFAULT_REGION: auto
99150
run: |
100-
aws s3 cp images/kernel/vmlinux-5.10 \
101-
"s3://${{ secrets.R2_BUCKET }}/binaries/vmlinux/latest/vmlinux" \
151+
TOOLCHAIN=${{ matrix.toolchain }}
152+
153+
# Upload to provisioning path (used by auto-provision)
154+
aws s3 cp "images/output/ubuntu-22.04/${TOOLCHAIN}/rootfs.ext4" \
155+
"s3://${{ secrets.R2_BUCKET }}/images/ubuntu-22.04/${TOOLCHAIN}/rootfs.ext4" \
102156
--endpoint-url "${{ secrets.R2_ENDPOINT }}"
103157
104-
- name: Upload rootfs to R2
158+
# Also upload to versioned path for rollback
159+
aws s3 cp "images/output/ubuntu-22.04/${TOOLCHAIN}/rootfs.ext4" \
160+
"s3://${{ secrets.R2_BUCKET }}/images/ubuntu-22.04/${TOOLCHAIN}/${{ github.sha }}/rootfs.ext4" \
161+
--endpoint-url "${{ secrets.R2_ENDPOINT }}"
162+
163+
- name: Upload kernel to R2
164+
if: matrix.toolchain == 'base'
105165
env:
106166
AWS_ACCESS_KEY_ID: ${{ secrets.R2_ACCESS_KEY_ID }}
107167
AWS_SECRET_ACCESS_KEY: ${{ secrets.R2_SECRET_ACCESS_KEY }}
108168
AWS_DEFAULT_REGION: auto
109169
run: |
110-
TOOLCHAIN=${{ github.event.inputs.toolchain || 'base' }}
111-
aws s3 cp "images/output/ubuntu-22.04/${TOOLCHAIN}/rootfs.ext4" \
112-
"s3://${{ secrets.R2_BUCKET }}/binaries/rootfs/latest/rootfs.ext4" \
113-
--endpoint-url "${{ secrets.R2_ENDPOINT }}"
114-
aws s3 cp "images/output/ubuntu-22.04/${TOOLCHAIN}/rootfs.ext4" \
115-
"s3://${{ secrets.R2_BUCKET }}/binaries/rootfs/${{ github.sha }}/rootfs.ext4" \
170+
aws s3 cp images/kernel/vmlinux-5.10 \
171+
"s3://${{ secrets.R2_BUCKET }}/images/vmlinux-5.10" \
116172
--endpoint-url "${{ secrets.R2_ENDPOINT }}"
117173
118-
- name: Upload image artifacts
119-
uses: actions/upload-artifact@v4
174+
- uses: actions/upload-artifact@v4
120175
with:
121-
name: vm-images-${{ github.sha }}
176+
name: rootfs-${{ matrix.toolchain }}-${{ github.sha }}
122177
path: |
123-
images/kernel/vmlinux-5.10
124-
images/output/ubuntu-22.04/*/rootfs.ext4
125-
images/output/ubuntu-22.04/*/rootfs.sha256
178+
images/output/ubuntu-22.04/${{ matrix.toolchain }}/rootfs.ext4
179+
images/output/ubuntu-22.04/${{ matrix.toolchain }}/rootfs.sha256
126180
retention-days: 30
127181

128-
# Images are deployed to servers via the admin "Redeploy Daemon" button,
129-
# which pulls from R2 and patches the rootfs in-place. No direct SSH needed.
182+
# Upload manifest and trigger reprovision after all images are built
183+
finalize:
184+
name: Upload Manifest & Reprovision
185+
needs: [build-image, matrix]
186+
runs-on: ubuntu-latest
187+
if: always() && !contains(needs.build-image.result, 'failure')
188+
steps:
189+
- uses: actions/checkout@v4
190+
191+
- name: Upload manifest to R2
192+
env:
193+
AWS_ACCESS_KEY_ID: ${{ secrets.R2_ACCESS_KEY_ID }}
194+
AWS_SECRET_ACCESS_KEY: ${{ secrets.R2_SECRET_ACCESS_KEY }}
195+
AWS_DEFAULT_REGION: auto
196+
run: |
197+
aws s3 cp images/manifest.json \
198+
"s3://${{ secrets.R2_BUCKET }}/images/manifest.json" \
199+
--endpoint-url "${{ secrets.R2_ENDPOINT }}"
200+
201+
- name: Trigger reprovision on all nodes
202+
if: github.event.inputs.reprovision != 'false'
203+
env:
204+
ADMIN_API_URL: ${{ secrets.API_URL }}
205+
ADMIN_API_TOKEN: ${{ secrets.ADMIN_API_TOKEN }}
206+
run: |
207+
if [ -z "$ADMIN_API_URL" ] || [ -z "$ADMIN_API_TOKEN" ]; then
208+
echo "API_URL or ADMIN_API_TOKEN not set, skipping reprovision"
209+
exit 0
210+
fi
211+
212+
# Get list of online nodes
213+
NODES=$(curl -sf \
214+
-H "Authorization: Bearer $ADMIN_API_TOKEN" \
215+
"$ADMIN_API_URL/v1/admin/nodes" | jq -r '.nodes[] | select(.status == "online") | .id')
216+
217+
if [ -z "$NODES" ]; then
218+
echo "No online nodes found, skipping reprovision"
219+
exit 0
220+
fi
221+
222+
# Trigger reprovision on each node
223+
for NODE_ID in $NODES; do
224+
echo "Reprovisioning node $NODE_ID..."
225+
RESULT=$(curl -sf \
226+
-X POST \
227+
-H "Authorization: Bearer $ADMIN_API_TOKEN" \
228+
-H "Content-Type: application/json" \
229+
-d '{"image_refs": []}' \
230+
"$ADMIN_API_URL/v1/admin/nodes/$NODE_ID/reprovision" || echo '{"error": "request failed"}')
231+
echo "$RESULT" | jq .
232+
done

0 commit comments

Comments
 (0)