Skip to content

Commit 4da207a

Browse files
avrabeclaude
andauthored
fix(ci): relocate zephyr-tests west workspace to /mnt — fix intermittent ENOSPC (#71)
Kills the intermittent zephyr-tests CI failures: two CI-infra causes (NOT test/code bugs) — (1) unauthenticated west sdk install blowing GitHub's 60-req/hr anonymous API limit under ~59 concurrent jobs (fixed via --personal-access-token), and (2) build-time ENOSPC on the ~14GB root fs (fixed by relocating the west workspace + builds to /mnt). zephyr-tests matrix now fully green. Known-remaining (pre-existing on main, separate follow-up): llvm-lto docker-PULL ENOSPC on the large ci image (image unpack to / before the container starts; needs de-containerize / smaller image), and smp_spinlock (continue-on-error, non-blocking). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
1 parent bc1bff2 commit 4da207a

2 files changed

Lines changed: 105 additions & 59 deletions

File tree

.github/workflows/llvm-lto.yml

Lines changed: 34 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -31,14 +31,23 @@ defaults:
3131
env:
3232
HOME: /root
3333
DOCKER_CONFIG: /tmp/.docker
34+
# Authenticate `west sdk install`'s GitHub API release-list fetch (anonymous
35+
# 60 req/hr -> 5000/hr) so concurrent jobs don't 403 and fail to install the
36+
# SDK. Same root cause fixed in zephyr-tests.yml. Workflow-level -> all jobs.
37+
# github.token (the secrets context is not available at workflow-level env).
38+
GITHUB_TOKEN: ${{ github.token }}
3439

3540
jobs:
3641
llvm-lto-build:
3742
name: "LLVM LTO build + size comparison"
3843
runs-on: ubuntu-22.04
3944
container:
4045
image: ghcr.io/zephyrproject-rtos/ci:v0.29.0
41-
options: --user root
46+
# Mount the runner's large ephemeral scratch disk (/mnt, ~70 GB). These jobs
47+
# run multiple west builds on the full `ci` image — more disk than a
48+
# single-build job — so the west workspace + build dirs go on /mnt to avoid
49+
# ENOSPC on the ~14 GB root fs (same fix as zephyr-tests.yml).
50+
options: --user root --volume /mnt:/mnt
4251
env:
4352
HOME: /root
4453
DOCKER_CONFIG: /tmp/.docker
@@ -74,10 +83,10 @@ jobs:
7483
- name: Initialize west workspace
7584
run: |
7685
pip3 install west
77-
west init -m https://github.com/pulseengine/zephyr.git --mr gale/sem-replacement zephyr-workspace
78-
cd zephyr-workspace
86+
west init -m https://github.com/pulseengine/zephyr.git --mr gale/sem-replacement /mnt/zephyr-workspace
87+
cd /mnt/zephyr-workspace
7988
west update --narrow -o=--depth=1
80-
for attempt in 1 2 3; do west sdk install -t arm-zephyr-eabi && break || sleep 5; done
89+
for attempt in 1 2 3; do west sdk install -t arm-zephyr-eabi --personal-access-token "${GITHUB_TOKEN}" && break || sleep 5; done
8190
8291
- name: Setup SDK paths
8392
run: |
@@ -116,8 +125,8 @@ jobs:
116125
- name: "Build baseline (GCC, no Gale)"
117126
run: |
118127
. /root/.cargo/env
119-
export ZEPHYR_BASE="${GITHUB_WORKSPACE}/zephyr-workspace/zephyr"
120-
cd zephyr-workspace
128+
export ZEPHYR_BASE="/mnt/zephyr-workspace/zephyr"
129+
cd /mnt/zephyr-workspace
121130
west build -b qemu_cortex_m3 \
122131
-s zephyr/tests/kernel/semaphore/semaphore \
123132
-d build-gcc-baseline
@@ -127,9 +136,9 @@ jobs:
127136
- name: "Build GCC + Gale"
128137
run: |
129138
. /root/.cargo/env
130-
export ZEPHYR_BASE="${GITHUB_WORKSPACE}/zephyr-workspace/zephyr"
139+
export ZEPHYR_BASE="/mnt/zephyr-workspace/zephyr"
131140
export GALE_ROOT="${GITHUB_WORKSPACE}/gale"
132-
cd zephyr-workspace
141+
cd /mnt/zephyr-workspace
133142
west build -b qemu_cortex_m3 \
134143
-s zephyr/tests/kernel/semaphore/semaphore \
135144
-d build-gcc-gale \
@@ -154,9 +163,9 @@ jobs:
154163
- name: "Build LLVM + Gale (no LTO)"
155164
run: |
156165
. /root/.cargo/env
157-
export ZEPHYR_BASE="${GITHUB_WORKSPACE}/zephyr-workspace/zephyr"
166+
export ZEPHYR_BASE="/mnt/zephyr-workspace/zephyr"
158167
export GALE_ROOT="${GITHUB_WORKSPACE}/gale"
159-
cd zephyr-workspace
168+
cd /mnt/zephyr-workspace
160169
west build -b qemu_cortex_m3 \
161170
-s zephyr/tests/kernel/semaphore/semaphore \
162171
-d build-llvm-gale \
@@ -180,9 +189,9 @@ jobs:
180189
- name: "Build LLVM + Gale + LTO"
181190
run: |
182191
. /root/.cargo/env
183-
export ZEPHYR_BASE="${GITHUB_WORKSPACE}/zephyr-workspace/zephyr"
192+
export ZEPHYR_BASE="/mnt/zephyr-workspace/zephyr"
184193
export GALE_ROOT="${GITHUB_WORKSPACE}/gale"
185-
cd zephyr-workspace
194+
cd /mnt/zephyr-workspace
186195
west build -b qemu_cortex_m3 \
187196
-s zephyr/tests/kernel/semaphore/semaphore \
188197
-d build-llvm-lto \
@@ -201,7 +210,7 @@ jobs:
201210
- name: "Run semaphore tests (LLVM+LTO)"
202211
run: |
203212
. /root/.cargo/env
204-
export ZEPHYR_BASE="${GITHUB_WORKSPACE}/zephyr-workspace/zephyr"
213+
export ZEPHYR_BASE="/mnt/zephyr-workspace/zephyr"
205214
# QEMU may not be found by LLVM toolchain variant. Use SDK QEMU directly.
206215
# SDK 1.0.x: hosttools/sysroots/... (Poky hosttools layout).
207216
# SDK 0.x: sysroots/... (flat layout).
@@ -214,7 +223,7 @@ jobs:
214223
QEMU=$(which qemu-system-arm 2>/dev/null || echo "")
215224
fi
216225
if [ -n "${QEMU}" ]; then
217-
cd zephyr-workspace
226+
cd /mnt/zephyr-workspace
218227
timeout 120 "${QEMU}" -cpu cortex-m3 -machine lm3s6965evb -nographic \
219228
-vga none -net none -icount shift=6,align=off,sleep=off \
220229
-rtc clock=vm -kernel build-llvm-lto/zephyr/zephyr.elf 2>&1 | tee test-llvm-lto.log &
@@ -233,7 +242,7 @@ jobs:
233242
- name: "Check inlining + compare sizes"
234243
if: always()
235244
run: |
236-
cd zephyr-workspace
245+
cd /mnt/zephyr-workspace
237246
echo "=== Symbol check ==="
238247
# Count + list gale_ FFI symbols that survived the link. The goal
239248
# of cross-language LTO is for these to be inlined into the C
@@ -296,7 +305,11 @@ jobs:
296305
runs-on: ubuntu-22.04
297306
container:
298307
image: ghcr.io/zephyrproject-rtos/ci:v0.29.0
299-
options: --user root
308+
# Mount the runner's large ephemeral scratch disk (/mnt, ~70 GB). These jobs
309+
# run multiple west builds on the full `ci` image — more disk than a
310+
# single-build job — so the west workspace + build dirs go on /mnt to avoid
311+
# ENOSPC on the ~14 GB root fs (same fix as zephyr-tests.yml).
312+
options: --user root --volume /mnt:/mnt
300313
strategy:
301314
fail-fast: false
302315
matrix:
@@ -348,10 +361,10 @@ jobs:
348361
- name: Initialize west workspace
349362
run: |
350363
pip3 install west
351-
west init -m https://github.com/pulseengine/zephyr.git --mr gale/sem-replacement zephyr-workspace
352-
cd zephyr-workspace
364+
west init -m https://github.com/pulseengine/zephyr.git --mr gale/sem-replacement /mnt/zephyr-workspace
365+
cd /mnt/zephyr-workspace
353366
west update --narrow -o=--depth=1
354-
for attempt in 1 2 3; do west sdk install -t arm-zephyr-eabi && break || sleep 5; done
367+
for attempt in 1 2 3; do west sdk install -t arm-zephyr-eabi --personal-access-token "${GITHUB_TOKEN}" && break || sleep 5; done
355368
356369
- name: Setup SDK paths
357370
run: |
@@ -384,9 +397,9 @@ jobs:
384397
- name: Build and run test (LLVM+LTO)
385398
run: |
386399
. /root/.cargo/env
387-
export ZEPHYR_BASE="${GITHUB_WORKSPACE}/zephyr-workspace/zephyr"
400+
export ZEPHYR_BASE="/mnt/zephyr-workspace/zephyr"
388401
export GALE_ROOT="${GITHUB_WORKSPACE}/gale"
389-
cd zephyr-workspace
402+
cd /mnt/zephyr-workspace
390403
west build -b qemu_cortex_m3 \
391404
-s "zephyr/${TEST_PATH}" \
392405
-- \

.github/workflows/zephyr-tests.yml

Lines changed: 71 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -24,14 +24,29 @@ concurrency:
2424
env:
2525
HOME: /root
2626
DOCKER_CONFIG: /tmp/.docker
27+
# Token value for `west sdk install --personal-access-token` (see the install
28+
# steps). `west sdk install`'s "Fetching Zephyr SDK list" hits the GitHub API;
29+
# anonymous = 60 req/hr per runner IP, and with ~59 concurrent jobs that limit
30+
# is blown -> "403 API rate limit exceeded" -> SDK never installs -> CMake
31+
# FindZephyr-sdk error. west does NOT read GITHUB_TOKEN from the env (it told
32+
# us so: "Try ... --personal-access-token argument or a .netrc file"), so the
33+
# steps pass it explicitly; this env just carries the value. github.token is
34+
# used because the secrets context is unavailable at workflow-level env.
35+
GITHUB_TOKEN: ${{ github.token }}
2736

2837
jobs:
2938
zephyr-test:
3039
name: "${{ matrix.primitive }} (qemu_cortex_m3)"
3140
runs-on: ubuntu-22.04
3241
container:
3342
image: ghcr.io/zephyrproject-rtos/ci-base:v0.29.0
34-
options: --user root
43+
# Mount the runner's large ephemeral scratch disk (/mnt, ~70 GB) into the
44+
# container. The ci-base image layers (full multi-arch Zephyr SDK) already
45+
# consume most of the host root fs (~14 GB free); putting the west
46+
# workspace + build artifacts on / tips unlucky jobs into ENOSPC (the
47+
# runner itself crashes writing its diag log). The west workspace below is
48+
# created under /mnt so the heavy writes land on the big disk instead.
49+
options: --user root --volume /mnt:/mnt
3550
strategy:
3651
fail-fast: false
3752
matrix:
@@ -152,19 +167,19 @@ jobs:
152167
- name: Initialize west workspace
153168
run: |
154169
pip3 install west
155-
west init -m https://github.com/pulseengine/zephyr.git --mr gale/sem-replacement zephyr-workspace
156-
cd zephyr-workspace
170+
west init -m https://github.com/pulseengine/zephyr.git --mr gale/sem-replacement /mnt/zephyr-workspace
171+
cd /mnt/zephyr-workspace
157172
west update --narrow -o=--depth=1
158-
for attempt in 1 2 3; do west sdk install -t arm-zephyr-eabi && break || sleep 5; done
173+
for attempt in 1 2 3; do west sdk install -t arm-zephyr-eabi --personal-access-token "${GITHUB_TOKEN}" && break || sleep 5; done
159174
160175
- name: Build and run test
161176
env:
162177
TEST_PATH: ${{ matrix.test_path }}
163178
run: |
164179
. /root/.cargo/env
165-
export ZEPHYR_BASE="${GITHUB_WORKSPACE}/zephyr-workspace/zephyr"
180+
export ZEPHYR_BASE="/mnt/zephyr-workspace/zephyr"
166181
export GALE_ROOT="${GITHUB_WORKSPACE}/gale"
167-
cd zephyr-workspace
182+
cd /mnt/zephyr-workspace
168183
west build -b qemu_cortex_m3 \
169184
-s "zephyr/${TEST_PATH}" \
170185
-- \
@@ -188,7 +203,13 @@ jobs:
188203
runs-on: ubuntu-22.04
189204
container:
190205
image: ghcr.io/zephyrproject-rtos/ci-base:v0.29.0
191-
options: --user root
206+
# Mount the runner's large ephemeral scratch disk (/mnt, ~70 GB) into the
207+
# container. The ci-base image layers (full multi-arch Zephyr SDK) already
208+
# consume most of the host root fs (~14 GB free); putting the west
209+
# workspace + build artifacts on / tips unlucky jobs into ENOSPC (the
210+
# runner itself crashes writing its diag log). The west workspace below is
211+
# created under /mnt so the heavy writes land on the big disk instead.
212+
options: --user root --volume /mnt:/mnt
192213
strategy:
193214
fail-fast: false
194215
matrix:
@@ -226,19 +247,19 @@ jobs:
226247
- name: Initialize west workspace
227248
run: |
228249
pip3 install west
229-
west init -m https://github.com/pulseengine/zephyr.git --mr gale/sem-replacement zephyr-workspace
230-
cd zephyr-workspace
250+
west init -m https://github.com/pulseengine/zephyr.git --mr gale/sem-replacement /mnt/zephyr-workspace
251+
cd /mnt/zephyr-workspace
231252
west update --narrow -o=--depth=1
232-
for attempt in 1 2 3; do west sdk install -t arm-zephyr-eabi && break || sleep 5; done
253+
for attempt in 1 2 3; do west sdk install -t arm-zephyr-eabi --personal-access-token "${GITHUB_TOKEN}" && break || sleep 5; done
233254
234255
- name: Build and run test
235256
env:
236257
TEST_PATH: ${{ matrix.test_path }}
237258
run: |
238259
. /root/.cargo/env
239-
export ZEPHYR_BASE="${GITHUB_WORKSPACE}/zephyr-workspace/zephyr"
260+
export ZEPHYR_BASE="/mnt/zephyr-workspace/zephyr"
240261
export GALE_ROOT="${GITHUB_WORKSPACE}/gale"
241-
cd zephyr-workspace
262+
cd /mnt/zephyr-workspace
242263
west build -b mps2/an385 \
243264
-s "zephyr/${TEST_PATH}" \
244265
-- \
@@ -277,7 +298,13 @@ jobs:
277298
runs-on: ubuntu-22.04
278299
container:
279300
image: ghcr.io/zephyrproject-rtos/ci-base:v0.29.0
280-
options: --user root
301+
# Mount the runner's large ephemeral scratch disk (/mnt, ~70 GB) into the
302+
# container. The ci-base image layers (full multi-arch Zephyr SDK) already
303+
# consume most of the host root fs (~14 GB free); putting the west
304+
# workspace + build artifacts on / tips unlucky jobs into ENOSPC (the
305+
# runner itself crashes writing its diag log). The west workspace below is
306+
# created under /mnt so the heavy writes land on the big disk instead.
307+
options: --user root --volume /mnt:/mnt
281308
strategy:
282309
fail-fast: false
283310
matrix:
@@ -320,19 +347,19 @@ jobs:
320347
- name: Initialize west workspace
321348
run: |
322349
pip3 install west
323-
west init -m https://github.com/pulseengine/zephyr.git --mr gale/sem-replacement zephyr-workspace
324-
cd zephyr-workspace
350+
west init -m https://github.com/pulseengine/zephyr.git --mr gale/sem-replacement /mnt/zephyr-workspace
351+
cd /mnt/zephyr-workspace
325352
west update --narrow -o=--depth=1
326-
for attempt in 1 2 3; do west sdk install -t x86_64-zephyr-elf && break || sleep 5; done
353+
for attempt in 1 2 3; do west sdk install -t x86_64-zephyr-elf --personal-access-token "${GITHUB_TOKEN}" && break || sleep 5; done
327354
328355
- name: Build and run test
329356
env:
330357
TEST_PATH: ${{ matrix.test_path }}
331358
run: |
332359
. /root/.cargo/env
333-
export ZEPHYR_BASE="${GITHUB_WORKSPACE}/zephyr-workspace/zephyr"
360+
export ZEPHYR_BASE="/mnt/zephyr-workspace/zephyr"
334361
export GALE_ROOT="${GITHUB_WORKSPACE}/gale"
335-
cd zephyr-workspace
362+
cd /mnt/zephyr-workspace
336363
west build -b qemu_x86_64 \
337364
-s "zephyr/${TEST_PATH}" \
338365
-- \
@@ -354,7 +381,13 @@ jobs:
354381
runs-on: ubuntu-22.04
355382
container:
356383
image: ghcr.io/zephyrproject-rtos/ci-base:v0.29.0
357-
options: --user root
384+
# Mount the runner's large ephemeral scratch disk (/mnt, ~70 GB) into the
385+
# container. The ci-base image layers (full multi-arch Zephyr SDK) already
386+
# consume most of the host root fs (~14 GB free); putting the west
387+
# workspace + build artifacts on / tips unlucky jobs into ENOSPC (the
388+
# runner itself crashes writing its diag log). The west workspace below is
389+
# created under /mnt so the heavy writes land on the big disk instead.
390+
options: --user root --volume /mnt:/mnt
358391
env:
359392
HOME: /root
360393
DOCKER_CONFIG: /tmp/.docker
@@ -375,10 +408,10 @@ jobs:
375408
- name: Initialize west workspace
376409
run: |
377410
pip3 install west
378-
west init -m https://github.com/pulseengine/zephyr.git --mr gale/sem-replacement zephyr-workspace
379-
cd zephyr-workspace
411+
west init -m https://github.com/pulseengine/zephyr.git --mr gale/sem-replacement /mnt/zephyr-workspace
412+
cd /mnt/zephyr-workspace
380413
west update --narrow -o=--depth=1
381-
for attempt in 1 2 3; do west sdk install -t arm-zephyr-eabi && break || sleep 5; done
414+
for attempt in 1 2 3; do west sdk install -t arm-zephyr-eabi --personal-access-token "${GITHUB_TOKEN}" && break || sleep 5; done
382415
383416
- name: Setup SDK paths
384417
run: |
@@ -395,8 +428,8 @@ jobs:
395428
- name: Build baseline (stock Zephyr)
396429
run: |
397430
. /root/.cargo/env
398-
export ZEPHYR_BASE="${GITHUB_WORKSPACE}/zephyr-workspace/zephyr"
399-
cd zephyr-workspace
431+
export ZEPHYR_BASE="/mnt/zephyr-workspace/zephyr"
432+
cd /mnt/zephyr-workspace
400433
west build -b qemu_cortex_m3 \
401434
-s zephyr/tests/kernel/semaphore/semaphore \
402435
-d build-baseline
@@ -405,9 +438,9 @@ jobs:
405438
- name: Build with Gale
406439
run: |
407440
. /root/.cargo/env
408-
export ZEPHYR_BASE="${GITHUB_WORKSPACE}/zephyr-workspace/zephyr"
441+
export ZEPHYR_BASE="/mnt/zephyr-workspace/zephyr"
409442
export GALE_ROOT="${GITHUB_WORKSPACE}/gale"
410-
cd zephyr-workspace
443+
cd /mnt/zephyr-workspace
411444
west build -b qemu_cortex_m3 \
412445
-s zephyr/tests/kernel/semaphore/semaphore \
413446
-d build-gale \
@@ -421,21 +454,21 @@ jobs:
421454
echo "=== Binary Size Comparison ==="
422455
echo ""
423456
echo "--- Baseline (stock Zephyr) ---"
424-
cat zephyr-workspace/baseline-size.txt
457+
cat /mnt/zephyr-workspace/baseline-size.txt
425458
echo ""
426459
echo "--- With Gale ---"
427-
cat zephyr-workspace/gale-size.txt
460+
cat /mnt/zephyr-workspace/gale-size.txt
428461
echo ""
429462
430-
BASELINE_TEXT=$(tail -1 zephyr-workspace/baseline-size.txt | awk '{print $1}')
431-
BASELINE_DATA=$(tail -1 zephyr-workspace/baseline-size.txt | awk '{print $2}')
432-
BASELINE_BSS=$(tail -1 zephyr-workspace/baseline-size.txt | awk '{print $3}')
433-
BASELINE_TOTAL=$(tail -1 zephyr-workspace/baseline-size.txt | awk '{print $4}')
463+
BASELINE_TEXT=$(tail -1 /mnt/zephyr-workspace/baseline-size.txt | awk '{print $1}')
464+
BASELINE_DATA=$(tail -1 /mnt/zephyr-workspace/baseline-size.txt | awk '{print $2}')
465+
BASELINE_BSS=$(tail -1 /mnt/zephyr-workspace/baseline-size.txt | awk '{print $3}')
466+
BASELINE_TOTAL=$(tail -1 /mnt/zephyr-workspace/baseline-size.txt | awk '{print $4}')
434467
435-
GALE_TEXT=$(tail -1 zephyr-workspace/gale-size.txt | awk '{print $1}')
436-
GALE_DATA=$(tail -1 zephyr-workspace/gale-size.txt | awk '{print $2}')
437-
GALE_BSS=$(tail -1 zephyr-workspace/gale-size.txt | awk '{print $3}')
438-
GALE_TOTAL=$(tail -1 zephyr-workspace/gale-size.txt | awk '{print $4}')
468+
GALE_TEXT=$(tail -1 /mnt/zephyr-workspace/gale-size.txt | awk '{print $1}')
469+
GALE_DATA=$(tail -1 /mnt/zephyr-workspace/gale-size.txt | awk '{print $2}')
470+
GALE_BSS=$(tail -1 /mnt/zephyr-workspace/gale-size.txt | awk '{print $3}')
471+
GALE_TOTAL=$(tail -1 /mnt/zephyr-workspace/gale-size.txt | awk '{print $4}')
439472
440473
DELTA_TEXT=$((GALE_TEXT - BASELINE_TEXT))
441474
DELTA_DATA=$((GALE_DATA - BASELINE_DATA))
@@ -467,7 +500,7 @@ jobs:
467500
printf "gale_overhead_total %d ns/iter (+/- 0)\n" "$DELTA_TOTAL"
468501
printf "gale_flash_bytes %d ns/iter (+/- 0)\n" "$GALE_TOTAL"
469502
printf "baseline_flash_bytes %d ns/iter (+/- 0)\n" "$BASELINE_TOTAL"
470-
} > zephyr-workspace/benchmark-output.txt
503+
} > /mnt/zephyr-workspace/benchmark-output.txt
471504
472505
# Fail if total overhead exceeds 10% (SYSREQ-PERF-001)
473506
PERCENT=$((DELTA_TOTAL * 100 / BASELINE_TOTAL))
@@ -484,7 +517,7 @@ jobs:
484517
with:
485518
name: Binary Size (semaphore test, qemu_cortex_m3)
486519
tool: cargo
487-
output-file-path: zephyr-workspace/benchmark-output.txt
520+
output-file-path: /mnt/zephyr-workspace/benchmark-output.txt
488521
github-token: ${{ secrets.GITHUB_TOKEN }}
489522
auto-push: ${{ github.event_name == 'push' && github.ref == 'refs/heads/main' }}
490523
comment-on-alert: true

0 commit comments

Comments
 (0)