Skip to content

Commit 321fb09

Browse files
chore: more fixes for serverless-init builds (#1057)
fixing qemu flakiness.
1 parent 3d0df01 commit 321fb09

File tree

2 files changed

+120
-12
lines changed

2 files changed

+120
-12
lines changed

.github/workflows/release-serverless-init.yml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,8 +53,17 @@ jobs:
5353
ref: ${{ github.event.inputs.agentBranch }}
5454
path: datadog-agent
5555

56+
# Pin QEMU to a known-good version. The default (binfmt:latest) has broken
57+
# arm64 emulation multiple times due to QEMU segfaults in libc-bin triggers:
58+
# - Feb 2025: qemu-v9.2.0 — PR #571 pinned, PR #581 reverted to :latest
59+
# - Mar 2026: qemu-v10.2.1 — binfmt:latest updated Feb 18, broke builds
60+
# See .github/workflows/test-qemu-versions.yml to sweep versions before
61+
# bumping this pin. A sweep pass is necessary but not sufficient — always
62+
# verify with a real build, as some failures are load-dependent.
5663
- name: Set up QEMU
5764
uses: docker/setup-qemu-action@ce360397dd3f832beb865e1373c09c0e9f86d70a #v4.0.0
65+
with:
66+
image: tonistiigi/binfmt:qemu-v10.1.3
5867

5968
- name: Set up Docker Buildx
6069
uses: docker/setup-buildx-action@v3
Lines changed: 111 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,61 @@
1+
# Test QEMU/binfmt versions for arm64 cross-compilation on x86_64 runners.
2+
#
3+
# The serverless-init release build (release-serverless-init.yml) uses QEMU via
4+
# docker/setup-qemu-action to emulate arm64 during multi-platform Docker builds.
5+
# The QEMU version provided by tonistiigi/binfmt:latest has broken arm64 emulation
6+
# multiple times, causing segfaults during "Processing triggers for libc-bin" in
7+
# the ubuntu:22.04 compresser stage of our Dockerfiles (Dockerfile.build and
8+
# Dockerfile.alpine.build). Because of this, release-serverless-init.yml pins a
9+
# specific binfmt image version rather than using :latest.
10+
#
11+
# History of QEMU breakage in this repo:
12+
#
13+
# Feb 2025 — qemu-v9.2.0 caused segfaults in arm64 emulation.
14+
# - https://github.com/tonistiigi/binfmt/issues/240
15+
# - https://github.com/DataDog/datadog-agent/pull/33988 (pin in datadog-agent)
16+
# - https://github.com/DataDog/datadog-lambda-extension/pull/571 (pin here)
17+
# - https://github.com/DataDog/datadog-lambda-extension/pull/581 (reverted pin
18+
# after upstream claimed fix — this left us on :latest for ~12 months)
19+
#
20+
# Mar 2026 — qemu-v10.2.1 (shipped in binfmt:latest on Feb 18 2026) caused the
21+
# same segfault pattern. The last successful build was Feb 17 2026, one day
22+
# before the new image was published. This time we pin permanently.
23+
#
24+
# Test results (Mar 2026, two runs to check for flakiness):
25+
#
26+
# Version | Run 1 | Run 2 | Notes
27+
# --------------|-------|-------|------
28+
# qemu-v10.2.1 | pass | pass | Passes simple tests but FAILS in real build
29+
# qemu-v10.1.3 | pass | pass | Current pin target
30+
# qemu-v10.0.4 | pass | pass |
31+
# qemu-v9.2.2 | pass | pass | Has QEMU-side fix (actions/runner-images#11662)
32+
# qemu-v9.2.0 | FAIL | FAIL | Consistently broken
33+
# qemu-v8.1.5 | FAIL | FAIL | Consistently broken
34+
# qemu-v8.1.4 | FAIL | pass | Flaky
35+
# qemu-v8.0.4 | pass | pass |
36+
# qemu-v7.0.0-28| pass | pass | Long-standing known-good baseline
37+
#
38+
# Important caveats:
39+
# - Some versions are non-deterministically broken (v8.1.4 flipped between runs).
40+
# - v10.2.1 passes all tests here but segfaults in the real build, likely because
41+
# the real build runs Go compilation and the compresser stage concurrently under
42+
# QEMU, creating enough load to trigger the flaky segfault.
43+
# - Therefore: a version passing this sweep is necessary but NOT sufficient.
44+
# Always verify with an actual release build before merging a pin change.
45+
#
46+
# Relevant upstream issues:
47+
# - https://github.com/tonistiigi/binfmt/issues/215
48+
# - https://github.com/tonistiigi/binfmt/issues/240
49+
# - https://github.com/tonistiigi/binfmt/issues/245
50+
# - https://github.com/actions/runner-images/issues/11662
51+
#
52+
# Usage:
53+
# 1. Dispatch this workflow from the Actions tab to sweep all versions.
54+
# 2. Run it twice to check for flakiness.
55+
# 3. Pick the newest consistently passing version.
56+
# 4. Update the pin in release-serverless-init.yml.
57+
# 5. Do a real serverless-init release build to confirm before merging.
58+
159
name: Test QEMU versions for arm64 emulation
260

361
on:
@@ -10,21 +68,23 @@ jobs:
1068
fail-fast: false
1169
matrix:
1270
qemu_image:
13-
# v10.x - current era
14-
- "tonistiigi/binfmt:qemu-v10.2.1" # current latest, known broken
15-
- "tonistiigi/binfmt:qemu-v10.1.3" # released Feb 17 2026, day of last good build
16-
- "tonistiigi/binfmt:qemu-v10.0.4" # Jan 2026
71+
# v10.x
72+
- "tonistiigi/binfmt:qemu-v10.2.1"
73+
- "tonistiigi/binfmt:qemu-v10.1.3"
74+
- "tonistiigi/binfmt:qemu-v10.0.4"
1775
# v9.x
18-
- "tonistiigi/binfmt:qemu-v9.2.2" # reportedly has fix for segfault issue
19-
- "tonistiigi/binfmt:qemu-v9.2.0" # known broken (Feb 2025 incident)
76+
- "tonistiigi/binfmt:qemu-v9.2.2"
77+
- "tonistiigi/binfmt:qemu-v9.2.0"
2078
# v8.x
21-
- "tonistiigi/binfmt:qemu-v8.1.5" # reported working in issue #245
79+
- "tonistiigi/binfmt:qemu-v8.1.5"
2280
- "tonistiigi/binfmt:qemu-v8.1.4"
2381
- "tonistiigi/binfmt:qemu-v8.0.4"
24-
# v7.x - known good baseline
82+
# v7.x
2583
- "tonistiigi/binfmt:qemu-v7.0.0-28"
2684
name: "QEMU ${{ matrix.qemu_image }}"
2785
steps:
86+
- uses: actions/checkout@v6.0.2
87+
2888
- name: Set up QEMU
2989
uses: docker/setup-qemu-action@ce360397dd3f832beb865e1373c09c0e9f86d70a #v4.0.0
3090
with:
@@ -36,12 +96,51 @@ jobs:
3696
- name: Set up Docker Buildx
3797
uses: docker/setup-buildx-action@v3
3898

39-
- name: "Test: ubuntu:22.04 apt-get install (matching compresser stage)"
99+
# Simple docker run tests — catches deterministically broken versions
100+
# (v9.2.0 and v8.1.5 consistently fail here; v8.1.4 is flaky).
101+
- name: "Test: docker run - ubuntu:22.04 apt-get (simple)"
40102
run: |
41103
docker run --rm --platform linux/arm64 ubuntu:22.04 \
42-
bash -c "apt-get update && apt-get install -y zip binutils && echo 'SUCCESS: apt-get completed'"
104+
bash -c "apt-get update && apt-get install -y zip binutils && echo 'SUCCESS: docker run apt-get completed'"
43105
44-
- name: "Test: alpine:3.16 apk add (matching builder stage)"
106+
- name: "Test: docker run - alpine:3.16 apk add (simple)"
45107
run: |
46108
docker run --rm --platform linux/arm64 alpine:3.16 \
47-
sh -c "apk add --no-cache git make musl-dev gcc && echo 'SUCCESS: apk completed'"
109+
sh -c "apk add --no-cache git make musl-dev gcc && echo 'SUCCESS: docker run apk completed'"
110+
111+
# Buildx tests — closer to the real build, using docker buildx build with
112+
# Dockerfiles that match the stages in Dockerfile.build and Dockerfile.alpine.build.
113+
# Note: even these may not catch load-dependent failures; see header comment.
114+
- name: "Test: buildx - compresser stage (matches real Dockerfile.build)"
115+
run: |
116+
cat > /tmp/Dockerfile.test-compresser <<'DOCKERFILE'
117+
FROM ubuntu:22.04 as compresser
118+
RUN apt-get update && apt-get install -y zip binutils
119+
RUN mkdir /extensions
120+
WORKDIR /extensions
121+
RUN echo "SUCCESS: buildx compresser stage completed"
122+
DOCKERFILE
123+
docker buildx build --platform linux/arm64 -f /tmp/Dockerfile.test-compresser /tmp
124+
125+
- name: "Test: buildx - alpine builder stage (matches real Dockerfile.alpine.build)"
126+
run: |
127+
cat > /tmp/Dockerfile.test-alpine <<'DOCKERFILE'
128+
FROM alpine:3.16 as builder
129+
RUN apk add --no-cache git make musl-dev gcc
130+
COPY --from=golang:1.24.6-alpine /usr/local/go/ /usr/lib/go
131+
ENV GOROOT /usr/lib/go
132+
ENV GOPATH /go
133+
ENV PATH /go/bin:$PATH
134+
RUN mkdir -p ${GOPATH}/src ${GOPATH}/bin
135+
RUN echo "SUCCESS: buildx alpine builder stage completed"
136+
DOCKERFILE
137+
docker buildx build --platform linux/arm64 -f /tmp/Dockerfile.test-alpine /tmp
138+
139+
- name: "Test: buildx - multi-platform (both arches, matches real build)"
140+
run: |
141+
cat > /tmp/Dockerfile.test-multi <<'DOCKERFILE'
142+
FROM ubuntu:22.04
143+
RUN apt-get update && apt-get install -y zip binutils
144+
RUN echo "SUCCESS: buildx multi-platform completed"
145+
DOCKERFILE
146+
docker buildx build --platform linux/amd64,linux/arm64 -f /tmp/Dockerfile.test-multi /tmp

0 commit comments

Comments
 (0)