Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions .github/workflows/release-serverless-init.yml
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,17 @@ jobs:
ref: ${{ github.event.inputs.agentBranch }}
path: datadog-agent

# Pin QEMU to a known-good version. The default (binfmt:latest) has broken
# arm64 emulation multiple times due to QEMU segfaults in libc-bin triggers:
# - Feb 2025: qemu-v9.2.0 — PR #571 pinned, PR #581 reverted to :latest
# - Mar 2026: qemu-v10.2.1 — binfmt:latest updated Feb 18, broke builds
# See .github/workflows/test-qemu-versions.yml to sweep versions before
# bumping this pin. A sweep pass is necessary but not sufficient — always
# verify with a real build, as some failures are load-dependent.
- name: Set up QEMU
uses: docker/setup-qemu-action@ce360397dd3f832beb865e1373c09c0e9f86d70a #v4.0.0
with:
image: tonistiigi/binfmt:qemu-v10.1.3

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
Expand Down
123 changes: 111 additions & 12 deletions .github/workflows/test-qemu-versions.yml
Original file line number Diff line number Diff line change
@@ -1,3 +1,61 @@
# Test QEMU/binfmt versions for arm64 cross-compilation on x86_64 runners.
#
# The serverless-init release build (release-serverless-init.yml) uses QEMU via
# docker/setup-qemu-action to emulate arm64 during multi-platform Docker builds.
# The QEMU version provided by tonistiigi/binfmt:latest has broken arm64 emulation
# multiple times, causing segfaults during "Processing triggers for libc-bin" in
# the ubuntu:22.04 compresser stage of our Dockerfiles (Dockerfile.build and
# Dockerfile.alpine.build). Because of this, release-serverless-init.yml pins a
# specific binfmt image version rather than using :latest.
#
# History of QEMU breakage in this repo:
#
# Feb 2025 — qemu-v9.2.0 caused segfaults in arm64 emulation.
# - https://github.com/tonistiigi/binfmt/issues/240
# - https://github.com/DataDog/datadog-agent/pull/33988 (pin in datadog-agent)
# - https://github.com/DataDog/datadog-lambda-extension/pull/571 (pin here)
# - https://github.com/DataDog/datadog-lambda-extension/pull/581 (reverted pin
# after upstream claimed fix — this left us on :latest for ~12 months)
#
# Mar 2026 — qemu-v10.2.1 (shipped in binfmt:latest on Feb 18 2026) caused the
# same segfault pattern. The last successful build was Feb 17 2026, one day
# before the new image was published. This time we pin permanently.
#
# Test results (Mar 2026, two runs to check for flakiness):
#
# Version | Run 1 | Run 2 | Notes
# --------------|-------|-------|------
# qemu-v10.2.1 | pass | pass | Passes simple tests but FAILS in real build
# qemu-v10.1.3 | pass | pass | Current pin target
# qemu-v10.0.4 | pass | pass |
# qemu-v9.2.2 | pass | pass | Has QEMU-side fix (actions/runner-images#11662)
# qemu-v9.2.0 | FAIL | FAIL | Consistently broken
# qemu-v8.1.5 | FAIL | FAIL | Consistently broken
# qemu-v8.1.4 | FAIL | pass | Flaky
# qemu-v8.0.4 | pass | pass |
# qemu-v7.0.0-28| pass | pass | Long-standing known-good baseline
#
# Important caveats:
# - Some versions are non-deterministically broken (v8.1.4 flipped between runs).
# - v10.2.1 passes all tests here but segfaults in the real build, likely because
# the real build runs Go compilation and the compresser stage concurrently under
# QEMU, creating enough load to trigger the flaky segfault.
# - Therefore: a version passing this sweep is necessary but NOT sufficient.
# Always verify with an actual release build before merging a pin change.
#
# Relevant upstream issues:
# - https://github.com/tonistiigi/binfmt/issues/215
# - https://github.com/tonistiigi/binfmt/issues/240
# - https://github.com/tonistiigi/binfmt/issues/245
# - https://github.com/actions/runner-images/issues/11662
#
# Usage:
# 1. Dispatch this workflow from the Actions tab to sweep all versions.
# 2. Run it twice to check for flakiness.
# 3. Pick the newest consistently passing version.
# 4. Update the pin in release-serverless-init.yml.
# 5. Do a real serverless-init release build to confirm before merging.

name: Test QEMU versions for arm64 emulation

on:
Expand All @@ -10,21 +68,23 @@ jobs:
fail-fast: false
matrix:
qemu_image:
# v10.x - current era
- "tonistiigi/binfmt:qemu-v10.2.1" # current latest, known broken
- "tonistiigi/binfmt:qemu-v10.1.3" # released Feb 17 2026, day of last good build
- "tonistiigi/binfmt:qemu-v10.0.4" # Jan 2026
# v10.x
- "tonistiigi/binfmt:qemu-v10.2.1"
- "tonistiigi/binfmt:qemu-v10.1.3"
- "tonistiigi/binfmt:qemu-v10.0.4"
# v9.x
- "tonistiigi/binfmt:qemu-v9.2.2" # reportedly has fix for segfault issue
- "tonistiigi/binfmt:qemu-v9.2.0" # known broken (Feb 2025 incident)
- "tonistiigi/binfmt:qemu-v9.2.2"
- "tonistiigi/binfmt:qemu-v9.2.0"
# v8.x
- "tonistiigi/binfmt:qemu-v8.1.5" # reported working in issue #245
- "tonistiigi/binfmt:qemu-v8.1.5"
- "tonistiigi/binfmt:qemu-v8.1.4"
- "tonistiigi/binfmt:qemu-v8.0.4"
# v7.x - known good baseline
# v7.x
- "tonistiigi/binfmt:qemu-v7.0.0-28"
name: "QEMU ${{ matrix.qemu_image }}"
steps:
- uses: actions/checkout@v6.0.2

- name: Set up QEMU
uses: docker/setup-qemu-action@ce360397dd3f832beb865e1373c09c0e9f86d70a #v4.0.0
with:
Expand All @@ -36,12 +96,51 @@ jobs:
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3

- name: "Test: ubuntu:22.04 apt-get install (matching compresser stage)"
# Simple docker run tests — catches deterministically broken versions
# (v9.2.0 and v8.1.5 consistently fail here; v8.1.4 is flaky).
- name: "Test: docker run - ubuntu:22.04 apt-get (simple)"
run: |
docker run --rm --platform linux/arm64 ubuntu:22.04 \
bash -c "apt-get update && apt-get install -y zip binutils && echo 'SUCCESS: apt-get completed'"
bash -c "apt-get update && apt-get install -y zip binutils && echo 'SUCCESS: docker run apt-get completed'"

- name: "Test: alpine:3.16 apk add (matching builder stage)"
- name: "Test: docker run - alpine:3.16 apk add (simple)"
run: |
docker run --rm --platform linux/arm64 alpine:3.16 \
sh -c "apk add --no-cache git make musl-dev gcc && echo 'SUCCESS: apk completed'"
sh -c "apk add --no-cache git make musl-dev gcc && echo 'SUCCESS: docker run apk completed'"

# Buildx tests — closer to the real build, using docker buildx build with
# Dockerfiles that match the stages in Dockerfile.build and Dockerfile.alpine.build.
# Note: even these may not catch load-dependent failures; see header comment.
- name: "Test: buildx - compresser stage (matches real Dockerfile.build)"
run: |
cat > /tmp/Dockerfile.test-compresser <<'DOCKERFILE'
FROM ubuntu:22.04 as compresser
RUN apt-get update && apt-get install -y zip binutils
RUN mkdir /extensions
WORKDIR /extensions
RUN echo "SUCCESS: buildx compresser stage completed"
DOCKERFILE
docker buildx build --platform linux/arm64 -f /tmp/Dockerfile.test-compresser /tmp

- name: "Test: buildx - alpine builder stage (matches real Dockerfile.alpine.build)"
run: |
cat > /tmp/Dockerfile.test-alpine <<'DOCKERFILE'
FROM alpine:3.16 as builder
RUN apk add --no-cache git make musl-dev gcc
COPY --from=golang:1.24.6-alpine /usr/local/go/ /usr/lib/go
ENV GOROOT /usr/lib/go
ENV GOPATH /go
ENV PATH /go/bin:$PATH
RUN mkdir -p ${GOPATH}/src ${GOPATH}/bin
RUN echo "SUCCESS: buildx alpine builder stage completed"
DOCKERFILE
docker buildx build --platform linux/arm64 -f /tmp/Dockerfile.test-alpine /tmp

- name: "Test: buildx - multi-platform (both arches, matches real build)"
run: |
cat > /tmp/Dockerfile.test-multi <<'DOCKERFILE'
FROM ubuntu:22.04
RUN apt-get update && apt-get install -y zip binutils
RUN echo "SUCCESS: buildx multi-platform completed"
DOCKERFILE
docker buildx build --platform linux/amd64,linux/arm64 -f /tmp/Dockerfile.test-multi /tmp
Loading