From f6c92578ef40330cf3f681a30e6854bada3e1f30 Mon Sep 17 00:00:00 2001 From: Aleksandr Pasechnik Date: Tue, 3 Mar 2026 16:01:30 -0500 Subject: [PATCH] chore: more fixes for serverless-init builds --- .github/workflows/release-serverless-init.yml | 9 ++ .github/workflows/test-qemu-versions.yml | 123 ++++++++++++++++-- 2 files changed, 120 insertions(+), 12 deletions(-) diff --git a/.github/workflows/release-serverless-init.yml b/.github/workflows/release-serverless-init.yml index 1e98fbce4..7aa9e2628 100644 --- a/.github/workflows/release-serverless-init.yml +++ b/.github/workflows/release-serverless-init.yml @@ -53,8 +53,17 @@ jobs: ref: ${{ github.event.inputs.agentBranch }} path: datadog-agent + # Pin QEMU to a known-good version. The default (binfmt:latest) has broken + # arm64 emulation multiple times due to QEMU segfaults in libc-bin triggers: + # - Feb 2025: qemu-v9.2.0 — PR #571 pinned, PR #581 reverted to :latest + # - Mar 2026: qemu-v10.2.1 — binfmt:latest updated Feb 18, broke builds + # See .github/workflows/test-qemu-versions.yml to sweep versions before + # bumping this pin. A sweep pass is necessary but not sufficient — always + # verify with a real build, as some failures are load-dependent. - name: Set up QEMU uses: docker/setup-qemu-action@ce360397dd3f832beb865e1373c09c0e9f86d70a #v4.0.0 + with: + image: tonistiigi/binfmt:qemu-v10.1.3 - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 diff --git a/.github/workflows/test-qemu-versions.yml b/.github/workflows/test-qemu-versions.yml index 51843fc79..98b5c6c37 100644 --- a/.github/workflows/test-qemu-versions.yml +++ b/.github/workflows/test-qemu-versions.yml @@ -1,3 +1,61 @@ +# Test QEMU/binfmt versions for arm64 cross-compilation on x86_64 runners. +# +# The serverless-init release build (release-serverless-init.yml) uses QEMU via +# docker/setup-qemu-action to emulate arm64 during multi-platform Docker builds. +# The QEMU version provided by tonistiigi/binfmt:latest has broken arm64 emulation +# multiple times, causing segfaults during "Processing triggers for libc-bin" in +# the ubuntu:22.04 compresser stage of our Dockerfiles (Dockerfile.build and +# Dockerfile.alpine.build). Because of this, release-serverless-init.yml pins a +# specific binfmt image version rather than using :latest. +# +# History of QEMU breakage in this repo: +# +# Feb 2025 — qemu-v9.2.0 caused segfaults in arm64 emulation. +# - https://github.com/tonistiigi/binfmt/issues/240 +# - https://github.com/DataDog/datadog-agent/pull/33988 (pin in datadog-agent) +# - https://github.com/DataDog/datadog-lambda-extension/pull/571 (pin here) +# - https://github.com/DataDog/datadog-lambda-extension/pull/581 (reverted pin +# after upstream claimed fix — this left us on :latest for ~12 months) +# +# Mar 2026 — qemu-v10.2.1 (shipped in binfmt:latest on Feb 18 2026) caused the +# same segfault pattern. The last successful build was Feb 17 2026, one day +# before the new image was published. This time we pin permanently. +# +# Test results (Mar 2026, two runs to check for flakiness): +# +# Version | Run 1 | Run 2 | Notes +# --------------|-------|-------|------ +# qemu-v10.2.1 | pass | pass | Passes simple tests but FAILS in real build +# qemu-v10.1.3 | pass | pass | Current pin target +# qemu-v10.0.4 | pass | pass | +# qemu-v9.2.2 | pass | pass | Has QEMU-side fix (actions/runner-images#11662) +# qemu-v9.2.0 | FAIL | FAIL | Consistently broken +# qemu-v8.1.5 | FAIL | FAIL | Consistently broken +# qemu-v8.1.4 | FAIL | pass | Flaky +# qemu-v8.0.4 | pass | pass | +# qemu-v7.0.0-28| pass | pass | Long-standing known-good baseline +# +# Important caveats: +# - Some versions are non-deterministically broken (v8.1.4 flipped between runs). +# - v10.2.1 passes all tests here but segfaults in the real build, likely because +# the real build runs Go compilation and the compresser stage concurrently under +# QEMU, creating enough load to trigger the flaky segfault. +# - Therefore: a version passing this sweep is necessary but NOT sufficient. +# Always verify with an actual release build before merging a pin change. +# +# Relevant upstream issues: +# - https://github.com/tonistiigi/binfmt/issues/215 +# - https://github.com/tonistiigi/binfmt/issues/240 +# - https://github.com/tonistiigi/binfmt/issues/245 +# - https://github.com/actions/runner-images/issues/11662 +# +# Usage: +# 1. Dispatch this workflow from the Actions tab to sweep all versions. +# 2. Run it twice to check for flakiness. +# 3. Pick the newest consistently passing version. +# 4. Update the pin in release-serverless-init.yml. +# 5. Do a real serverless-init release build to confirm before merging. + name: Test QEMU versions for arm64 emulation on: @@ -10,21 +68,23 @@ jobs: fail-fast: false matrix: qemu_image: - # v10.x - current era - - "tonistiigi/binfmt:qemu-v10.2.1" # current latest, known broken - - "tonistiigi/binfmt:qemu-v10.1.3" # released Feb 17 2026, day of last good build - - "tonistiigi/binfmt:qemu-v10.0.4" # Jan 2026 + # v10.x + - "tonistiigi/binfmt:qemu-v10.2.1" + - "tonistiigi/binfmt:qemu-v10.1.3" + - "tonistiigi/binfmt:qemu-v10.0.4" # v9.x - - "tonistiigi/binfmt:qemu-v9.2.2" # reportedly has fix for segfault issue - - "tonistiigi/binfmt:qemu-v9.2.0" # known broken (Feb 2025 incident) + - "tonistiigi/binfmt:qemu-v9.2.2" + - "tonistiigi/binfmt:qemu-v9.2.0" # v8.x - - "tonistiigi/binfmt:qemu-v8.1.5" # reported working in issue #245 + - "tonistiigi/binfmt:qemu-v8.1.5" - "tonistiigi/binfmt:qemu-v8.1.4" - "tonistiigi/binfmt:qemu-v8.0.4" - # v7.x - known good baseline + # v7.x - "tonistiigi/binfmt:qemu-v7.0.0-28" name: "QEMU ${{ matrix.qemu_image }}" steps: + - uses: actions/checkout@v6.0.2 + - name: Set up QEMU uses: docker/setup-qemu-action@ce360397dd3f832beb865e1373c09c0e9f86d70a #v4.0.0 with: @@ -36,12 +96,51 @@ jobs: - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 - - name: "Test: ubuntu:22.04 apt-get install (matching compresser stage)" + # Simple docker run tests — catches deterministically broken versions + # (v9.2.0 and v8.1.5 consistently fail here; v8.1.4 is flaky). + - name: "Test: docker run - ubuntu:22.04 apt-get (simple)" run: | docker run --rm --platform linux/arm64 ubuntu:22.04 \ - bash -c "apt-get update && apt-get install -y zip binutils && echo 'SUCCESS: apt-get completed'" + bash -c "apt-get update && apt-get install -y zip binutils && echo 'SUCCESS: docker run apt-get completed'" - - name: "Test: alpine:3.16 apk add (matching builder stage)" + - name: "Test: docker run - alpine:3.16 apk add (simple)" run: | docker run --rm --platform linux/arm64 alpine:3.16 \ - sh -c "apk add --no-cache git make musl-dev gcc && echo 'SUCCESS: apk completed'" + sh -c "apk add --no-cache git make musl-dev gcc && echo 'SUCCESS: docker run apk completed'" + + # Buildx tests — closer to the real build, using docker buildx build with + # Dockerfiles that match the stages in Dockerfile.build and Dockerfile.alpine.build. + # Note: even these may not catch load-dependent failures; see header comment. + - name: "Test: buildx - compresser stage (matches real Dockerfile.build)" + run: | + cat > /tmp/Dockerfile.test-compresser <<'DOCKERFILE' + FROM ubuntu:22.04 as compresser + RUN apt-get update && apt-get install -y zip binutils + RUN mkdir /extensions + WORKDIR /extensions + RUN echo "SUCCESS: buildx compresser stage completed" + DOCKERFILE + docker buildx build --platform linux/arm64 -f /tmp/Dockerfile.test-compresser /tmp + + - name: "Test: buildx - alpine builder stage (matches real Dockerfile.alpine.build)" + run: | + cat > /tmp/Dockerfile.test-alpine <<'DOCKERFILE' + FROM alpine:3.16 as builder + RUN apk add --no-cache git make musl-dev gcc + COPY --from=golang:1.24.6-alpine /usr/local/go/ /usr/lib/go + ENV GOROOT /usr/lib/go + ENV GOPATH /go + ENV PATH /go/bin:$PATH + RUN mkdir -p ${GOPATH}/src ${GOPATH}/bin + RUN echo "SUCCESS: buildx alpine builder stage completed" + DOCKERFILE + docker buildx build --platform linux/arm64 -f /tmp/Dockerfile.test-alpine /tmp + + - name: "Test: buildx - multi-platform (both arches, matches real build)" + run: | + cat > /tmp/Dockerfile.test-multi <<'DOCKERFILE' + FROM ubuntu:22.04 + RUN apt-get update && apt-get install -y zip binutils + RUN echo "SUCCESS: buildx multi-platform completed" + DOCKERFILE + docker buildx build --platform linux/amd64,linux/arm64 -f /tmp/Dockerfile.test-multi /tmp