Skip to content

Commit edc7715

Browse files
tomassrnkae2bclaude
authored
ci: add ARM64 cross-compilation and unit test workflow (#2255)
* ci: add ARM64 cross-compilation and unit test workflow - Add pr-tests-arm64.yml with cross-compile job (ubuntu-24.04) and native ARM64 unit test matrix (ubuntu-24.04-arm) - Wire arm64-tests into pull-request.yml - Add setup-arm64-runner.sh for self-hosted runner provisioning Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * fix: update checkout to v5 and add timeout to cross-compile job - Align with repo convention of actions/checkout@v5 - Add timeout-minutes: 15 to cross-compile job to prevent hangs Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * fix: data races in NBD path_direct and PostProcessor test on ARM64 - path_direct.go: capture deviceIndex before goroutine closure. The outer for-loop is not a range loop, so Go 1.22+ loop variable fix doesn't apply — deviceIndex is reassigned on each retry iteration. - postprocessor_test.go: use sync.Mutex-wrapped buffer instead of bare bytes.Buffer. The PostProcessor goroutine and test goroutine write to the buffer concurrently. Both races were exposed by running tests with -race on ARM64 (weaker memory model makes races more likely to manifest). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * fix: add fetch-busybox to ARM64 cross-compile and test jobs Busybox binary is no longer committed to git (downloaded at build time from fc-busybox release). Add fetch-busybox to: - cross-compile job (with BUILD_ARCH=arm64 for correct binary) - orchestrator ARM64 test setup Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> --------- Co-authored-by: e2b <e2b@Onsites-MacBook-Pro.local> Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 8cc230b commit edc7715

5 files changed

Lines changed: 261 additions & 5 deletions

File tree

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
#!/usr/bin/env bash
2+
#
3+
# Setup script for an ARM64 self-hosted GitHub Actions runner.
4+
# Run this on a fresh ARM64 Ubuntu 22.04/24.04 machine with KVM support.
5+
#
6+
# Prerequisites:
7+
# - ARM64 Linux host (Graviton, Ampere, etc.)
8+
# - KVM enabled (/dev/kvm accessible)
9+
# - At least 8GB RAM (for hugepage allocation)
10+
# - Root access
11+
#
12+
# Usage:
13+
# sudo ./setup-arm64-runner.sh
14+
#
15+
# After running this script, register the machine as a GitHub Actions
16+
# self-hosted runner with the label: infra-tests-arm64
17+
# https://github.com/e2b-dev/infra/settings/actions/runners/new
18+
19+
set -euo pipefail
20+
21+
PS4='[\D{%Y-%m-%d %H:%M:%S}] '
22+
set -x
23+
24+
if [ "$(id -u)" -ne 0 ]; then
25+
echo "ERROR: This script must be run as root" >&2
26+
exit 1
27+
fi
28+
29+
ARCH=$(dpkg --print-architecture)
30+
if [ "$ARCH" != "arm64" ]; then
31+
echo "ERROR: This script is for ARM64 hosts (detected: $ARCH)" >&2
32+
exit 1
33+
fi
34+
35+
echo "=== Setting up ARM64 GitHub Actions runner ==="
36+
37+
# KVM check
38+
if [ ! -e /dev/kvm ]; then
39+
echo "ERROR: /dev/kvm not found. KVM support is required." >&2
40+
exit 1
41+
fi
42+
43+
# Install base dependencies
44+
apt-get update
45+
apt-get install -y --no-install-recommends \
46+
build-essential \
47+
curl \
48+
git \
49+
jq \
50+
nbd-client \
51+
nbd-server
52+
53+
# Enable unprivileged userfaultfd
54+
echo 1 > /proc/sys/vm/unprivileged_userfaultfd
55+
56+
# Hugepages
57+
mkdir -p /mnt/hugepages
58+
mount -t hugetlbfs none /mnt/hugepages 2>/dev/null || true
59+
echo 2000 > /proc/sys/vm/nr_hugepages
60+
61+
grep -qF 'hugetlbfs /mnt/hugepages' /etc/fstab || \
62+
echo "hugetlbfs /mnt/hugepages hugetlbfs defaults 0 0" >> /etc/fstab
63+
64+
# Sysctl — write once (idempotent)
65+
cat <<'EOF' > /etc/sysctl.d/99-e2b.conf
66+
vm.unprivileged_userfaultfd=1
67+
vm.nr_hugepages=2000
68+
net.core.somaxconn=65535
69+
net.core.netdev_max_backlog=65535
70+
net.ipv4.tcp_max_syn_backlog=65535
71+
vm.max_map_count=1048576
72+
EOF
73+
sysctl --system
74+
75+
# NBD
76+
modprobe nbd nbds_max=256
77+
echo "nbd" > /etc/modules-load.d/e2b.conf
78+
echo "options nbd nbds_max=256" > /etc/modprobe.d/e2b-nbd.conf
79+
80+
# Disable inotify for NBD devices
81+
cat <<'EOF' > /etc/udev/rules.d/97-nbd-device.rules
82+
ACTION=="add|change", KERNEL=="nbd*", OPTIONS:="nowatch"
83+
EOF
84+
udevadm control --reload-rules
85+
udevadm trigger
86+
87+
# File descriptor limits
88+
cat <<'EOF' > /etc/security/limits.d/99-e2b.conf
89+
* soft nofile 1048576
90+
* hard nofile 1048576
91+
EOF
92+
93+
echo ""
94+
echo "=== ARM64 runner setup complete ==="
95+
echo ""
96+
echo "Verify:"
97+
echo " uname -m → aarch64"
98+
echo " ls /dev/kvm → exists"
99+
echo " cat /proc/meminfo | grep HugePages_Total"
100+
echo " lsmod | grep nbd"
101+
echo ""
102+
echo "Next: register this machine as a GitHub Actions self-hosted runner"
103+
echo " Label: infra-tests-arm64"
104+
echo " https://github.com/e2b-dev/infra/settings/actions/runners/new"
Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
name: ARM64 tests on PRs
2+
3+
on: [workflow_call]
4+
5+
permissions:
6+
contents: read
7+
8+
jobs:
9+
cross-compile:
10+
name: Cross-compile all packages for ARM64
11+
runs-on: ubuntu-24.04
12+
timeout-minutes: 15
13+
steps:
14+
- name: Checkout repository
15+
uses: actions/checkout@v5
16+
17+
- name: Setup Go
18+
uses: ./.github/actions/go-setup-cache
19+
20+
- name: Install ARM64 cross-compiler
21+
run: sudo apt-get update && sudo apt-get install -y gcc-aarch64-linux-gnu
22+
23+
- name: Build and vet packages (pure Go)
24+
run: |
25+
for pkg in api client-proxy envd shared db docker-reverse-proxy; do
26+
echo "::group::packages/$pkg"
27+
pushd "packages/$pkg" > /dev/null
28+
GOARCH=arm64 go build ./...
29+
GOARCH=arm64 go vet ./...
30+
popd > /dev/null
31+
echo "::endgroup::"
32+
done
33+
34+
- name: Fetch busybox for orchestrator embed
35+
run: make -C packages/orchestrator fetch-busybox BUILD_ARCH=arm64
36+
37+
- name: Build and vet orchestrator (CGO)
38+
run: |
39+
CGO_ENABLED=1 CC=aarch64-linux-gnu-gcc GOARCH=arm64 go build ./...
40+
CGO_ENABLED=1 CC=aarch64-linux-gnu-gcc GOARCH=arm64 go vet ./...
41+
working-directory: packages/orchestrator
42+
43+
arm64-unit-tests:
44+
name: ARM64 tests for ${{ matrix.package }}
45+
runs-on: ubuntu-24.04-arm
46+
timeout-minutes: 30
47+
strategy:
48+
matrix:
49+
include:
50+
- package: packages/api
51+
test_path: ./...
52+
sudo: false
53+
- package: packages/client-proxy
54+
test_path: ./...
55+
sudo: false
56+
- package: packages/db
57+
test_path: ./...
58+
sudo: false
59+
- package: packages/docker-reverse-proxy
60+
test_path: ./...
61+
sudo: false
62+
- package: packages/envd
63+
test_path: ./...
64+
sudo: true
65+
- package: packages/orchestrator
66+
test_path: ./...
67+
sudo: true
68+
- package: packages/shared
69+
test_path: ./pkg/...
70+
sudo: false
71+
fail-fast: false
72+
steps:
73+
- name: Checkout repository
74+
uses: actions/checkout@v5
75+
76+
- name: Setup Go
77+
uses: ./.github/actions/go-setup-cache
78+
with:
79+
cache-dependency-paths: |
80+
go.work
81+
${{ matrix.package }}/go.mod
82+
${{ matrix.package }}/go.sum
83+
84+
- name: Setup envd tests
85+
run: |
86+
sudo apt-get update && sudo apt-get install -y bindfs
87+
if: matrix.package == 'packages/envd'
88+
89+
- name: Setup orchestrator tests
90+
run: |
91+
# Download busybox for go:embed
92+
make -C packages/orchestrator fetch-busybox
93+
94+
# Enable unprivileged uffd (Ubuntu defaults to 0)
95+
echo 1 | sudo tee /proc/sys/vm/unprivileged_userfaultfd
96+
97+
# Enable hugepages (256 × 2MB = 512MB).
98+
# Tests that need more hugepages than available will skip gracefully.
99+
sudo mkdir -p /mnt/hugepages
100+
sudo mount -t hugetlbfs none /mnt/hugepages
101+
echo 256 | sudo tee /proc/sys/vm/nr_hugepages
102+
103+
# Install extra kernel modules (nbd is not in base modules on GitHub-hosted runners)
104+
sudo apt-get update
105+
sudo apt-get install -y linux-modules-extra-$(uname -r)
106+
sudo modprobe nbd nbds_max=256
107+
108+
# Disable inotify watching of change events for NBD devices
109+
echo 'ACTION=="add|change", KERNEL=="nbd*", OPTIONS:="nowatch"' | sudo tee /etc/udev/rules.d/97-nbd-device.rules
110+
sudo udevadm control --reload-rules
111+
sudo udevadm trigger
112+
if: matrix.package == 'packages/orchestrator'
113+
114+
- name: Run tests that require sudo
115+
working-directory: ${{ matrix.package }}
116+
run: sudo -E `which go` test -race -v ${{ matrix.test_path }}
117+
if: matrix.sudo == true
118+
119+
- name: Run tests
120+
working-directory: ${{ matrix.package }}
121+
run: go test -race -v ${{ matrix.test_path }}
122+
if: matrix.sudo == false

.github/workflows/pull-request.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@ jobs:
2828
uses: ./.github/workflows/out-of-order-migrations.yml
2929
unit-tests:
3030
uses: ./.github/workflows/pr-tests.yml
31+
arm64-tests:
32+
uses: ./.github/workflows/pr-tests-arm64.yml
3133
integration-tests:
3234
needs: [out-of-order-migrations]
3335
uses: ./.github/workflows/integration_tests.yml

packages/orchestrator/pkg/sandbox/nbd/path_direct.go

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ func (d *DirectPathMount) Open(ctx context.Context) (retDeviceIndex uint32, err
7878

7979
telemetry.ReportEvent(ctx, "got backend size")
8080

81-
deviceIndex := uint32(math.MaxUint32)
81+
var deviceIndex uint32
8282

8383
for {
8484
deviceIndex, err = d.devicePool.GetDevice(ctx)
@@ -119,13 +119,17 @@ func (d *DirectPathMount) Open(ctx context.Context) (retDeviceIndex uint32, err
119119
server.Close()
120120

121121
dispatch := NewDispatch(serverc, d.Backend)
122+
// Capture deviceIndex for the goroutine closure — it's reassigned on
123+
// each retry iteration of the outer for-loop (not a range loop, so
124+
// Go 1.22+ loop variable fix doesn't apply).
125+
devIdx := deviceIndex
122126
// Start reading commands on the socket and dispatching them to our provider
123127
d.handlersWg.Go(func() {
124128
handleErr := dispatch.Handle(ctx)
125129
// The error is expected to happen if the nbd (socket connection) is closed
126130
logger.L().Info(ctx, "closing handler for NBD commands",
127131
zap.Error(handleErr),
128-
zap.Uint32("device_index", deviceIndex),
132+
zap.Uint32("device_index", devIdx),
129133
zap.Int("socket_index", i),
130134
)
131135
})

packages/orchestrator/pkg/template/build/writer/postprocessor_test.go

Lines changed: 27 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package writer
22

33
import (
44
"bytes"
5+
"sync"
56
"testing"
67
"time"
78

@@ -12,14 +13,37 @@ import (
1213
"github.com/e2b-dev/infra/packages/shared/pkg/logger"
1314
)
1415

15-
func newTestCore(buf *bytes.Buffer) zapcore.Core {
16+
// syncBuffer wraps bytes.Buffer with a mutex for concurrent writes from
17+
// the PostProcessor goroutine and the test goroutine.
18+
type syncBuffer struct {
19+
mu sync.Mutex
20+
buf bytes.Buffer
21+
}
22+
23+
func (b *syncBuffer) Write(p []byte) (int, error) {
24+
b.mu.Lock()
25+
defer b.mu.Unlock()
26+
27+
return b.buf.Write(p)
28+
}
29+
30+
func (b *syncBuffer) Sync() error { return nil }
31+
32+
func (b *syncBuffer) String() string {
33+
b.mu.Lock()
34+
defer b.mu.Unlock()
35+
36+
return b.buf.String()
37+
}
38+
39+
func newTestCore(buf *syncBuffer) zapcore.Core {
1640
encoderCfg := zap.NewDevelopmentEncoderConfig()
1741
encoderCfg.TimeKey = ""
1842
encoder := zapcore.NewConsoleEncoder(encoderCfg)
1943

2044
core := zapcore.NewCore(
2145
encoder,
22-
zapcore.AddSync(buf),
46+
buf,
2347
zapcore.DebugLevel,
2448
)
2549

@@ -29,7 +53,7 @@ func newTestCore(buf *bytes.Buffer) zapcore.Core {
2953
func TestPostProcessor_Start(t *testing.T) {
3054
t.Parallel()
3155
ctx := t.Context()
32-
var buf bytes.Buffer
56+
var buf syncBuffer
3357
core := newTestCore(&buf)
3458

3559
interval := time.Millisecond * 100

0 commit comments

Comments
 (0)