Skip to content

Commit 2406c64

Browse files
sbryngelsonclaude
andcommitted
Simplify build cache to per-runner directories
Replace the shared cache (with flock, sed path fixups, and workspace tracking) with per-runner caches keyed by RUNNER_NAME. Each runner always uses the same workspace path, so CMake's absolute paths are always correct — no cross-runner path issues, no locking needed. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 233aa11 commit 2406c64

2 files changed

Lines changed: 8 additions & 74 deletions

File tree

Lines changed: 8 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -1,94 +1,35 @@
11
#!/bin/bash
22
# Sets up a persistent build cache for self-hosted CI runners.
3-
# Creates a symlink: ./build -> <resolved scratch path>/.mfc-ci-cache/<key>/build
3+
# Creates a symlink: ./build -> <scratch>/.mfc-ci-cache/<key>/build
44
#
5-
# This ensures that every run of the same config (cluster/device/interface) finds
6-
# cached build artifacts regardless of which runner instance picks up the job.
7-
#
8-
# Concurrent safety: uses flock to serialize access per cache directory. If
9-
# multiple PRs trigger the same config simultaneously, the second job waits
10-
# for the first to finish (up to 1 hour), then gets a warm cache. If the lock
11-
# times out, falls back to a local build (same as no caching).
5+
# Each runner gets its own cache keyed by (cluster, device, interface, runner).
6+
# This avoids cross-runner path issues entirely — CMake's absolute paths are
7+
# always correct because the same runner always uses the same workspace path.
128
#
139
# Usage: source .github/scripts/setup-build-cache.sh <cluster> <device> <interface>
1410

1511
_cache_cluster="${1:?Usage: setup-build-cache.sh <cluster> <device> <interface>}"
1612
_cache_device="${2:?}"
1713
_cache_interface="${3:-none}"
14+
_cache_runner="${RUNNER_NAME:?RUNNER_NAME not set}"
1815

19-
_cache_key="${_cache_cluster}-${_cache_device}-${_cache_interface}"
16+
_cache_key="${_cache_cluster}-${_cache_device}-${_cache_interface}-${_cache_runner}"
2017
_cache_base="$HOME/scratch/.mfc-ci-cache/${_cache_key}/build"
2118

22-
# Create the cache dir, then resolve to a physical path (no symlinks).
23-
# $HOME/scratch is typically a symlink to a scratch filesystem — resolving
24-
# it ensures the build symlink target remains valid even if intermediate
25-
# symlinks change.
2619
mkdir -p "$_cache_base"
2720
_cache_dir="$(cd "$_cache_base" && pwd -P)"
2821

2922
echo "=== Build Cache Setup ==="
3023
echo " Cache key: $_cache_key"
3124
echo " Cache dir: $_cache_dir"
3225

33-
# Acquire an exclusive lock on the cache directory to prevent concurrent
34-
# builds from corrupting it. The lock is fd-based (flock on fd 9), so it
35-
# auto-releases when the calling process exits — no stale locks.
36-
#
37-
# Timeout: 1 hour. If another build holds the lock, we wait. This is fine
38-
# because the waiting job will get a warm cache when it finally acquires.
39-
# If the lock can't be acquired after 1 hour, something is wrong — fall
40-
# back to a local build in the workspace.
41-
_cache_locked=false
42-
_lock_file="$_cache_dir/.cache.lock"
43-
exec 9>"$_lock_file"
44-
echo " Acquiring cache lock..."
45-
if flock --timeout 3600 9; then
46-
_cache_locked=true
47-
echo " Cache lock acquired"
48-
else
49-
echo " WARNING: Cache lock timeout (1h), building locally without cache"
50-
exec 9>&-
51-
# Remove any existing symlink to the shared cache so we don't write
52-
# into it without the lock. Then create a real local directory.
53-
if [ -L "build" ]; then
54-
rm -f "build"
55-
fi
56-
mkdir -p "build"
57-
echo "========================="
58-
return 0 2>/dev/null || true
59-
fi
60-
61-
# If build/ exists (real dir or stale symlink), remove it.
62-
# rm -rf on a symlink removes the symlink, not the target — cache is safe.
26+
# Replace any existing build/ (real dir or stale symlink) with a symlink
27+
# to our runner-specific cache directory.
6328
if [ -e "build" ] || [ -L "build" ]; then
6429
rm -rf "build"
6530
fi
6631

6732
ln -s "$_cache_dir" "build"
6833

69-
# Handle cross-runner workspace path changes.
70-
# CMakeCache.txt stores absolute paths from whichever runner instance
71-
# originally configured the build. If we're on a different runner, sed-replace
72-
# the old workspace path with the current one so CMake can do incremental builds.
73-
_workspace_marker="$_cache_dir/.workspace_path"
74-
if [ -f "$_workspace_marker" ]; then
75-
_old_workspace=$(cat "$_workspace_marker")
76-
if [ "$_old_workspace" != "$(pwd)" ]; then
77-
echo " Workspace path changed: $_old_workspace -> $(pwd)"
78-
echo " Updating cached paths..."
79-
# Update CMake build files in staging/
80-
find "$_cache_dir/staging" -type f \
81-
\( -name "CMakeCache.txt" -o -name "*.cmake" \
82-
-o -name "*.make" -o -name "Makefile" \
83-
-o -name "build.ninja" \) \
84-
-exec sed -i "s|${_old_workspace}|$(pwd)|g" {} + 2>/dev/null || true
85-
# Compiled binaries have stale paths baked in — delete install/
86-
# so CMake rebuilds and re-installs them with correct paths.
87-
echo " Clearing install/ to force rebuild of binaries..."
88-
rm -rf "$_cache_dir/install"
89-
fi
90-
fi
91-
echo "$(pwd)" > "$_workspace_marker"
92-
9334
echo " Symlink: build -> $_cache_dir"
9435
echo "========================="

.github/workflows/phoenix/test.sh

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -33,13 +33,6 @@ while [ $attempt -le $max_attempts ]; do
3333
attempt=$((attempt + 1))
3434
done
3535

36-
# Release the cache lock before running tests. Tests only read installed
37-
# binaries and can take hours — no need to block other builds.
38-
if [ "${_cache_locked:-false}" = true ]; then
39-
exec 9>&-
40-
echo "Released build cache lock before tests"
41-
fi
42-
4336
n_test_threads=8
4437

4538
if [ "$job_device" = "gpu" ]; then

0 commit comments

Comments
 (0)