Skip to content

Commit 423fe06

Browse files
midlemanclaude
andauthored
ci: fix stable cache misses (#13669)
### Summary Fixes the stable extensions cache, which has been missing on every PR and merge run since the 1.111 upstream merge. - Path bug: `extensions/out` exists at save time but not restore time -> excluded from the path list - Key was non-deterministic across CI runs -> enumerate via `git ls-tree` / `git ls-files` instead of filesystem `find` - Key bumped v4 -> v7 to drop orphaned caches - PRs restore from main; only test-merge.yml saves - Cache hit/miss surfaced in the test/unit job summary ### QA Notes First main run after merge populates the v7 cache. We will now see a GH summary with cache hit/miss - instead of having to dig through the job steps. --------- Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent f20fa09 commit 423fe06

7 files changed

Lines changed: 149 additions & 23 deletions

File tree

.github/actions/restore-build-caches/action.yml

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,10 @@ inputs:
3737
description: "Whether to restore Playwright browsers cache (default: true)"
3838
required: false
3939
default: 'true'
40+
emit-summary:
41+
description: "Emit cache hit/miss summary to the job summary (default: false). Cache state is identical across jobs in a workflow, so opt in from one job to avoid duplication."
42+
required: false
43+
default: 'false'
4044

4145
outputs:
4246
cache-npm-core-hit:
@@ -193,7 +197,7 @@ runs:
193197
uses: actions/cache/restore@v5
194198
with:
195199
path: ${{ steps.cache-paths.outputs.npm-extensions-stable-paths }}
196-
key: npm-extensions-stable-v4-${{ runner.os }}-${{ steps.distro.outputs.distro }}-${{ steps.extensions-stable-hash.outputs.hash }}
200+
key: npm-extensions-stable-v7-${{ runner.os }}-${{ steps.distro.outputs.distro }}-${{ steps.extensions-stable-hash.outputs.hash }}
197201

198202
- name: Restore built-in extensions cache
199203
if: ${{ inputs.restore-builtins == 'true' }}
@@ -271,4 +275,7 @@ runs:
271275
CACHE_NPM_EXTENSIONS_STABLE_PARTIAL: ${{ steps.detect-partials.outputs.npm-extensions-stable-partial }}
272276
CACHE_BUILTINS_PARTIAL: ${{ steps.detect-partials.outputs.builtins-partial }}
273277
CACHE_PLAYWRIGHT_PARTIAL: ${{ steps.detect-partials.outputs.playwright-partial }}
278+
279+
# whether to write a summary block to $GITHUB_STEP_SUMMARY
280+
EMIT_CACHE_SUMMARY: ${{ inputs.emit-summary }}
274281
run: .github/cache-scripts/log-cache-status.sh restore

.github/actions/save-build-caches/action.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,7 @@ runs:
129129
uses: actions/cache/save@v5
130130
with:
131131
path: ${{ steps.cache-paths.outputs.npm-extensions-stable-paths }}
132-
key: npm-extensions-stable-v4-${{ runner.os }}-${{ steps.distro.outputs.distro }}-${{ inputs.extensions-stable-hash }}
132+
key: npm-extensions-stable-v7-${{ runner.os }}-${{ steps.distro.outputs.distro }}-${{ inputs.extensions-stable-hash }}
133133

134134
- name: Save built-in extensions cache
135135
if: ${{ inputs.cache-builtins-hit == 'false' }}

.github/cache-scripts/cache-paths.sh

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -155,14 +155,18 @@ generate_npm_extensions_stable_paths() {
155155
fi
156156
done <<< "$volatile_exts"
157157

158-
# Find all extensions except volatile ones and node_modules
158+
# Find all extensions except volatile ones, node_modules, and build output.
159+
# `out` is a build artifact directory (produced by esbuild-extension-common.mts).
160+
# It only exists after compile, so including it would make the path list
161+
# non-deterministic between cache restore (no out/) and cache save (out/ exists),
162+
# producing different cache versions and constant misses.
159163
local paths=""
160164
for ext_dir in extensions/*/; do
161165
ext_dir="${ext_dir%/}"
162166
local ext_name="${ext_dir##*/}"
163167

164-
# Skip node_modules and volatile extensions
165-
if [ "$ext_name" != "node_modules" ] && ! echo "$ext_name" | grep -qE "^($volatile_pattern)$"; then
168+
# Skip node_modules, build output, and volatile extensions
169+
if [ "$ext_name" != "node_modules" ] && [ "$ext_name" != "out" ] && ! echo "$ext_name" | grep -qE "^($volatile_pattern)$"; then
166170
paths="${paths}${ext_dir}"$'\n'
167171
fi
168172
done

.github/cache-scripts/generate-extensions-hash.sh

Lines changed: 19 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -119,8 +119,12 @@ if [ "$FILTER" == "volatile" ]; then
119119
FILES_HASH=$(eval "find extensions .vscode -maxdepth 3 -name \"package.json\" -type f -not -path \"*/node_modules/*\" $FIND_EXCLUDES 2>/dev/null" | grep -E "($FILTER_PATTERN)" | sort | xargs cat 2>/dev/null | sha256sum | cut -d' ' -f1)
120120
elif [ "$FILTER" == "stable" ]; then
121121
echo " → Filtering: stable extensions only"
122+
# Enumerate via git ls-files (deterministic) instead of find (filesystem-
123+
# dependent). The find variant produced different hashes across runs of
124+
# identical code, so the stable cache key never matched and the cache
125+
# missed every run.
122126
FILTER_PATTERN=$(IFS='|'; echo "${VOLATILE_EXTENSIONS[*]}")
123-
FILES_HASH=$(eval "find extensions .vscode -maxdepth 3 -name \"package.json\" -type f -not -path \"*/node_modules/*\" $FIND_EXCLUDES 2>/dev/null" | grep -v -E "($FILTER_PATTERN)" | sort | xargs cat 2>/dev/null | sha256sum | cut -d' ' -f1)
127+
FILES_HASH=$(git ls-files extensions .vscode | grep -E '(^|/)package\.json$' | grep -v '/node_modules/' | grep -v -E "($FILTER_PATTERN)" | sort | xargs cat 2>/dev/null | sha256sum | cut -d' ' -f1)
124128
else
125129
echo " → No filter: all extensions"
126130
FILES_HASH=$(eval "find extensions .vscode -maxdepth 3 -name \"package.json\" -type f -not -path \"*/node_modules/*\" $FIND_EXCLUDES 2>/dev/null" | sort | xargs cat 2>/dev/null | sha256sum | cut -d' ' -f1)
@@ -151,9 +155,13 @@ if [ "$FILTER" == "volatile" ]; then
151155
GIT_TREE_HASH=$(echo "$GIT_TREE_HASH" | sha256sum | cut -d' ' -f1)
152156
elif [ "$FILTER" == "stable" ]; then
153157
echo " → Filtering: stable extensions only"
154-
ALL_EXT_DIRS=$(find extensions -maxdepth 1 -type d -not -name "node_modules" | tail -n +2 | sort)
158+
# Enumerate via git ls-tree (deterministic) instead of find / bash glob
159+
# (filesystem-dependent). `tree` covers regular subdirs; `commit` covers
160+
# submodules (e.g. extensions/positron-copilot-chat).
161+
ALL_EXT_DIRS=$(git ls-tree HEAD extensions/ | awk '$2=="tree" || $2=="commit" {print $4}' | sort)
155162
GIT_TREE_HASH=""
156-
for ext_dir in $ALL_EXT_DIRS; do
163+
while IFS= read -r ext_dir; do
164+
[ -z "$ext_dir" ] && continue
157165
# Check if this is a volatile extension (skip if it is)
158166
is_volatile=false
159167
for volatile_ext in "${VOLATILE_EXTENSIONS[@]}"; do
@@ -167,16 +175,15 @@ elif [ "$FILTER" == "stable" ]; then
167175
TREE_HASH=$(git rev-parse "HEAD:$ext_dir" 2>/dev/null || echo "no-tree")
168176
GIT_TREE_HASH="${GIT_TREE_HASH}${TREE_HASH}"
169177
fi
170-
done
178+
done <<< "$ALL_EXT_DIRS"
171179

172-
# Also hash .vscode/extensions/* (they're in the stable cache too!)
173-
for vscode_ext_dir in .vscode/extensions/*/; do
174-
if [ -d "$vscode_ext_dir" ]; then
175-
vscode_ext_dir="${vscode_ext_dir%/}"
176-
TREE_HASH=$(git rev-parse "HEAD:$vscode_ext_dir" 2>/dev/null || echo "no-tree")
177-
GIT_TREE_HASH="${GIT_TREE_HASH}${TREE_HASH}"
178-
fi
179-
done
180+
# Also hash .vscode/extensions/* (they're in the stable cache too).
181+
VSCODE_EXT_DIRS=$(git ls-tree HEAD .vscode/extensions/ | awk '$2=="tree" || $2=="commit" {print $4}' | sort)
182+
while IFS= read -r vscode_ext_dir; do
183+
[ -z "$vscode_ext_dir" ] && continue
184+
TREE_HASH=$(git rev-parse "HEAD:$vscode_ext_dir" 2>/dev/null || echo "no-tree")
185+
GIT_TREE_HASH="${GIT_TREE_HASH}${TREE_HASH}"
186+
done <<< "$VSCODE_EXT_DIRS"
180187

181188
GIT_TREE_HASH=$(echo "$GIT_TREE_HASH" | sha256sum | cut -d' ' -f1)
182189
else

.github/cache-scripts/log-cache-status.sh

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,3 +145,106 @@ elif [[ "$OPERATION" == "save" ]]; then
145145
log_save_status "builtins" "CACHE_BUILTINS_HIT"
146146
log_save_status "playwright" "CACHE_PLAYWRIGHT_HIT"
147147
fi
148+
149+
# ============================================================================
150+
# SECTION 4: Job Summary (loud on miss, quiet on hit)
151+
# ============================================================================
152+
# Surface cache state in the GitHub Actions job summary so regressions are
153+
# visible without digging through raw logs. The stable cache miss bug went
154+
# undetected for 20+ days; this is the safety net.
155+
#
156+
# Hit case: one terse line ("✅ All caches restored").
157+
# Miss case: "## Cache" section with one bullet per cache showing state.
158+
# Skipped case: one terse line ("⏭️ All caches skipped") if no caches were enabled.
159+
#
160+
# Skipped if $GITHUB_STEP_SUMMARY is unset (local runs).
161+
162+
# Only emit a summary for restore operations, and only when the caller opts in
163+
# via EMIT_CACHE_SUMMARY=true. Cache state is identical across jobs in a
164+
# workflow, so we opt in from one job (test/unit) to avoid duplication.
165+
# Save events are redundant with restore misses (a save only happens because
166+
# the matching restore missed), so we never emit a save summary.
167+
if [[ -z "${GITHUB_STEP_SUMMARY:-}" \
168+
|| "$OPERATION" != "restore" \
169+
|| "${EMIT_CACHE_SUMMARY:-false}" != "true" ]]; then
170+
exit 0
171+
fi
172+
173+
# Map a single cache to "hit" | "partial" | "miss" | "skipped".
174+
cache_state() {
175+
local enabled_var="$1"
176+
local hit_var="$2"
177+
local partial_var="$3"
178+
179+
if [[ "${!enabled_var:-false}" != "true" ]]; then
180+
echo "skipped"
181+
elif [[ "${!hit_var:-false}" == "true" ]]; then
182+
echo "hit"
183+
elif [[ "${!partial_var:-false}" == "true" ]]; then
184+
echo "partial"
185+
else
186+
echo "miss"
187+
fi
188+
}
189+
190+
# Render a cache state as a summary fragment.
191+
render_state() {
192+
case "$1" in
193+
hit) echo "✅ hit" ;;
194+
partial) echo "⚠️ partial" ;;
195+
miss) echo "" ;;
196+
skipped) echo "⏭️ skipped" ;;
197+
*) echo "unknown" ;;
198+
esac
199+
}
200+
201+
# Cache definitions: name | enabled_var | hit_var | partial_var
202+
CACHES=(
203+
"npm-core|RESTORE_NPM_CORE|CACHE_NPM_CORE_HIT|CACHE_NPM_CORE_PARTIAL"
204+
"npm-ext-volatile|RESTORE_NPM_EXTENSIONS|CACHE_NPM_EXTENSIONS_VOLATILE_HIT|CACHE_NPM_EXTENSIONS_VOLATILE_PARTIAL"
205+
"npm-ext-stable|RESTORE_NPM_EXTENSIONS|CACHE_NPM_EXTENSIONS_STABLE_HIT|CACHE_NPM_EXTENSIONS_STABLE_PARTIAL"
206+
"builtins|RESTORE_BUILTINS|CACHE_BUILTINS_HIT|CACHE_BUILTINS_PARTIAL"
207+
"playwright|RESTORE_PLAYWRIGHT|CACHE_PLAYWRIGHT_HIT|CACHE_PLAYWRIGHT_PARTIAL"
208+
)
209+
210+
# Compute state per cache and count categories.
211+
states=()
212+
total=0
213+
exact_hits=0
214+
partial_hits=0
215+
misses=0
216+
for spec in "${CACHES[@]}"; do
217+
IFS='|' read -r name enabled_var hit_var partial_var <<< "$spec"
218+
state="$(cache_state "$enabled_var" "$hit_var" "$partial_var")"
219+
states+=("$name|$state")
220+
221+
if [[ "$state" != "skipped" ]]; then
222+
total=$((total + 1))
223+
fi
224+
case "$state" in
225+
hit) exact_hits=$((exact_hits + 1)) ;;
226+
partial) partial_hits=$((partial_hits + 1)) ;;
227+
miss) misses=$((misses + 1)) ;;
228+
esac
229+
done
230+
231+
# Emit summary.
232+
{
233+
if [[ "$total" -eq 0 ]]; then
234+
echo "⏭️ All caches skipped"
235+
elif [[ "$misses" -eq 0 ]]; then
236+
if [[ "$partial_hits" -gt 0 ]]; then
237+
echo "✅ All caches restored (${exact_hits} exact, ${partial_hits} partial)"
238+
else
239+
echo "✅ All caches restored (${exact_hits}/${total})"
240+
fi
241+
else
242+
echo "## Cache"
243+
echo ""
244+
for entry in "${states[@]}"; do
245+
IFS='|' read -r name state <<< "$entry"
246+
echo "- ${name} $(render_state "$state")"
247+
done
248+
fi
249+
echo ""
250+
} >> "$GITHUB_STEP_SUMMARY"

.github/workflows/test-pull-request.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -286,7 +286,7 @@ jobs:
286286
uses: ./.github/workflows/test-unit.yml
287287
secrets: inherit
288288
with:
289-
save_cache: true # Only this job saves cache to avoid race conditions
289+
save_cache: false # PRs restore from main only; test-merge.yml saves
290290

291291
ext-host-tests:
292292
name: test

.github/workflows/test-unit.yml

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,14 @@
11
name: "Test: Unit"
22

33
# Cache Save Strategy:
4-
# This workflow has a save_cache parameter to prevent race conditions when multiple jobs
5-
# try to save the same cache key simultaneously. Only ONE job per calling workflow should
6-
# set save_cache: true. Examples:
7-
# - test-pull-request.yml: unit-tests saves, ext-host-tests and e2e do not
8-
# - test-merge.yml: unit-tests saves, setup/ext-host/e2e do not
4+
# Only main (test-merge.yml) saves caches; PRs restore from main only. This
5+
# avoids the 10 GB GitHub Actions cache ceiling getting thrashed by many
6+
# concurrent PRs each writing duplicate copies of identical content, which
7+
# previously caused mid-day evictions and effectively broke caching on main.
8+
# The save_cache parameter lets us scope the save step to the merge workflow.
9+
# Examples:
10+
# - test-pull-request.yml: save_cache=false (restore only)
11+
# - test-merge.yml: unit-tests sets save_cache=true (one saver per workflow run)
912
# - test-full-suite.yml: only restores
1013

1114
on:
@@ -57,6 +60,8 @@ jobs:
5760
- name: 📦 Restore caches
5861
id: restore-caches
5962
uses: ./.github/actions/restore-build-caches
63+
with:
64+
emit-summary: 'true'
6065

6166
- name: Install system dependencies
6267
uses: ./.github/actions/install-system-deps

0 commit comments

Comments
 (0)