Skip to content

Commit c5e487e

Browse files
authored
Merge pull request #106 from julia-vscode/sp/cache-infra
feat: new cache hosting infra
2 parents ccdda2f + 0ebf17c commit c5e487e

19 files changed

Lines changed: 1434 additions & 51 deletions
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
name: Cache-infra integration (rclone)
2+
3+
# The cache-infra scripts (regen/reconcile/seed) are Unix shell + rclone, so
4+
# their integration tests only run on Linux with rclone installed. The main
5+
# Julia CI matrix has no rclone, so those items skip there; this workflow gives
6+
# them real coverage. JW_TEST_FILTER restricts the run to just those items.
7+
8+
on:
9+
push:
10+
branches: [main, master]
11+
pull_request:
12+
types: [opened, synchronize, reopened]
13+
14+
jobs:
15+
rclone-tests:
16+
name: cache-infra integration (ubuntu, rclone)
17+
runs-on: ubuntu-latest
18+
steps:
19+
- uses: actions/checkout@v4
20+
- uses: julia-actions/setup-julia@v2
21+
with:
22+
version: '1'
23+
- uses: julia-actions/cache@v2
24+
- name: Install rclone
25+
run: |
26+
sudo apt-get update
27+
sudo apt-get install -y rclone
28+
rclone version
29+
- uses: julia-actions/julia-buildpkg@v1
30+
- uses: julia-actions/julia-runtest@v1
31+
env:
32+
JW_TEST_FILTER: cache-infra

scripts/package_symbolcache.sh

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
#!/usr/bin/env bash
2+
# Package a jwcloudindex store into the hosting layout: one tar.gz per .jstore
3+
# under OUT/${STORE_PREFIX}/packages/. Packaging only — index generation is done by the
4+
# caller via `jwcloudindex --emit-index` (regen builds a union index; the seed
5+
# builds a full one), so this script does not build or upload an index.
6+
# Usage: package_symbolcache.sh STORE OUT
7+
set -euo pipefail
8+
source "$(dirname "${BASH_SOURCE[0]}")/symbolcache_common.sh"
9+
STORE=${1:?store dir}; STORE=${STORE%/}; OUT=${2:?out dir}
10+
JOBS=$(nproc)
11+
PKGS_OUT="$OUT/${STORE_PREFIX}/packages"; mkdir -p "$PKGS_OUT"
12+
13+
# One tar.gz per .jstore (tarball contains just the .jstore, gzip).
14+
export STORE PKGS_OUT
15+
find "$STORE" -name '*.jstore' -print0 | xargs -0 -P"$JOBS" -n1 bash -c '
16+
set -euo pipefail
17+
f=$1; rel=${f#"$STORE"/}; dir=$(dirname "$rel"); base=$(basename "$f")
18+
dest="$PKGS_OUT/$dir"; mkdir -p "$dest"
19+
tar -czf "$dest/${base%.jstore}.tar.gz" -C "$STORE/$dir" "$base"
20+
' _
21+
22+
echo "packaged $(find "$PKGS_OUT" -name '*.tar.gz' | wc -l) artifacts to $OUT/${STORE_PREFIX}/packages"

scripts/reconcile_symbolcache.sh

Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,150 @@
1+
#!/usr/bin/env bash
2+
#
3+
# Periodic full reconcile: treat the bucket's artifacts as source of truth —
4+
# rebuild the index from the artifacts present, drop tombstones that now have an
5+
# artifact. Never mutates artifacts. No lock (single-flight is the scheduler's job).
6+
#
7+
# Usage:
8+
# reconcile_symbolcache.sh --remote REMOTE [--work DIR]
9+
#
10+
# --remote REMOTE (required) rclone remote + bucket prefix, e.g.
11+
# "r2:symbolcache" or ":local:/path/to/dir" for local testing.
12+
# --work DIR scratch dir (default: fresh mktemp)
13+
#
14+
# Requires: rclone, gzip, tar, sort, awk, comm.
15+
# Single-flight is the scheduler's responsibility (Actions concurrency: / flock).
16+
# No lock object is stored or checked.
17+
#
18+
set -euo pipefail
19+
source "$(dirname "${BASH_SOURCE[0]}")/symbolcache_common.sh"
20+
21+
usage() { cat <<'EOF'
22+
Usage: reconcile_symbolcache.sh --remote REMOTE [--work DIR]
23+
--remote REMOTE (required) rclone remote + bucket prefix (e.g. r2:symbolcache)
24+
--work DIR scratch dir (default: fresh mktemp)
25+
EOF
26+
}
27+
28+
# ---------------------------------------------------------------------------
29+
# Arguments
30+
# ---------------------------------------------------------------------------
31+
REMOTE=""; WORK=""
32+
while [[ $# -gt 0 ]]; do
33+
case "$1" in
34+
--remote) REMOTE="$2"; shift 2 ;;
35+
--work) WORK="$2"; shift 2 ;;
36+
-h|--help) usage; exit 0 ;;
37+
*) echo "[reconcile] ERROR: unknown argument: $1" >&2; usage >&2; exit 2 ;;
38+
esac
39+
done
40+
[[ -n "$REMOTE" ]] || { echo "[reconcile] ERROR: --remote is required" >&2; usage >&2; exit 2; }
41+
WORK="${WORK:-$(mktemp -d /tmp/reconcile_symbolcache.XXXXXX)}"
42+
43+
PFX="${STORE_PREFIX}"
44+
STATE="$PFX/_state"
45+
46+
mkdir -p "$WORK"
47+
48+
echo "[reconcile] REMOTE=$REMOTE WORK=$WORK"
49+
50+
# ---------------------------------------------------------------------------
51+
# Step 1: List authoritative artifacts
52+
# ---------------------------------------------------------------------------
53+
# Layer 1 safety: separate rclone exit status from grep's.
54+
# rclone lsf writes to raw_listing.txt with stderr captured separately.
55+
# A genuinely absent packages/ prefix (first run, S3/R2 returns exit 0 with
56+
# empty output; local backend returns exit 3 "directory not found") is treated
57+
# as empty — that is an expected condition. Any other rclone error (auth,
58+
# network, wrong remote config) is a hard failure that aborts under
59+
# set -euo pipefail to prevent rebuilding with a bogus empty list.
60+
# grep on a valid-but-empty listing exits 1 — tolerated with || true on the
61+
# filter-only step.
62+
echo "[reconcile] listing artifacts under $REMOTE/$PFX/packages ..."
63+
set +e
64+
rclone lsf -R --files-only "${REMOTE}/${PFX}/packages" \
65+
> "$WORK/raw_listing.txt" \
66+
2> "$WORK/rclone_lsf_err.txt"
67+
rclone_rc=$?
68+
set -e
69+
if [[ $rclone_rc -ne 0 ]]; then
70+
err_text=$(cat "$WORK/rclone_lsf_err.txt")
71+
# Tolerate "directory not found" / "object not found" listing errors — these
72+
# occur on the local backend when packages/ does not yet exist (first run).
73+
# Real object stores (S3/R2) return exit 0 with empty output for absent
74+
# prefixes, so this branch is mainly a local-backend / CI safety valve.
75+
# Do NOT match generic "not found" which also appears in config errors
76+
# ("didn't find section in config file").
77+
if echo "$err_text" | grep -qE "error listing:.*not found|error in ListJSON:.*not found|NoSuchKey|NoSuchBucket"; then
78+
echo "[reconcile] packages prefix absent (directory not found) — treating as empty"
79+
: > "$WORK/raw_listing.txt"
80+
else
81+
echo "[reconcile] ERROR: rclone lsf failed (exit $rclone_rc):" >&2
82+
echo "$err_text" >&2
83+
exit $rclone_rc
84+
fi
85+
fi
86+
grep '\.tar\.gz$' "$WORK/raw_listing.txt" \
87+
| awk -F/ '{s=$NF; sub(/\.tar\.gz$/, "", s); print $(NF-1) "/" s}' \
88+
| sort -u > "$WORK/artifacts.txt" || true
89+
90+
artifact_count=$(wc -l < "$WORK/artifacts.txt")
91+
echo "[reconcile] found $artifact_count artifact(s)"
92+
93+
# Layer 2 safety: if derived artifact count is 0 but an existing index already
94+
# has entries, abort rather than wipe. Preserves the genuine first-run /
95+
# truly-empty case: zero artifacts AND no/empty existing index → proceed.
96+
if [[ "$artifact_count" -eq 0 ]]; then
97+
existing=$(rclone cat "${REMOTE}/${PFX}/index.tar.gz" 2>/dev/null \
98+
| gzip -dc 2>/dev/null | grep -c . || true)
99+
if [[ "${existing:-0}" -gt 0 ]]; then
100+
echo "[reconcile] ERROR: artifact list empty but existing index has $existing entries — aborting to avoid wiping the index" >&2
101+
exit 1
102+
fi
103+
fi
104+
105+
# ---------------------------------------------------------------------------
106+
# Step 2: Rebuild and publish the index from artifacts.txt
107+
# ---------------------------------------------------------------------------
108+
# The availability index is authoritative: it is exactly the set of keys
109+
# for which an artifact exists in the bucket right now.
110+
idxdir="$WORK/idx_staging"
111+
mkdir -p "$idxdir"
112+
cp "$WORK/artifacts.txt" "$idxdir/index.txt"
113+
114+
tar -czf "$WORK/index.tar.gz" -C "$idxdir" index.txt
115+
116+
echo "[reconcile] uploading rebuilt index.tar.gz ($artifact_count entries) ..."
117+
rclone copyto "$WORK/index.tar.gz" "${REMOTE}/${PFX}/index.tar.gz" --header-upload "$CC_INDEX"
118+
echo "[reconcile] index.tar.gz uploaded"
119+
120+
# ---------------------------------------------------------------------------
121+
# Step 3: Reconcile tombstones
122+
# ---------------------------------------------------------------------------
123+
# Download current tombstones (tolerate absence).
124+
touch "$WORK/tombstones.txt"
125+
if rclone copyto "${REMOTE}/${STATE}/tombstones.txt.gz" "$WORK/tombstones_dl.txt.gz" 2>/dev/null; then
126+
gzip -dc "$WORK/tombstones_dl.txt.gz" > "$WORK/tombstones.txt" \
127+
|| { echo "[reconcile] WARNING: tombstones decompress failed; treating as empty" >&2; }
128+
else
129+
echo "[reconcile] no existing tombstones.txt.gz (first run or empty remote)"
130+
fi
131+
132+
tombstone_count=$(wc -l < "$WORK/tombstones.txt")
133+
echo "[reconcile] downloaded $tombstone_count tombstone(s)"
134+
135+
# Drop any tombstone key that now has an artifact (both files are sorted).
136+
# comm -23: lines only in file1 (tombstones) that are NOT in file2 (artifacts).
137+
comm -23 \
138+
<(sort "$WORK/tombstones.txt") \
139+
<(sort "$WORK/artifacts.txt") \
140+
> "$WORK/tombstones_new.txt"
141+
142+
new_tombstone_count=$(wc -l < "$WORK/tombstones_new.txt")
143+
dropped=$(( tombstone_count - new_tombstone_count ))
144+
echo "[reconcile] reconciled tombstones: $new_tombstone_count kept, $dropped dropped (had artifact)"
145+
146+
# Upload reconciled tombstones.
147+
gzip -c "$WORK/tombstones_new.txt" | rclone rcat "${REMOTE}/${STATE}/tombstones.txt.gz" --header-upload "$CC_PRIVATE"
148+
echo "[reconcile] tombstones.txt.gz uploaded"
149+
150+
echo "[reconcile] done"

0 commit comments

Comments
 (0)