|
| 1 | +#!/usr/bin/env bash |
| 2 | +# |
| 3 | +# Periodic full reconcile: treat the bucket's artifacts as source of truth — |
| 4 | +# rebuild the index from the artifacts present, drop tombstones that now have an |
| 5 | +# artifact. Never mutates artifacts. No lock (single-flight is the scheduler's job). |
| 6 | +# |
| 7 | +# Usage: |
| 8 | +# reconcile_symbolcache.sh --remote REMOTE [--work DIR] |
| 9 | +# |
| 10 | +# --remote REMOTE (required) rclone remote + bucket prefix, e.g. |
| 11 | +# "r2:symbolcache" or ":local:/path/to/dir" for local testing. |
| 12 | +# --work DIR scratch dir (default: fresh mktemp) |
| 13 | +# |
| 14 | +# Requires: rclone, gzip, tar, sort, awk, comm. |
| 15 | +# Single-flight is the scheduler's responsibility (Actions concurrency: / flock). |
| 16 | +# No lock object is stored or checked. |
| 17 | +# |
| 18 | +set -euo pipefail |
| 19 | +source "$(dirname "${BASH_SOURCE[0]}")/symbolcache_common.sh" |
| 20 | + |
| 21 | +usage() { cat <<'EOF' |
| 22 | +Usage: reconcile_symbolcache.sh --remote REMOTE [--work DIR] |
| 23 | + --remote REMOTE (required) rclone remote + bucket prefix (e.g. r2:symbolcache) |
| 24 | + --work DIR scratch dir (default: fresh mktemp) |
| 25 | +EOF |
| 26 | +} |
| 27 | + |
| 28 | +# --------------------------------------------------------------------------- |
| 29 | +# Arguments |
| 30 | +# --------------------------------------------------------------------------- |
| 31 | +REMOTE=""; WORK="" |
| 32 | +while [[ $# -gt 0 ]]; do |
| 33 | + case "$1" in |
| 34 | + --remote) REMOTE="$2"; shift 2 ;; |
| 35 | + --work) WORK="$2"; shift 2 ;; |
| 36 | + -h|--help) usage; exit 0 ;; |
| 37 | + *) echo "[reconcile] ERROR: unknown argument: $1" >&2; usage >&2; exit 2 ;; |
| 38 | + esac |
| 39 | +done |
| 40 | +[[ -n "$REMOTE" ]] || { echo "[reconcile] ERROR: --remote is required" >&2; usage >&2; exit 2; } |
| 41 | +WORK="${WORK:-$(mktemp -d /tmp/reconcile_symbolcache.XXXXXX)}" |
| 42 | + |
| 43 | +PFX="${STORE_PREFIX}" |
| 44 | +STATE="$PFX/_state" |
| 45 | + |
| 46 | +mkdir -p "$WORK" |
| 47 | + |
| 48 | +echo "[reconcile] REMOTE=$REMOTE WORK=$WORK" |
| 49 | + |
| 50 | +# --------------------------------------------------------------------------- |
| 51 | +# Step 1: List authoritative artifacts |
| 52 | +# --------------------------------------------------------------------------- |
| 53 | +# Layer 1 safety: separate rclone exit status from grep's. |
| 54 | +# rclone lsf writes to raw_listing.txt with stderr captured separately. |
| 55 | +# A genuinely absent packages/ prefix (first run, S3/R2 returns exit 0 with |
| 56 | +# empty output; local backend returns exit 3 "directory not found") is treated |
| 57 | +# as empty — that is an expected condition. Any other rclone error (auth, |
| 58 | +# network, wrong remote config) is a hard failure that aborts under |
| 59 | +# set -euo pipefail to prevent rebuilding with a bogus empty list. |
| 60 | +# grep on a valid-but-empty listing exits 1 — tolerated with || true on the |
| 61 | +# filter-only step. |
| 62 | +echo "[reconcile] listing artifacts under $REMOTE/$PFX/packages ..." |
| 63 | +set +e |
| 64 | +rclone lsf -R --files-only "${REMOTE}/${PFX}/packages" \ |
| 65 | + > "$WORK/raw_listing.txt" \ |
| 66 | + 2> "$WORK/rclone_lsf_err.txt" |
| 67 | +rclone_rc=$? |
| 68 | +set -e |
| 69 | +if [[ $rclone_rc -ne 0 ]]; then |
| 70 | + err_text=$(cat "$WORK/rclone_lsf_err.txt") |
| 71 | + # Tolerate "directory not found" / "object not found" listing errors — these |
| 72 | + # occur on the local backend when packages/ does not yet exist (first run). |
| 73 | + # Real object stores (S3/R2) return exit 0 with empty output for absent |
| 74 | + # prefixes, so this branch is mainly a local-backend / CI safety valve. |
| 75 | + # Do NOT match generic "not found" which also appears in config errors |
| 76 | + # ("didn't find section in config file"). |
| 77 | + if echo "$err_text" | grep -qE "error listing:.*not found|error in ListJSON:.*not found|NoSuchKey|NoSuchBucket"; then |
| 78 | + echo "[reconcile] packages prefix absent (directory not found) — treating as empty" |
| 79 | + : > "$WORK/raw_listing.txt" |
| 80 | + else |
| 81 | + echo "[reconcile] ERROR: rclone lsf failed (exit $rclone_rc):" >&2 |
| 82 | + echo "$err_text" >&2 |
| 83 | + exit $rclone_rc |
| 84 | + fi |
| 85 | +fi |
| 86 | +grep '\.tar\.gz$' "$WORK/raw_listing.txt" \ |
| 87 | + | awk -F/ '{s=$NF; sub(/\.tar\.gz$/, "", s); print $(NF-1) "/" s}' \ |
| 88 | + | sort -u > "$WORK/artifacts.txt" || true |
| 89 | + |
| 90 | +artifact_count=$(wc -l < "$WORK/artifacts.txt") |
| 91 | +echo "[reconcile] found $artifact_count artifact(s)" |
| 92 | + |
| 93 | +# Layer 2 safety: if derived artifact count is 0 but an existing index already |
| 94 | +# has entries, abort rather than wipe. Preserves the genuine first-run / |
| 95 | +# truly-empty case: zero artifacts AND no/empty existing index → proceed. |
| 96 | +if [[ "$artifact_count" -eq 0 ]]; then |
| 97 | + existing=$(rclone cat "${REMOTE}/${PFX}/index.tar.gz" 2>/dev/null \ |
| 98 | + | gzip -dc 2>/dev/null | grep -c . || true) |
| 99 | + if [[ "${existing:-0}" -gt 0 ]]; then |
| 100 | + echo "[reconcile] ERROR: artifact list empty but existing index has $existing entries — aborting to avoid wiping the index" >&2 |
| 101 | + exit 1 |
| 102 | + fi |
| 103 | +fi |
| 104 | + |
| 105 | +# --------------------------------------------------------------------------- |
| 106 | +# Step 2: Rebuild and publish the index from artifacts.txt |
| 107 | +# --------------------------------------------------------------------------- |
| 108 | +# The availability index is authoritative: it is exactly the set of keys |
| 109 | +# for which an artifact exists in the bucket right now. |
| 110 | +idxdir="$WORK/idx_staging" |
| 111 | +mkdir -p "$idxdir" |
| 112 | +cp "$WORK/artifacts.txt" "$idxdir/index.txt" |
| 113 | + |
| 114 | +tar -czf "$WORK/index.tar.gz" -C "$idxdir" index.txt |
| 115 | + |
| 116 | +echo "[reconcile] uploading rebuilt index.tar.gz ($artifact_count entries) ..." |
| 117 | +rclone copyto "$WORK/index.tar.gz" "${REMOTE}/${PFX}/index.tar.gz" --header-upload "$CC_INDEX" |
| 118 | +echo "[reconcile] index.tar.gz uploaded" |
| 119 | + |
| 120 | +# --------------------------------------------------------------------------- |
| 121 | +# Step 3: Reconcile tombstones |
| 122 | +# --------------------------------------------------------------------------- |
| 123 | +# Download current tombstones (tolerate absence). |
| 124 | +touch "$WORK/tombstones.txt" |
| 125 | +if rclone copyto "${REMOTE}/${STATE}/tombstones.txt.gz" "$WORK/tombstones_dl.txt.gz" 2>/dev/null; then |
| 126 | + gzip -dc "$WORK/tombstones_dl.txt.gz" > "$WORK/tombstones.txt" \ |
| 127 | + || { echo "[reconcile] WARNING: tombstones decompress failed; treating as empty" >&2; } |
| 128 | +else |
| 129 | + echo "[reconcile] no existing tombstones.txt.gz (first run or empty remote)" |
| 130 | +fi |
| 131 | + |
| 132 | +tombstone_count=$(wc -l < "$WORK/tombstones.txt") |
| 133 | +echo "[reconcile] downloaded $tombstone_count tombstone(s)" |
| 134 | + |
| 135 | +# Drop any tombstone key that now has an artifact (both files are sorted). |
| 136 | +# comm -23: lines only in file1 (tombstones) that are NOT in file2 (artifacts). |
| 137 | +comm -23 \ |
| 138 | + <(sort "$WORK/tombstones.txt") \ |
| 139 | + <(sort "$WORK/artifacts.txt") \ |
| 140 | + > "$WORK/tombstones_new.txt" |
| 141 | + |
| 142 | +new_tombstone_count=$(wc -l < "$WORK/tombstones_new.txt") |
| 143 | +dropped=$(( tombstone_count - new_tombstone_count )) |
| 144 | +echo "[reconcile] reconciled tombstones: $new_tombstone_count kept, $dropped dropped (had artifact)" |
| 145 | + |
| 146 | +# Upload reconciled tombstones. |
| 147 | +gzip -c "$WORK/tombstones_new.txt" | rclone rcat "${REMOTE}/${STATE}/tombstones.txt.gz" --header-upload "$CC_PRIVATE" |
| 148 | +echo "[reconcile] tombstones.txt.gz uploaded" |
| 149 | + |
| 150 | +echo "[reconcile] done" |
0 commit comments