Skip to content

Commit 232564b

Browse files
worstellampagent
andauthored
fix(snapshot): make snapshot serving range-aware (#356)
The git snapshot endpoint ignored client `Range` headers, so it always served the full object. It now forwards `Range`/`If-Range` to the cache on the cache-hit path, returns `206`/`Content-Range` (`416` when not satisfiable), and advertises `Accept-Ranges: bytes`, so clients can fetch with bounded parallel range requests (`client.ParallelGet`) instead of one full GET. Snapshot freshen metadata (`X-Cachew-Snapshot-Commit` / `X-Cachew-Bundle-Url`) is emitted on `206` responses too, via a shared `setSnapshotMetadataHeaders` helper used by both the full and ranged paths, so a ranged client still learns whether to apply a delta bundle after a parallel download. Test: `TestSnapshotGetHonorsRange` covers 206/416/Content-Range and matching commit metadata on partial responses, using a non-`*os.File` memory cache reader so range support comes from forwarding `Range` to `Open` rather than `http.ServeContent`. Co-authored-by: Amp <amp@ampcode.com>
1 parent 2c806db commit 232564b

3 files changed

Lines changed: 202 additions & 25 deletions

File tree

internal/httputil/conditional.go

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,23 @@ func ConditionalOptions(r *http.Request) []client.RequestOption {
3232
return opts
3333
}
3434

35+
// RangeOptions extracts only the Range/If-Range options from r, for callers
36+
// that evaluate If-Match/If-None-Match separately (e.g. via CheckConditionals)
37+
// and so must not double-handle them by forwarding the full set to Open.
38+
// If-Range is only meaningful alongside Range, so it is dropped when Range is
39+
// absent.
40+
func RangeOptions(r *http.Request) []client.RequestOption {
41+
v := r.Header.Get("Range")
42+
if v == "" {
43+
return nil
44+
}
45+
opts := []client.RequestOption{func(o *client.RequestOptions) { o.Range = v }}
46+
if ir := r.Header.Get("If-Range"); ir != "" {
47+
opts = append(opts, client.IfRange(ir))
48+
}
49+
return opts
50+
}
51+
3552
// CheckConditionals evaluates RFC 7232 If-Match and If-None-Match precondition
3653
// headers on r against etag. It returns 0 when all preconditions pass,
3754
// otherwise the HTTP status the caller should send: 412 Precondition Failed for

internal/strategy/git/snapshot.go

Lines changed: 79 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -320,7 +320,24 @@ func (s *Strategy) handleSnapshotRequest(w http.ResponseWriter, r *http.Request,
320320
}
321321
s.maybeBackgroundFetch(repo)
322322

323-
reader, headers, err := s.cache.Open(ctx, cacheKey)
323+
// Forward only Range/If-Range here; If-Match/If-None-Match are evaluated
324+
// against the served body below via CheckConditionals.
325+
reader, headers, err := s.cache.Open(ctx, cacheKey, httputil.RangeOptions(r)...)
326+
if errors.Is(err, cache.ErrRangeNotSatisfiable) {
327+
// A failed If-Match (412) or satisfied If-None-Match (304) takes
328+
// precedence over an unsatisfiable range (RFC 7232 §3, RFC 7233 §3.1).
329+
if status := httputil.CheckConditionals(r, headers.Get(cache.ETagKey)); status != 0 {
330+
w.WriteHeader(status)
331+
return
332+
}
333+
w.Header().Set("Content-Type", "application/zstd")
334+
w.Header().Set("Accept-Ranges", "bytes")
335+
if cr := headers.Get("Content-Range"); cr != "" {
336+
w.Header().Set("Content-Range", cr)
337+
}
338+
w.WriteHeader(http.StatusRequestedRangeNotSatisfiable)
339+
return
340+
}
324341
if err != nil && !errors.Is(err, os.ErrNotExist) {
325342
logger.ErrorContext(ctx, "Failed to open snapshot from cache", "upstream", upstreamURL, "error", err)
326343
http.Error(w, "Internal server error", http.StatusInternalServerError)
@@ -517,23 +534,42 @@ func (s *Strategy) handleBundleRequest(w http.ResponseWriter, r *http.Request, h
517534
}
518535

519536
func (s *Strategy) serveSnapshotWithBundle(ctx context.Context, w http.ResponseWriter, r *http.Request, reader io.ReadCloser, headers http.Header, repo *gitclone.Repository, upstreamURL, repoName string, start time.Time) error {
520-
snapshotCommit := headers.Get("X-Cachew-Snapshot-Commit")
521-
mirrorHead := s.getMirrorHead(ctx, repo)
522-
523-
// Forward the snapshot commit to the client so it knows whether the
524-
// snapshot is fresh (no bundle URL = already at HEAD, skip freshen).
525-
if snapshotCommit != "" {
526-
w.Header().Set("X-Cachew-Snapshot-Commit", snapshotCommit)
537+
// If-Match/If-None-Match are evaluated before serving any body: a satisfied
538+
// If-None-Match (304) or a failed If-Match (412) takes precedence over a
539+
// range response, so revalidating clients are not handed a 206 they would
540+
// have to discard (RFC 7232 §3, RFC 7233 §3.1).
541+
if status := httputil.CheckConditionals(r, headers.Get(cache.ETagKey)); status != 0 {
542+
w.WriteHeader(status)
543+
s.metrics.recordSnapshotServe(ctx, "cache", repoName, 0, time.Since(start))
544+
if span := trace.SpanFromContext(ctx); span.SpanContext().IsValid() {
545+
span.SetAttributes(attribute.String("cachew.source", "cache"), attribute.Int64("cachew.bytes", 0))
546+
}
547+
return nil
527548
}
528549

529-
if snapshotCommit != "" && mirrorHead != "" && snapshotCommit != mirrorHead {
530-
repoPath, err := gitclone.RepoPathFromURL(upstreamURL)
531-
if err == nil {
532-
bundleURL := fmt.Sprintf("/git/%s/snapshot.bundle?base=%s", repoPath, snapshotCommit)
533-
w.Header().Set("X-Cachew-Bundle-Url", bundleURL)
550+
// A satisfied byte range is served as-is. Bundle negotiation applies only to
551+
// whole-snapshot downloads, so a partial read (e.g. a client.ParallelGet
552+
// chunk) skips it and returns 206 directly.
553+
if cr := headers.Get("Content-Range"); cr != "" {
554+
applySnapshotCacheHeaders(w, headers)
555+
w.Header().Set("Accept-Ranges", "bytes")
556+
w.Header().Set("Content-Range", cr)
557+
// Ranged clients (client.ParallelGet) read the freshen metadata from the
558+
// discovery chunk, so it must be present on partial responses too.
559+
s.setSnapshotMetadataHeaders(ctx, w, headers, repo, upstreamURL)
560+
w.WriteHeader(http.StatusPartialContent)
561+
n, err := io.Copy(w, reader)
562+
s.metrics.recordSnapshotServe(ctx, "cache_range", repoName, n, time.Since(start))
563+
if span := trace.SpanFromContext(ctx); span.SpanContext().IsValid() {
564+
span.SetAttributes(attribute.String("cachew.source", "cache_range"), attribute.Int64("cachew.bytes", n))
534565
}
566+
return errors.Wrap(err, "stream snapshot range")
567+
}
568+
569+
snapshotCommit, bundleURL := s.setSnapshotMetadataHeaders(ctx, w, headers, repo, upstreamURL)
535570

536-
// Proactively generate and cache the bundle so any pod can serve it.
571+
// Proactively generate and cache the advertised bundle so any pod can serve it.
572+
if bundleURL != "" {
537573
go func() {
538574
bgCtx := context.WithoutCancel(ctx)
539575
logger := logging.FromContext(bgCtx)
@@ -550,25 +586,43 @@ func (s *Strategy) serveSnapshotWithBundle(ctx context.Context, w http.ResponseW
550586
}
551587

552588
applySnapshotCacheHeaders(w, headers)
589+
w.Header().Set("Accept-Ranges", "bytes")
553590

554-
// Honour conditional GETs against the advertised ETag. ServeContent does this
555-
// natively for *os.File readers, but cache backends returning non-file readers
556-
// (S3, memory, remote) fall through to io.Copy, so revalidate explicitly to
557-
// avoid streaming the full snapshot when the client already has it.
558-
var n int64
559-
var err error
560-
if status := httputil.CheckConditionals(r, headers.Get(cache.ETagKey)); status != 0 {
561-
w.WriteHeader(status)
562-
} else {
563-
n, err = serveReaderFast(w, r, reader)
564-
}
591+
n, err := serveReaderFast(w, r, reader)
565592
s.metrics.recordSnapshotServe(ctx, "cache", repoName, n, time.Since(start))
566593
if span := trace.SpanFromContext(ctx); span.SpanContext().IsValid() {
567594
span.SetAttributes(attribute.String("cachew.source", "cache"), attribute.Int64("cachew.bytes", n))
568595
}
569596
return errors.Wrap(err, "stream snapshot")
570597
}
571598

599+
// setSnapshotMetadataHeaders advertises the snapshot's commit and, when the
600+
// snapshot trails the mirror's HEAD, the delta-bundle URL clients use to
601+
// fast-forward. Shared by the full and ranged serve paths so ranged clients
602+
// (client.ParallelGet) receive the same freshen metadata on the discovery
603+
// chunk. It returns the snapshot commit and the bundle URL it set (empty when
604+
// the snapshot is already at HEAD), so callers can decide whether to
605+
// pre-generate the bundle.
606+
func (s *Strategy) setSnapshotMetadataHeaders(ctx context.Context, w http.ResponseWriter, headers http.Header, repo *gitclone.Repository, upstreamURL string) (snapshotCommit, bundleURL string) {
607+
snapshotCommit = headers.Get("X-Cachew-Snapshot-Commit")
608+
if snapshotCommit == "" {
609+
return "", ""
610+
}
611+
w.Header().Set("X-Cachew-Snapshot-Commit", snapshotCommit)
612+
613+
mirrorHead := s.getMirrorHead(ctx, repo)
614+
if mirrorHead == "" || snapshotCommit == mirrorHead {
615+
return snapshotCommit, ""
616+
}
617+
repoPath, err := gitclone.RepoPathFromURL(upstreamURL)
618+
if err != nil {
619+
return snapshotCommit, ""
620+
}
621+
bundleURL = fmt.Sprintf("/git/%s/snapshot.bundle?base=%s", repoPath, snapshotCommit)
622+
w.Header().Set("X-Cachew-Bundle-Url", bundleURL)
623+
return snapshotCommit, bundleURL
624+
}
625+
572626
// applySnapshotCacheHeaders forwards the cached snapshot's validators so clients
573627
// can revalidate (ETag) and size the transfer (Content-Length). Content-Type is
574628
// fixed for snapshots regardless of what the cache backend recorded.

internal/strategy/git/snapshot_test.go

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import (
99
"os"
1010
"os/exec"
1111
"path/filepath"
12+
"strconv"
1213
"strings"
1314
"testing"
1415
"time"
@@ -930,3 +931,108 @@ func TestSnapshotRemoteURLUsesUpstreamURL(t *testing.T) {
930931
assert.NoError(t, err, string(output))
931932
assert.Equal(t, upstreamURL+"\n", string(output))
932933
}
934+
935+
func TestSnapshotGetHonorsRange(t *testing.T) {
936+
if _, err := exec.LookPath("git"); err != nil {
937+
t.Skip("git not found in PATH")
938+
}
939+
940+
_, ctx := logging.Configure(context.Background(), logging.Config{})
941+
tmpDir := t.TempDir()
942+
mirrorRoot := filepath.Join(tmpDir, "mirrors")
943+
upstreamURL := "https://github.com/org/repo"
944+
mirrorPath := filepath.Join(mirrorRoot, "github.com", "org", "repo")
945+
createTestMirrorRepo(t, mirrorPath)
946+
947+
// The memory cache returns a non-*os.File reader, so range handling must come
948+
// from the strategy forwarding Range to Open rather than http.ServeContent.
949+
memCache, err := cache.NewMemory(ctx, cache.MemoryConfig{MaxTTL: time.Hour})
950+
assert.NoError(t, err)
951+
mux := newTestMux()
952+
953+
cm := gitclone.NewManagerProvider(ctx, gitclone.Config{MirrorRoot: mirrorRoot}, nil)
954+
s, err := git.New(ctx, git.Config{}, newTestScheduler(ctx, t), memCache, mux, cm, func() (*githubapp.TokenManager, error) { return nil, nil }) //nolint:nilnil
955+
assert.NoError(t, err)
956+
957+
manager, err := cm()
958+
assert.NoError(t, err)
959+
repo, err := manager.GetOrCreate(ctx, upstreamURL)
960+
assert.NoError(t, err)
961+
962+
waitForReady(t, s)
963+
err = s.GenerateAndUploadSnapshot(ctx, repo)
964+
assert.NoError(t, err)
965+
966+
handler := mux.handlers["GET /git/{host}/{path...}"]
967+
assert.NotZero(t, handler)
968+
969+
get := func(rangeHeader string) *httptest.ResponseRecorder {
970+
req := httptest.NewRequest(http.MethodGet, "/git/github.com/org/repo/snapshot.tar.zst", nil)
971+
req = req.WithContext(ctx)
972+
req.SetPathValue("host", "github.com")
973+
req.SetPathValue("path", "org/repo/snapshot.tar.zst")
974+
if rangeHeader != "" {
975+
req.Header.Set("Range", rangeHeader)
976+
}
977+
w := httptest.NewRecorder()
978+
handler.ServeHTTP(w, req)
979+
return w
980+
}
981+
982+
// Full GET advertises range support and yields the whole body.
983+
full := get("")
984+
assert.Equal(t, 200, full.Code)
985+
assert.Equal(t, "bytes", full.Header().Get("Accept-Ranges"))
986+
body := full.Body.Bytes()
987+
assert.True(t, len(body) > 4, "snapshot body should be larger than the test range")
988+
commit := full.Header().Get("X-Cachew-Snapshot-Commit")
989+
assert.NotZero(t, commit, "full GET should advertise the snapshot commit")
990+
991+
// A satisfiable range returns 206 with the matching bytes and Content-Range.
992+
partial := get("bytes=0-3")
993+
assert.Equal(t, http.StatusPartialContent, partial.Code)
994+
assert.Equal(t, "bytes", partial.Header().Get("Accept-Ranges"))
995+
assert.Equal(t, "bytes 0-3/"+strconv.Itoa(len(body)), partial.Header().Get("Content-Range"))
996+
assert.Equal(t, body[:4], partial.Body.Bytes())
997+
// Ranged clients must receive the same freshen metadata as a full GET so
998+
// they can apply a delta bundle after a parallel download.
999+
assert.Equal(t, commit, partial.Header().Get("X-Cachew-Snapshot-Commit"))
1000+
1001+
// A range beyond the object is not satisfiable.
1002+
tooBig := get("bytes=" + strconv.Itoa(len(body)+10) + "-" + strconv.Itoa(len(body)+20))
1003+
assert.Equal(t, http.StatusRequestedRangeNotSatisfiable, tooBig.Code)
1004+
assert.Equal(t, "bytes */"+strconv.Itoa(len(body)), tooBig.Header().Get("Content-Range"))
1005+
1006+
etag := full.Header().Get("ETag")
1007+
assert.NotZero(t, etag, "snapshot should advertise an ETag")
1008+
1009+
getCond := func(rangeHeader string, condHeaders map[string]string) *httptest.ResponseRecorder {
1010+
req := httptest.NewRequest(http.MethodGet, "/git/github.com/org/repo/snapshot.tar.zst", nil)
1011+
req = req.WithContext(ctx)
1012+
req.SetPathValue("host", "github.com")
1013+
req.SetPathValue("path", "org/repo/snapshot.tar.zst")
1014+
req.Header.Set("Range", rangeHeader)
1015+
for k, v := range condHeaders {
1016+
req.Header.Set(k, v)
1017+
}
1018+
w := httptest.NewRecorder()
1019+
handler.ServeHTTP(w, req)
1020+
return w
1021+
}
1022+
1023+
// Conditional validators take precedence over the range: a matching
1024+
// If-None-Match revalidates to 304 and a stale If-Match fails with 412,
1025+
// rather than returning a 206 body the client would discard.
1026+
notModified := getCond("bytes=0-3", map[string]string{"If-None-Match": etag})
1027+
assert.Equal(t, http.StatusNotModified, notModified.Code)
1028+
assert.Equal(t, 0, notModified.Body.Len())
1029+
1030+
preconditionFailed := getCond("bytes=0-3", map[string]string{"If-Match": `"stale-etag"`})
1031+
assert.Equal(t, http.StatusPreconditionFailed, preconditionFailed.Code)
1032+
1033+
// An unsatisfiable range with a stale If-Match is a precondition failure
1034+
// (412), not a 416.
1035+
beyond := strconv.Itoa(len(body)+10) + "-" + strconv.Itoa(len(body)+20)
1036+
rangeWithStaleIfMatch := getCond("bytes="+beyond, map[string]string{"If-Match": `"stale-etag"`})
1037+
assert.Equal(t, http.StatusPreconditionFailed, rangeWithStaleIfMatch.Code)
1038+
}

0 commit comments

Comments
 (0)