Skip to content

Commit c3d1e6f

Browse files
committed
backup: address review on KEYMAP/MANIFEST (PR #712)
- Adapters fields are now *Adapter pointers (Gemini #98). Nil -> excluded, non-nil empty -> included-with-no-scopes, non-nil populated -> normal. Previous non-pointer struct collapsed both "excluded" and "included-empty" into the same on-disk shape; gemini's reference (don't silently drop entries during serialization) was the right call. Test TestAdaptersStruct_NilVsEmptyDistinguishedOnDisk covers it. - KeymapReader.Next now validates the base64-encoded `original` field at parse time (Codex P1 #179). Previously the JSON parsed fine and the bad base64 surfaced lazily on Original() — that defers corruption detection past initial ingest. Test TestKeymapReader_RejectsMalformedBase64AtParseTime locks in the parse-time validation. - ReadManifest now rejects trailing bytes via dec.More() (Codex P2 #194). Two manifests concatenated, garbage tails, partial-write artifacts: all surface as ErrInvalidManifest. Tests TestReadManifest_RejectsTrailingBytes and TestReadManifest_RejectsTrailingNonWhitespace. - Test name TestKeymapReader_AcceptsBlankLinesByPolicy renamed to TestKeymapReader_RejectsBlankLines (Gemini #160). The test was already asserting rejection; only the name was misleading.
1 parent 3022b92 commit c3d1e6f

4 files changed

Lines changed: 117 additions & 15 deletions

File tree

internal/backup/keymap.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,11 @@ func NewKeymapReader(r io.Reader) *KeymapReader {
155155
// (zero, false, nil) at end of stream, and (zero, false, err) on parse
156156
// failure or I/O error. Once an error is returned the reader is sticky:
157157
// subsequent calls return the same error.
158+
//
159+
// The base64-encoded `original` field is validated at parse time rather
160+
// than lazily: a malformed dump must surface on the first read of the
161+
// affected line, not propagate silently until a much later
162+
// rec.Original() call. Same error class either way.
158163
func (r *KeymapReader) Next() (KeymapRecord, bool, error) {
159164
if r.err != nil {
160165
return KeymapRecord{}, false, r.err
@@ -176,6 +181,10 @@ func (r *KeymapReader) Next() (KeymapRecord, bool, error) {
176181
r.err = errors.Wrap(ErrInvalidKeymapRecord, "missing encoded or kind")
177182
return KeymapRecord{}, false, r.err
178183
}
184+
if _, err := base64.RawURLEncoding.DecodeString(rec.OriginalB64); err != nil {
185+
r.err = errors.Wrap(ErrInvalidKeymapRecord, err.Error())
186+
return KeymapRecord{}, false, r.err
187+
}
179188
return rec, true, nil
180189
}
181190

internal/backup/keymap_test.go

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -157,13 +157,13 @@ func TestKeymapReader_RejectsRecordWithoutEncodedOrKind(t *testing.T) {
157157
}
158158
}
159159

160-
func TestKeymapReader_AcceptsBlankLinesByPolicy(t *testing.T) {
160+
func TestKeymapReader_RejectsBlankLines(t *testing.T) {
161161
t.Parallel()
162162
// bufio.Scanner skips trailing newline but emits an empty line when one
163163
// is in the middle of the stream. We require strict JSONL — every
164-
// non-empty line must be a record. An empty line in the middle should
165-
// surface as ErrInvalidKeymapRecord rather than silently skipped, so
166-
// truncated dumps are recognised.
164+
// non-empty line must be a record. An empty line in the middle must
165+
// surface as ErrInvalidKeymapRecord rather than be silently skipped,
166+
// so truncated dumps are recognised.
167167
input := `{"encoded":"x","original":"AA","kind":"sha-fallback"}` + "\n\n" +
168168
`{"encoded":"y","original":"AA","kind":"sha-fallback"}` + "\n"
169169
r := NewKeymapReader(strings.NewReader(input))
@@ -203,3 +203,17 @@ func TestKeymapRecord_OriginalRejectsBadBase64(t *testing.T) {
203203
t.Fatalf("err = %v, want ErrInvalidKeymapRecord", err)
204204
}
205205
}
206+
207+
func TestKeymapReader_RejectsMalformedBase64AtParseTime(t *testing.T) {
208+
t.Parallel()
209+
// JSON parses fine; the structural fields are present; only the
210+
// `original` base64 is malformed. The reader must catch this on
211+
// the first Next() rather than defer it to a later Original()
212+
// call — Codex P1 #179.
213+
input := `{"encoded":"x","original":"!!!","kind":"sha-fallback"}` + "\n"
214+
r := NewKeymapReader(strings.NewReader(input))
215+
_, _, err := r.Next()
216+
if !errors.Is(err, ErrInvalidKeymapRecord) {
217+
t.Fatalf("err=%v want ErrInvalidKeymapRecord on parse-time base64 validation", err)
218+
}
219+
}

internal/backup/manifest.go

Lines changed: 31 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -87,14 +87,27 @@ type Live struct {
8787
PinTokenSHA256 string `json:"pin_token_sha256,omitempty"`
8888
}
8989

90-
// Adapters lists which scopes were dumped per adapter. An empty slice
91-
// means "no scopes for this adapter were dumped"; a nil slice means
92-
// "this adapter was not in the dump's scope filter."
90+
// Adapters lists which scopes were dumped per adapter. The pointer
91+
// values express two distinguishable on-disk states:
92+
//
93+
// - nil -> the adapter was excluded from this dump (e.g.
94+
// `--adapter dynamodb,s3` filtered it out). The corresponding
95+
// JSON key is absent.
96+
// - non-nil pointer to Adapter{} -> the adapter was in scope but
97+
// no scopes for it were emitted (no tables, no buckets, etc.).
98+
// The JSON key is present with an empty object.
99+
// - non-nil pointer to a populated Adapter -> the listed scopes
100+
// were emitted.
101+
//
102+
// Storing pointers (rather than zero-value Adapter structs) is what
103+
// keeps "excluded by filter" distinguishable from "included but
104+
// empty" through json.Marshal — non-pointer fields would collapse
105+
// both states into the same on-disk shape.
93106
type Adapters struct {
94-
DynamoDB Adapter `json:"dynamodb"`
95-
S3 Adapter `json:"s3"`
96-
Redis Adapter `json:"redis"`
97-
SQS Adapter `json:"sqs"`
107+
DynamoDB *Adapter `json:"dynamodb,omitempty"`
108+
S3 *Adapter `json:"s3,omitempty"`
109+
Redis *Adapter `json:"redis,omitempty"`
110+
SQS *Adapter `json:"sqs,omitempty"`
98111
}
99112

100113
// Adapter holds the scope identifiers for one adapter. Field names are
@@ -193,6 +206,17 @@ func ReadManifest(r io.Reader) (Manifest, error) {
193206
if err := dec.Decode(&m); err != nil {
194207
return Manifest{}, errors.Wrap(ErrInvalidManifest, err.Error())
195208
}
209+
// MANIFEST.json is exactly one JSON object. Trailing bytes
210+
// (a second object, junk, even whitespace-only padding) point at
211+
// concatenation bugs or partial-write corruption — both of which
212+
// must surface here rather than be silently discarded. We use
213+
// io.Discard rather than parsing because we only care that
214+
// nothing-decodable is present; structural validation lives in
215+
// validate().
216+
if dec.More() {
217+
return Manifest{}, errors.Wrap(ErrInvalidManifest,
218+
"trailing bytes after manifest JSON object")
219+
}
196220
if m.FormatVersion == 0 {
197221
return Manifest{}, errors.Wrapf(ErrUnsupportedFormatVersion,
198222
"format_version is zero")

internal/backup/manifest_test.go

Lines changed: 59 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,10 @@ func TestManifest_Phase0RoundTrip(t *testing.T) {
2020
m.LastCommitTS = 4517352099840000
2121
m.Source = &Source{FSMPath: "/data/fsm-snap/0000000000000064.fsm", FSMCRC32C: "deadbeef"}
2222
m.Adapters = Adapters{
23-
DynamoDB: Adapter{Tables: []string{"orders", "users"}},
24-
S3: Adapter{Buckets: []string{"photos"}},
25-
Redis: Adapter{Databases: []uint32{0}},
26-
SQS: Adapter{Queues: []string{"orders-fifo.fifo"}},
23+
DynamoDB: &Adapter{Tables: []string{"orders", "users"}},
24+
S3: &Adapter{Buckets: []string{"photos"}},
25+
Redis: &Adapter{Databases: []uint32{0}},
26+
SQS: &Adapter{Queues: []string{"orders-fifo.fifo"}},
2727
}
2828
m.Exclusions = Exclusions{} // all defaults
2929

@@ -205,6 +205,61 @@ func TestNewPhase0SnapshotManifest_DefaultsArePopulated(t *testing.T) {
205205
}
206206
}
207207

208+
func TestReadManifest_RejectsTrailingBytes(t *testing.T) {
209+
t.Parallel()
210+
// Two manifests concatenated; the second must surface as a
211+
// trailing-bytes error rather than be silently discarded — Codex
212+
// P2 #194.
213+
m := NewPhase0SnapshotManifest(time.Now())
214+
body, err := json.Marshal(m)
215+
if err != nil {
216+
t.Fatalf("marshal: %v", err)
217+
}
218+
bad := append([]byte{}, body...)
219+
bad = append(bad, body...)
220+
_, err = ReadManifest(bytes.NewReader(bad))
221+
if !errors.Is(err, ErrInvalidManifest) {
222+
t.Fatalf("err=%v want ErrInvalidManifest on trailing bytes", err)
223+
}
224+
}
225+
226+
func TestReadManifest_RejectsTrailingNonWhitespace(t *testing.T) {
227+
t.Parallel()
228+
m := NewPhase0SnapshotManifest(time.Now())
229+
body, err := json.Marshal(m)
230+
if err != nil {
231+
t.Fatalf("marshal: %v", err)
232+
}
233+
bad := append([]byte{}, body...)
234+
bad = append(bad, []byte("garbage")...)
235+
_, err = ReadManifest(bytes.NewReader(bad))
236+
if !errors.Is(err, ErrInvalidManifest) {
237+
t.Fatalf("err=%v want ErrInvalidManifest on trailing garbage", err)
238+
}
239+
}
240+
241+
func TestAdaptersStruct_NilVsEmptyDistinguishedOnDisk(t *testing.T) {
242+
t.Parallel()
243+
// Gemini #98: an excluded adapter (nil pointer) must serialize
244+
// differently from an included-but-empty adapter (non-nil pointer
245+
// to Adapter{}).
246+
excluded := Adapters{
247+
DynamoDB: &Adapter{}, // present, no scopes
248+
// S3 / Redis / SQS left nil — out of scope
249+
}
250+
body, err := json.Marshal(excluded)
251+
if err != nil {
252+
t.Fatal(err)
253+
}
254+
out := string(body)
255+
if !strings.Contains(out, `"dynamodb":{}`) {
256+
t.Fatalf("included-empty must serialise as `dynamodb:{}`, got %s", out)
257+
}
258+
if strings.Contains(out, `"s3"`) || strings.Contains(out, `"redis"`) || strings.Contains(out, `"sqs"`) {
259+
t.Fatalf("excluded adapters must be omitted, got %s", out)
260+
}
261+
}
262+
208263
func TestWriteManifest_ProducesPrettyJSON(t *testing.T) {
209264
t.Parallel()
210265
m := NewPhase0SnapshotManifest(time.Now())

0 commit comments

Comments
 (0)