Skip to content

Commit 24e75d3

Browse files
committed
backup: M4-2b reject walker-skipped names + backslash in encoded keymap path (codex P2 #928 round 4)
Two related findings on encode_s3_collision.go's validateKeymapEncodedPath: 1. Encoded backslash slips past slash-only segment scan. On Windows filepath.Join treats '\' as a path separator, but strings.Split(encoded, '/') doesn't see it. A corrupt KEYMAP entry like 'dir\\x.elastickv-leaf-data' passes target verification (Lstat against the joined Windows path) and the walk later looks up the slash-form, missing the backslash-keyed record - emits the renamed on-disk key instead of failing closed. 2. Encoded targets that the walker skips. walkObjectEntry returns nil for top-level '_bucket.json', top-level 'KEYMAP.jsonl' (when a tracker is loaded), and anything ending in S3MetaSuffixReserved (sidecars). A KEYMAP entry naming one of those paths passes the IsRegular check but the walk drops it - the rename is silently ignored. Fix: validateKeymapEncodedPath now also: - Rejects any backslash anywhere in rec.Encoded (mirrors safeJoinUnderRoot's backslash guard). - Rejects encoded paths ending in S3MetaSuffixReserved (sidecar suffix at any depth). - Rejects bare '_bucket.json' and 'KEYMAP.jsonl' (top-level control files). Nested 'prefix/_bucket.json' and 'prefix/KEYMAP.jsonl' remain valid user object keys (the walk processes them normally). Caller audit: validateKeymapEncodedPath is invoked once from validateKeymapRecord (single chain). No call-site changes. Regression tests: - TestValidateKeymapEncodedPath_Backslash: 4 cases covering middle / leading / trailing / multi-segment backslash. - TestValidateKeymapEncodedPath_WalkerSkipped: 6-case table covering top-level reserved + sidecar suffix (top-level + nested) + accepted nested user-key forms.
1 parent 3c6fb92 commit 24e75d3

2 files changed

Lines changed: 104 additions & 5 deletions

File tree

internal/backup/encode_s3_collision.go

Lines changed: 34 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -193,17 +193,46 @@ func validateKeymapOriginalPath(rec KeymapRecord, original string) error {
193193
}
194194

195195
// validateKeymapEncodedPath rejects rec.Encoded values that would
196-
// escape the bucket sub-root or carry empty segments. The lookup
197-
// site (resolveObjectKeyFromRel) joins this value under bucketDir
198-
// when calling root.Lstat, and ".." would silently break out of the
199-
// bucket. Gemini medium PR #928.
196+
// escape the bucket sub-root, carry empty segments, contain
197+
// backslashes filepath.Join splits on Windows, or name a
198+
// walker-skipped control/sidecar file that encodeObject never
199+
// reaches.
200+
//
201+
// Path segments: gemini medium PR #928.
202+
// Backslash + walker-skipped reserved names: codex P2 #928 round 4.
203+
// - The walker (walkObjectEntry) returns nil for top-level
204+
// "_bucket.json" and top-level "KEYMAP.jsonl" (when a tracker
205+
// is loaded) and for any file ending in S3MetaSuffixReserved.
206+
// A KEYMAP entry whose Encoded names one of those passes the
207+
// IsRegular check in verifyKeymapTargetsExist but is silently
208+
// ignored by the walk - the corruption surfaces only as a
209+
// missing-key during restore.
200210
func validateKeymapEncodedPath(encoded string) error {
201-
for _, seg := range strings.Split(encoded, "/") {
211+
if strings.ContainsRune(encoded, '\\') {
212+
return errors.Wrapf(ErrInvalidKeymapRecord,
213+
"encoded path %q contains backslash (treated as a separator on Windows; rename in S3 first)", encoded)
214+
}
215+
segs := strings.Split(encoded, "/")
216+
for _, seg := range segs {
202217
if seg == "" || seg == "." || seg == ".." {
203218
return errors.Wrapf(ErrInvalidKeymapRecord,
204219
"encoded path %q contains invalid segment %q", encoded, seg)
205220
}
206221
}
222+
last := segs[len(segs)-1]
223+
if strings.HasSuffix(last, S3MetaSuffixReserved) {
224+
return errors.Wrapf(ErrInvalidKeymapRecord,
225+
"encoded path %q ends in reserved sidecar suffix %q (walker would skip)",
226+
encoded, S3MetaSuffixReserved)
227+
}
228+
// _bucket.json and KEYMAP.jsonl are walker-skipped only at the
229+
// top level (rel == ""). Nested user keys like
230+
// "prefix/_bucket.json" are real object bodies that the walk
231+
// processes normally, so accept those.
232+
if len(segs) == 1 && (segs[0] == "_bucket.json" || segs[0] == "KEYMAP.jsonl") {
233+
return errors.Wrapf(ErrInvalidKeymapRecord,
234+
"encoded path %q names a top-level control file the walker skips", encoded)
235+
}
207236
return nil
208237
}
209238

internal/backup/encode_s3_collision_test.go

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,76 @@ func TestValidateKeymapRecord_EmptyOriginal(t *testing.T) {
253253
}
254254
}
255255

256+
// TestValidateKeymapEncodedPath_Backslash pins codex P2 #928
257+
// round 4 (encoded-backslash): a corrupt KEYMAP Encoded value with
258+
// a backslash bypasses the slash-segment scan on Windows because
259+
// filepath.Join treats `\` as a separator. The validator must
260+
// reject backslashes anywhere in the encoded path.
261+
func TestValidateKeymapEncodedPath_Backslash(t *testing.T) {
262+
t.Parallel()
263+
cases := []string{
264+
`dir\x` + S3LeafDataSuffix,
265+
`a\b\c` + S3LeafDataSuffix,
266+
`\leading` + S3LeafDataSuffix,
267+
`trailing\` + S3LeafDataSuffix,
268+
}
269+
for _, enc := range cases {
270+
t.Run(enc, func(t *testing.T) {
271+
t.Parallel()
272+
rec := leafRecord(enc, "ok/key")
273+
err := validateKeymapRecord(rec)
274+
if !errors.Is(err, ErrInvalidKeymapRecord) {
275+
t.Fatalf("err = %v, want ErrInvalidKeymapRecord", err)
276+
}
277+
if !strings.Contains(err.Error(), "backslash") {
278+
t.Fatalf("err = %v, want backslash message", err)
279+
}
280+
})
281+
}
282+
}
283+
284+
// TestValidateKeymapEncodedPath_WalkerSkipped pins codex P2 #928
285+
// round 4 (walker-skipped reserved files): a corrupt KEYMAP entry
286+
// whose Encoded names a top-level "_bucket.json" / "KEYMAP.jsonl"
287+
// or any path ending in S3MetaSuffixReserved passes the IsRegular
288+
// check but the walk silently skips it — the rename is never
289+
// reversed and the operator sees no diagnostic. The validator must
290+
// reject these encoded values at load time.
291+
func TestValidateKeymapEncodedPath_WalkerSkipped(t *testing.T) {
292+
t.Parallel()
293+
cases := []struct {
294+
name string
295+
encoded string
296+
wantErr bool
297+
}{
298+
{"top-level _bucket.json", "_bucket.json", true},
299+
{"top-level KEYMAP.jsonl", "KEYMAP.jsonl", true},
300+
{"sidecar suffix (top-level)", "obj" + S3MetaSuffixReserved, true},
301+
{"sidecar suffix (nested)", "prefix/obj" + S3MetaSuffixReserved, true},
302+
// Nested _bucket.json / KEYMAP.jsonl ARE real user objects
303+
// the walker processes - accept them.
304+
{"nested _bucket.json (user key)", "prefix/_bucket.json", false},
305+
{"nested KEYMAP.jsonl (user key)", "prefix/KEYMAP.jsonl", false},
306+
}
307+
for _, c := range cases {
308+
t.Run(c.name, func(t *testing.T) {
309+
t.Parallel()
310+
rec := KeymapRecord{
311+
Encoded: c.encoded,
312+
OriginalB64: base64.RawURLEncoding.EncodeToString([]byte("original/key")),
313+
Kind: KindSHAFallback,
314+
}
315+
err := validateKeymapRecord(rec)
316+
if c.wantErr && !errors.Is(err, ErrInvalidKeymapRecord) {
317+
t.Fatalf("err = %v, want ErrInvalidKeymapRecord", err)
318+
}
319+
if !c.wantErr && err != nil {
320+
t.Fatalf("unexpected err = %v", err)
321+
}
322+
})
323+
}
324+
}
325+
256326
// TestValidateKeymapOriginalPath pins codex P2 #928 round 3: the
257327
// decoded original key must not contain any form the decoder's
258328
// safeJoinUnderRoot would refuse — NUL byte, backslash, dot

0 commit comments

Comments
 (0)