Skip to content

Commit b05ec87

Browse files
committed
backup: PR810 r3 — codex r3 P1 (Windows reserved names via filepath.IsLocal)
Codex r3 P1: validateChecksumRelPath rejected `..`-traversal and absolute paths but accepted Windows reserved device names like CON / NUL / COM1 / CONOUT$. On Windows, os.Open of those names yields the host's console / device rather than a regular file under the dump root, so verification can succeed/fail based on host device state instead of dump contents. Fix: replace the explicit absolute/traversal checks with filepath.IsLocal (Go 1.20+), which delegates path-locality to the stdlib. On Windows IsLocal additionally rejects the reserved device names; on Unix the names look like ordinary files (and would be hashed if they exist under root, the honest dump shape). Empty / `.` are still explicitly rejected because IsLocal considers `.` local (it stays within the subtree, but it names the root directory, which is not a hashable regular file). The post-Clean recheck below IsLocal is belt-and-braces against a future stdlib change. Tests: - TestValidateChecksumRelPath_RejectsWindowsDeviceNames pins the rejection. The reserved-name check is Windows-only by filepath.IsLocal's contract, so the test skips on non-Windows; the lexical fix itself runs on every platform. - TestValidateChecksumRelPath_AcceptsHonestPaths cross-checks the IsLocal switchover does not regress the five legitimate Phase 0a dump-tree shapes (Redis blob, S3 nested object, DynamoDB schema, SQS messages.jsonl). Caller audit (CLAUDE.md "semantic-change → grep all callers"): - validateChecksumRelPath only called from verifyOneChecksumLine → VerifyChecksums; the latter has no production callers (test-only). The semantic change is purely rejection-side tightening on Windows; honest CHECKSUMS unaffected. Self-review: 1. Data loss — none (read-side hardening). 2. Concurrency — none. 3. Performance — IsLocal is O(path-len); a single pass. 4. Data consistency — Windows device-name escape closed. 5. Test coverage — two new tests; existing five-shape traversal-path table test still passes.
1 parent 2bb8cd7 commit b05ec87

2 files changed

Lines changed: 94 additions & 23 deletions

File tree

internal/backup/checksums.go

Lines changed: 30 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -259,39 +259,46 @@ func refuseSymlinkComponents(root, rel string) error {
259259
var ErrChecksumsSymlinkEscape = errors.New("backup: CHECKSUMS path traverses symlink")
260260

261261
// validateChecksumRelPath rejects CHECKSUMS rows whose path field
262-
// could escape the dump root. The acceptance rules:
262+
// could escape the dump root or otherwise resolve outside the
263+
// regular-file subtree the verifier expects. The acceptance rule
264+
// is `filepath.IsLocal` — Go-stdlib's lexical "stays within the
265+
// subtree rooted at the evaluation directory" check, which
266+
// covers:
263267
//
264-
// - absolute paths are rejected;
265-
// - paths containing a `..` segment (before or after Clean) are
266-
// rejected;
267-
// - the cleaned form must not start with `../` (covers
268-
// filepath.Clean turning `a/../../b` into `../b`);
269-
// - empty / `.` paths are rejected so a malformed entry that
270-
// would otherwise read the root directory itself surfaces.
268+
// - absolute paths (any platform);
269+
// - traversal via `..` segments (before or after Clean);
270+
// - on Windows, reserved device names like `CON`, `NUL`,
271+
// `COM1`-`COM9`, `LPT1`-`LPT9`, `PRN`, `AUX`, `CONIN$`,
272+
// `CONOUT$`, etc. — opened via `os.Open` these would yield
273+
// the host's console / device, not a dump file, and produce
274+
// hash mismatches keyed off host state. Codex r3 P1 on
275+
// PR #810.
271276
//
272-
// Returns the cleaned form so callers join it without redoing the
273-
// canonicalisation. Mirrors the same guard the S3 encoder applies
274-
// to bucket/object path segments before scratch-dir layout.
277+
// Empty and "." are rejected explicitly because IsLocal returns
278+
// true for "." (it does stay within the subtree, but it names
279+
// the root directory itself, which is not a file we can hash);
280+
// honest CHECKSUMS lines name regular files, not the root dir.
281+
//
282+
// Returns the Cleaned form so callers join it without redoing
283+
// the canonicalisation. Mirrors the same guard the S3 encoder
284+
// applies to bucket/object path segments before scratch-dir
285+
// layout.
275286
func validateChecksumRelPath(rel string) (string, error) {
276287
if rel == "" || rel == "." {
277288
return "", errors.Wrapf(ErrChecksumsMalformedLine, "empty path field")
278289
}
279-
if filepath.IsAbs(rel) {
280-
return "", errors.Wrapf(ErrChecksumsPathTraversal, "absolute path: %q", rel)
290+
if !filepath.IsLocal(rel) {
291+
return "", errors.Wrapf(ErrChecksumsPathTraversal,
292+
"not a local path (absolute / traversal / reserved name): %q", rel)
281293
}
282294
cleaned := filepath.Clean(rel)
283-
// Defence in depth: Clean turns `a/b/../../../etc/passwd` into
284-
// `../etc/passwd`. A leading `..` segment in the cleaned form
285-
// means the relative path escaped its parent, regardless of
286-
// what tricks were used to write it.
295+
// IsLocal already rejects absolute paths and `..` traversal
296+
// before/after Clean; the post-Clean recheck below is
297+
// belt-and-braces against a future stdlib change that would
298+
// loosen IsLocal but keep the cleaned-shape invariants.
287299
if cleaned == ".." || strings.HasPrefix(cleaned, ".."+string(filepath.Separator)) {
288-
return "", errors.Wrapf(ErrChecksumsPathTraversal, "escapes root: %q", rel)
300+
return "", errors.Wrapf(ErrChecksumsPathTraversal, "escapes root after Clean: %q", rel)
289301
}
290-
// `Clean` strips trailing slashes but leaves the leading
291-
// separator on absolute paths intact; we already filtered
292-
// those, so a remaining absolute-shaped Clean result is from
293-
// inputs like `/foo` on platforms where filepath.IsAbs
294-
// disagrees with filepath.Clean. Belt-and-braces.
295302
if filepath.IsAbs(cleaned) {
296303
return "", errors.Wrapf(ErrChecksumsPathTraversal, "absolute after Clean: %q", rel)
297304
}

internal/backup/checksums_test.go

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -244,6 +244,70 @@ func TestVerifyChecksums_RejectsIntermediateSymlink(t *testing.T) {
244244
}
245245
}
246246

247+
// TestValidateChecksumRelPath_RejectsWindowsDeviceNames is the
248+
// regression for codex r3 P1 on PR #810: even after the textual
249+
// `..`-traversal guard, names like CON / NUL / COM1 / CONOUT$
250+
// would open the host's console / device on Windows via
251+
// `os.Open(filepath.Join(root, "CON"))`. The verifier delegates
252+
// locality to filepath.IsLocal which refuses these names
253+
// lexically — but only on Windows (filepath.IsLocal is
254+
// platform-aware; on Unix these names are valid filenames).
255+
//
256+
// The test runs the unit-level validateChecksumRelPath check
257+
// rather than the full VerifyChecksums flow because the lstat
258+
// step downstream of the IsLocal guard would fail with a
259+
// generic ENOENT on Unix where the names are not pre-created,
260+
// masking the rejection class we want to pin.
261+
func TestValidateChecksumRelPath_RejectsWindowsDeviceNames(t *testing.T) {
262+
if runtime.GOOS != "windows" {
263+
// filepath.IsLocal only rejects reserved device names on
264+
// Windows; on Unix these strings are ordinary filenames.
265+
// The lexical fix lands on every platform, but the
266+
// rejection is observable only where it matters.
267+
t.Skip("Windows-only: filepath.IsLocal reserved-name check is platform-specific")
268+
}
269+
t.Parallel()
270+
names := []string{"CON", "NUL", "PRN", "AUX", "COM1", "LPT1", "CONOUT$"}
271+
for _, name := range names {
272+
t.Run(name, func(t *testing.T) {
273+
t.Parallel()
274+
_, err := validateChecksumRelPath(name)
275+
if err == nil {
276+
t.Fatalf("expected ErrChecksumsPathTraversal for reserved name %q, got nil", name)
277+
}
278+
if !errors.Is(err, ErrChecksumsPathTraversal) {
279+
t.Fatalf("err = %v, want ErrChecksumsPathTraversal", err)
280+
}
281+
})
282+
}
283+
}
284+
285+
// TestValidateChecksumRelPath_AcceptsHonestPaths cross-checks the
286+
// IsLocal switchover did not regress legitimate relative paths
287+
// (the Phase 0a dump tree).
288+
func TestValidateChecksumRelPath_AcceptsHonestPaths(t *testing.T) {
289+
t.Parallel()
290+
cases := []string{
291+
"a.txt",
292+
"redis/db_0/strings/foo.bin",
293+
"s3/photos/2026/04/29/img.jpg",
294+
"dynamodb/orders/_schema.json",
295+
"sqs/queue-1/messages.jsonl",
296+
}
297+
for _, p := range cases {
298+
t.Run(p, func(t *testing.T) {
299+
t.Parallel()
300+
got, err := validateChecksumRelPath(p)
301+
if err != nil {
302+
t.Fatalf("validateChecksumRelPath(%q) unexpected err: %v", p, err)
303+
}
304+
if got != filepath.Clean(p) {
305+
t.Fatalf("validateChecksumRelPath(%q) = %q, want %q", p, got, filepath.Clean(p))
306+
}
307+
})
308+
}
309+
}
310+
247311
// TestVerifyChecksums_StreamsLargeFile pins the bufio.Scanner path
248312
// — the previous implementation slurped the entire CHECKSUMS file
249313
// via os.ReadFile, which OOMs on large dump trees (gemini

0 commit comments

Comments
 (0)