Skip to content

Commit b2d403d

Browse files
committed
conformance: graduate t5308-pack-detect-duplicates
Extends the conformance gate to cover pack-file processing. - gogit cat-file: -e <oid> existence check (exit 0/1, no output) and --batch-check (stdin OIDs -> "<oid> <type> <size>" or "<oid> missing"). - gogit index-pack --stdin [--strict]: reads a pack from stdin and writes pack-<sha>.{pack,idx} into .git/objects/pack/ via go-git's WritePackfileToObjectStorage. --strict scans the pack with a WithScannerObservers-attached observer and rejects packs containing duplicate object IDs before any state is written. - Harness test-tool stub gains sha1/sha256 [-b] cases (used by t/lib-pack.sh to compute pack trailers) and is now rewritten on every run so script changes take effect without manual cache invalidation. - conformance/tests.txt: append t5308-pack-detect-duplicates.sh. Assisted-by: Claude Opus 4.7 <noreply@anthropic.com> Signed-off-by: Paulo Gomes <paulo@entire.io>
1 parent 860b070 commit b2d403d

7 files changed

Lines changed: 415 additions & 6 deletions

File tree

cmd/gogit/cat-file.go

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
package main
2+
3+
import (
4+
"bufio"
5+
"errors"
6+
"fmt"
7+
"io"
8+
"os"
9+
"strings"
10+
11+
"github.com/go-git/go-git/v6"
12+
"github.com/go-git/go-git/v6/plumbing"
13+
"github.com/spf13/cobra"
14+
)
15+
16+
var (
17+
catFileExists bool
18+
catFileBatchCheck bool
19+
)
20+
21+
func init() {
22+
catFileCmd.Flags().BoolVarP(&catFileExists, "exists", "e", false,
23+
"Check whether object exists; exit 0 if so, 1 otherwise")
24+
catFileCmd.Flags().BoolVar(&catFileBatchCheck, "batch-check", false,
25+
"Read object IDs from stdin and print <oid> <type> <size> per line (or '<oid> missing')")
26+
rootCmd.AddCommand(catFileCmd)
27+
}
28+
29+
var catFileCmd = &cobra.Command{
30+
Use: "cat-file (-e <oid> | --batch-check)",
31+
Short: "Provide content or check existence of repository objects",
32+
RunE: func(cmd *cobra.Command, args []string) error {
33+
if catFileExists && catFileBatchCheck {
34+
return errors.New("-e and --batch-check are mutually exclusive")
35+
}
36+
37+
if !catFileExists && !catFileBatchCheck {
38+
return errors.New("one of -e or --batch-check is required")
39+
}
40+
41+
r, err := git.PlainOpenWithOptions(".", &git.PlainOpenOptions{DetectDotGit: true})
42+
if err != nil {
43+
return fmt.Errorf("failed to open repository: %w", err)
44+
}
45+
46+
if catFileExists {
47+
if len(args) != 1 {
48+
return errors.New("-e requires exactly one <oid> argument")
49+
}
50+
51+
return catFileExistsCheck(r, args[0])
52+
}
53+
54+
return catFileBatchCheckRun(cmd, r, os.Stdin)
55+
},
56+
DisableFlagsInUseLine: true,
57+
SilenceUsage: true,
58+
SilenceErrors: true,
59+
}
60+
61+
func catFileExistsCheck(r *git.Repository, oid string) error {
62+
h := plumbing.NewHash(oid)
63+
if _, err := r.Storer.EncodedObject(plumbing.AnyObject, h); err != nil {
64+
os.Exit(1)
65+
}
66+
67+
return nil
68+
}
69+
70+
func catFileBatchCheckRun(cmd *cobra.Command, r *git.Repository, stdin io.Reader) error {
71+
w := bufio.NewWriter(cmd.OutOrStdout())
72+
defer w.Flush()
73+
74+
scanner := bufio.NewScanner(stdin)
75+
for scanner.Scan() {
76+
line := strings.TrimSpace(scanner.Text())
77+
if line == "" {
78+
continue
79+
}
80+
81+
h := plumbing.NewHash(line)
82+
83+
obj, err := r.Storer.EncodedObject(plumbing.AnyObject, h)
84+
if err != nil {
85+
fmt.Fprintf(w, "%s missing\n", line)
86+
87+
continue
88+
}
89+
90+
fmt.Fprintf(w, "%s %s %d\n", line, obj.Type(), obj.Size())
91+
}
92+
93+
return scanner.Err()
94+
}

cmd/gogit/cat-file_test.go

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
package main
2+
3+
import (
4+
"os"
5+
"path/filepath"
6+
"testing"
7+
)
8+
9+
const baseBlobOID = "df967b96a579e45a18b8251732d16804b2e56a55" // sha1 of "blob 5\0base\n"
10+
11+
func setupRepoWithBaseBlob(t *testing.T) string {
12+
t.Helper()
13+
14+
repo := t.TempDir()
15+
16+
if _, _, err := runGogit(t, repo, "init"); err != nil {
17+
t.Fatalf("init: %v", err)
18+
}
19+
20+
if err := os.WriteFile(filepath.Join(repo, "file0"), []byte("base\n"), 0o644); err != nil {
21+
t.Fatal(err)
22+
}
23+
24+
if _, _, err := runGogit(t, repo, "add", "file0"); err != nil {
25+
t.Fatalf("add: %v", err)
26+
}
27+
28+
if _, _, err := runGogitEnv(t, repo, gitIdentityEnv(repo), "commit", "-m", "x"); err != nil {
29+
t.Fatalf("commit: %v", err)
30+
}
31+
32+
return repo
33+
}
34+
35+
func TestCatFileExistsExitsZero(t *testing.T) {
36+
t.Parallel()
37+
38+
repo := setupRepoWithBaseBlob(t)
39+
40+
if _, _, err := runGogit(t, repo, "cat-file", "-e", baseBlobOID); err != nil {
41+
t.Fatalf("cat-file -e <existing>: expected exit 0, got %v", err)
42+
}
43+
}
44+
45+
func TestCatFileMissingExitsOne(t *testing.T) {
46+
t.Parallel()
47+
48+
repo := t.TempDir()
49+
50+
if _, _, err := runGogit(t, repo, "init"); err != nil {
51+
t.Fatalf("init: %v", err)
52+
}
53+
54+
stdout, _, err := runGogit(t, repo, "cat-file", "-e", "0000000000000000000000000000000000000000")
55+
if err == nil {
56+
t.Fatalf("expected non-zero exit, got success")
57+
}
58+
59+
if stdout != "" {
60+
t.Fatalf("expected no stdout, got %q", stdout)
61+
}
62+
}
63+
64+
func TestCatFileBatchCheck(t *testing.T) {
65+
t.Parallel()
66+
67+
repo := setupRepoWithBaseBlob(t)
68+
69+
const missingOID = "0000000000000000000000000000000000000000"
70+
71+
input := baseBlobOID + "\n" + missingOID + "\n"
72+
want := baseBlobOID + " blob 5\n" + missingOID + " missing\n"
73+
74+
stdout, stderr, err := runGogitStdin(t, repo, input, "cat-file", "--batch-check")
75+
if err != nil {
76+
t.Fatalf("cat-file --batch-check failed: %v\nstderr: %s", err, stderr)
77+
}
78+
79+
if stdout != want {
80+
t.Fatalf("batch-check output mismatch:\n got: %q\nwant: %q", stdout, want)
81+
}
82+
}

cmd/gogit/index-pack.go

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
package main
2+
3+
import (
4+
"bytes"
5+
"errors"
6+
"fmt"
7+
"io"
8+
9+
"github.com/go-git/go-git/v6"
10+
"github.com/go-git/go-git/v6/plumbing"
11+
"github.com/go-git/go-git/v6/plumbing/format/packfile"
12+
"github.com/go-git/go-git/v6/plumbing/storer"
13+
"github.com/spf13/cobra"
14+
)
15+
16+
var (
17+
indexPackStdin bool
18+
indexPackStrict bool
19+
)
20+
21+
func init() {
22+
indexPackCmd.Flags().BoolVar(&indexPackStdin, "stdin", false, "Read the pack from standard input")
23+
indexPackCmd.Flags().BoolVar(&indexPackStrict, "strict", false, "Reject packs containing duplicate object IDs")
24+
rootCmd.AddCommand(indexPackCmd)
25+
}
26+
27+
var indexPackCmd = &cobra.Command{
28+
Use: "index-pack --stdin [--strict]",
29+
Short: "Build a pack index for an existing packed archive",
30+
RunE: func(cmd *cobra.Command, _ []string) error {
31+
if !indexPackStdin {
32+
return errors.New("--stdin is required")
33+
}
34+
35+
r, err := git.PlainOpenWithOptions(".", &git.PlainOpenOptions{DetectDotGit: true})
36+
if err != nil {
37+
return fmt.Errorf("failed to open repository: %w", err)
38+
}
39+
40+
return indexPackRun(r, cmd.InOrStdin(), indexPackStrict)
41+
},
42+
DisableFlagsInUseLine: true,
43+
SilenceUsage: true,
44+
SilenceErrors: true,
45+
}
46+
47+
func indexPackRun(repo *git.Repository, in io.Reader, strict bool) error {
48+
pw, ok := repo.Storer.(storer.PackfileWriter)
49+
if !ok {
50+
return errors.New("repository storer does not support packfile writes")
51+
}
52+
53+
if !strict {
54+
return packfile.WritePackfileToObjectStorage(pw, in)
55+
}
56+
57+
buf, err := io.ReadAll(in)
58+
if err != nil {
59+
return fmt.Errorf("read pack: %w", err)
60+
}
61+
62+
if err := checkPackForDuplicates(buf); err != nil {
63+
return err
64+
}
65+
66+
return packfile.WritePackfileToObjectStorage(pw, bytes.NewReader(buf))
67+
}
68+
69+
func checkPackForDuplicates(pack []byte) error {
70+
obs := &dupObserver{seen: make(map[plumbing.Hash]struct{})}
71+
parser := packfile.NewParser(bytes.NewReader(pack), packfile.WithScannerObservers(obs))
72+
73+
if _, err := parser.Parse(); err != nil {
74+
return fmt.Errorf("parse pack: %w", err)
75+
}
76+
77+
if obs.dup != plumbing.ZeroHash {
78+
return fmt.Errorf("duplicate object %s in pack (--strict)", obs.dup)
79+
}
80+
81+
return nil
82+
}
83+
84+
type dupObserver struct {
85+
seen map[plumbing.Hash]struct{}
86+
dup plumbing.Hash
87+
}
88+
89+
func (o *dupObserver) OnHeader(_ uint32) error { return nil }
90+
func (o *dupObserver) OnFooter(_ plumbing.Hash) error { return nil }
91+
92+
func (o *dupObserver) OnInflatedObjectHeader(_ plumbing.ObjectType, _, _ int64) error {
93+
return nil
94+
}
95+
96+
func (o *dupObserver) OnInflatedObjectContent(h plumbing.Hash, _ int64, _ uint32, _ []byte) error {
97+
if _, ok := o.seen[h]; ok && o.dup == plumbing.ZeroHash {
98+
o.dup = h
99+
}
100+
101+
o.seen[h] = struct{}{}
102+
103+
return nil
104+
}

cmd/gogit/index-pack_test.go

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
package main
2+
3+
import (
4+
"bytes"
5+
"crypto/sha1"
6+
"encoding/binary"
7+
"os"
8+
"path/filepath"
9+
"testing"
10+
)
11+
12+
// emptyBlobPackEntry is the on-the-wire (zlib-compressed) packfile entry for the
13+
// canonical empty blob. Lifted verbatim from upstream's t/lib-pack.sh, which is
14+
// the same trick t5308 uses to construct test packs.
15+
var emptyBlobPackEntry = []byte{0x30, 0x78, 0x9c, 0x03, 0x00, 0x00, 0x00, 0x00, 0x01}
16+
17+
// makePack returns a valid v2 packfile containing `count` copies of the empty
18+
// blob entry, with a correct SHA1 trailer.
19+
func makePack(t *testing.T, count int) []byte {
20+
t.Helper()
21+
22+
var buf bytes.Buffer
23+
24+
buf.WriteString("PACK")
25+
26+
if err := binary.Write(&buf, binary.BigEndian, uint32(2)); err != nil {
27+
t.Fatal(err)
28+
}
29+
30+
if err := binary.Write(&buf, binary.BigEndian, uint32(count)); err != nil {
31+
t.Fatal(err)
32+
}
33+
34+
for range count {
35+
buf.Write(emptyBlobPackEntry)
36+
}
37+
38+
h := sha1.Sum(buf.Bytes())
39+
buf.Write(h[:])
40+
41+
return buf.Bytes()
42+
}
43+
44+
func TestIndexPackStdinAcceptsCleanPack(t *testing.T) {
45+
t.Parallel()
46+
47+
repo := t.TempDir()
48+
49+
if _, _, err := runGogit(t, repo, "init"); err != nil {
50+
t.Fatalf("init: %v", err)
51+
}
52+
53+
pack := makePack(t, 1)
54+
if _, stderr, err := runGogitStdin(t, repo, string(pack), "index-pack", "--stdin"); err != nil {
55+
t.Fatalf("index-pack --stdin failed: %v\nstderr: %s", err, stderr)
56+
}
57+
58+
matches, err := filepath.Glob(filepath.Join(repo, ".git", "objects", "pack", "pack-*.pack"))
59+
if err != nil {
60+
t.Fatal(err)
61+
}
62+
63+
if len(matches) != 1 {
64+
t.Fatalf("expected exactly one pack file, got %d: %v", len(matches), matches)
65+
}
66+
67+
idxPath := matches[0][:len(matches[0])-5] + ".idx"
68+
if _, err := os.Stat(idxPath); err != nil {
69+
t.Fatalf("expected idx alongside pack: %v", err)
70+
}
71+
}
72+
73+
func TestIndexPackStrictRejectsDuplicates(t *testing.T) {
74+
t.Parallel()
75+
76+
repo := t.TempDir()
77+
78+
if _, _, err := runGogit(t, repo, "init"); err != nil {
79+
t.Fatalf("init: %v", err)
80+
}
81+
82+
pack := makePack(t, 2)
83+
if _, _, err := runGogitStdin(t, repo, string(pack), "index-pack", "--strict", "--stdin"); err == nil {
84+
t.Fatal("expected non-zero exit for duplicate-object pack under --strict")
85+
}
86+
87+
matches, err := filepath.Glob(filepath.Join(repo, ".git", "objects", "pack", "pack-*.pack"))
88+
if err != nil {
89+
t.Fatal(err)
90+
}
91+
92+
if len(matches) != 0 {
93+
t.Fatalf("expected no pack file left behind, got %d: %v", len(matches), matches)
94+
}
95+
}

0 commit comments

Comments
 (0)