Skip to content

Commit 5ceb988

Browse files
committed
Merge branch 'en/ort-harden-against-corrupt-trees' into seen
"ort" merge backend handles merging corrupt trees better by aborting when it should. * en/ort-harden-against-corrupt-trees: cache-tree: fix verify_cache() to catch non-adjacent D/F conflicts merge-ort: abort merge when trees have duplicate entries merge-ort: free diff pairs queue in clear_or_reinit_internal_opts() merge-ort: drop unnecessary show_all_errors from collect_merge_info() merge-ort: propagate callback errors from traverse_trees_wrapper()
2 parents fc0a027 + 60826fd commit 5ceb988

6 files changed

Lines changed: 239 additions & 37 deletions

cache-tree.c

Lines changed: 43 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -193,22 +193,62 @@ static int verify_cache(struct index_state *istate, int flags)
193193
for (i = 0; i + 1 < istate->cache_nr; i++) {
194194
/* path/file always comes after path because of the way
195195
* the cache is sorted. Also path can appear only once,
196-
* which means conflicting one would immediately follow.
196+
* so path/file is likely the immediately following path
197+
* but might be separated if there is e.g. a
198+
* path-internal/... file.
197199
*/
198200
const struct cache_entry *this_ce = istate->cache[i];
199201
const struct cache_entry *next_ce = istate->cache[i + 1];
200202
const char *this_name = this_ce->name;
201203
const char *next_name = next_ce->name;
202204
int this_len = ce_namelen(this_ce);
205+
const char *conflict_name = NULL;
206+
203207
if (this_len < ce_namelen(next_ce) &&
204-
next_name[this_len] == '/' &&
208+
next_name[this_len] <= '/' &&
205209
strncmp(this_name, next_name, this_len) == 0) {
210+
if (next_name[this_len] == '/') {
211+
conflict_name = next_name;
212+
} else if (next_name[this_len] < '/') {
213+
/*
214+
* The immediately next entry shares our
215+
* prefix but sorts before "path/" (e.g.,
216+
* "path-internal" between "path" and
217+
* "path/file", since '-' (0x2D) < '/'
218+
* (0x2F)). Binary search to find where
219+
* "path/" would be and check for a D/F
220+
* conflict there.
221+
*/
222+
struct cache_entry *other;
223+
struct strbuf probe = STRBUF_INIT;
224+
int pos;
225+
226+
strbuf_add(&probe, this_name, this_len);
227+
strbuf_addch(&probe, '/');
228+
pos = index_name_pos_sparse(istate,
229+
probe.buf,
230+
probe.len);
231+
strbuf_release(&probe);
232+
233+
if (pos < 0)
234+
pos = -pos - 1;
235+
if (pos >= (int)istate->cache_nr)
236+
continue;
237+
other = istate->cache[pos];
238+
if (ce_namelen(other) > this_len &&
239+
other->name[this_len] == '/' &&
240+
!strncmp(this_name, other->name, this_len))
241+
conflict_name = other->name;
242+
}
243+
}
244+
245+
if (conflict_name) {
206246
if (10 < ++funny) {
207247
fprintf(stderr, "...\n");
208248
break;
209249
}
210250
fprintf(stderr, "You have both %s and %s\n",
211-
this_name, next_name);
251+
this_name, conflict_name);
212252
}
213253
}
214254
if (funny)

merge-ort.c

Lines changed: 44 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -728,6 +728,8 @@ static void clear_or_reinit_internal_opts(struct merge_options_internal *opti,
728728
strintmap_clear_func(&renames->deferred[i].possible_trivial_merges);
729729
strset_clear_func(&renames->deferred[i].target_dirs);
730730
renames->deferred[i].trivial_merges_okay = 1; /* 1 == maybe */
731+
free(renames->pairs[i].queue);
732+
diff_queue_init(&renames->pairs[i]);
731733
}
732734
renames->cached_pairs_valid_side = 0;
733735
renames->dir_rename_mask = 0;
@@ -1008,32 +1010,34 @@ static int traverse_trees_wrapper(struct index_state *istate,
10081010
info->traverse_path = renames->callback_data_traverse_path;
10091011
info->fn = old_fn;
10101012
for (i = old_offset; i < renames->callback_data_nr; ++i) {
1011-
info->fn(n,
1012-
renames->callback_data[i].mask,
1013-
renames->callback_data[i].dirmask,
1014-
renames->callback_data[i].names,
1015-
info);
1013+
ret = info->fn(n,
1014+
renames->callback_data[i].mask,
1015+
renames->callback_data[i].dirmask,
1016+
renames->callback_data[i].names,
1017+
info);
1018+
if (ret < 0)
1019+
break;
10161020
}
10171021

10181022
renames->callback_data_nr = old_offset;
10191023
free(renames->callback_data_traverse_path);
10201024
renames->callback_data_traverse_path = old_callback_data_traverse_path;
10211025
info->traverse_path = NULL;
1022-
return 0;
1026+
return ret < 0 ? ret : 0;
10231027
}
10241028

1025-
static void setup_path_info(struct merge_options *opt,
1026-
struct string_list_item *result,
1027-
const char *current_dir_name,
1028-
int current_dir_name_len,
1029-
char *fullpath, /* we'll take over ownership */
1030-
struct name_entry *names,
1031-
struct name_entry *merged_version,
1032-
unsigned is_null, /* boolean */
1033-
unsigned df_conflict, /* boolean */
1034-
unsigned filemask,
1035-
unsigned dirmask,
1036-
int resolved /* boolean */)
1029+
static int setup_path_info(struct merge_options *opt,
1030+
struct string_list_item *result,
1031+
const char *current_dir_name,
1032+
int current_dir_name_len,
1033+
char *fullpath, /* we'll take over ownership */
1034+
struct name_entry *names,
1035+
struct name_entry *merged_version,
1036+
unsigned is_null, /* boolean */
1037+
unsigned df_conflict, /* boolean */
1038+
unsigned filemask,
1039+
unsigned dirmask,
1040+
int resolved /* boolean */)
10371041
{
10381042
/* result->util is void*, so mi is a convenience typed variable */
10391043
struct merged_info *mi;
@@ -1077,9 +1081,11 @@ static void setup_path_info(struct merge_options *opt,
10771081
*/
10781082
mi->is_null = 1;
10791083
}
1080-
strmap_put(&opt->priv->paths, fullpath, mi);
1084+
if (strmap_put(&opt->priv->paths, fullpath, mi))
1085+
return error(_("tree has duplicate entries for '%s'"), fullpath);
10811086
result->string = fullpath;
10821087
result->util = mi;
1088+
return 0;
10831089
}
10841090

10851091
static void add_pair(struct merge_options *opt,
@@ -1346,9 +1352,10 @@ static int collect_merge_info_callback(int n,
13461352
*/
13471353
if (side1_matches_mbase && side2_matches_mbase) {
13481354
/* mbase, side1, & side2 all match; use mbase as resolution */
1349-
setup_path_info(opt, &pi, dirname, info->pathlen, fullpath,
1350-
names, names+0, mbase_null, 0 /* df_conflict */,
1351-
filemask, dirmask, 1 /* resolved */);
1355+
if (setup_path_info(opt, &pi, dirname, info->pathlen, fullpath,
1356+
names, names+0, mbase_null, 0 /* df_conflict */,
1357+
filemask, dirmask, 1 /* resolved */))
1358+
return -1; /* Quit traversing */
13521359
return mask;
13531360
}
13541361

@@ -1360,9 +1367,10 @@ static int collect_merge_info_callback(int n,
13601367
*/
13611368
if (sides_match && filemask == 0x07) {
13621369
/* use side1 (== side2) version as resolution */
1363-
setup_path_info(opt, &pi, dirname, info->pathlen, fullpath,
1364-
names, names+1, side1_null, 0,
1365-
filemask, dirmask, 1);
1370+
if (setup_path_info(opt, &pi, dirname, info->pathlen, fullpath,
1371+
names, names+1, side1_null, 0,
1372+
filemask, dirmask, 1))
1373+
return -1; /* Quit traversing */
13661374
return mask;
13671375
}
13681376

@@ -1374,18 +1382,20 @@ static int collect_merge_info_callback(int n,
13741382
*/
13751383
if (side1_matches_mbase && filemask == 0x07) {
13761384
/* use side2 version as resolution */
1377-
setup_path_info(opt, &pi, dirname, info->pathlen, fullpath,
1378-
names, names+2, side2_null, 0,
1379-
filemask, dirmask, 1);
1385+
if (setup_path_info(opt, &pi, dirname, info->pathlen, fullpath,
1386+
names, names+2, side2_null, 0,
1387+
filemask, dirmask, 1))
1388+
return -1; /* Quit traversing */
13801389
return mask;
13811390
}
13821391

13831392
/* Similar to above but swapping sides 1 and 2 */
13841393
if (side2_matches_mbase && filemask == 0x07) {
13851394
/* use side1 version as resolution */
1386-
setup_path_info(opt, &pi, dirname, info->pathlen, fullpath,
1387-
names, names+1, side1_null, 0,
1388-
filemask, dirmask, 1);
1395+
if (setup_path_info(opt, &pi, dirname, info->pathlen, fullpath,
1396+
names, names+1, side1_null, 0,
1397+
filemask, dirmask, 1))
1398+
return -1; /* Quit traversing */
13891399
return mask;
13901400
}
13911401

@@ -1409,8 +1419,9 @@ static int collect_merge_info_callback(int n,
14091419
* unconflict some more cases, but that comes later so all we can
14101420
* do now is record the different non-null file hashes.)
14111421
*/
1412-
setup_path_info(opt, &pi, dirname, info->pathlen, fullpath,
1413-
names, NULL, 0, df_conflict, filemask, dirmask, 0);
1422+
if (setup_path_info(opt, &pi, dirname, info->pathlen, fullpath,
1423+
names, NULL, 0, df_conflict, filemask, dirmask, 0))
1424+
return -1; /* Quit traversing */
14141425

14151426
ci = pi.util;
14161427
VERIFY_CI(ci);
@@ -1738,7 +1749,6 @@ static int collect_merge_info(struct merge_options *opt,
17381749
setup_traverse_info(&info, opt->priv->toplevel_dir);
17391750
info.fn = collect_merge_info_callback;
17401751
info.data = opt;
1741-
info.show_all_errors = 1;
17421752

17431753
if (repo_parse_tree(opt->repo, merge_base) < 0 ||
17441754
repo_parse_tree(opt->repo, side1) < 0 ||

t/meson.build

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,7 @@ integration_tests = [
125125
't0090-cache-tree.sh',
126126
't0091-bugreport.sh',
127127
't0092-diagnose.sh',
128+
't0093-verify-cache-df-gap.sh',
128129
't0095-bloom.sh',
129130
't0100-previous.sh',
130131
't0101-at-syntax.sh',

t/t0093-direct-index-write.pl

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
#!/usr/bin/perl
2+
#
3+
# Build a v2 index file from entries listed on stdin.
4+
# Each line: "octalmode hex-oid name"
5+
# Output: binary index written to stdout.
6+
#
7+
# This bypasses all D/F safety checks in add_index_entry(), simulating
8+
# what happens when code uses ADD_CACHE_JUST_APPEND to bulk-load entries.
9+
use strict;
10+
use warnings;
11+
use Digest::SHA qw(sha1 sha256);
12+
13+
my $hash_algo = $ENV{'GIT_DEFAULT_HASH'} || 'sha1';
14+
my $hash_func = $hash_algo eq 'sha256' ? \&sha256 : \&sha1;
15+
16+
my @entries;
17+
while (my $line = <STDIN>) {
18+
chomp $line;
19+
my ($mode, $oid_hex, $name) = split(/ /, $line, 3);
20+
push @entries, [$mode, $oid_hex, $name];
21+
}
22+
23+
my $body = "DIRC" . pack("NN", 2, scalar @entries);
24+
25+
for my $ent (@entries) {
26+
my ($mode, $oid_hex, $name) = @{$ent};
27+
# 10 x 32-bit stat fields (zeroed), with mode in position 7
28+
my $stat = pack("N10", 0, 0, 0, 0, 0, 0, oct($mode), 0, 0, 0);
29+
my $oid = pack("H*", $oid_hex);
30+
my $flags = pack("n", length($name) & 0xFFF);
31+
my $entry = $stat . $oid . $flags . $name . "\0";
32+
# Pad to 8-byte boundary
33+
while (length($entry) % 8) { $entry .= "\0"; }
34+
$body .= $entry;
35+
}
36+
37+
binmode STDOUT;
38+
print $body . $hash_func->($body);

t/t0093-verify-cache-df-gap.sh

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
#!/bin/sh
2+
3+
test_description='verify_cache() must catch non-adjacent D/F conflicts
4+
5+
Ensure that verify_cache() can complain about bad entries like:
6+
7+
docs <-- submodule
8+
docs-internal/... <-- sorts here because "-" < "/"
9+
docs/... <-- D/F conflict with "docs" above, not adjacent
10+
11+
In order to test verify_cache, we directly construct a corrupt index
12+
(bypassing the D/F safety checks in add_index_entry) and verify that
13+
write-tree rejects it.
14+
'
15+
16+
. ./test-lib.sh
17+
18+
if ! test_have_prereq PERL
19+
then
20+
skip_all='skipping verify_cache D/F tests; Perl not available'
21+
test_done
22+
fi
23+
24+
# Build a v2 index from entries on stdin, bypassing D/F checks.
25+
# Each line: "octalmode hex-oid name" (entries must be pre-sorted).
26+
build_corrupt_index () {
27+
perl "$TEST_DIRECTORY/t0093-direct-index-write.pl" >"$1"
28+
}
29+
30+
test_expect_success 'setup objects' '
31+
test_commit base &&
32+
BLOB=$(git rev-parse HEAD:base.t) &&
33+
SUB_COMMIT=$(git rev-parse HEAD)
34+
'
35+
36+
test_expect_success 'adjacent D/F conflict is caught by verify_cache' '
37+
cat >index-entries <<-EOF &&
38+
0160000 $SUB_COMMIT docs
39+
0100644 $BLOB docs/requirements.txt
40+
EOF
41+
build_corrupt_index .git/index <index-entries &&
42+
43+
test_must_fail git write-tree 2>err &&
44+
test_grep "You have both docs and docs/requirements.txt" err
45+
'
46+
47+
test_expect_success 'non-adjacent D/F conflict is caught by verify_cache' '
48+
cat >index-entries <<-EOF &&
49+
0160000 $SUB_COMMIT docs
50+
0100644 $BLOB docs-internal/README.md
51+
0100644 $BLOB docs/requirements.txt
52+
EOF
53+
build_corrupt_index .git/index <index-entries &&
54+
55+
test_must_fail git write-tree 2>err &&
56+
test_grep "You have both docs and docs/requirements.txt" err
57+
'
58+
59+
test_done

t/t6422-merge-rename-corner-cases.sh

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1525,4 +1525,58 @@ test_expect_success 'submodule/directory preliminary conflict' '
15251525
)
15261526
'
15271527

1528+
# Testcase: submodule/directory conflict with duplicate tree entries
1529+
# One side has a path as a gitlink (submodule). The other side replaces
1530+
# the gitlink with a directory. A third-party tool creates a tree on the
1531+
# submodule side that has *both* a gitlink and a tree entry for the same
1532+
# path (adding a file inside the submodule path ignoring that there's a
1533+
# gitlink there). collect_merge_info_callback() should detect the
1534+
# duplicate and abort rather than silently corrupting its bookkeeping.
1535+
1536+
test_expect_success 'duplicate tree entries trigger an error' '
1537+
test_when_finished "rm -rf duplicate-entry" &&
1538+
git init duplicate-entry &&
1539+
(
1540+
cd duplicate-entry &&
1541+
1542+
# Base commit: "docs" is a gitlink (submodule)
1543+
empty_tree=$(git mktree </dev/null) &&
1544+
fake_commit=$(git commit-tree $empty_tree </dev/null) &&
1545+
git update-index --add --cacheinfo 160000,$fake_commit,docs &&
1546+
echo base >file.txt &&
1547+
git add file.txt &&
1548+
git commit -m base &&
1549+
1550+
# side1: remove the gitlink, replace with a directory
1551+
git checkout -b side1 &&
1552+
git rm --cached docs &&
1553+
mkdir -p docs &&
1554+
echo hello >docs/requirements.txt &&
1555+
git add docs/requirements.txt &&
1556+
git commit -m "side1: submodule to directory" &&
1557+
1558+
# side2: keep the gitlink but craft a tree that also
1559+
# contains a tree entry for "docs" (simulating a tool
1560+
# that adds files inside a submodule path without
1561+
# removing the gitlink first).
1562+
git checkout main &&
1563+
git checkout -b side2 &&
1564+
blob_oid=$(echo world | git hash-object -w --stdin) &&
1565+
docs_tree=$(printf "100644 blob %s\trequirements.txt\n" \
1566+
"$blob_oid" | git mktree) &&
1567+
cur_tree=$(git rev-parse HEAD^{tree}) &&
1568+
git cat-file -p $cur_tree >tree-listing &&
1569+
printf "040000 tree %s\tdocs\n" "$docs_tree" >>tree-listing &&
1570+
new_tree=$(git mktree <tree-listing) &&
1571+
side2_commit=$(git commit-tree $new_tree -p HEAD \
1572+
-m "side2: add file alongside submodule") &&
1573+
git update-ref refs/heads/side2 $side2_commit &&
1574+
1575+
# Merging must detect the duplicate and abort
1576+
git checkout side1 &&
1577+
test_must_fail git merge side2 2>err &&
1578+
test_grep "duplicate entries" err
1579+
)
1580+
'
1581+
15281582
test_done

0 commit comments

Comments
 (0)