Skip to content

Commit 0d81c02

Browse files
committed
merge-ort: abort merge when trees have duplicate entries
Trees with duplicate entries are malformed; fsck reports "contains duplicate file entries" for them. merge-ort has from the beginning assumed that we would never hit such trees. It was written with the assumption that traverse_trees() calls collect_merge_info_callback() at most once per path. The "sanity checks" in that callback (added in d2bc199 (merge-ort: implement a very basic collect_merge_info(), 2020-12-13)) verify properties of each individual call but not that invariant. The strmap_put() in setup_path_info() silently overwrites the entry from any prior call for the same path, because it assumed there would be no other path. Unfortunately, supplemental data structures for various optimizations could still be tweaked before the extra paths were overwritten, and those data structures not matching expected state could trip various assertions. Change the return type of setup_path_info() from void to int to allow us to detect this case, and abort the merge with a clear error message when it occurs. Signed-off-by: Elijah Newren <newren@gmail.com>
1 parent d422f73 commit 0d81c02

2 files changed

Lines changed: 88 additions & 27 deletions

File tree

merge-ort.c

Lines changed: 34 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1026,18 +1026,18 @@ static int traverse_trees_wrapper(struct index_state *istate,
10261026
return ret < 0 ? ret : 0;
10271027
}
10281028

1029-
static void setup_path_info(struct merge_options *opt,
1030-
struct string_list_item *result,
1031-
const char *current_dir_name,
1032-
int current_dir_name_len,
1033-
char *fullpath, /* we'll take over ownership */
1034-
struct name_entry *names,
1035-
struct name_entry *merged_version,
1036-
unsigned is_null, /* boolean */
1037-
unsigned df_conflict, /* boolean */
1038-
unsigned filemask,
1039-
unsigned dirmask,
1040-
int resolved /* boolean */)
1029+
static int setup_path_info(struct merge_options *opt,
1030+
struct string_list_item *result,
1031+
const char *current_dir_name,
1032+
int current_dir_name_len,
1033+
char *fullpath, /* we'll take over ownership */
1034+
struct name_entry *names,
1035+
struct name_entry *merged_version,
1036+
unsigned is_null, /* boolean */
1037+
unsigned df_conflict, /* boolean */
1038+
unsigned filemask,
1039+
unsigned dirmask,
1040+
int resolved /* boolean */)
10411041
{
10421042
/* result->util is void*, so mi is a convenience typed variable */
10431043
struct merged_info *mi;
@@ -1081,9 +1081,11 @@ static void setup_path_info(struct merge_options *opt,
10811081
*/
10821082
mi->is_null = 1;
10831083
}
1084-
strmap_put(&opt->priv->paths, fullpath, mi);
1084+
if (strmap_put(&opt->priv->paths, fullpath, mi))
1085+
return error(_("tree has duplicate entries for '%s'"), fullpath);
10851086
result->string = fullpath;
10861087
result->util = mi;
1088+
return 0;
10871089
}
10881090

10891091
static void add_pair(struct merge_options *opt,
@@ -1350,9 +1352,10 @@ static int collect_merge_info_callback(int n,
13501352
*/
13511353
if (side1_matches_mbase && side2_matches_mbase) {
13521354
/* mbase, side1, & side2 all match; use mbase as resolution */
1353-
setup_path_info(opt, &pi, dirname, info->pathlen, fullpath,
1354-
names, names+0, mbase_null, 0 /* df_conflict */,
1355-
filemask, dirmask, 1 /* resolved */);
1355+
if (setup_path_info(opt, &pi, dirname, info->pathlen, fullpath,
1356+
names, names+0, mbase_null, 0 /* df_conflict */,
1357+
filemask, dirmask, 1 /* resolved */))
1358+
return -1; /* Quit traversing */
13561359
return mask;
13571360
}
13581361

@@ -1364,9 +1367,10 @@ static int collect_merge_info_callback(int n,
13641367
*/
13651368
if (sides_match && filemask == 0x07) {
13661369
/* use side1 (== side2) version as resolution */
1367-
setup_path_info(opt, &pi, dirname, info->pathlen, fullpath,
1368-
names, names+1, side1_null, 0,
1369-
filemask, dirmask, 1);
1370+
if (setup_path_info(opt, &pi, dirname, info->pathlen, fullpath,
1371+
names, names+1, side1_null, 0,
1372+
filemask, dirmask, 1))
1373+
return -1; /* Quit traversing */
13701374
return mask;
13711375
}
13721376

@@ -1378,18 +1382,20 @@ static int collect_merge_info_callback(int n,
13781382
*/
13791383
if (side1_matches_mbase && filemask == 0x07) {
13801384
/* use side2 version as resolution */
1381-
setup_path_info(opt, &pi, dirname, info->pathlen, fullpath,
1382-
names, names+2, side2_null, 0,
1383-
filemask, dirmask, 1);
1385+
if (setup_path_info(opt, &pi, dirname, info->pathlen, fullpath,
1386+
names, names+2, side2_null, 0,
1387+
filemask, dirmask, 1))
1388+
return -1; /* Quit traversing */
13841389
return mask;
13851390
}
13861391

13871392
/* Similar to above but swapping sides 1 and 2 */
13881393
if (side2_matches_mbase && filemask == 0x07) {
13891394
/* use side1 version as resolution */
1390-
setup_path_info(opt, &pi, dirname, info->pathlen, fullpath,
1391-
names, names+1, side1_null, 0,
1392-
filemask, dirmask, 1);
1395+
if (setup_path_info(opt, &pi, dirname, info->pathlen, fullpath,
1396+
names, names+1, side1_null, 0,
1397+
filemask, dirmask, 1))
1398+
return -1; /* Quit traversing */
13931399
return mask;
13941400
}
13951401

@@ -1413,8 +1419,9 @@ static int collect_merge_info_callback(int n,
14131419
* unconflict some more cases, but that comes later so all we can
14141420
* do now is record the different non-null file hashes.)
14151421
*/
1416-
setup_path_info(opt, &pi, dirname, info->pathlen, fullpath,
1417-
names, NULL, 0, df_conflict, filemask, dirmask, 0);
1422+
if (setup_path_info(opt, &pi, dirname, info->pathlen, fullpath,
1423+
names, NULL, 0, df_conflict, filemask, dirmask, 0))
1424+
return -1; /* Quit traversing */
14181425

14191426
ci = pi.util;
14201427
VERIFY_CI(ci);

t/t6422-merge-rename-corner-cases.sh

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1525,4 +1525,58 @@ test_expect_success 'submodule/directory preliminary conflict' '
15251525
)
15261526
'
15271527

1528+
# Testcase: submodule/directory conflict with duplicate tree entries
1529+
# One side has a path as a gitlink (submodule). The other side replaces
1530+
# the gitlink with a directory. A third-party tool creates a tree on the
1531+
# submodule side that has *both* a gitlink and a tree entry for the same
1532+
# path (adding a file inside the submodule path ignoring that there's a
1533+
# gitlink there). collect_merge_info_callback() should detect the
1534+
# duplicate and abort rather than silently corrupting its bookkeeping.
1535+
1536+
test_expect_success 'duplicate tree entries trigger an error' '
1537+
test_when_finished "rm -rf duplicate-entry" &&
1538+
git init duplicate-entry &&
1539+
(
1540+
cd duplicate-entry &&
1541+
1542+
# Base commit: "docs" is a gitlink (submodule)
1543+
empty_tree=$(git mktree </dev/null) &&
1544+
fake_commit=$(git commit-tree $empty_tree </dev/null) &&
1545+
git update-index --add --cacheinfo 160000,$fake_commit,docs &&
1546+
echo base >file.txt &&
1547+
git add file.txt &&
1548+
git commit -m base &&
1549+
1550+
# side1: remove the gitlink, replace with a directory
1551+
git checkout -b side1 &&
1552+
git rm --cached docs &&
1553+
mkdir -p docs &&
1554+
echo hello >docs/requirements.txt &&
1555+
git add docs/requirements.txt &&
1556+
git commit -m "side1: submodule to directory" &&
1557+
1558+
# side2: keep the gitlink but craft a tree that also
1559+
# contains a tree entry for "docs" (simulating a tool
1560+
# that adds files inside a submodule path without
1561+
# removing the gitlink first).
1562+
git checkout main &&
1563+
git checkout -b side2 &&
1564+
blob_oid=$(echo world | git hash-object -w --stdin) &&
1565+
docs_tree=$(printf "100644 blob %s\trequirements.txt\n" \
1566+
"$blob_oid" | git mktree) &&
1567+
cur_tree=$(git rev-parse HEAD^{tree}) &&
1568+
git cat-file -p $cur_tree >tree-listing &&
1569+
printf "040000 tree %s\tdocs\n" "$docs_tree" >>tree-listing &&
1570+
new_tree=$(git mktree <tree-listing) &&
1571+
side2_commit=$(git commit-tree $new_tree -p HEAD \
1572+
-m "side2: add file alongside submodule") &&
1573+
git update-ref refs/heads/side2 $side2_commit &&
1574+
1575+
# Merging must detect the duplicate and abort
1576+
git checkout side1 &&
1577+
test_must_fail git merge side2 2>err &&
1578+
test_grep "duplicate entries" err
1579+
)
1580+
'
1581+
15281582
test_done

0 commit comments

Comments
 (0)