Skip to content

Commit dd2a4c0

Browse files
phillipwoodgitster
authored andcommitted
diff --anchored: avoid checking unmatched lines
For a line to be an anchor it has to appear in each of the files being diffed exactly once. With that in mind lets delay checking whether a line is an anchor until we know there is exactly one instance of the line in each file. As each line is checked at most once, there is no need to cache the result of is_anchor() and we can drop that field from the hashmap entries. When diffing 5000 recent commits in git.git this gives a modest speedup of ~2%. In the (rather extreme) example below that consists largely of deletions the speedup is ~16%. seq 0 10000000 >old printf '%s\n' 300000 100000 200000 >new git diff --no-index --anchored=300000 old new Signed-off-by: Phillip Wood <phillip.wood@dunelm.org.uk> Signed-off-by: Junio C Hamano <gitster@pobox.com>
1 parent 67ad421 commit dd2a4c0

1 file changed

Lines changed: 6 additions & 12 deletions

File tree

xdiff/xpatience.c

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -61,12 +61,6 @@ struct hashmap {
6161
* initially, "next" reflects only the order in file1.
6262
*/
6363
struct entry *next, *previous;
64-
65-
/*
66-
* If 1, this entry can serve as an anchor. See
67-
* Documentation/diff-options.adoc for more information.
68-
*/
69-
unsigned anchor : 1;
7064
} *entries, *first, *last;
7165
/* were common records found? */
7266
unsigned long has_matches;
@@ -85,8 +79,7 @@ static int is_anchor(xpparam_t const *xpp, const char *line)
8579
}
8680

8781
/* The argument "pass" is 1 for the first file, 2 for the second. */
88-
static void insert_record(xpparam_t const *xpp, int line, struct hashmap *map,
89-
int pass)
82+
static void insert_record(int line, struct hashmap *map, int pass)
9083
{
9184
xrecord_t *records = pass == 1 ?
9285
map->env->xdf1.recs : map->env->xdf2.recs;
@@ -121,7 +114,6 @@ static void insert_record(xpparam_t const *xpp, int line, struct hashmap *map,
121114
return;
122115
map->entries[index].line1 = line;
123116
map->entries[index].minimal_perfect_hash = record->minimal_perfect_hash;
124-
map->entries[index].anchor = is_anchor(xpp, (const char *)map->env->xdf1.recs[line - 1].ptr);
125117
if (!map->first)
126118
map->first = map->entries + index;
127119
if (map->last) {
@@ -153,11 +145,11 @@ static int fill_hashmap(xpparam_t const *xpp, xdfenv_t *env,
153145

154146
/* First, fill with entries from the first file */
155147
while (count1--)
156-
insert_record(xpp, line1++, result, 1);
148+
insert_record(line1++, result, 1);
157149

158150
/* Then search for matches in the second file */
159151
while (count2--)
160-
insert_record(xpp, line2++, result, 2);
152+
insert_record(line2++, result, 2);
161153

162154
return 0;
163155
}
@@ -194,6 +186,8 @@ static int binary_search(struct entry **sequence, int longest,
194186
*/
195187
static int find_longest_common_sequence(struct hashmap *map, struct entry **res)
196188
{
189+
xpparam_t const *xpp = map->xpp;
190+
xrecord_t const *recs = map->env->xdf2.recs;
197191
struct entry **sequence;
198192
int longest = 0, i;
199193
struct entry *entry;
@@ -220,7 +214,7 @@ static int find_longest_common_sequence(struct hashmap *map, struct entry **res)
220214
if (i <= anchor_i)
221215
continue;
222216
sequence[i] = entry;
223-
if (entry->anchor) {
217+
if (is_anchor(xpp, (const char*)recs[entry->line2 - 1].ptr)) {
224218
anchor_i = i;
225219
longest = anchor_i + 1;
226220
} else if (i == longest) {

0 commit comments

Comments
 (0)