Skip to content

Commit ef6fa38

Browse files
committed
Merge branch 'ly/changed-paths-traversal' into seen
Lift the limitation to use changed-path filter in "git log" so that it can be used for a pathspec with multiple literal paths. * ly/changed-paths-traversal: bloom: optimize multiple pathspec items in revision traversal bloom: replace struct bloom_key * with struct bloom_keyvec
2 parents af12714 + ede969e commit ef6fa38

5 files changed

Lines changed: 139 additions & 67 deletions

File tree

bloom.c

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -280,6 +280,25 @@ void deinit_bloom_filters(void)
280280
deep_clear_bloom_filter_slab(&bloom_filters, free_one_bloom_filter);
281281
}
282282

283+
struct bloom_keyvec *create_bloom_keyvec(size_t count)
284+
{
285+
struct bloom_keyvec *vec;
286+
size_t sz = sizeof(struct bloom_keyvec);
287+
sz += count * sizeof(struct bloom_key);
288+
vec = (struct bloom_keyvec *)xcalloc(1, sz);
289+
vec->count = count;
290+
return vec;
291+
}
292+
293+
void destroy_bloom_keyvec(struct bloom_keyvec *vec)
294+
{
295+
if (!vec)
296+
return;
297+
for (size_t nr = 0; nr < vec->count; nr++)
298+
clear_bloom_key(&vec->key[nr]);
299+
free(vec);
300+
}
301+
283302
static int pathmap_cmp(const void *hashmap_cmp_fn_data UNUSED,
284303
const struct hashmap_entry *eptr,
285304
const struct hashmap_entry *entry_or_key,
@@ -540,3 +559,15 @@ int bloom_filter_contains(const struct bloom_filter *filter,
540559

541560
return 1;
542561
}
562+
563+
int bloom_filter_contains_vec(const struct bloom_filter *filter,
564+
const struct bloom_keyvec *vec,
565+
const struct bloom_filter_settings *settings)
566+
{
567+
int ret = 1;
568+
569+
for (size_t nr = 0; ret > 0 && nr < vec->count; nr++)
570+
ret = bloom_filter_contains(filter, &vec->key[nr], settings);
571+
572+
return ret;
573+
}

bloom.h

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,11 @@ struct bloom_key {
7474
uint32_t *hashes;
7575
};
7676

77+
struct bloom_keyvec {
78+
size_t count;
79+
struct bloom_key key[FLEX_ARRAY];
80+
};
81+
7782
int load_bloom_filter_from_graph(struct commit_graph *g,
7883
struct bloom_filter *filter,
7984
uint32_t graph_pos);
@@ -100,6 +105,17 @@ void add_key_to_filter(const struct bloom_key *key,
100105
void init_bloom_filters(void);
101106
void deinit_bloom_filters(void);
102107

108+
struct bloom_keyvec *create_bloom_keyvec(size_t count);
109+
void destroy_bloom_keyvec(struct bloom_keyvec *vec);
110+
111+
static inline void fill_bloom_keyvec_key(const char *data, size_t len,
112+
struct bloom_keyvec *vec, size_t nr,
113+
const struct bloom_filter_settings *settings)
114+
{
115+
assert(nr < vec->count);
116+
fill_bloom_key(data, len, &vec->key[nr], settings);
117+
}
118+
103119
enum bloom_filter_computed {
104120
BLOOM_NOT_COMPUTED = (1 << 0),
105121
BLOOM_COMPUTED = (1 << 1),
@@ -137,4 +153,8 @@ int bloom_filter_contains(const struct bloom_filter *filter,
137153
const struct bloom_key *key,
138154
const struct bloom_filter_settings *settings);
139155

156+
int bloom_filter_contains_vec(const struct bloom_filter *filter,
157+
const struct bloom_keyvec *v,
158+
const struct bloom_filter_settings *settings);
159+
140160
#endif

revision.c

Lines changed: 71 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -675,23 +675,25 @@ static int forbid_bloom_filters(struct pathspec *spec)
675675
{
676676
if (spec->has_wildcard)
677677
return 1;
678-
if (spec->nr > 1)
679-
return 1;
680678
if (spec->magic & ~PATHSPEC_LITERAL)
681679
return 1;
682-
if (spec->nr && (spec->items[0].magic & ~PATHSPEC_LITERAL))
683-
return 1;
680+
for (size_t nr = 0; nr < spec->nr; nr++)
681+
if (spec->items[nr].magic & ~PATHSPEC_LITERAL)
682+
return 1;
684683

685684
return 0;
686685
}
687686

687+
static void release_revisions_bloom_keyvecs(struct rev_info *revs);
688+
688689
static void prepare_to_use_bloom_filter(struct rev_info *revs)
689690
{
690691
struct pathspec_item *pi;
692+
struct bloom_keyvec *bloom_keyvec;
691693
char *path_alloc = NULL;
692694
const char *path, *p;
693695
size_t len;
694-
int path_component_nr = 1;
696+
int path_component_nr;
695697

696698
if (!revs->commits)
697699
return;
@@ -708,63 +710,73 @@ static void prepare_to_use_bloom_filter(struct rev_info *revs)
708710
if (!revs->pruning.pathspec.nr)
709711
return;
710712

711-
pi = &revs->pruning.pathspec.items[0];
712-
713-
/* remove single trailing slash from path, if needed */
714-
if (pi->len > 0 && pi->match[pi->len - 1] == '/') {
715-
path_alloc = xmemdupz(pi->match, pi->len - 1);
716-
path = path_alloc;
717-
} else
718-
path = pi->match;
719-
720-
len = strlen(path);
721-
if (!len) {
722-
revs->bloom_filter_settings = NULL;
723-
free(path_alloc);
724-
return;
725-
}
713+
revs->bloom_keyvecs_nr = revs->pruning.pathspec.nr;
714+
CALLOC_ARRAY(revs->bloom_keyvecs, revs->bloom_keyvecs_nr);
715+
for (int i = 0; i < revs->pruning.pathspec.nr; i++) {
716+
pi = &revs->pruning.pathspec.items[i];
717+
path_component_nr = 1;
718+
719+
/* remove single trailing slash from path, if needed */
720+
if (pi->len > 0 && pi->match[pi->len - 1] == '/') {
721+
path_alloc = xmemdupz(pi->match, pi->len - 1);
722+
path = path_alloc;
723+
} else
724+
path = pi->match;
725+
726+
len = strlen(path);
727+
if (!len)
728+
goto fail;
729+
730+
p = path;
731+
while (*p) {
732+
/*
733+
* At this point, the path is normalized to use
734+
* Unix-style path separators. This is required due to
735+
* how the changed-path Bloom filters store the paths.
736+
*/
737+
if (*p == '/')
738+
path_component_nr++;
739+
p++;
740+
}
726741

727-
p = path;
728-
while (*p) {
729-
/*
730-
* At this point, the path is normalized to use Unix-style
731-
* path separators. This is required due to how the
732-
* changed-path Bloom filters store the paths.
733-
*/
734-
if (*p == '/')
735-
path_component_nr++;
736-
p++;
737-
}
742+
bloom_keyvec = create_bloom_keyvec(path_component_nr);
743+
revs->bloom_keyvecs[i] = bloom_keyvec;
738744

739-
revs->bloom_keys_nr = path_component_nr;
740-
ALLOC_ARRAY(revs->bloom_keys, revs->bloom_keys_nr);
745+
fill_bloom_keyvec_key(path, len, bloom_keyvec, 0,
746+
revs->bloom_filter_settings);
747+
path_component_nr = 1;
741748

742-
fill_bloom_key(path, len, &revs->bloom_keys[0],
743-
revs->bloom_filter_settings);
744-
path_component_nr = 1;
749+
p = path + len - 1;
750+
while (p > path) {
751+
if (*p == '/')
752+
fill_bloom_keyvec_key(path, p - path,
753+
bloom_keyvec,
754+
path_component_nr++,
755+
revs->bloom_filter_settings);
756+
p--;
757+
}
745758

746-
p = path + len - 1;
747-
while (p > path) {
748-
if (*p == '/')
749-
fill_bloom_key(path, p - path,
750-
&revs->bloom_keys[path_component_nr++],
751-
revs->bloom_filter_settings);
752-
p--;
759+
FREE_AND_NULL(path_alloc);
753760
}
754761

755762
if (trace2_is_enabled() && !bloom_filter_atexit_registered) {
756763
atexit(trace2_bloom_filter_statistics_atexit);
757764
bloom_filter_atexit_registered = 1;
758765
}
759766

767+
return;
768+
769+
fail:
770+
revs->bloom_filter_settings = NULL;
760771
free(path_alloc);
772+
release_revisions_bloom_keyvecs(revs);
761773
}
762774

763775
static int check_maybe_different_in_bloom_filter(struct rev_info *revs,
764776
struct commit *commit)
765777
{
766778
struct bloom_filter *filter;
767-
int result = 1, j;
779+
int result = 0;
768780

769781
if (!revs->repo->objects->commit_graph)
770782
return -1;
@@ -779,10 +791,10 @@ static int check_maybe_different_in_bloom_filter(struct rev_info *revs,
779791
return -1;
780792
}
781793

782-
for (j = 0; result && j < revs->bloom_keys_nr; j++) {
783-
result = bloom_filter_contains(filter,
784-
&revs->bloom_keys[j],
785-
revs->bloom_filter_settings);
794+
for (size_t nr = 0; !result && nr < revs->bloom_keyvecs_nr; nr++) {
795+
result = bloom_filter_contains_vec(filter,
796+
revs->bloom_keyvecs[nr],
797+
revs->bloom_filter_settings);
786798
}
787799

788800
if (result)
@@ -823,7 +835,7 @@ static int rev_compare_tree(struct rev_info *revs,
823835
return REV_TREE_SAME;
824836
}
825837

826-
if (revs->bloom_keys_nr && !nth_parent) {
838+
if (revs->bloom_keyvecs_nr && !nth_parent) {
827839
bloom_ret = check_maybe_different_in_bloom_filter(revs, commit);
828840

829841
if (bloom_ret == 0)
@@ -850,7 +862,7 @@ static int rev_same_tree_as_empty(struct rev_info *revs, struct commit *commit,
850862
if (!t1)
851863
return 0;
852864

853-
if (!nth_parent && revs->bloom_keys_nr) {
865+
if (!nth_parent && revs->bloom_keyvecs_nr) {
854866
bloom_ret = check_maybe_different_in_bloom_filter(revs, commit);
855867
if (!bloom_ret)
856868
return 1;
@@ -3202,6 +3214,14 @@ static void release_revisions_mailmap(struct string_list *mailmap)
32023214

32033215
static void release_revisions_topo_walk_info(struct topo_walk_info *info);
32043216

3217+
static void release_revisions_bloom_keyvecs(struct rev_info *revs)
3218+
{
3219+
for (size_t nr = 0; nr < revs->bloom_keyvecs_nr; nr++)
3220+
destroy_bloom_keyvec(revs->bloom_keyvecs[nr]);
3221+
FREE_AND_NULL(revs->bloom_keyvecs);
3222+
revs->bloom_keyvecs_nr = 0;
3223+
}
3224+
32053225
static void free_void_commit_list(void *list)
32063226
{
32073227
free_commit_list(list);
@@ -3230,11 +3250,7 @@ void release_revisions(struct rev_info *revs)
32303250
clear_decoration(&revs->treesame, free);
32313251
line_log_free(revs);
32323252
oidset_clear(&revs->missing_commits);
3233-
3234-
for (int i = 0; i < revs->bloom_keys_nr; i++)
3235-
clear_bloom_key(&revs->bloom_keys[i]);
3236-
FREE_AND_NULL(revs->bloom_keys);
3237-
revs->bloom_keys_nr = 0;
3253+
release_revisions_bloom_keyvecs(revs);
32383254
}
32393255

32403256
static void add_child(struct rev_info *revs, struct commit *parent, struct commit *child)

revision.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ struct repository;
6262
struct rev_info;
6363
struct string_list;
6464
struct saved_parents;
65-
struct bloom_key;
65+
struct bloom_keyvec;
6666
struct bloom_filter_settings;
6767
struct option;
6868
struct parse_opt_ctx_t;
@@ -360,8 +360,8 @@ struct rev_info {
360360

361361
/* Commit graph bloom filter fields */
362362
/* The bloom filter key(s) for the pathspec */
363-
struct bloom_key *bloom_keys;
364-
int bloom_keys_nr;
363+
struct bloom_keyvec **bloom_keyvecs;
364+
int bloom_keyvecs_nr;
365365

366366
/*
367367
* The bloom filter settings used to generate the key.

t/t4216-log-bloom.sh

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -66,8 +66,9 @@ sane_unset GIT_TRACE2_CONFIG_PARAMS
6666

6767
setup () {
6868
rm -f "$TRASH_DIRECTORY/trace.perf" &&
69-
git -c core.commitGraph=false log --pretty="format:%s" $1 >log_wo_bloom &&
70-
GIT_TRACE2_PERF="$TRASH_DIRECTORY/trace.perf" git -c core.commitGraph=true log --pretty="format:%s" $1 >log_w_bloom
69+
eval git -c core.commitGraph=false log --pretty="format:%s" "$1" >log_wo_bloom &&
70+
eval "GIT_TRACE2_PERF=\"$TRASH_DIRECTORY/trace.perf\"" \
71+
git -c core.commitGraph=true log --pretty="format:%s" "$1" >log_w_bloom
7172
}
7273

7374
test_bloom_filters_used () {
@@ -138,10 +139,6 @@ test_expect_success 'git log with --walk-reflogs does not use Bloom filters' '
138139
test_bloom_filters_not_used "--walk-reflogs -- A"
139140
'
140141

141-
test_expect_success 'git log -- multiple path specs does not use Bloom filters' '
142-
test_bloom_filters_not_used "-- file4 A/file1"
143-
'
144-
145142
test_expect_success 'git log -- "." pathspec at root does not use Bloom filters' '
146143
test_bloom_filters_not_used "-- ."
147144
'
@@ -151,9 +148,17 @@ test_expect_success 'git log with wildcard that resolves to a single path uses B
151148
test_bloom_filters_used "-- *renamed"
152149
'
153150

154-
test_expect_success 'git log with wildcard that resolves to a multiple paths does not uses Bloom filters' '
155-
test_bloom_filters_not_used "-- *" &&
156-
test_bloom_filters_not_used "-- file*"
151+
test_expect_success 'git log with multiple literal paths uses Bloom filter' '
152+
test_bloom_filters_used "-- file4 A/file1" &&
153+
test_bloom_filters_used "-- *" &&
154+
test_bloom_filters_used "-- file*"
155+
'
156+
157+
test_expect_success 'git log with path contains a wildcard does not use Bloom filter' '
158+
test_bloom_filters_not_used "-- file\*" &&
159+
test_bloom_filters_not_used "-- A/\* file4" &&
160+
test_bloom_filters_not_used "-- file4 A/\*" &&
161+
test_bloom_filters_not_used "-- * A/\*"
157162
'
158163

159164
test_expect_success 'setup - add commit-graph to the chain without Bloom filters' '

0 commit comments

Comments
 (0)