Skip to content

Commit 2ef22aa

Browse files
committed
backfill: work with prefix pathspecs
The previous change allowed specifying revision arguments over the 'git backfill' command-line. This created the opportunity for pathspecs that specify a smaller set of starting commits, but otherwise did not restrict the blob paths that were downloaded. Update the path-walk API to accept certain kinds of pathspecs and to reject anything too complex. The current behavior focuses on pathspecs that match paths exactly. This includes exact filenames, including directory names as prefixes. The reason for this restriction is to allow for a faster execution by pruning the path walk to only trees that could contribute towards one of those paths as a parent directory. The test directory 'd/f/' (next to 'd/file*.txt') was prepared in a previous commit to exercise the subtlety in prefix matching. Signed-off-by: Derrick Stolee <stolee@gmail.com>
1 parent 1bd7364 commit 2ef22aa

2 files changed

Lines changed: 75 additions & 6 deletions

File tree

path-walk.c

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,49 @@ static int add_tree_entries(struct path_walk_context *ctx,
206206
match != MATCHED)
207207
continue;
208208
}
209+
if (ctx->revs->prune_data.nr) {
210+
struct pathspec *pd = &ctx->revs->prune_data;
211+
bool found = false;
212+
213+
for (int i = 0; i < pd->nr; i++) {
214+
struct pathspec_item *item = &pd->items[i];
215+
216+
/*
217+
* Is this path a parent directory of
218+
* the pathspec item?
219+
*/
220+
if (path.len < (size_t)item->len &&
221+
!strncmp(path.buf, item->match, path.len) &&
222+
item->match[path.len - 1] == '/') {
223+
found = true;
224+
break;
225+
}
226+
227+
/*
228+
* Or, is the pathspec an exact match?
229+
*/
230+
if (path.len == (size_t)item->len &&
231+
!strcmp(path.buf, item->match)) {
232+
found = true;
233+
break;
234+
}
235+
236+
/*
237+
* Or, is the pathspec a directory prefix
238+
* match?
239+
*/
240+
if (path.len > (size_t)item->len &&
241+
!strncmp(path.buf, item->match, item->len) &&
242+
path.buf[item->len] == '/') {
243+
found = true;
244+
break;
245+
}
246+
}
247+
248+
/* Skip paths that do not match the prefix. */
249+
if (!found)
250+
continue;
251+
}
209252

210253
add_path_to_list(ctx, path.buf, type, &entry.oid,
211254
!(o->flags & UNINTERESTING));
@@ -481,6 +524,17 @@ int walk_objects_by_path(struct path_walk_info *info)
481524
if (info->tags)
482525
info->revs->tag_objects = 1;
483526

527+
if (ctx.revs->prune_data.nr) {
528+
/*
529+
* Check that all pathspecs are prefixes, or remove
530+
* them from consideration, with a warning.
531+
*/
532+
struct pathspec *pd = &ctx.revs->prune_data;
533+
534+
if (pd->has_wildcard || pd->magic)
535+
return error(_("provided pathspec is too generic"));
536+
}
537+
484538
/* Insert a single list for the root tree into the paths. */
485539
CALLOC_ARRAY(root_tree_list, 1);
486540
root_tree_list->type = OBJ_TREE;

t/t5620-backfill.sh

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -263,7 +263,7 @@ test_expect_success 'backfill with revisions over stdin' '
263263
test_line_count = 18 missing
264264
'
265265

266-
test_expect_success 'backfill with pathspec' '
266+
test_expect_success 'backfill with prefix pathspec' '
267267
test_when_finished rm -rf backfill-path &&
268268
git clone --bare --filter=blob:none \
269269
--single-branch --branch=main \
@@ -273,13 +273,28 @@ test_expect_success 'backfill with pathspec' '
273273
git -C backfill-path rev-list --quiet --objects --missing=print HEAD >missing &&
274274
test_line_count = 48 missing &&
275275
276-
# TODO: We want this pathspec to be respected in limiting
277-
# the downloaded files.
278-
git -C backfill-path backfill HEAD -- d/f &&
276+
git -C backfill-path backfill HEAD -- d/f 2>err &&
277+
test_must_be_empty err &&
279278
280-
# TODO: We only want specific objects downloaded.
281279
git -C backfill-path rev-list --quiet --objects --missing=print HEAD >missing &&
282-
test_line_count = 0 missing
280+
test_line_count = 40 missing
281+
'
282+
283+
test_expect_success 'backfill with multiple pathspecs' '
284+
test_when_finished rm -rf backfill-path &&
285+
git clone --bare --filter=blob:none \
286+
--single-branch --branch=main \
287+
"file://$(pwd)/srv.bare" backfill-path &&
288+
289+
# No blobs yet
290+
git -C backfill-path rev-list --quiet --objects --missing=print HEAD >missing &&
291+
test_line_count = 48 missing &&
292+
293+
git -C backfill-path backfill HEAD -- d/f a 2>err &&
294+
test_must_be_empty err &&
295+
296+
git -C backfill-path rev-list --quiet --objects --missing=print HEAD >missing &&
297+
test_line_count = 16 missing
283298
'
284299

285300
test_expect_success 'backfill with --all' '

0 commit comments

Comments
 (0)