Skip to content

Commit 1bd7364

Browse files
committed
backfill: accept revision arguments
The existing implementation of 'git backfill' only includes downloading missing blobs reachable from HEAD. Advanced uses may desire more general commit limiting options, such as '--all' for all references, specifying a commit range via negative references, or specifying a recency of use such as with '--since=<date>'. All of these options are available if we use setup_revisions() to parse the unknown arguments with the revision machinery. This opens up a large number of possibilities, only a small set of which are tested here. For documentation, we avoid duplicating the option documentation and instead link to the documentation of 'git rev-list'. Note that these arguments currently allow specifying a pathspec, which modifies the commit history checks but does not limit the paths used in the backfill logic. This will be updated in a future change. Signed-off-by: Derrick Stolee <stolee@gmail.com>
1 parent 55a45b2 commit 1bd7364

3 files changed

Lines changed: 135 additions & 6 deletions

File tree

Documentation/git-backfill.adoc

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,9 +63,12 @@ OPTIONS
6363
current sparse-checkout. If the sparse-checkout feature is enabled,
6464
then `--sparse` is assumed and can be disabled with `--no-sparse`.
6565

66+
You may also specify the commit limiting options from linkgit:git-rev-list[1].
67+
6668
SEE ALSO
6769
--------
6870
linkgit:git-clone[1].
71+
linkgit:git-rev-list[1].
6972

7073
GIT
7174
---

builtin/backfill.c

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ struct backfill_context {
3535
struct oid_array current_batch;
3636
size_t min_batch_size;
3737
int sparse;
38+
struct rev_info revs;
3839
};
3940

4041
static void backfill_context_clear(struct backfill_context *ctx)
@@ -80,7 +81,6 @@ static int fill_missing_blobs(const char *path UNUSED,
8081

8182
static int do_backfill(struct backfill_context *ctx)
8283
{
83-
struct rev_info revs;
8484
struct path_walk_info info = PATH_WALK_INFO_INIT;
8585
int ret;
8686

@@ -92,13 +92,14 @@ static int do_backfill(struct backfill_context *ctx)
9292
}
9393
}
9494

95-
repo_init_revisions(ctx->repo, &revs, "");
96-
handle_revision_arg("HEAD", &revs, 0, 0);
95+
/* Walk from HEAD if otherwise unspecified. */
96+
if (!ctx->revs.pending.nr)
97+
handle_revision_arg("HEAD", &ctx->revs, 0, 0);
9798

9899
info.blobs = 1;
99100
info.tags = info.commits = info.trees = 0;
100101

101-
info.revs = &revs;
102+
info.revs = &ctx->revs;
102103
info.path_fn = fill_missing_blobs;
103104
info.path_fn_data = ctx;
104105

@@ -109,7 +110,6 @@ static int do_backfill(struct backfill_context *ctx)
109110
download_batch(ctx);
110111

111112
path_walk_info_clear(&info);
112-
release_revisions(&revs);
113113
return ret;
114114
}
115115

@@ -121,6 +121,7 @@ int cmd_backfill(int argc, const char **argv, const char *prefix, struct reposit
121121
.current_batch = OID_ARRAY_INIT,
122122
.min_batch_size = 50000,
123123
.sparse = 0,
124+
.revs = REV_INFO_INIT,
124125
};
125126
struct option options[] = {
126127
OPT_UNSIGNED(0, "min-batch-size", &ctx.min_batch_size,
@@ -134,7 +135,12 @@ int cmd_backfill(int argc, const char **argv, const char *prefix, struct reposit
134135
builtin_backfill_usage, options);
135136

136137
argc = parse_options(argc, argv, prefix, options, builtin_backfill_usage,
137-
0);
138+
PARSE_OPT_KEEP_UNKNOWN_OPT |
139+
PARSE_OPT_KEEP_ARGV0 |
140+
PARSE_OPT_KEEP_DASHDASH);
141+
142+
repo_init_revisions(repo, &ctx.revs, prefix);
143+
argc = setup_revisions(argc, argv, &ctx.revs, NULL);
138144

139145
repo_config(repo, git_default_config, NULL);
140146

@@ -143,5 +149,6 @@ int cmd_backfill(int argc, const char **argv, const char *prefix, struct reposit
143149

144150
result = do_backfill(&ctx);
145151
backfill_context_clear(&ctx);
152+
release_revisions(&ctx.revs);
146153
return result;
147154
}

t/t5620-backfill.sh

Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,125 @@ test_expect_success 'backfill --sparse without cone mode (negative)' '
224224
test_line_count = 12 missing
225225
'
226226

227+
test_expect_success 'backfill with revision range' '
228+
test_when_finished rm -rf backfill-revs &&
229+
git clone --no-checkout --filter=blob:none \
230+
--single-branch --branch=main \
231+
"file://$(pwd)/srv.bare" backfill-revs &&
232+
233+
# No blobs yet
234+
git -C backfill-revs rev-list --quiet --objects --missing=print HEAD >missing &&
235+
test_line_count = 48 missing &&
236+
237+
git -C backfill-revs backfill HEAD~2..HEAD &&
238+
239+
# 30 objects downloaded.
240+
git -C backfill-revs rev-list --quiet --objects --missing=print HEAD >missing &&
241+
test_line_count = 18 missing
242+
'
243+
244+
test_expect_success 'backfill with revisions over stdin' '
245+
test_when_finished rm -rf backfill-revs &&
246+
git clone --no-checkout --filter=blob:none \
247+
--single-branch --branch=main \
248+
"file://$(pwd)/srv.bare" backfill-revs &&
249+
250+
# No blobs yet
251+
git -C backfill-revs rev-list --quiet --objects --missing=print HEAD >missing &&
252+
test_line_count = 48 missing &&
253+
254+
cat >in <<-EOF &&
255+
HEAD
256+
^HEAD~2
257+
EOF
258+
259+
git -C backfill-revs backfill --stdin <in &&
260+
261+
# 30 objects downloaded.
262+
git -C backfill-revs rev-list --quiet --objects --missing=print HEAD >missing &&
263+
test_line_count = 18 missing
264+
'
265+
266+
test_expect_success 'backfill with pathspec' '
267+
test_when_finished rm -rf backfill-path &&
268+
git clone --bare --filter=blob:none \
269+
--single-branch --branch=main \
270+
"file://$(pwd)/srv.bare" backfill-path &&
271+
272+
# No blobs yet
273+
git -C backfill-path rev-list --quiet --objects --missing=print HEAD >missing &&
274+
test_line_count = 48 missing &&
275+
276+
# TODO: We want this pathspec to be respected in limiting
277+
# the downloaded files.
278+
git -C backfill-path backfill HEAD -- d/f &&
279+
280+
# TODO: We only want specific objects downloaded.
281+
git -C backfill-path rev-list --quiet --objects --missing=print HEAD >missing &&
282+
test_line_count = 0 missing
283+
'
284+
285+
test_expect_success 'backfill with --all' '
286+
test_when_finished rm -rf backfill-all &&
287+
git clone --no-checkout --filter=blob:none \
288+
"file://$(pwd)/srv-revs.bare" backfill-all &&
289+
290+
# All blobs from all refs are missing
291+
git -C backfill-all rev-list --quiet --objects --all --missing=print >missing &&
292+
test_line_count = 54 missing &&
293+
294+
# Backfill from HEAD gets main blobs only
295+
git -C backfill-all backfill HEAD &&
296+
297+
# Other branch blobs still missing
298+
git -C backfill-all rev-list --quiet --objects --all --missing=print >missing &&
299+
test_line_count = 2 missing &&
300+
301+
# Backfill with --all gets everything
302+
git -C backfill-all backfill --all &&
303+
304+
git -C backfill-all rev-list --quiet --objects --all --missing=print >missing &&
305+
test_line_count = 0 missing
306+
'
307+
308+
test_expect_success 'backfill with --first-parent' '
309+
test_when_finished rm -rf backfill-fp &&
310+
git clone --no-checkout --filter=blob:none \
311+
--single-branch --branch=main \
312+
"file://$(pwd)/srv-revs.bare" backfill-fp &&
313+
314+
git -C backfill-fp rev-list --quiet --objects --missing=print HEAD >missing &&
315+
test_line_count = 52 missing &&
316+
317+
# --first-parent skips the side branch commits, so
318+
# s/file.{1,2}.txt v1 blobs (only in side commit 1) are missed.
319+
git -C backfill-fp backfill --first-parent HEAD &&
320+
321+
git -C backfill-fp rev-list --quiet --objects --missing=print HEAD >missing &&
322+
test_line_count = 2 missing
323+
'
324+
325+
test_expect_success 'backfill with --since' '
326+
test_when_finished rm -rf backfill-since &&
327+
git clone --no-checkout --filter=blob:none \
328+
--single-branch --branch=main \
329+
"file://$(pwd)/srv-revs.bare" backfill-since &&
330+
331+
git -C backfill-since rev-list --quiet --objects --missing=print HEAD >missing &&
332+
test_line_count = 52 missing &&
333+
334+
# Use a cutoff between commits 4 and 5 (between v1 and v2
335+
# iterations). Commits 5-8 still carry v1 of files 2-4 in
336+
# their trees, but v1 of file.1.txt is only in commits 1-4.
337+
SINCE=$(git -C backfill-since log --first-parent --reverse \
338+
--format=%ct HEAD~1 | sed -n 5p) &&
339+
git -C backfill-since backfill --since="@$((SINCE - 1))" HEAD &&
340+
341+
# 6 missing: v1 of file.1.txt in all 6 directories
342+
git -C backfill-since rev-list --quiet --objects --missing=print HEAD >missing &&
343+
test_line_count = 6 missing
344+
'
345+
227346
. "$TEST_DIRECTORY"/lib-httpd.sh
228347
start_httpd
229348

0 commit comments

Comments
 (0)