Skip to content

Commit 8d656a9

Browse files
committed
fetch, clone: add fetch.blobSizeLimit config
External tools like git-lfs and git-fat use the filter clean/smudge mechanism to manage large binary objects, but this requires pointer files, a separate storage backend, and careful coordination. Git's partial clone infrastructure provides a more native approach: large blobs can be excluded at the protocol level during fetch and lazily retrieved on demand. However, enabling this requires passing `--filter=blob:limit=<size>` on every clone, which is not discoverable and cannot be set as a global default. Add a new `fetch.blobSizeLimit` configuration option that enables size-based partial clone behavior globally. When set, both `git clone` and `git fetch` automatically apply a `blob:limit=<size>` filter. Blobs larger than the threshold that are not needed for the current worktree are excluded from the transfer and lazily fetched on demand when needed (e.g., during checkout, diff, or merge). This makes it easy to work with repositories that have accumulated large binary files in their history, without downloading all of them upfront. The precedence order is: 1. Explicit `--filter=` on the command line (highest) 2. Existing `remote.<name>.partialclonefilter` 3. `fetch.blobSizeLimit` (new, lowest) Once a clone or fetch applies this setting, the remote is registered as a promisor remote with the corresponding filter spec, so subsequent fetches inherit it automatically. If the server does not support object filtering, the setting is silently ignored. Signed-off-by: Alan Braithwaite <alan@braithwaite.dev>
1 parent 7b2bccb commit 8d656a9

File tree

4 files changed

+135
-10
lines changed

4 files changed

+135
-10
lines changed

Documentation/config/fetch.adoc

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,25 @@ config setting.
103103
file helps performance of many Git commands, including `git merge-base`,
104104
`git push -f`, and `git log --graph`. Defaults to `false`.
105105

106+
`fetch.blobSizeLimit`::
107+
When set to a size value (e.g., `1m`, `100k`, `1g`), both
108+
linkgit:git-clone[1] and linkgit:git-fetch[1] will automatically
109+
use `--filter=blob:limit=<value>` to enable partial clone
110+
behavior. Blobs larger than this threshold are excluded from the
111+
initial transfer and lazily fetched on demand when needed (e.g.,
112+
during checkout).
113+
+
114+
This provides a convenient way to enable size-based partial clones
115+
globally without passing `--filter` on every command. Once a clone or
116+
fetch applies this setting, the remote is registered as a promisor
117+
remote with the corresponding filter, so subsequent fetches inherit
118+
the filter automatically.
119+
+
120+
An explicit `--filter` option on the command line takes precedence over
121+
this config. An existing `remote.<name>.partialclonefilter` also takes
122+
precedence. If the server does not support object filtering, the
123+
setting is silently ignored.
124+
106125
`fetch.bundleURI`::
107126
This value stores a URI for downloading Git object data from a bundle
108127
URI before performing an incremental fetch from the origin Git server.

builtin/clone.c

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ static struct string_list option_optional_reference = STRING_LIST_INIT_NODUP;
7878
static int max_jobs = -1;
7979
static struct string_list option_recurse_submodules = STRING_LIST_INIT_NODUP;
8080
static int config_filter_submodules = -1; /* unspecified */
81+
static char *config_blob_size_limit;
8182
static int option_remote_submodules;
8283

8384
static int recurse_submodules_cb(const struct option *opt,
@@ -753,6 +754,10 @@ static int git_clone_config(const char *k, const char *v,
753754
config_reject_shallow = git_config_bool(k, v);
754755
if (!strcmp(k, "clone.filtersubmodules"))
755756
config_filter_submodules = git_config_bool(k, v);
757+
if (!strcmp(k, "fetch.blobsizelimit")) {
758+
free(config_blob_size_limit);
759+
git_config_string(&config_blob_size_limit, k, v);
760+
}
756761

757762
return git_default_config(k, v, ctx, cb);
758763
}
@@ -1010,6 +1015,13 @@ int cmd_clone(int argc,
10101015
argc = parse_options(argc, argv, prefix, builtin_clone_options,
10111016
builtin_clone_usage, 0);
10121017

1018+
if (!filter_options.choice && config_blob_size_limit) {
1019+
struct strbuf buf = STRBUF_INIT;
1020+
strbuf_addf(&buf, "blob:limit=%s", config_blob_size_limit);
1021+
parse_list_objects_filter(&filter_options, buf.buf);
1022+
strbuf_release(&buf);
1023+
}
1024+
10131025
if (argc > 2)
10141026
usage_msg_opt(_("Too many arguments."),
10151027
builtin_clone_usage, builtin_clone_options);
@@ -1634,6 +1646,7 @@ int cmd_clone(int argc,
16341646
ref_storage_format);
16351647

16361648
list_objects_filter_release(&filter_options);
1649+
free(config_blob_size_limit);
16371650

16381651
string_list_clear(&option_not, 0);
16391652
string_list_clear(&option_config, 0);

builtin/fetch.c

Lines changed: 35 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,7 @@ struct fetch_config {
109109
int recurse_submodules;
110110
int parallel;
111111
int submodule_fetch_jobs;
112+
char *blob_size_limit;
112113
};
113114

114115
static int git_fetch_config(const char *k, const char *v,
@@ -160,6 +161,9 @@ static int git_fetch_config(const char *k, const char *v,
160161
return 0;
161162
}
162163

164+
if (!strcmp(k, "fetch.blobsizelimit"))
165+
return git_config_string(&fetch_config->blob_size_limit, k, v);
166+
163167
if (!strcmp(k, "fetch.output")) {
164168
if (!v)
165169
return config_error_nonbool(k);
@@ -2342,7 +2346,8 @@ static int fetch_multiple(struct string_list *list, int max_children,
23422346
* or inherit the default filter-spec from the config.
23432347
*/
23442348
static inline void fetch_one_setup_partial(struct remote *remote,
2345-
struct list_objects_filter_options *filter_options)
2349+
struct list_objects_filter_options *filter_options,
2350+
const struct fetch_config *config)
23462351
{
23472352
/*
23482353
* Explicit --no-filter argument overrides everything, regardless
@@ -2352,10 +2357,12 @@ static inline void fetch_one_setup_partial(struct remote *remote,
23522357
return;
23532358

23542359
/*
2355-
* If no prior partial clone/fetch and the current fetch DID NOT
2356-
* request a partial-fetch, do a normal fetch.
2360+
* If no prior partial clone/fetch, the current fetch did not
2361+
* request a partial-fetch, and no global blob size limit is
2362+
* configured, do a normal fetch.
23572363
*/
2358-
if (!repo_has_promisor_remote(the_repository) && !filter_options->choice)
2364+
if (!repo_has_promisor_remote(the_repository) &&
2365+
!filter_options->choice && !config->blob_size_limit)
23592366
return;
23602367

23612368
/*
@@ -2372,11 +2379,27 @@ static inline void fetch_one_setup_partial(struct remote *remote,
23722379
/*
23732380
* Do a partial-fetch from the promisor remote using either the
23742381
* explicitly given filter-spec or inherit the filter-spec from
2375-
* the config.
2382+
* the per-remote config.
2383+
*/
2384+
if (repo_has_promisor_remote(the_repository)) {
2385+
partial_clone_get_default_filter_spec(filter_options,
2386+
remote->name);
2387+
if (filter_options->choice)
2388+
return;
2389+
}
2390+
2391+
/*
2392+
* Fall back to the global fetch.blobSizeLimit config. This
2393+
* enables partial clone behavior without requiring --filter
2394+
* on the command line or a pre-existing promisor remote.
23762395
*/
2377-
if (!filter_options->choice)
2378-
partial_clone_get_default_filter_spec(filter_options, remote->name);
2379-
return;
2396+
if (!filter_options->choice && config->blob_size_limit) {
2397+
struct strbuf buf = STRBUF_INIT;
2398+
strbuf_addf(&buf, "blob:limit=%s", config->blob_size_limit);
2399+
parse_list_objects_filter(filter_options, buf.buf);
2400+
strbuf_release(&buf);
2401+
partial_clone_register(remote->name, filter_options);
2402+
}
23802403
}
23812404

23822405
static int fetch_one(struct remote *remote, int argc, const char **argv,
@@ -2762,9 +2785,10 @@ int cmd_fetch(int argc,
27622785
oidset_clear(&acked_commits);
27632786
trace2_region_leave("fetch", "negotiate-only", the_repository);
27642787
} else if (remote) {
2765-
if (filter_options.choice || repo_has_promisor_remote(the_repository)) {
2788+
if (filter_options.choice || repo_has_promisor_remote(the_repository) ||
2789+
config.blob_size_limit) {
27662790
trace2_region_enter("fetch", "setup-partial", the_repository);
2767-
fetch_one_setup_partial(remote, &filter_options);
2791+
fetch_one_setup_partial(remote, &filter_options, &config);
27682792
trace2_region_leave("fetch", "setup-partial", the_repository);
27692793
}
27702794
trace2_region_enter("fetch", "fetch-one", the_repository);
@@ -2876,5 +2900,6 @@ int cmd_fetch(int argc,
28762900
cleanup:
28772901
string_list_clear(&list, 0);
28782902
list_objects_filter_release(&filter_options);
2903+
free(config.blob_size_limit);
28792904
return result;
28802905
}

t/t5616-partial-clone.sh

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -722,6 +722,74 @@ test_expect_success 'after fetching descendants of non-promisor commits, gc work
722722
git -C partial gc --prune=now
723723
'
724724

725+
# Test fetch.blobSizeLimit config
726+
727+
test_expect_success 'setup for fetch.blobSizeLimit tests' '
728+
git init blob-limit-src &&
729+
echo "small" >blob-limit-src/small.txt &&
730+
dd if=/dev/zero of=blob-limit-src/large.bin bs=1024 count=100 2>/dev/null &&
731+
git -C blob-limit-src add . &&
732+
git -C blob-limit-src commit -m "initial" &&
733+
734+
git clone --bare "file://$(pwd)/blob-limit-src" blob-limit-srv.bare &&
735+
git -C blob-limit-srv.bare config --local uploadpack.allowfilter 1 &&
736+
git -C blob-limit-srv.bare config --local uploadpack.allowanysha1inwant 1
737+
'
738+
739+
test_expect_success 'clone with blob:limit filter directly' '
740+
git clone --filter=blob:limit=1k \
741+
"file://$(pwd)/blob-limit-srv.bare" blob-limit-direct &&
742+
743+
test "$(git -C blob-limit-direct config --local remote.origin.promisor)" = "true" &&
744+
test "$(git -C blob-limit-direct config --local remote.origin.partialclonefilter)" = "blob:limit=1024"
745+
'
746+
747+
test_expect_success 'clone with fetch.blobSizeLimit config applies filter' '
748+
git -c fetch.blobSizeLimit=1k clone \
749+
"file://$(pwd)/blob-limit-srv.bare" blob-limit-clone &&
750+
751+
test "$(git -C blob-limit-clone config --local remote.origin.promisor)" = "true" &&
752+
test "$(git -C blob-limit-clone config --local remote.origin.partialclonefilter)" = "blob:limit=1024"
753+
'
754+
755+
test_expect_success 'clone with --filter overrides fetch.blobSizeLimit' '
756+
git -c fetch.blobSizeLimit=1k clone --filter=blob:none \
757+
"file://$(pwd)/blob-limit-srv.bare" blob-limit-override &&
758+
759+
test "$(git -C blob-limit-override config --local remote.origin.partialclonefilter)" = "blob:none"
760+
'
761+
762+
test_expect_success 'fetch with fetch.blobSizeLimit registers promisor remote' '
763+
git clone --no-checkout "file://$(pwd)/blob-limit-srv.bare" blob-limit-fetch &&
764+
765+
# Sanity: not yet a partial clone
766+
test_must_fail git -C blob-limit-fetch config --local remote.origin.promisor &&
767+
768+
# Add a new commit to the server
769+
echo "new-small" >blob-limit-src/new-small.txt &&
770+
dd if=/dev/zero of=blob-limit-src/new-large.bin bs=1024 count=100 2>/dev/null &&
771+
git -C blob-limit-src add . &&
772+
git -C blob-limit-src commit -m "second" &&
773+
git -C blob-limit-src push "file://$(pwd)/blob-limit-srv.bare" main &&
774+
775+
# Fetch with the config set
776+
git -C blob-limit-fetch -c fetch.blobSizeLimit=1k fetch origin &&
777+
778+
test "$(git -C blob-limit-fetch config --local remote.origin.promisor)" = "true" &&
779+
test "$(git -C blob-limit-fetch config --local remote.origin.partialclonefilter)" = "blob:limit=1024"
780+
'
781+
782+
test_expect_success 'fetch.blobSizeLimit does not override existing partialclonefilter' '
783+
git clone --filter=blob:none \
784+
"file://$(pwd)/blob-limit-srv.bare" blob-limit-existing &&
785+
786+
test "$(git -C blob-limit-existing config --local remote.origin.partialclonefilter)" = "blob:none" &&
787+
788+
# Fetch with a different blobSizeLimit; existing filter should win
789+
git -C blob-limit-existing -c fetch.blobSizeLimit=1k fetch origin &&
790+
791+
test "$(git -C blob-limit-existing config --local remote.origin.partialclonefilter)" = "blob:none"
792+
'
725793

726794
. "$TEST_DIRECTORY"/lib-httpd.sh
727795
start_httpd

0 commit comments

Comments
 (0)