Skip to content

Commit d5ef4d1

Browse files
authored
feat: use treeless sparse checkout for remote template cloning (#899)
Use --filter=tree:0 with sparse checkout when fetching remote templates with a known template path. This dramatically reduces clone time and disk usage for large monorepos like adk-samples by only downloading git objects for the specific subdirectory needed. When no template path is specified (e.g. discovery/listing), the existing full shallow clone is preserved since the entire repo tree is needed.
1 parent 2ca338b commit d5ef4d1

2 files changed

Lines changed: 133 additions & 19 deletions

File tree

agent_starter_pack/cli/utils/remote_template.py

Lines changed: 47 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -270,19 +270,27 @@ def fetch_remote_template(
270270
# Attempt Git Clone
271271
try:
272272
clone_url = spec.repo_url
273-
274-
# Build clone command with --single-branch (optimized for branches)
275-
clone_cmd = [
276-
"git",
277-
"clone",
278-
"--depth",
279-
"1",
280-
"--single-branch",
281-
"--branch",
282-
spec.git_ref,
283-
clone_url,
284-
str(repo_path),
285-
]
273+
git_env = {**os.environ, "GIT_TERMINAL_PROMPT": "0"}
274+
275+
# Use sparse checkout when a specific template path is known — significantly
276+
# faster for large monorepos since only the needed subdirectory is downloaded.
277+
use_sparse_checkout = bool(spec.template_path)
278+
279+
clone_cmd = ["git", "clone"]
280+
if use_sparse_checkout:
281+
clone_cmd.append("--no-checkout")
282+
clone_cmd.extend(
283+
[
284+
"--depth",
285+
"1",
286+
"--single-branch",
287+
"--branch",
288+
spec.git_ref,
289+
]
290+
)
291+
if use_sparse_checkout:
292+
clone_cmd.append("--filter=tree:0")
293+
clone_cmd.extend([clone_url, str(repo_path)])
286294

287295
logging.debug(
288296
f"Attempting to clone remote template with Git: {' '.join(clone_cmd)}"
@@ -293,7 +301,7 @@ def fetch_remote_template(
293301
capture_output=True,
294302
text=True,
295303
encoding="utf-8",
296-
env={**os.environ, "GIT_TERMINAL_PROMPT": "0"},
304+
env=git_env,
297305
)
298306

299307
# If clone with --single-branch fails, retry without it (for tags)
@@ -303,23 +311,25 @@ def fetch_remote_template(
303311
logging.debug(
304312
f"Clone with --single-branch failed, retrying without it (git_ref '{spec.git_ref}' is likely a tag)"
305313
)
306-
clone_cmd_without_single_branch = [
314+
clone_cmd_retry = [
307315
"git",
308316
"clone",
309317
"--depth",
310318
"1",
311319
"--branch",
312320
spec.git_ref,
313-
clone_url,
314-
str(repo_path),
315321
]
322+
if use_sparse_checkout:
323+
clone_cmd_retry.extend(["--no-checkout", "--filter=tree:0"])
324+
clone_cmd_retry.extend([clone_url, str(repo_path)])
325+
316326
subprocess.run(
317-
clone_cmd_without_single_branch,
327+
clone_cmd_retry,
318328
capture_output=True,
319329
text=True,
320330
check=True,
321331
encoding="utf-8",
322-
env={**os.environ, "GIT_TERMINAL_PROMPT": "0"},
332+
env=git_env,
323333
)
324334
logging.debug("Git clone successful (without --single-branch).")
325335
else:
@@ -329,6 +339,24 @@ def fetch_remote_template(
329339
)
330340
else:
331341
logging.debug("Git clone successful.")
342+
343+
# Set up sparse checkout to materialize only the needed template path
344+
if use_sparse_checkout:
345+
sparse_cmds = [
346+
["git", "sparse-checkout", "set", spec.template_path + "/"],
347+
["git", "checkout", spec.git_ref],
348+
]
349+
for cmd in sparse_cmds:
350+
subprocess.run(
351+
cmd,
352+
capture_output=True,
353+
text=True,
354+
check=True,
355+
encoding="utf-8",
356+
cwd=str(repo_path),
357+
env=git_env,
358+
)
359+
logging.debug(f"Sparse checkout configured for path: {spec.template_path}")
332360
except subprocess.CalledProcessError as e:
333361
shutil.rmtree(temp_path, ignore_errors=True)
334362
raise RuntimeError(f"Git clone failed: {e.stderr.strip()}") from e

tests/cli/utils/test_remote_template.py

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,92 @@ def test_fetch_remote_template_path_not_found(
184184

185185
mock_rmtree.assert_called_once()
186186

187+
@patch("subprocess.run")
188+
@patch("tempfile.mkdtemp")
189+
@patch("shutil.rmtree")
190+
def test_fetch_uses_sparse_checkout_when_template_path_set(
191+
self,
192+
mock_rmtree: MagicMock,
193+
mock_mkdtemp: MagicMock,
194+
mock_subprocess: MagicMock,
195+
) -> None:
196+
"""Test that fetch uses treeless clone + sparse checkout when template_path is set"""
197+
mock_mkdtemp.return_value = "/tmp/test_dir"
198+
mock_subprocess.return_value = MagicMock(returncode=0, stderr="")
199+
200+
spec = RemoteTemplateSpec(
201+
repo_url="https://github.com/google/adk-samples",
202+
template_path="python/agents/data-science",
203+
git_ref="main",
204+
is_adk_samples=True,
205+
)
206+
207+
# Make template_dir.exists() return True to avoid FileNotFoundError
208+
with (
209+
patch("pathlib.Path.exists", return_value=True),
210+
patch(
211+
"agent_starter_pack.cli.utils.remote_template.check_and_execute_with_version_lock",
212+
return_value=False,
213+
),
214+
):
215+
fetch_remote_template(spec)
216+
217+
# Verify the clone command uses --filter=tree:0 and --no-checkout
218+
clone_call = mock_subprocess.call_args_list[0]
219+
clone_cmd = clone_call[0][0]
220+
assert "--filter=tree:0" in clone_cmd
221+
assert "--no-checkout" in clone_cmd
222+
223+
# Verify sparse checkout commands were called
224+
sparse_set_call = mock_subprocess.call_args_list[1]
225+
sparse_cmd = sparse_set_call[0][0]
226+
assert sparse_cmd == [
227+
"git",
228+
"sparse-checkout",
229+
"set",
230+
"python/agents/data-science/",
231+
]
232+
233+
checkout_call = mock_subprocess.call_args_list[2]
234+
checkout_cmd = checkout_call[0][0]
235+
assert checkout_cmd == ["git", "checkout", "main"]
236+
237+
@patch("subprocess.run")
238+
@patch("tempfile.mkdtemp")
239+
@patch("shutil.rmtree")
240+
def test_fetch_uses_full_clone_when_no_template_path(
241+
self,
242+
mock_rmtree: MagicMock,
243+
mock_mkdtemp: MagicMock,
244+
mock_subprocess: MagicMock,
245+
) -> None:
246+
"""Test that fetch uses full shallow clone when template_path is empty"""
247+
mock_mkdtemp.return_value = "/tmp/test_dir"
248+
mock_subprocess.return_value = MagicMock(returncode=0, stderr="")
249+
250+
spec = RemoteTemplateSpec(
251+
repo_url="https://github.com/google/adk-samples",
252+
template_path="",
253+
git_ref="main",
254+
)
255+
256+
with (
257+
patch("pathlib.Path.exists", return_value=True),
258+
patch(
259+
"agent_starter_pack.cli.utils.remote_template.check_and_execute_with_version_lock",
260+
return_value=False,
261+
),
262+
):
263+
fetch_remote_template(spec)
264+
265+
# Verify the clone command does NOT use --filter=tree:0
266+
clone_call = mock_subprocess.call_args_list[0]
267+
clone_cmd = clone_call[0][0]
268+
assert "--filter=tree:0" not in clone_cmd
269+
assert "--no-checkout" not in clone_cmd
270+
# Should only have the clone call, no sparse checkout commands
271+
assert len(mock_subprocess.call_args_list) == 1
272+
187273

188274
class TestLoadRemoteTemplateConfig:
189275
def test_load_remote_template_config_primary_location(self) -> None:

0 commit comments

Comments
 (0)