Skip to content

Commit 0058173

Browse files
authored
fix: improve subprocess exception in git_utils and add tests (#5812)
* fix: improve subprocess exception in git_utils and add tests * fix: add generic exception handling for subprocess in git_utils * fix: handle non-standard exception constructors in catch-all
1 parent 0747156 commit 0058173

2 files changed

Lines changed: 139 additions & 1 deletion

File tree

src/sagemaker/git_utils.py

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -332,6 +332,22 @@ def _clone_command_for_codecommit_https(git_config, dest_dir):
332332
_run_clone_command(updated_url, dest_dir)
333333

334334

335+
def _redact_credentials_from_url(url):
336+
"""Redact credentials embedded in an HTTPS Git URL.
337+
338+
Replaces any username, password, or token embedded before the '@' in an
339+
HTTPS URL with a placeholder so that credentials are never exposed in
340+
logs or exception messages.
341+
342+
Args:
343+
url (str): The Git repository URL that may contain embedded credentials.
344+
345+
Returns:
346+
str: The URL with credentials replaced by '<credentials-redacted>'.
347+
"""
348+
return re.sub(r"(https://)([^@]+)@", r"\1<credentials-redacted>@", url)
349+
350+
335351
def _run_clone_command(repo_url, dest_dir):
336352
"""Run the 'git clone' command with the repo url and the directory to clone the repo into.
337353
@@ -345,7 +361,24 @@ def _run_clone_command(repo_url, dest_dir):
345361
my_env = os.environ.copy()
346362
if repo_url.startswith("https://"):
347363
my_env["GIT_TERMINAL_PROMPT"] = "0"
348-
subprocess.check_call(["git", "clone", repo_url, dest_dir], env=my_env)
364+
try:
365+
subprocess.check_call(["git", "clone", repo_url, dest_dir], env=my_env)
366+
except subprocess.CalledProcessError as e:
367+
# Re-raise with credentials redacted from the command to prevent
368+
# plaintext tokens/passwords from leaking into logs or tracebacks.
369+
safe_url = _redact_credentials_from_url(repo_url)
370+
raise subprocess.CalledProcessError(
371+
e.returncode,
372+
["git", "clone", safe_url, dest_dir],
373+
output=e.output,
374+
stderr=e.stderr,
375+
) from None
376+
except Exception as e:
377+
safe_url = _redact_credentials_from_url(repo_url)
378+
try:
379+
raise type(e)(str(e).replace(repo_url, safe_url)) from None
380+
except TypeError:
381+
raise RuntimeError(str(e).replace(repo_url, safe_url)) from None
349382
elif repo_url.startswith("git@") or repo_url.startswith("ssh://"):
350383
try:
351384
with tempfile.TemporaryDirectory() as tmp_dir:

tests/unit/test_git_utils.py

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -704,3 +704,108 @@ def test_sanitize_git_url_comprehensive_attack_scenarios(self):
704704
"Invalid characters in hostname",
705705
]
706706
)
707+
708+
709+
class TestCredentialRedaction:
710+
"""Test cases for credential redaction in clone error handling."""
711+
712+
def test_redact_token_from_url(self):
713+
"""Test that a token embedded in an HTTPS URL is redacted."""
714+
url = "https://ghp_SuperSecretToken123@github.com/user/repo.git"
715+
result = git_utils._redact_credentials_from_url(url)
716+
assert "ghp_SuperSecretToken123" not in result
717+
assert result == "https://<credentials-redacted>@github.com/user/repo.git"
718+
719+
def test_redact_username_password_from_url(self):
720+
"""Test that username:password embedded in an HTTPS URL is redacted."""
721+
url = "https://myuser:mypassword@github.com/user/repo.git"
722+
result = git_utils._redact_credentials_from_url(url)
723+
assert "myuser" not in result
724+
assert "mypassword" not in result
725+
assert result == "https://<credentials-redacted>@github.com/user/repo.git"
726+
727+
def test_redact_url_encoded_password(self):
728+
"""Test that URL-encoded credentials are redacted."""
729+
url = "https://user:p%40ss%20word@git-codecommit.us-east-1.amazonaws.com/v1/repos/myrepo"
730+
result = git_utils._redact_credentials_from_url(url)
731+
assert "p%40ss%20word" not in result
732+
assert "<credentials-redacted>" in result
733+
734+
def test_no_redaction_without_credentials(self):
735+
"""Test that URLs without credentials are unchanged."""
736+
url = "https://github.com/user/repo.git"
737+
result = git_utils._redact_credentials_from_url(url)
738+
assert result == url
739+
740+
def test_no_redaction_for_ssh_url(self):
741+
"""Test that SSH URLs are not affected by redaction."""
742+
url = "git@github.com:user/repo.git"
743+
result = git_utils._redact_credentials_from_url(url)
744+
assert result == url
745+
746+
def test_clone_failure_redacts_token(self):
747+
"""Test that CalledProcessError from a failed clone does not contain the token."""
748+
token_url = "https://ghp_secret123@github.com/user/repo.git"
749+
with patch(
750+
"subprocess.check_call",
751+
side_effect=subprocess.CalledProcessError(
752+
128, ["git", "clone", token_url, "/tmp/dest"]
753+
),
754+
):
755+
with pytest.raises(subprocess.CalledProcessError) as exc_info:
756+
git_utils._run_clone_command(token_url, "/tmp/dest")
757+
758+
assert "ghp_secret123" not in str(exc_info.value)
759+
assert "ghp_secret123" not in str(exc_info.value.cmd)
760+
assert "<credentials-redacted>" in str(exc_info.value.cmd)
761+
762+
def test_clone_failure_redacts_username_password(self):
763+
"""Test that CalledProcessError from a failed clone does not contain username/password."""
764+
cred_url = "https://admin:hunter2@github.com/org/repo.git"
765+
with patch(
766+
"subprocess.check_call",
767+
side_effect=subprocess.CalledProcessError(
768+
128, ["git", "clone", cred_url, "/tmp/dest"]
769+
),
770+
):
771+
with pytest.raises(subprocess.CalledProcessError) as exc_info:
772+
git_utils._run_clone_command(cred_url, "/tmp/dest")
773+
774+
assert "admin" not in str(exc_info.value.cmd)
775+
assert "hunter2" not in str(exc_info.value.cmd)
776+
assert "<credentials-redacted>" in str(exc_info.value.cmd)
777+
778+
def test_clone_failure_redacts_codecommit_credentials(self):
779+
"""Test that CodeCommit HTTPS credentials are redacted on failure."""
780+
cc_url = "https://user:pass@git-codecommit.us-east-1.amazonaws.com/v1/repos/myrepo"
781+
with patch(
782+
"subprocess.check_call",
783+
side_effect=subprocess.CalledProcessError(
784+
128, ["git", "clone", cc_url, "/tmp/dest"]
785+
),
786+
):
787+
with pytest.raises(subprocess.CalledProcessError) as exc_info:
788+
git_utils._run_clone_command(cc_url, "/tmp/dest")
789+
790+
assert "user:pass" not in str(exc_info.value.cmd)
791+
assert "<credentials-redacted>" in str(exc_info.value.cmd)
792+
793+
def test_clone_failure_suppresses_exception_chain(self):
794+
"""Test that the original exception chain is suppressed (from None)."""
795+
token_url = "https://ghp_secret@github.com/user/repo.git"
796+
with patch(
797+
"subprocess.check_call",
798+
side_effect=subprocess.CalledProcessError(
799+
128, ["git", "clone", token_url, "/tmp/dest"]
800+
),
801+
):
802+
with pytest.raises(subprocess.CalledProcessError) as exc_info:
803+
git_utils._run_clone_command(token_url, "/tmp/dest")
804+
805+
assert exc_info.value.__cause__ is None
806+
807+
def test_clone_success_no_exception(self):
808+
"""Test that successful clone does not raise."""
809+
url = "https://ghp_token@github.com/user/repo.git"
810+
with patch("subprocess.check_call"):
811+
git_utils._run_clone_command(url, "/tmp/dest")

0 commit comments

Comments
 (0)