Skip to content

Commit 8ad491d

Browse files
authored
Harden GitHook SSH command building and URL construction (#64756)
- Quote user-controlled values in _build_ssh_command with shlex.quote - Validate strict_host_key_checking against allowlist of valid SSH values - URL-encode username and auth token when embedding in repo URLs - Fix logic bug: `not X or not Y` → `not X and not Y` for git@/https:// check - Limit str.replace to first occurrence
1 parent b5dae4e commit 8ad491d

2 files changed

Lines changed: 21 additions & 9 deletions

File tree

providers/git/src/airflow/providers/git/hooks/git.py

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
import stat
2626
import tempfile
2727
from typing import Any
28+
from urllib.parse import quote as urlquote
2829

2930
from airflow.providers.common.compat.sdk import AirflowException, BaseHook
3031

@@ -109,25 +110,32 @@ def __init__(
109110
raise AirflowException("Both 'key_file' and 'private_key' cannot be provided at the same time")
110111
self._process_git_auth_url()
111112

113+
_VALID_STRICT_HOST_KEY_CHECKING = frozenset({"yes", "no", "accept-new", "off", "ask"})
114+
112115
def _build_ssh_command(self, key_path: str | None = None) -> str:
113116
parts = ["ssh"]
114117

115118
if key_path:
116-
parts.append(f"-i {key_path}")
119+
parts.append(f"-i {shlex.quote(key_path)}")
117120
parts.append("-o IdentitiesOnly=yes")
118121

122+
if self.strict_host_key_checking not in self._VALID_STRICT_HOST_KEY_CHECKING:
123+
raise ValueError(
124+
f"Invalid strict_host_key_checking value: {self.strict_host_key_checking!r}. "
125+
f"Must be one of {sorted(self._VALID_STRICT_HOST_KEY_CHECKING)}"
126+
)
119127
parts.append(f"-o StrictHostKeyChecking={self.strict_host_key_checking}")
120128

121129
if self.known_hosts_file:
122-
parts.append(f"-o UserKnownHostsFile={self.known_hosts_file}")
130+
parts.append(f"-o UserKnownHostsFile={shlex.quote(self.known_hosts_file)}")
123131
elif self.strict_host_key_checking == "no":
124132
parts.append("-o UserKnownHostsFile=/dev/null")
125133

126134
if self.ssh_config_file:
127-
parts.append(f"-F {self.ssh_config_file}")
135+
parts.append(f"-F {shlex.quote(self.ssh_config_file)}")
128136

129137
if self.host_proxy_cmd:
130-
parts.append(f'-o ProxyCommand="{self.host_proxy_cmd}"')
138+
parts.append(f"-o ProxyCommand={shlex.quote(self.host_proxy_cmd)}")
131139

132140
if self.ssh_port:
133141
parts.append(f"-p {self.ssh_port}")
@@ -138,13 +146,17 @@ def _process_git_auth_url(self):
138146
if not isinstance(self.repo_url, str):
139147
return
140148
if self.auth_token and self.repo_url.startswith("https://"):
141-
self.repo_url = self.repo_url.replace("https://", f"https://{self.user_name}:{self.auth_token}@")
149+
encoded_user = urlquote(self.user_name, safe="")
150+
encoded_token = urlquote(self.auth_token, safe="")
151+
self.repo_url = self.repo_url.replace("https://", f"https://{encoded_user}:{encoded_token}@", 1)
142152
elif self.auth_token and self.repo_url.startswith("http://"):
143-
self.repo_url = self.repo_url.replace("http://", f"http://{self.user_name}:{self.auth_token}@")
153+
encoded_user = urlquote(self.user_name, safe="")
154+
encoded_token = urlquote(self.auth_token, safe="")
155+
self.repo_url = self.repo_url.replace("http://", f"http://{encoded_user}:{encoded_token}@", 1)
144156
elif self.repo_url.startswith("http://"):
145157
# if no auth token, use the repo url as is
146-
self.repo_url = self.repo_url
147-
elif not self.repo_url.startswith("git@") or not self.repo_url.startswith("https://"):
158+
pass
159+
elif not self.repo_url.startswith("git@") and not self.repo_url.startswith("https://"):
148160
self.repo_url = os.path.expanduser(self.repo_url)
149161

150162
def set_git_env(self, key: str | None = None) -> None:

providers/git/tests/unit/git/hooks/test_git.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -249,7 +249,7 @@ def test_proxy_command(self, create_connection_without_db):
249249
hook = GitHook(git_conn_id="git_with_proxy")
250250
with hook.configure_hook_env():
251251
cmd = hook.env["GIT_SSH_COMMAND"]
252-
assert 'ProxyCommand="ssh -W %h:%p bastion.example.com"' in cmd
252+
assert "ProxyCommand='ssh -W %h:%p bastion.example.com'" in cmd
253253

254254
def test_known_hosts_file(self, create_connection_without_db):
255255
create_connection_without_db(

0 commit comments

Comments
 (0)