Skip to content

Commit 3aae687

Browse files
fix: encode or validate all user provided data passed to urls (#739)
Fix a path traversal attack with request handling where untrusted values from users were passed directly into urls.
1 parent 4ca20cc commit 3aae687

4 files changed

Lines changed: 59 additions & 19 deletions

File tree

monty/exts/info/codesnippets.py

Lines changed: 18 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
from monty.bot import Monty
1717
from monty.log import get_logger
1818
from monty.utils import scheduling
19-
from monty.utils.helpers import EXPAND_BUTTON_PREFIX, decode_github_link
19+
from monty.utils.helpers import EXPAND_BUTTON_PREFIX, block_url_traversal, decode_github_link
2020
from monty.utils.markdown import remove_codeblocks
2121
from monty.utils.messages import DeleteButton, suppress_embeds
2222

@@ -30,7 +30,8 @@
3030

3131
# start_char, line_delimiter, and end_char are currently unused.
3232
GITHUB_RE = re.compile(
33-
r"https?:\/\/github\.(?:com|dev)\/(?P<user>[a-zA-Z0-9-]+)\/(?P<repo>[a-zA-Z0-9-]+)\/(?:blob|tree)\/(?P<path>[^#>]+)(\?[^#>]+)?"
33+
r"https?:\/\/github\.(?:com|dev)\/(?P<user>[a-zA-Z0-9-]+)\/(?P<repo>[a-zA-Z0-9-]+)\/(?:blob|tree)\/"
34+
r"(?P<path>[^#>]+/[^#>]+)(\?[^#>]+)?"
3435
r"(?:(#L(?P<L>L)?(?P<start_line>\d+)(?(L)C(?P<start_char>\d+))(?:(?P<line_delimiter>[-~\:]"
3536
r"|(\.\.))L(?P<end_line>\d+)(?(L)C(?P<end_char>\d+)))?))"
3637
)
@@ -119,17 +120,22 @@ async def _fetch_github_snippet(
119120
repo: str,
120121
path: str,
121122
start_line: str,
122-
end_line: str,
123+
end_line: str | None,
123124
**kwargs: "NoReturn",
124125
) -> str:
125126
"""Fetches a snippet from a GitHub repo."""
127+
user, repo, start_line = block_url_traversal(user, repo, start_line)
128+
if end_line:
129+
end_line = block_url_traversal(end_line)
130+
126131
# Search the GitHub API for the specified branch
127132
r = await self.bot.github.rest.repos.async_list_branches(user, repo, per_page=100)
128133
branches = r.json()
129134
r = await self.bot.github.rest.repos.async_list_tags(user, repo, per_page=100)
130135
tags = r.json()
131136
refs = branches + tags
132137
ref, encoded_file_path = self._find_ref(path, refs)
138+
ref, encoded_file_path = block_url_traversal(ref, encoded_file_path)
133139

134140
r = await self.bot.github.rest.repos.async_get_content(
135141
user,
@@ -156,6 +162,9 @@ async def _fetch_github_gist_snippet(
156162
**kwargs: "NoReturn",
157163
) -> str:
158164
"""Fetches a snippet from a GitHub gist."""
165+
gist_id, revision, file_path, start_line, end_line = block_url_traversal(
166+
gist_id, revision, file_path, start_line, end_line
167+
)
159168
if revision:
160169
endpoint = self.bot.github.rest.gists.async_get_revision(gist_id, revision)
161170
else:
@@ -186,20 +195,17 @@ async def _fetch_gitlab_snippet(
186195
**kwargs: "NoReturn",
187196
) -> str:
188197
"""Fetches a snippet from a GitLab repo."""
189-
enc_repo = quote_plus(repo)
190-
198+
repo, start_line, end_line = block_url_traversal(repo, start_line, end_line)
191199
# Searches the GitLab API for the specified branch
192-
branches = await self._fetch_response(
193-
f"https://gitlab.com/api/v4/projects/{enc_repo}/repository/branches", "json"
194-
)
195-
tags = await self._fetch_response(f"https://gitlab.com/api/v4/projects/{enc_repo}/repository/tags", "json")
200+
branches = await self._fetch_response(f"https://gitlab.com/api/v4/projects/{repo}/repository/branches", "json")
201+
tags = await self._fetch_response(f"https://gitlab.com/api/v4/projects/{repo}/repository/tags", "json")
196202
refs = branches + tags
197203
ref, file_path = self._find_ref(path, refs)
198-
enc_ref = quote_plus(ref)
199-
enc_file_path = quote_plus(file_path)
204+
ref = quote_plus(ref)
205+
path = quote_plus(file_path)
200206

201207
file_contents = await self._fetch_response(
202-
f"https://gitlab.com/api/v4/projects/{enc_repo}/repository/files/{enc_file_path}/raw?ref={enc_ref}",
208+
f"https://gitlab.com/api/v4/projects/{repo}/repository/files/{path}/raw?ref={ref}",
203209
"text",
204210
)
205211
return self._snippet_to_codeblock(file_contents, file_path, start_line, end_line)

monty/exts/info/github_info.py

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,14 +27,20 @@
2727
from monty.log import get_logger
2828
from monty.utils import responses, scheduling
2929
from monty.utils.extensions import invoke_help_command
30-
from monty.utils.helpers import fromisoformat, get_num_suffix
30+
from monty.utils.helpers import block_url_traversal, fromisoformat, get_num_suffix
3131
from monty.utils.markdown import DiscordRenderer, remove_codeblocks
3232
from monty.utils.messages import DeleteButton, extract_urls, suppress_embeds
3333

3434

3535
# Maximum number of issues in one message
3636
MAXIMUM_ISSUES = 6
3737

38+
USERNAME_RE = re.compile(r"[a-zA-Z0-9][a-zA-Z0-9\-]{0,38}")
39+
REPO_RE = re.compile(
40+
rf"(?:(?P<owner>{USERNAME_RE.pattern})\/)?"
41+
r"(?P<repo>[\w\-\.]{1,100})"
42+
)
43+
3844
# Regex used when looking for automatic linking in messages
3945
# regex101 of current regex https://regex101.com/r/V2ji8M/6
4046
AUTOMATIC_REGEX = re.compile(
@@ -285,6 +291,11 @@ async def github_group(self, ctx: commands.Context, user_or_repo: str = "") -> N
285291
@github_group.command(name="user", aliases=("userinfo",))
286292
async def github_user_info(self, ctx: commands.Context, username: str) -> None:
287293
"""Fetches a user's GitHub information."""
294+
if not (match := USERNAME_RE.fullmatch(username)):
295+
msg = f"`{username}` is not a valid GitHub username."
296+
raise commands.BadArgument(msg)
297+
username = match.group(0)
298+
288299
async with ctx.typing():
289300
try:
290301
resp = await self.bot.github.rest.users.async_get_by_username(username)
@@ -375,18 +386,24 @@ async def github_repo_info(self, ctx: commands.Context, *repository: str) -> Non
375386
else:
376387
repo_name = ""
377388

389+
if not (match := REPO_RE.fullmatch(repo_name)):
390+
msg = f"`{repo_name}` is not a valid GitHub repository name."
391+
raise commands.BadArgument(msg)
392+
repo_name = match.group(0)
393+
378394
if not repo_name or repo_name.count("/") > 1:
379395
args = " ".join(repository[:2])
380396

381397
raise commands.BadArgument(
382398
"The repository should look like `user/reponame` or `user reponame`"
383399
+ (f", not `{args}`." if "`" not in args and len(args) < 20 else ".")
384400
)
385-
return
401+
402+
owner, repo = block_url_traversal(*repo_name.split("/", 1))
386403

387404
async with ctx.typing():
388405
try:
389-
resp = await self.bot.github.rest.repos.async_get(*repo_name.split("/", 1))
406+
resp = await self.bot.github.rest.repos.async_get(owner, repo)
390407
repo_data = resp.json()
391408
except githubkit.exception.RequestFailed:
392409
# There won't be a message key if this repo exists
@@ -461,6 +478,7 @@ async def fetch_issues(
461478
Returns IssueState on success, FetchError on failure.
462479
"""
463480
json_data = None
481+
user, repository = block_url_traversal(user, repository)
464482
if not is_discussion: # not a discussion, or uncertain
465483
try:
466484
r = await self.bot.github.rest.issues.async_get(user, repository, number)

monty/exts/python/pypi.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,10 @@ async def fetch_package_list(self, *, use_cache: bool = True) -> None:
151151

152152
async def fetch_package(self, package: str) -> dict[str, Any] | None:
153153
"""Fetch a package from PyPI."""
154+
if characters := self.check_characters(package):
155+
msg = f"Illegal character(s) passed into command: '{disnake.utils.escape_markdown(characters.group(0))}'"
156+
raise MontyCommandError(msg)
157+
154158
async with self.bot.http_session.get(JSON_URL.format(package=package), headers=PYPI_API_HEADERS) as response:
155159
if response.status == 200 and response.content_type == "application/json":
156160
return await response.json()
@@ -265,9 +269,6 @@ async def pypi_package(
265269
embed.set_thumbnail(url=PYPI_ICON)
266270

267271
defer_task = None
268-
if characters := self.check_characters(package):
269-
msg = f"Illegal character(s) passed into command: '{disnake.utils.escape_markdown(characters.group(0))}'"
270-
raise MontyCommandError(msg)
271272

272273
response_json = await self.fetch_package(package)
273274
if not response_json:

monty/utils/helpers.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import datetime
55
import ssl
66
from typing import TYPE_CHECKING, Any, TypeVar, overload
7-
from urllib.parse import urlsplit, urlunsplit
7+
from urllib.parse import quote_plus, urlsplit, urlunsplit
88

99
import base65536
1010
import dateutil.parser
@@ -76,6 +76,21 @@ def pad_base64(data: str) -> str:
7676
return data + "=" * (-len(data) % 4)
7777

7878

79+
@overload
80+
def block_url_traversal(arg: str) -> str: ...
81+
82+
83+
@overload
84+
def block_url_traversal(*args: str) -> tuple[str, ...]: ...
85+
86+
87+
def block_url_traversal(arg: str, *args: str) -> str | tuple[str, ...]:
88+
"""Normalise a string for use in a URL by fixing slashes and escaping `/`."""
89+
if not args:
90+
return quote_plus(arg)
91+
return tuple(quote_plus(a) for a in (arg, *args))
92+
93+
7994
EXPAND_BUTTON_PREFIX = "ghexp-v1:"
8095

8196

0 commit comments

Comments
 (0)