Skip to content

Commit 7f05bdf

Browse files
committed
fix: Sanitize URLs to private repositories to not contain credentials
URLs to private repositories usually contain credentials. Signed-off-by: Nicolas Nobelis <nicolas.nobelis@bosch.com>
1 parent a5366c3 commit 7f05bdf

1 file changed

Lines changed: 18 additions & 2 deletions

File tree

src/python_inspector/package_data.py

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,22 @@ async def get_pypi_data_from_purl(
8686
valid_distribution_urls.insert(0, wheel_url)
8787

8888
urls = {url.get("url"): url for url in response.get("urls") or []}
89+
90+
def remove_credentials_from_url(url: str):
91+
# Parse the URL into its components
92+
parsed = urlparse(url)
93+
94+
new_netloc = parsed.hostname
95+
if parsed.port:
96+
new_netloc += f":{parsed.port}"
97+
98+
# Create a new parsed result object, replacing the old netloc
99+
# with our new one that has no credentials.
100+
parsed = parsed._replace(netloc=new_netloc)
101+
url_without_credentials = urlunparse(parsed)
102+
103+
return url_without_credentials
104+
89105
# iterate over the valid distribution urls and return the first
90106
# one that is matching.
91107
for dist_url in valid_distribution_urls:
@@ -99,12 +115,12 @@ async def get_pypi_data_from_purl(
99115
primary_language="Python",
100116
description=get_description(info),
101117
homepage_url=homepage_url,
102-
api_data_url=api_url,
118+
api_data_url=remove_credentials_from_url(api_url),
103119
bug_tracking_url=bug_tracking_url,
104120
code_view_url=code_view_url,
105121
license_expression=info.get("license_expression"),
106122
declared_license=get_declared_license(info),
107-
download_url=dist_url,
123+
download_url=remove_credentials_from_url(dist_url),
108124
size=url_data.get("size"),
109125
md5=digests.get("md5") or url_data.get("md5_digest"),
110126
sha256=digests.get("sha256"),

0 commit comments

Comments
 (0)