99# See https://aboutcode.org for more information about nexB OSS projects.
1010#
1111
12+ import os
13+ from urllib .parse import urlparse , urlunparse
14+
1215from typing import Dict
1316from typing import List
1417from typing import Optional
@@ -64,10 +67,27 @@ async def get_pypi_data_from_purl(
6467 sdist_url = await get_sdist_download_url (
6568 purl = parsed_purl , repos = repos , python_version = python_version
6669 )
70+
71+ def canonicalize_url (url : str ):
72+ # Parse the URL into its components
73+ parsed = urlparse (url )
74+
75+ # Canonicalize the path component to resolve ".."
76+ # os.path.normpath will handle segments like '.' and '..'
77+ canonical_path = os .path .normpath (parsed .path )
78+
79+ # Rebuild the URL with the canonicalized path
80+ # We replace the original path with the new one
81+ parsed = parsed ._replace (path = canonical_path )
82+ canonical_url = urlunparse (parsed )
83+
84+ return canonical_url
85+
6786 if sdist_url :
6887 valid_distribution_urls .append (sdist_url )
6988
7089 valid_distribution_urls = [url for url in valid_distribution_urls if url ]
90+ valid_distribution_urls = list (map (canonicalize_url , valid_distribution_urls ))
7191
7292 # if prefer_source is True then only source distribution is used
7393 # in case of no source distribution available then wheel is used
@@ -83,10 +103,23 @@ async def get_pypi_data_from_purl(
83103 ]
84104 wheel_url = choose_single_wheel (wheel_urls )
85105 if wheel_url :
86- valid_distribution_urls .insert (0 , wheel_url )
106+ valid_distribution_urls .insert (0 , canonicalize_url ( wheel_url ) )
87107
88108 urls = {url .get ("url" ): url for url in response .get ("urls" ) or []}
89109
110+ # Sanitize all URLs that are relative and canonicalize them
111+ urls_sanitized = {}
112+ for url in urls :
113+ value = urls .get (url )
114+
115+ if url .startswith ("https" ):
116+ url_sanitized = canonicalize_url (url )
117+ else :
118+ url_sanitized = canonicalize_url (base_path + url )
119+
120+ urls_sanitized [url_sanitized ] = value
121+
122+
90123 def remove_credentials_from_url (url : str ):
91124 # Parse the URL into its components
92125 parsed = urlparse (url )
@@ -105,10 +138,10 @@ def remove_credentials_from_url(url: str):
105138 # iterate over the valid distribution urls and return the first
106139 # one that is matching.
107140 for dist_url in valid_distribution_urls :
108- if dist_url not in urls :
141+ if dist_url not in urls_sanitized :
109142 continue
110143
111- url_data = urls .get (dist_url )
144+ url_data = urls_sanitized .get (dist_url )
112145 digests = url_data .get ("digests" ) or {}
113146
114147 return PackageData (
0 commit comments