Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions python/private/pypi/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -418,13 +418,19 @@ bzl_library(
srcs = ["simpleapi_download.bzl"],
deps = [
":parse_simpleapi_html_bzl",
":urllib_bzl",
"//python/private:auth_bzl",
"//python/private:normalize_name_bzl",
"//python/private:text_util_bzl",
"@bazel_features//:features",
],
)

bzl_library(
name = "urllib_bzl",
srcs = ["urllib.bzl"],
)

bzl_library(
name = "version_from_filename_bzl",
srcs = ["version_from_filename.bzl"],
Expand Down Expand Up @@ -474,6 +480,7 @@ bzl_library(
":patch_whl_bzl",
":pep508_requirement_bzl",
":pypi_repo_utils_bzl",
":urllib_bzl",
":whl_extract_bzl",
":whl_metadata_bzl",
":whl_target_platforms_bzl",
Expand Down
4 changes: 4 additions & 0 deletions python/private/pypi/hub_builder.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -599,6 +599,7 @@ def _create_whl_repos(
for src in whl.srcs:
repo = _whl_repo(
src = src,
index_url = whl.index_url,
whl_library_args = whl_library_args,
download_only = pip_attr.download_only,
netrc = self._config.netrc or pip_attr.netrc,
Expand Down Expand Up @@ -678,6 +679,7 @@ def _whl_repo(
*,
src,
whl_library_args,
index_url,
is_multiple_versions,
download_only,
netrc,
Expand Down Expand Up @@ -731,6 +733,8 @@ def _whl_repo(
args["netrc"] = netrc
if auth_patterns:
args["auth_patterns"] = auth_patterns
if index_url:
args["index_url"] = index_url

args["urls"] = [src.url]
args["sha256"] = src.sha256
Expand Down
8 changes: 5 additions & 3 deletions python/private/pypi/parse_requirements.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -188,10 +188,11 @@ def parse_requirements(
for p in r.target_platforms:
requirement_target_platforms[p] = None

pkg_sources = index_urls.get(name)
package_srcs = _package_srcs(
name = name,
reqs = reqs,
index_urls = index_urls,
pkg_sources = pkg_sources,
platforms = platforms,
extract_url_srcs = extract_url_srcs,
logger = logger,
Expand All @@ -216,6 +217,7 @@ def parse_requirements(
name = normalize_name(name),
is_exposed = len(requirement_target_platforms) == len(requirements),
is_multiple_versions = len(reqs.values()) > 1,
index_url = pkg_sources.index_url if pkg_sources else "",
srcs = package_srcs,
)
ret.append(item)
Expand All @@ -234,7 +236,7 @@ def _package_srcs(
*,
name,
reqs,
index_urls,
pkg_sources,
platforms,
logger,
extract_url_srcs):
Expand All @@ -253,7 +255,7 @@ def _package_srcs(
dist, can_fallback = _add_dists(
requirement = r,
target_platform = platforms.get(target_platform),
index_urls = index_urls.get(name),
index_urls = pkg_sources,
logger = logger,
)
logger.debug(lambda: "The whl dist is: {}".format(dist.filename if dist else dist))
Expand Down
50 changes: 2 additions & 48 deletions python/private/pypi/parse_simpleapi_html.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,10 @@ Parse SimpleAPI HTML in Starlark.

load(":version_from_filename.bzl", "version_from_filename")

def parse_simpleapi_html(*, url, content):
def parse_simpleapi_html(*, content):
"""Get the package URLs for given shas by parsing the Simple API HTML.

Args:
url(str): The URL that the HTML content can be downloaded from.
content(str): The Simple API HTML content.

Returns:
Expand Down Expand Up @@ -57,7 +56,6 @@ def parse_simpleapi_html(*, url, content):
sha256s_by_version = {}
for line in lines[1:]:
dist_url, _, tail = line.partition("#sha256=")
dist_url = _absolute_url(url, dist_url)

sha256, _, tail = tail.partition("\"")

Expand Down Expand Up @@ -87,7 +85,7 @@ def parse_simpleapi_html(*, url, content):
url = dist_url,
sha256 = sha256,
metadata_sha256 = metadata_sha256,
metadata_url = _absolute_url(url, metadata_url) if metadata_url else "",
metadata_url = metadata_url,
yanked = yanked,
)
else:
Expand All @@ -106,47 +104,3 @@ def parse_simpleapi_html(*, url, content):
whls = whls,
sha256s_by_version = sha256s_by_version,
)

def _get_root_directory(url):
scheme_end = url.find("://")
if scheme_end == -1:
fail("Invalid URL format")

scheme = url[:scheme_end]
host_end = url.find("/", scheme_end + 3)
if host_end == -1:
host_end = len(url)
host = url[scheme_end + 3:host_end]

return "{}://{}".format(scheme, host)

def _is_downloadable(url):
"""Checks if the URL would be accepted by the Bazel downloader.

This is based on Bazel's HttpUtils::isUrlSupportedByDownloader
"""
return url.startswith("http://") or url.startswith("https://") or url.startswith("file://")

def _absolute_url(index_url, candidate):
if candidate == "":
return candidate

if _is_downloadable(candidate):
return candidate

if candidate.startswith("/"):
# absolute path
root_directory = _get_root_directory(index_url)
return "{}{}".format(root_directory, candidate)

if candidate.startswith(".."):
# relative path with up references
candidate_parts = candidate.split("..")
last = candidate_parts[-1]
for _ in range(len(candidate_parts) - 1):
index_url, _, _ = index_url.rstrip("/").rpartition("/")

return "{}/{}".format(index_url, last.strip("/"))

# relative path without up-references
return "{}/{}".format(index_url.rstrip("/"), candidate)
10 changes: 9 additions & 1 deletion python/private/pypi/pypi_cache.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,11 @@ In the future the same will be used to:
"""

def pypi_cache(store = None):
"""The cache for PyPI index queries."""
"""The cache for PyPI index queries.

Currently the key is of the following structure:
(url, real_url)
"""

# buildifier: disable=uninitialized
self = struct(
Expand All @@ -29,6 +33,10 @@ def _pypi_cache_setdefault(self, key, parsed_result):
key: {type}`str` The cache key, can be any string.
parsed_result: {type}`struct` The result of `parse_simpleapi_html` function.

index_url and distribution is used to write to the MODULE.bazel.lock file as facts
real_index_url and distribution is used to write to in-memory cache to ensure that there are
no duplicate calls to the PyPI indexes

Returns:
The `parse_result`.
"""
Expand Down
83 changes: 44 additions & 39 deletions python/private/pypi/simpleapi_download.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ load("//python/private:envsubst.bzl", "envsubst")
load("//python/private:normalize_name.bzl", "normalize_name")
load("//python/private:text_util.bzl", "render")
load(":parse_simpleapi_html.bzl", "parse_simpleapi_html")
load(":urllib.bzl", "urllib")

def simpleapi_download(
ctx,
Expand Down Expand Up @@ -92,13 +93,14 @@ def simpleapi_download(
sources = [pkg for pkg in attr.sources if pkg not in found_on_index]
for pkg in sources:
pkg_normalized = normalize_name(pkg)
url = urllib.strip_empty_path_segments("{index_url}/{distribution}/".format(
index_url = index_url_overrides.get(pkg_normalized, index_url).rstrip("/"),
distribution = pkg,
))
result = read_simpleapi(
ctx = ctx,
url = "{}/{}/".format(
index_url_overrides.get(pkg_normalized, index_url).rstrip("/"),
pkg,
),
attr = attr,
url = url,
cache = cache,
get_auth = get_auth,
**download_kwargs
Expand All @@ -108,9 +110,10 @@ def simpleapi_download(
async_downloads[pkg] = struct(
pkg_normalized = pkg_normalized,
wait = result.wait,
url = url,
)
elif result.success:
contents[pkg_normalized] = result.output
contents[pkg_normalized] = _with_index_url(url, result.output)
found_on_index[pkg] = index_url

if not async_downloads:
Expand All @@ -122,7 +125,7 @@ def simpleapi_download(
result = download.wait()

if result.success:
contents[download.pkg_normalized] = result.output
contents[download.pkg_normalized] = _with_index_url(download.url, result.output)
found_on_index[pkg] = index_url

failed_sources = [pkg for pkg in attr.sources if pkg not in found_on_index]
Expand Down Expand Up @@ -168,14 +171,14 @@ def _read_simpleapi(ctx, url, attr, cache, get_auth = None, **download_kwargs):

Args:
ctx: The module_ctx or repository_ctx.
url: str, the url parameter that can be passed to ctx.download.
url: {type}`str`, the url parameter that can be passed to ctx.download.
attr: The attribute that contains necessary info for downloading. The
following attributes must be present:
* envsubst: The envsubst values for performing substitutions in the URL.
* netrc: The netrc parameter for ctx.download, see http_file for docs.
* envsubst: {type}`dict[str, str]` for performing substitutions in the URL.
* netrc: The netrc parameter for ctx.download, see {obj}`http_file` for docs.
* auth_patterns: The auth_patterns parameter for ctx.download, see
http_file for docs.
cache: A dict for storing the results.
{obj}`http_file` for docs.
cache: {type}`struct` the `pypi_cache` instance.
get_auth: A function to get auth information. Used in tests.
**download_kwargs: Any extra params to ctx.download.
Note that output and auth will be passed for you.
Expand All @@ -189,9 +192,9 @@ def _read_simpleapi(ctx, url, attr, cache, get_auth = None, **download_kwargs):
# them to ctx.download if we want to correctly handle the relative URLs.
# TODO: Add a test that env subbed index urls do not leak into the lock file.

real_url = strip_empty_path_segments(envsubst(url, attr.envsubst, ctx.getenv))
real_url = urllib.strip_empty_path_segments(envsubst(url, attr.envsubst, ctx.getenv))

cache_key = real_url
cache_key = (url, real_url)
cached_result = cache.get(cache_key)
if cached_result:
return struct(success = True, output = cached_result)
Expand Down Expand Up @@ -225,41 +228,43 @@ def _read_simpleapi(ctx, url, attr, cache, get_auth = None, **download_kwargs):
if download_kwargs.get("block") == False:
# Simulate the same API as ctx.download has
return struct(
wait = lambda: _read_index_result(ctx, download.wait(), output, real_url, cache, cache_key),
wait = lambda: _read_index_result(
ctx,
result = download.wait(),
output = output,
cache = cache,
cache_key = cache_key,
),
)

return _read_index_result(ctx, download, output, real_url, cache, cache_key)

def strip_empty_path_segments(url):
"""Removes empty path segments from a URL. Does nothing for urls with no scheme.

Public only for testing.

Args:
url: The url to remove empty path segments from

Returns:
The url with empty path segments removed and any trailing slash preserved.
If the url had no scheme it is returned unchanged.
"""
scheme, _, rest = url.partition("://")
if rest == "":
return url
stripped = "/".join([p for p in rest.split("/") if p])
if url.endswith("/"):
return "{}://{}/".format(scheme, stripped)
else:
return "{}://{}".format(scheme, stripped)
return _read_index_result(
ctx,
result = download,
output = output,
cache = cache,
cache_key = cache_key,
)

def _read_index_result(ctx, result, output, url, cache, cache_key):
def _read_index_result(ctx, *, result, output, cache, cache_key):
if not result.success:
return struct(success = False)

content = ctx.read(output)

output = parse_simpleapi_html(url = url, content = content)
output = parse_simpleapi_html(content = content)
if output:
cache.setdefault(cache_key, output)
return struct(success = True, output = output, cache_key = cache_key)
return struct(success = True, output = output)
else:
return struct(success = False)

def _with_index_url(index_url, values):
if not values:
return values

return struct(
sdists = values.sdists,
whls = values.whls,
sha256s_by_version = values.sha256s_by_version,
index_url = index_url,
)
Loading