Skip to content

Commit 82418c9

Browse files
committed
feat: write facts about what we find on SimpleAPI to MODULE.bazel.lock
1 parent 353e706 commit 82418c9

8 files changed

Lines changed: 634 additions & 132 deletions

File tree

python/private/pypi/extension.bzl

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ load(":parse_whl_name.bzl", "parse_whl_name")
2727
load(":pep508_env.bzl", "env")
2828
load(":pip_repository_attrs.bzl", "ATTRS")
2929
load(":platform.bzl", _plat = "platform")
30-
load(":simpleapi_download.bzl", "simpleapi_download")
30+
load(":simpleapi_download.bzl", "simpleapi_download", _simpleapi_cache = "simpleapi_cache")
3131
load(":whl_library.bzl", "whl_library")
3232

3333
def _whl_mods_impl(whl_mods_dict):
@@ -224,7 +224,7 @@ You cannot use both the additive_build_content and additive_build_content_file a
224224
# dict[str repo, HubBuilder]
225225
# See `hub_builder.bzl%hub_builder()` for `HubBuilder`
226226
pip_hub_map = {}
227-
simpleapi_cache = {}
227+
simpleapi_cache = _simpleapi_cache(module_ctx)
228228

229229
for mod in module_ctx.modules:
230230
for pip_attr in mod.tags.parse:
@@ -296,6 +296,7 @@ You cannot use both the additive_build_content and additive_build_content_file a
296296
hub_whl_map = hub_whl_map,
297297
whl_libraries = whl_libraries,
298298
whl_mods = whl_mods,
299+
facts = simpleapi_cache.get_facts(),
299300
platform_config_settings = {
300301
hub_name: {
301302
platform_name: sorted([str(Label(cv)) for cv in p.config_settings])
@@ -393,9 +394,11 @@ def _pip_impl(module_ctx):
393394
groups = mods.hub_group_map.get(hub_name),
394395
)
395396

396-
return module_ctx.extension_metadata(
397-
reproducible = True,
398-
)
397+
kwargs = {"reproducible": True}
398+
if mods.facts:
399+
kwargs["facts"] = mods.facts
400+
401+
return module_ctx.extension_metadata(**kwargs)
399402

400403
_default_attrs = {
401404
"arch_name": attr.string(

python/private/pypi/hub_builder.bzl

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -395,11 +395,11 @@ def _set_get_index_urls(self, pip_attr):
395395
index_url = pip_attr.experimental_index_url,
396396
extra_index_urls = pip_attr.experimental_extra_index_urls or [],
397397
index_url_overrides = pip_attr.experimental_index_url_overrides or {},
398-
sources = [
399-
d
400-
for d in distributions
398+
sources = {
399+
d: versions
400+
for d, versions in distributions.items()
401401
if _use_downloader(self, python_version, d)
402-
],
402+
},
403403
envsubst = pip_attr.envsubst,
404404
# Auth related info
405405
netrc = pip_attr.netrc,

python/private/pypi/parse_requirements.bzl

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -170,16 +170,15 @@ def parse_requirements(
170170

171171
index_urls = {}
172172
if get_index_urls:
173-
index_urls = get_index_urls(
174-
ctx,
175-
# Use list({}) as a way to have a set
176-
list({
177-
req.distribution: None
178-
for reqs in requirements_by_platform.values()
179-
for req in reqs.values()
180-
if not req.srcs.url
181-
}),
182-
)
173+
distributions = {}
174+
for reqs in requirements_by_platform.values():
175+
for req in reqs.values():
176+
if req.srcs.url:
177+
continue
178+
179+
distributions.setdefault(req.distribution, []).append(req.srcs.version)
180+
181+
index_urls = get_index_urls(ctx, distributions)
183182

184183
ret = []
185184
for name, reqs in sorted(requirements_by_platform.items()):

python/private/pypi/parse_simpleapi_html.bzl

Lines changed: 48 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,11 @@
1616
Parse SimpleAPI HTML in Starlark.
1717
"""
1818

19-
def parse_simpleapi_html(*, url, content):
19+
def parse_simpleapi_html(*, content, distribution):
2020
"""Get the package URLs for given shas by parsing the Simple API HTML.
2121
2222
Args:
23-
url(str): The URL that the HTML content can be downloaded from.
23+
distribution(str): Distribution name for which we are parsing the HTML.
2424
content(str): The Simple API HTML content.
2525
2626
Returns:
@@ -55,16 +55,14 @@ def parse_simpleapi_html(*, url, content):
5555
sha256s_by_version = {}
5656
for line in lines[1:]:
5757
dist_url, _, tail = line.partition("#sha256=")
58-
dist_url = _absolute_url(url, dist_url)
59-
6058
sha256, _, tail = tail.partition("\"")
6159

6260
# See https://packaging.python.org/en/latest/specifications/simple-repository-api/#adding-yank-support-to-the-simple-api
6361
yanked = "data-yanked" in line
6462

6563
head, _, _ = tail.rpartition("</a>")
6664
maybe_metadata, _, filename = head.rpartition(">")
67-
version = _version(filename)
65+
version = pkg_version(filename, distribution)
6866
sha256s_by_version.setdefault(version, []).append(sha256)
6967

7068
metadata_sha256 = ""
@@ -79,13 +77,14 @@ def parse_simpleapi_html(*, url, content):
7977
break
8078

8179
if filename.endswith(".whl"):
80+
metadata_url = metadata_url or ""
8281
whls[sha256] = struct(
8382
filename = filename,
8483
version = version,
8584
url = dist_url,
8685
sha256 = sha256,
8786
metadata_sha256 = metadata_sha256,
88-
metadata_url = _absolute_url(url, metadata_url) if metadata_url else "",
87+
metadata_url = metadata_url,
8988
yanked = yanked,
9089
)
9190
else:
@@ -110,18 +109,36 @@ _SDIST_EXTS = [
110109
".zip",
111110
]
112111

113-
def _version(filename):
112+
def pkg_version(filename, distribution = None):
113+
"""pkg_version extracts the version from the filename.
114+
115+
TODO: move this to a different location
116+
117+
Args:
118+
filename: TODO
119+
distribution: TODO
120+
121+
Returns:
122+
version string
123+
"""
114124
# See https://packaging.python.org/en/latest/specifications/binary-distribution-format/#binary-distribution-format
115125

116-
_, _, tail = filename.partition("-")
117-
version, _, _ = tail.partition("-")
118-
if version != tail:
119-
# The format is {name}-{version}-{whl_specifiers}.whl
120-
return version
126+
if filename.endswith(".whl"):
127+
_, _, tail = filename.partition("-")
128+
version, _, _ = tail.partition("-")
129+
if version != tail:
130+
# The format is {name}-{version}-{whl_specifiers}.whl
131+
return version
132+
133+
if not distribution:
134+
fail("for parsing sdists passing 'distribution' is mandatory")
121135

122136
# NOTE @aignas 2025-03-29: most of the files are wheels, so this is not the common path
123137

124138
# {name}-{version}.{ext}
139+
# TODO @aignas 2026-01-20: test for handling dashes in names, can't think of any other way to
140+
# get the version from the filename but to pass in the distribution name to this function.
141+
version = filename[len(distribution) + 1:]
125142
for ext in _SDIST_EXTS:
126143
version, _, _ = version.partition(ext) # build or name
127144

@@ -147,26 +164,35 @@ def _is_downloadable(url):
147164
"""
148165
return url.startswith("http://") or url.startswith("https://") or url.startswith("file://")
149166

150-
def _absolute_url(index_url, candidate):
151-
if candidate == "":
152-
return candidate
167+
def absolute_url(*, index_url, url):
168+
"""Return an absolute URL in case the url is not absolute.
169+
170+
Args:
171+
index_url: {type}`str` The index_url.
172+
url: {type}`str` The url of the artifact.
173+
174+
Returns:
175+
`url` if it is absolute, or absolute URL based on the `index_url`.
176+
"""
177+
if url == "":
178+
return url
153179

154-
if _is_downloadable(candidate):
155-
return candidate
180+
if _is_downloadable(url):
181+
return url
156182

157-
if candidate.startswith("/"):
183+
if url.startswith("/"):
158184
# absolute path
159185
root_directory = _get_root_directory(index_url)
160-
return "{}{}".format(root_directory, candidate)
186+
return "{}{}".format(root_directory, url)
161187

162-
if candidate.startswith(".."):
188+
if url.startswith(".."):
163189
# relative path with up references
164-
candidate_parts = candidate.split("..")
190+
candidate_parts = url.split("..")
165191
last = candidate_parts[-1]
166192
for _ in range(len(candidate_parts) - 1):
167193
index_url, _, _ = index_url.rstrip("/").rpartition("/")
168194

169195
return "{}/{}".format(index_url, last.strip("/"))
170196

171197
# relative path without up-references
172-
return "{}/{}".format(index_url.rstrip("/"), candidate)
198+
return "{}/{}".format(index_url.rstrip("/"), url)

0 commit comments

Comments
 (0)