Skip to content

Commit a0ff18d

Browse files
committed
exp(pypi): store necessary facts fetched from SimpleAPI
This allows us to cache what we store from the SimpleAPI in between the runs.
1 parent 717b943 commit a0ff18d

5 files changed

Lines changed: 396 additions & 96 deletions

File tree

python/private/pypi/extension.bzl

Lines changed: 33 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -223,6 +223,7 @@ You cannot use both the additive_build_content and additive_build_content_file a
223223
# versions.
224224
pip_hub_map = {}
225225
simpleapi_cache = {}
226+
facts = {}
226227

227228
for mod in module_ctx.modules:
228229
for pip_attr in mod.tags.parse:
@@ -240,6 +241,7 @@ You cannot use both the additive_build_content and additive_build_content_file a
240241
evaluate_markers_fn = kwargs.get("evaluate_markers", None),
241242
available_interpreters = kwargs.get("available_interpreters", INTERPRETER_LABELS),
242243
logger = repo_utils.logger(module_ctx, "pypi:hub:" + hub_name),
244+
facts = facts,
243245
)
244246
pip_hub_map[pip_attr.hub_name] = builder
245247
elif pip_hub_map[hub_name].module_name != mod.name:
@@ -286,6 +288,25 @@ You cannot use both the additive_build_content and additive_build_content_file a
286288
hub_group_map[hub.name] = out.group_map
287289
hub_whl_map[hub.name] = out.whl_map
288290

291+
facts = {
292+
"fact_version": facts.get("fact_version"),
293+
} | {
294+
index_url: {
295+
k: _sorted_dict(f.get(k))
296+
for k in [
297+
"dist_filenames",
298+
"dist_hashes",
299+
"dist_yanked",
300+
]
301+
if f.get(k)
302+
}
303+
for index_url, f in facts.items()
304+
if index_url not in ["fact_version"]
305+
}
306+
if len(facts) == 1:
307+
# only version is present, skip writing
308+
facts = None
309+
289310
return struct(
290311
config = config,
291312
exposed_packages = exposed_packages,
@@ -294,6 +315,7 @@ You cannot use both the additive_build_content and additive_build_content_file a
294315
hub_whl_map = hub_whl_map,
295316
whl_libraries = whl_libraries,
296317
whl_mods = whl_mods,
318+
facts = facts,
297319
platform_config_settings = {
298320
hub_name: {
299321
platform_name: sorted([str(Label(cv)) for cv in p.config_settings])
@@ -303,6 +325,12 @@ You cannot use both the additive_build_content and additive_build_content_file a
303325
},
304326
)
305327

328+
def _sorted_dict(d):
329+
if not d:
330+
return {}
331+
332+
return {k: v for k, v in sorted(d.items())}
333+
306334
def _pip_impl(module_ctx):
307335
"""Implementation of a class tag that creates the pip hub and corresponding pip spoke whl repositories.
308336
@@ -391,9 +419,11 @@ def _pip_impl(module_ctx):
391419
groups = mods.hub_group_map.get(hub_name),
392420
)
393421

394-
return module_ctx.extension_metadata(
395-
reproducible = True,
396-
)
422+
kwargs = {"reproducible": True}
423+
if mods.facts:
424+
kwargs["facts"] = mods.facts
425+
426+
return module_ctx.extension_metadata(**kwargs)
397427

398428
_default_attrs = {
399429
"arch_name": attr.string(

python/private/pypi/hub_builder.bzl

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ def hub_builder(
3131
simpleapi_download_fn,
3232
evaluate_markers_fn,
3333
logger,
34+
facts = None,
3435
simpleapi_cache = {}):
3536
"""Return a hub builder instance
3637
@@ -47,6 +48,7 @@ def hub_builder(
4748
used during the `repository_rule` and must be always compatible with the host.
4849
simpleapi_download_fn: the function used to download from SimpleAPI.
4950
simpleapi_cache: the cache for the download results.
51+
facts: the facts if they are available.
5052
logger: the logger for this builder.
5153
"""
5254

@@ -69,6 +71,7 @@ def hub_builder(
6971
_platforms = {},
7072
_group_name_by_whl = {},
7173
_get_index_urls = {},
74+
_facts = facts,
7275
_use_downloader = {},
7376
_simpleapi_cache = simpleapi_cache,
7477
# instance constants
@@ -335,11 +338,16 @@ def _set_get_index_urls(self, pip_attr):
335338
d
336339
for d in distributions
337340
if _use_downloader(self, python_version, d)
338-
],
341+
] if type(distributions) == "list" else {
342+
d: versions
343+
for d, versions in distributions.items()
344+
if _use_downloader(self, python_version, d)
345+
},
339346
envsubst = pip_attr.envsubst,
340347
# Auth related info
341348
netrc = pip_attr.netrc,
342349
auth_patterns = pip_attr.auth_patterns,
350+
facts = self._facts,
343351
),
344352
cache = self._simpleapi_cache,
345353
parallel_download = pip_attr.parallel_download,

python/private/pypi/parse_requirements.bzl

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -170,16 +170,15 @@ def parse_requirements(
170170

171171
index_urls = {}
172172
if get_index_urls:
173-
index_urls = get_index_urls(
174-
ctx,
175-
# Use list({}) as a way to have a set
176-
list({
177-
req.distribution: None
178-
for reqs in requirements_by_platform.values()
179-
for req in reqs.values()
180-
if not req.srcs.url
181-
}),
182-
)
173+
distributions = {}
174+
for reqs in requirements_by_platform.values():
175+
for req in reqs.values():
176+
if req.srcs.url:
177+
continue
178+
179+
distributions.setdefault(req.distribution, []).append(req.srcs.version)
180+
181+
index_urls = get_index_urls(ctx, distributions)
183182

184183
ret = []
185184
for name, reqs in sorted(requirements_by_platform.items()):

python/private/pypi/parse_simpleapi_html.bzl

Lines changed: 57 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,14 @@
1616
Parse SimpleAPI HTML in Starlark.
1717
"""
1818

19-
def parse_simpleapi_html(*, url, content):
19+
def parse_simpleapi_html(*, url, content, distribution = None, return_absolute = True):
2020
"""Get the package URLs for given shas by parsing the Simple API HTML.
2121
2222
Args:
2323
url(str): The URL that the HTML content can be downloaded from.
24+
distribution(str): TODO
2425
content(str): The Simple API HTML content.
26+
return_absolute: {type}`bool` TODO
2527
2628
Returns:
2729
A list of structs with:
@@ -33,6 +35,9 @@ def parse_simpleapi_html(*, url, content):
3335
present, then the 'metadata_url' is also present. Defaults to "".
3436
* metadata_url: The URL for the METADATA if we can download it. Defaults to "".
3537
"""
38+
if not distribution:
39+
_, _, distribution = url.strip("/").rpartition("/")
40+
3641
sdists = {}
3742
whls = {}
3843
lines = content.split("<a href=\"")
@@ -55,7 +60,8 @@ def parse_simpleapi_html(*, url, content):
5560
sha256s_by_version = {}
5661
for line in lines[1:]:
5762
dist_url, _, tail = line.partition("#sha256=")
58-
dist_url = _absolute_url(url, dist_url)
63+
if return_absolute:
64+
dist_url = absolute_url(index_url = url, url = dist_url)
5965

6066
sha256, _, tail = tail.partition("\"")
6167

@@ -64,7 +70,7 @@ def parse_simpleapi_html(*, url, content):
6470

6571
head, _, _ = tail.rpartition("</a>")
6672
maybe_metadata, _, filename = head.rpartition(">")
67-
version = _version(filename)
73+
version = pkg_version(filename, distribution)
6874
sha256s_by_version.setdefault(version, []).append(sha256)
6975

7076
metadata_sha256 = ""
@@ -79,13 +85,17 @@ def parse_simpleapi_html(*, url, content):
7985
break
8086

8187
if filename.endswith(".whl"):
88+
metadata_url = metadata_url or ""
89+
if return_absolute and metadata_url:
90+
metadata_url = absolute_url(index_url = url, url = metadata_url)
91+
8292
whls[sha256] = struct(
8393
filename = filename,
8494
version = version,
8595
url = dist_url,
8696
sha256 = sha256,
8797
metadata_sha256 = metadata_sha256,
88-
metadata_url = _absolute_url(url, metadata_url) if metadata_url else "",
98+
metadata_url = metadata_url,
8999
yanked = yanked,
90100
)
91101
else:
@@ -110,18 +120,36 @@ _SDIST_EXTS = [
110120
".zip",
111121
]
112122

113-
def _version(filename):
123+
def pkg_version(filename, distribution = None):
124+
"""pkg_version extracts the version from the filename.
125+
126+
TODO: move this to a different location
127+
128+
Args:
129+
filename: TODO
130+
distribution: TODO
131+
132+
Returns:
133+
version string
134+
"""
114135
# See https://packaging.python.org/en/latest/specifications/binary-distribution-format/#binary-distribution-format
115136

116-
_, _, tail = filename.partition("-")
117-
version, _, _ = tail.partition("-")
118-
if version != tail:
119-
# The format is {name}-{version}-{whl_specifiers}.whl
120-
return version
137+
if filename.endswith(".whl"):
138+
_, _, tail = filename.partition("-")
139+
version, _, _ = tail.partition("-")
140+
if version != tail:
141+
# The format is {name}-{version}-{whl_specifiers}.whl
142+
return version
143+
144+
if not distribution:
145+
fail("for parsing sdists passing 'distribution' is mandatory")
121146

122147
# NOTE @aignas 2025-03-29: most of the files are wheels, so this is not the common path
123148

124149
# {name}-{version}.{ext}
150+
# TODO @aignas 2026-01-20: test for handling dashes in names, can't think of any other way to
151+
# get the version from the filename but to pass in the distribution name to this function.
152+
version = filename[len(distribution) + 1:]
125153
for ext in _SDIST_EXTS:
126154
version, _, _ = version.partition(ext) # build or name
127155

@@ -147,26 +175,35 @@ def _is_downloadable(url):
147175
"""
148176
return url.startswith("http://") or url.startswith("https://") or url.startswith("file://")
149177

150-
def _absolute_url(index_url, candidate):
151-
if candidate == "":
152-
return candidate
178+
def absolute_url(*, index_url, url):
179+
"""Return an absolute URL in case the url is not absolute.
180+
181+
Args:
182+
index_url: {type}`str` The index_url.
183+
url: {type}`str` The url of the artifact.
184+
185+
Returns:
186+
`url` if it is absolute, or absolute URL based on the `index_url`.
187+
"""
188+
if url == "":
189+
return url
153190

154-
if _is_downloadable(candidate):
155-
return candidate
191+
if _is_downloadable(url):
192+
return url
156193

157-
if candidate.startswith("/"):
194+
if url.startswith("/"):
158195
# absolute path
159196
root_directory = _get_root_directory(index_url)
160-
return "{}{}".format(root_directory, candidate)
197+
return "{}{}".format(root_directory, url)
161198

162-
if candidate.startswith(".."):
199+
if url.startswith(".."):
163200
# relative path with up references
164-
candidate_parts = candidate.split("..")
201+
candidate_parts = url.split("..")
165202
last = candidate_parts[-1]
166203
for _ in range(len(candidate_parts) - 1):
167204
index_url, _, _ = index_url.rstrip("/").rpartition("/")
168205

169206
return "{}/{}".format(index_url, last.strip("/"))
170207

171208
# relative path without up-references
172-
return "{}/{}".format(index_url.rstrip("/"), candidate)
209+
return "{}/{}".format(index_url.rstrip("/"), url)

0 commit comments

Comments
 (0)