Skip to content

Commit 8c726cb

Browse files
aignasrickeylev
authored andcommitted
fix(pypi): skip index lookups when all package overrides are specified (bazel-contrib#3710)
When index_url_overrides is provided for all packages, we no longer need to call the index at all. This improves performance and aligns with the expected behavior where overrides should be sufficient. Just to note, currently Pytorch, PyPI, Artifactory have the root index pages, whilst GAR does not. Fixes bazel-contrib#3709 (cherry picked from commit 208ca88)
1 parent 8cc468b commit 8c726cb

3 files changed

Lines changed: 59 additions & 12 deletions

File tree

CHANGELOG.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,8 @@ Other changes:
8181
we will from now on fetch the lists of available packages on each index. The
8282
used package mappings will be written as facts to the `MODULE.bazel.lock` file
8383
on supported bazel versions and it should be done at most once. As a result,
84-
per-package {obj}`experimental_index_url_overrides` is no longer needed . What
84+
per-package {obj}`experimental_index_url_overrides` is no longer needed, but
85+
if specified, it needs to be provided for all packages not on the default index. What
8586
is more, the flags for `--index_url` and `--extra-index-url` now behave in the
8687
same way as in `uv` or `pip`, i.e. we default to `--index-url` if the package
8788
is not found in `--extra-index-url`. Fixes

python/private/pypi/simpleapi_download.bzl

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,7 @@ def simpleapi_download(
4343
attr: Contains the parameters for the download. They are grouped into a
4444
struct for better clarity. It must have attributes:
4545
* index_url: str, the index, or if `extra_index_urls` are passed, the default index.
46-
* index_url_overrides: dict[str, str], the index overrides for
47-
separate packages.
46+
* index_url_overrides: dict[str, str], the index overrides for separate packages.
4847
* extra_index_urls: Will be looked at in the order they are defined and the first match
4948
wins. This is similar to what uv does, see
5049
https://docs.astral.sh/uv/concepts/indexes/#searching-across-multiple-indexes.
@@ -132,16 +131,22 @@ def simpleapi_download(
132131
return contents
133132

134133
def _get_dist_urls(ctx, *, default_index, index_urls, index_url_overrides, sources, read_simpleapi, attr, block, _fail = fail, **kwargs):
134+
if index_url_overrides:
135+
# Let's not call the index at all and just assume that all of the overrides have been
136+
# specified.
137+
return {
138+
pkg: _normalize_url("{}/{}/".format(
139+
index_url_overrides.get(pkg, default_index),
140+
pkg.replace("_", "-"), # Use the official normalization for URLs
141+
))
142+
for pkg in sources
143+
}
144+
135145
downloads = {}
136146
results = {}
137147

138148
# Ensure the value is not frozen
139149
index_urls = [] + (index_urls or [])
140-
for extra in index_url_overrides.values():
141-
if extra not in index_urls:
142-
index_urls.append(extra)
143-
144-
index_urls = index_urls or []
145150
if default_index not in index_urls:
146151
index_urls.append(default_index)
147152

@@ -174,10 +179,6 @@ def _get_dist_urls(ctx, *, default_index, index_urls, index_url_overrides, sourc
174179
# and in the outer function process merging of the results.
175180
continue
176181

177-
if index_url_overrides.get(pkg, index_url) != index_url:
178-
# we should not use this index for the package
179-
continue
180-
181182
found = result.output.get(pkg)
182183
if not found:
183184
continue

tests/pypi/simpleapi_download/simpleapi_download_tests.bzl

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -273,6 +273,51 @@ def _test_download_url_parallel(env):
273273

274274
_tests.append(_test_download_url_parallel)
275275

276+
def _test_download_url_parallel_with_overrides(env):
277+
downloads = {}
278+
reads = [
279+
"",
280+
"",
281+
"",
282+
]
283+
284+
def download(url, output, **kwargs):
285+
_ = kwargs # buildifier: disable=unused-variable
286+
downloads[url[0]] = output
287+
return struct(wait = lambda: struct(success = True))
288+
289+
simpleapi_download(
290+
ctx = struct(
291+
getenv = {}.get,
292+
download = download,
293+
report_progress = lambda _: None,
294+
# We will first add a download to the list, so this is a poor man's `next(foo)`
295+
# implementation. We use 2 because we will enqueue 2 downloads in parallel.
296+
read = lambda i: reads[len(downloads) - 2],
297+
path = lambda i: "path/for/" + i,
298+
),
299+
attr = struct(
300+
index_url_overrides = {
301+
"bar": "https://example.com/extra/simple/",
302+
},
303+
index_url = "https://example.com/default/simple/",
304+
extra_index_urls = [],
305+
sources = {"bar": None, "baz": None, "foo": None},
306+
envsubst = [],
307+
),
308+
cache = pypi_cache(),
309+
parallel_download = True,
310+
get_auth = lambda ctx, urls, ctx_attr: struct(),
311+
)
312+
313+
env.expect.that_dict(downloads).contains_exactly({
314+
"https://example.com/default/simple/baz/": "path/for/https___example_com_default_simple_baz.html",
315+
"https://example.com/default/simple/foo/": "path/for/https___example_com_default_simple_foo.html",
316+
"https://example.com/extra/simple/bar/": "path/for/https___example_com_extra_simple_bar.html",
317+
})
318+
319+
_tests.append(_test_download_url_parallel_with_overrides)
320+
276321
def _test_download_envsubst_url(env):
277322
downloads = {}
278323
reads = [

0 commit comments

Comments
 (0)