Skip to content

Commit 84ccbb8

Browse files
committed
basic impl to optimize namespace packages
1 parent 41f91e9 commit 84ccbb8

2 files changed

Lines changed: 91 additions & 0 deletions

File tree

python/private/venv_runfiles.bzl

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ load(
1313
"VenvSymlinkEntry",
1414
"VenvSymlinkKind",
1515
)
16+
load(":py_internal.bzl", "py_internal")
1617

1718
def create_venv_app_files(ctx, deps, venv_dir_map):
1819
"""Creates the tree of app-specific files for a venv for a binary.
@@ -253,6 +254,9 @@ def get_venv_symlinks(ctx, files, package, version_str, site_packages_root):
253254
# List of (File, str venv_path) tuples
254255
files_left_to_link = []
255256

257+
# dict[str dirname, bool is_namespace_package]
258+
namespace_package_dirs = {}
259+
256260
# We want to minimize the number of files symlinked. Ideally, only the
257261
# top-level directories are symlinked. Unfortunately, shared libraries
258262
# complicate matters: if a shared library's directory is linked, then the
@@ -293,6 +297,29 @@ def get_venv_symlinks(ctx, files, package, version_str, site_packages_root):
293297
else:
294298
files_left_to_link.append((src, venv_path))
295299

300+
top_level_dirname, _, tail = venv_path.partition("/")
301+
if (
302+
# If it's already not directly linkable, nothing to do
303+
not cannot_be_linked_directly.get(top_level_dirname, False) and
304+
# If its already known to be non-implicit namespace, then skip
305+
namespace_package_dirs.get(top_level_dirname, True) and
306+
# It must be an importable name to be an implicit namespace package
307+
py_internal.regex_match(top_level_dirname, "(?U)\\w+")
308+
):
309+
namespace_package_dirs.setdefault(top_level_dirname, True)
310+
311+
# Looking for `__init__.` isn't 100% correct, as it'll match e.g.
312+
# `__init__.pyi`, but it's close enough.
313+
if "/" not in tail and tail.startswith("__init__."):
314+
namespace_package_dirs[top_level_dirname] = False
315+
316+
# We treat namespace packages as a hint that other distributions may
317+
# install into the same directory. As such, we avoid linking them directly
318+
# to avoid conflict merging later.
319+
for dirname, is_namespace_package in namespace_package_dirs.items():
320+
if is_namespace_package:
321+
cannot_be_linked_directly[dirname] = True
322+
296323
# At this point, venv_symlinks has entries for the shared libraries
297324
# and cannot_be_linked_directly has the directories that cannot be
298325
# directly linked. Next, we loop over the remaining files and group

tests/venv_site_packages_libs/app_files_building/app_files_building_tests.bzl

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -218,6 +218,7 @@ def _test_optimized_grouping_single_toplevel(name):
218218
empty_files(
219219
name = name + "_files",
220220
paths = [
221+
"site-packages/pkg2/__init__.py",
221222
"site-packages/pkg2/a.txt",
222223
"site-packages/pkg2/b_mod.so",
223224
],
@@ -247,6 +248,7 @@ def _test_optimized_grouping_single_toplevel_impl(env, target):
247248
"pkg2",
248249
link_to_path = rr + "pkg2",
249250
files = [
251+
"tests/venv_site_packages_libs/app_files_building/site-packages/pkg2/__init__.py",
250252
"tests/venv_site_packages_libs/app_files_building/site-packages/pkg2/a.txt",
251253
"tests/venv_site_packages_libs/app_files_building/site-packages/pkg2/b_mod.so",
252254
],
@@ -263,6 +265,68 @@ def _test_optimized_grouping_single_toplevel_impl(env, target):
263265
# The point of the optimization is to avoid having to merge conflicts.
264266
env.expect.that_collection(conflicts).contains_exactly([])
265267

268+
def _test_optimized_grouping_implicit_namespace_packages(name):
269+
empty_files(
270+
name = name + "_files",
271+
paths = [
272+
"site-packages/namespace/part1/foo.py",
273+
"site-packages/namespace/part2/bar.py",
274+
"site-packages/namespace-1.0.dist-info/METADATA",
275+
],
276+
)
277+
analysis_test(
278+
name = name,
279+
impl = _test_optimized_grouping_implicit_namespace_packages_impl,
280+
target = name + "_files",
281+
)
282+
283+
_tests.append(_test_optimized_grouping_implicit_namespace_packages)
284+
285+
def _test_optimized_grouping_implicit_namespace_packages_impl(env, target):
286+
test_ctx = _ctx(workspace_name = env.ctx.workspace_name)
287+
entries = get_venv_symlinks(
288+
test_ctx,
289+
target.files.to_list(),
290+
package = "pkg3",
291+
version_str = "1.0",
292+
site_packages_root = env.ctx.label.package + "/site-packages",
293+
)
294+
actual = _venv_symlinks_from_entries(entries)
295+
296+
rr = "{}/{}/site-packages/".format(test_ctx.workspace_name, env.ctx.label.package)
297+
expected = [
298+
_venv_symlink(
299+
"namespace/part1",
300+
link_to_path = rr + "namespace/part1",
301+
files = [
302+
"tests/venv_site_packages_libs/app_files_building/site-packages/namespace/part1/foo.py",
303+
],
304+
),
305+
_venv_symlink(
306+
"namespace/part2",
307+
link_to_path = rr + "namespace/part2",
308+
files = [
309+
"tests/venv_site_packages_libs/app_files_building/site-packages/namespace/part2/bar.py",
310+
],
311+
),
312+
_venv_symlink(
313+
"namespace-1.0.dist-info",
314+
link_to_path = rr + "namespace-1.0.dist-info",
315+
files = [
316+
"tests/venv_site_packages_libs/app_files_building/site-packages/namespace-1.0.dist-info/METADATA",
317+
],
318+
),
319+
]
320+
expected = sorted(expected, key = lambda e: (e.link_to_path, e.venv_path))
321+
env.expect.that_collection(
322+
actual,
323+
).contains_exactly(expected)
324+
325+
_, conflicts = build_link_map(test_ctx, entries, return_conflicts = True)
326+
327+
# The point of the optimization is to avoid having to merge conflicts.
328+
env.expect.that_collection(conflicts).contains_exactly([])
329+
266330
def _test_package_version_filtering(name):
267331
analysis_test(
268332
name = name,

0 commit comments

Comments
 (0)