-
-
Notifications
You must be signed in to change notification settings - Fork 690
Expand file tree
/
Copy pathwhl_extract.bzl
More file actions
130 lines (109 loc) · 4.82 KB
/
whl_extract.bzl
File metadata and controls
130 lines (109 loc) · 4.82 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
"""A simple whl extractor."""
load("@rules_python_internal//:rules_python_config.bzl", rp_config = "config")
load("//python/private:repo_utils.bzl", "repo_utils")
load(":whl_metadata.bzl", "find_whl_metadata")
def whl_extract(rctx, *, whl_path, logger):
"""Extract whls in Starlark.
Args:
rctx: the repository ctx.
whl_path: the whl path to extract.
logger: The logger to use
"""
install_dir_path = rctx.path("site-packages")
repo_utils.extract(
rctx,
archive = whl_path,
output = install_dir_path,
supports_whl_extraction = rp_config.supports_whl_extraction,
)
_maybe_fix_permissions(rctx, whl_path = whl_path, logger = logger)
metadata_file = find_whl_metadata(
install_dir = install_dir_path,
logger = logger,
)
# Get the <prefix>.dist_info dir name
dist_info_dir = metadata_file.dirname
rctx.file(
dist_info_dir.get_child("INSTALLER"),
"https://github.com/bazel-contrib/rules_python#pipstar",
)
# Get the <prefix>.dist_info dir name
data_dir = dist_info_dir.dirname.get_child(dist_info_dir.basename[:-len(".dist-info")] + ".data")
if data_dir.exists:
for prefix, dest_prefix in {
# https://docs.python.org/3/library/sysconfig.html#posix-prefix
# We are taking this from the legacy whl installer config
"data": "data",
"headers": "include",
# In theory there may be directory collisions here, so it would be best to
# merge the paths here. We are doing for quite a few levels deep. What is
# more, this code has to be reasonably efficient because some packages like
# to not put everything to the top level, but to indicate explicitly if
# something is in `platlib` or `purelib` (e.g. libclang wheel).
"platlib": "site-packages",
"purelib": "site-packages",
"scripts": "bin",
}.items():
src = data_dir.get_child(prefix)
if not src.exists:
# The prefix does not exist in the wheel, we can continue
continue
dest_dir = rctx.path(dest_prefix)
repo_utils.mkdir(rctx, dest_dir)
for (src, dest) in merge_trees(src, dest_dir):
logger.debug(lambda: "Renaming: {} -> {}".format(src, dest))
repo_utils.rename(rctx, src, dest)
# Ensure that there is no data dir left
rctx.delete(data_dir)
# TODO: This can be removed when Bazel 8.6+ is the minimum supported version.
def _maybe_fix_permissions(rctx, *, whl_path, logger):
# Fix permissions on extracted files. Some wheels have files without read permissions set,
# which causes errors when trying to read them later.
# We apply this to the root directory to ensure that everything in bin/, site-packages/,
# etc. is readable and executable where appropriate.
os_name = repo_utils.get_platforms_os_name(rctx)
if os_name != "windows":
# On Unix-like systems, recursively add read permissions to all files
# and ensure directories are traversable (need execute permission)
result = repo_utils.execute_unchecked(
rctx,
op = "Fixing wheel permissions {}".format(whl_path),
arguments = ["chmod", "-R", "a+rX", "."],
logger = logger,
)
if result.return_code != 0:
# It's possible chmod is not available or the filesystem doesn't support it.
# This is fine, we just want to try to fix permissions if possible.
logger.warn(lambda: "Failed to fix file permissions: {}".format(result.stderr))
def merge_trees(src, dest):
"""Merge src into the destination path.
This will attempt to merge-move src files to the destination directory if there are
existing files. Fails at directory depth is 10000 or if there are collisions.
Args:
src: {type}`path` a src path to rename.
dest: {type}`path` a dest path to rename to.
Returns:
A list of tuples for src and destination paths.
"""
ret = []
remaining = [(src, dest)]
collisions = []
for _ in range(10000):
if collisions or not remaining:
break
tmp = []
for (s, d) in remaining:
if not d.exists:
ret.append((s, d))
continue
if not s.is_dir or not d.is_dir:
collisions.append(s)
continue
for file_or_dir in s.readdir():
tmp.append((file_or_dir, d.get_child(file_or_dir.basename)))
remaining = tmp
if remaining:
fail("Exceeded maximum directory depth of 10000 during tree merge.")
if collisions:
fail("Detected collisions between {} and {}: {}".format(src, dest, collisions))
return ret