Skip to content

Commit 2c5616b

Browse files
authored
feat(zipapp): add content hash support to __main__-based invocations (#3683)
This adds support for zipapps going through `__main__` to extract to a directory based on their content hash. This matches behavior of the shell-based self-executable bootstrap.
1 parent 6d38770 commit 2c5616b

File tree

9 files changed

+321
-34
lines changed

9 files changed

+321
-34
lines changed

python/private/zipapp/py_zipapp_rule.bzl

Lines changed: 57 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ def _is_symlink(f):
1818
else:
1919
return "-1"
2020

21-
def _create_zipapp_main_py(ctx, py_runtime, py_executable, stage2_bootstrap):
21+
def _create_zipapp_main_py(ctx, py_runtime, py_executable, stage2_bootstrap, runfiles):
2222
venv_python_exe = py_executable.venv_python_exe
2323
if venv_python_exe:
2424
venv_python_exe_path = runfiles_root_path(ctx, venv_python_exe.short_path)
@@ -31,20 +31,40 @@ def _create_zipapp_main_py(ctx, py_runtime, py_executable, stage2_bootstrap):
3131
python_binary_actual_path = py_runtime.interpreter_path
3232

3333
zip_main_py = ctx.actions.declare_file(ctx.label.name + ".zip_main.py")
34-
ctx.actions.expand_template(
35-
template = py_runtime.zip_main_template,
36-
output = zip_main_py,
37-
substitutions = {
38-
"%EXTRACT_DIR%": paths.join(
39-
(ctx.label.repo_name or "_main"),
40-
ctx.label.package,
41-
ctx.label.name,
42-
),
43-
"%python_binary%": venv_python_exe_path,
44-
"%python_binary_actual%": python_binary_actual_path,
45-
"%stage2_bootstrap%": runfiles_root_path(ctx, stage2_bootstrap.short_path),
46-
"%workspace_name%": ctx.workspace_name,
47-
},
34+
35+
args = ctx.actions.args()
36+
args.add(py_runtime.zip_main_template, format = "--template=%s")
37+
args.add(zip_main_py, format = "--output=%s")
38+
39+
args.add(
40+
"%EXTRACT_DIR%=" + paths.join(
41+
(ctx.label.repo_name or "_main"),
42+
ctx.label.package,
43+
ctx.label.name,
44+
),
45+
format = "--substitution=%s",
46+
)
47+
args.add("%python_binary%=" + venv_python_exe_path, format = "--substitution=%s")
48+
args.add("%python_binary_actual%=" + python_binary_actual_path, format = "--substitution=%s")
49+
args.add("%stage2_bootstrap%=" + runfiles_root_path(ctx, stage2_bootstrap.short_path), format = "--substitution=%s")
50+
args.add("%workspace_name%=" + ctx.workspace_name, format = "--substitution=%s")
51+
52+
hash_files_manifest = ctx.actions.args()
53+
hash_files_manifest.use_param_file("--hash_files_manifest=%s", use_always = True)
54+
hash_files_manifest.set_param_file_format("multiline")
55+
56+
inputs = builders.DepsetBuilder()
57+
inputs.add(py_runtime.zip_main_template)
58+
_build_manifest(ctx, hash_files_manifest, runfiles, inputs)
59+
60+
actions_run(
61+
ctx,
62+
executable = ctx.attr._zip_main_maker,
63+
arguments = [args, hash_files_manifest],
64+
inputs = inputs.build(),
65+
outputs = [zip_main_py],
66+
mnemonic = "PyZipAppCreateMainPy",
67+
progress_message = "Generating zipapp __main__.py: %{label}",
4868
)
4969
return zip_main_py
5070

@@ -60,9 +80,7 @@ def _map_zip_symlinks(entry):
6080
def _map_zip_root_symlinks(entry):
6181
return "rf-root-symlink|" + _is_symlink(entry.target_file) + "|" + entry.path + "|" + entry.target_file.path
6282

63-
def _build_manifest(ctx, manifest, runfiles, zip_main):
64-
manifest.add("regular|0|__main__.py|{}".format(zip_main.path))
65-
83+
def _build_manifest(ctx, manifest, runfiles, inputs):
6684
manifest.add_all(
6785
# NOTE: Accessing runfiles.empty_filenames materializes them. A lambda
6886
# is used to defer that.
@@ -75,7 +93,10 @@ def _build_manifest(ctx, manifest, runfiles, zip_main):
7593
manifest.add_all(runfiles.symlinks, map_each = _map_zip_symlinks)
7694
manifest.add_all(runfiles.root_symlinks, map_each = _map_zip_root_symlinks)
7795

78-
inputs = [zip_main]
96+
inputs.add(runfiles.files)
97+
inputs.add([entry.target_file for entry in runfiles.symlinks.to_list()])
98+
inputs.add([entry.target_file for entry in runfiles.root_symlinks.to_list()])
99+
79100
zip_repo_mapping_manifest = maybe_create_repo_mapping(
80101
ctx = ctx,
81102
runfiles = runfiles,
@@ -87,8 +108,7 @@ def _build_manifest(ctx, manifest, runfiles, zip_main):
87108
zip_repo_mapping_manifest.path,
88109
format = "rf-root-symlink|0|_repo_mapping|%s",
89110
)
90-
inputs.append(zip_repo_mapping_manifest)
91-
return inputs
111+
inputs.add(zip_repo_mapping_manifest)
92112

93113
def _create_zip(ctx, py_runtime, py_executable, stage2_bootstrap):
94114
output = ctx.actions.declare_file(ctx.label.name + ".zip")
@@ -106,8 +126,17 @@ def _create_zip(ctx, py_runtime, py_executable, stage2_bootstrap):
106126

107127
runfiles = runfiles.build(ctx)
108128

109-
zip_main = _create_zipapp_main_py(ctx, py_runtime, py_executable, stage2_bootstrap)
110-
inputs = _build_manifest(ctx, manifest, runfiles, zip_main)
129+
zip_main = _create_zipapp_main_py(
130+
ctx,
131+
py_runtime,
132+
py_executable,
133+
stage2_bootstrap,
134+
runfiles,
135+
)
136+
inputs = builders.DepsetBuilder()
137+
manifest.add("regular|0|__main__.py|{}".format(zip_main.path))
138+
inputs.add(zip_main)
139+
_build_manifest(ctx, manifest, runfiles, inputs)
111140

112141
zipper_args = ctx.actions.args()
113142
zipper_args.add(output)
@@ -124,7 +153,7 @@ def _create_zip(ctx, py_runtime, py_executable, stage2_bootstrap):
124153
ctx,
125154
executable = ctx.attr._zipper,
126155
arguments = [manifest, zipper_args],
127-
inputs = depset(inputs, transitive = [runfiles.files]),
156+
inputs = inputs.build(),
128157
outputs = [output],
129158
mnemonic = "PyZipAppCreateZip",
130159
progress_message = "Reticulating zipapp archive: %{label} into %{output}",
@@ -315,6 +344,10 @@ Whether the output should be an executable zip file.
315344
"@platforms//os:windows",
316345
],
317346
),
347+
"_zip_main_maker": attr.label(
348+
cfg = "exec",
349+
default = "//tools/private/zipapp:zip_main_maker",
350+
),
318351
"_zip_shell_template": attr.label(
319352
default = ":zip_shell_template",
320353
allow_single_file = True,

python/private/zipapp/zip_main_template.py

Lines changed: 15 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
import subprocess
2828
import tempfile
2929
import zipfile
30-
from os.path import dirname, join
30+
from os.path import dirname, join, basename
3131

3232
# runfiles-root-relative path
3333
_STAGE2_BOOTSTRAP = "%stage2_bootstrap%"
@@ -39,8 +39,10 @@
3939
_WORKSPACE_NAME = "%workspace_name%"
4040
# relative path under EXTRACT_ROOT to extract to.
4141
EXTRACT_DIR = "%EXTRACT_DIR%"
42+
APP_HASH = "%APP_HASH%"
4243

4344
EXTRACT_ROOT = os.environ.get("RULES_PYTHON_EXTRACT_ROOT")
45+
IS_WINDOWS = os.name == "nt"
4446

4547

4648
def print_verbose(*args, mapping=None, values=None):
@@ -67,10 +69,6 @@ def print_verbose(*args, mapping=None, values=None):
6769
print("bootstrap: stage 1:", *args, file=sys.stderr, flush=True)
6870

6971

70-
# Return True if running on Windows
71-
def is_windows():
72-
return os.name == "nt"
73-
7472

7573
def get_windows_path_with_unc_prefix(path):
7674
"""Adds UNC prefix after getting a normalized absolute Windows path.
@@ -81,7 +79,7 @@ def get_windows_path_with_unc_prefix(path):
8179

8280
# No need to add prefix for non-Windows platforms.
8381
# And \\?\ doesn't work in python 2 or on mingw
84-
if not is_windows() or sys.version_info[0] < 3:
82+
if not IS_WINDOWS or sys.version_info[0] < 3:
8583
return path
8684

8785
# Starting in Windows 10, version 1607(OS build 14393), MAX_PATH limitations have been
@@ -113,7 +111,7 @@ def has_windows_executable_extension(path):
113111

114112
if (
115113
_PYTHON_BINARY_VENV
116-
and is_windows()
114+
and IS_WINDOWS
117115
and not has_windows_executable_extension(_PYTHON_BINARY_VENV)
118116
):
119117
_PYTHON_BINARY_VENV = _PYTHON_BINARY_VENV + ".exe"
@@ -197,7 +195,14 @@ def extract_zip(zip_path, dest_dir):
197195
# Create the runfiles tree by extracting the zip file
198196
def create_runfiles_root():
199197
if EXTRACT_ROOT:
200-
extract_root = join(EXTRACT_ROOT, EXTRACT_DIR)
198+
# Shorten the path for Windows in case long path support is disabled
199+
if IS_WINDOWS:
200+
hash_dir = APP_HASH[0:32]
201+
extract_dir = basename(EXTRACT_DIR)
202+
extract_root = join(EXTRACT_ROOT, extract_dir, hash_dir)
203+
else:
204+
extract_root = join(EXTRACT_ROOT, EXTRACT_DIR, APP_HASH)
205+
extract_root = get_windows_path_with_unc_prefix(extract_root)
201206
else:
202207
extract_root = tempfile.mkdtemp("", "Bazel.runfiles_")
203208
extract_zip(dirname(__file__), extract_root)
@@ -245,9 +250,9 @@ def execute_file(
245250
subprocess_argv.append(f"-XRULES_PYTHON_ZIP_DIR={dirname(runfiles_root)}")
246251
subprocess_argv.append(main_filename)
247252
subprocess_argv += args
248-
print_verbose("subprocess argv:", values=subprocess_argv)
249253
print_verbose("subprocess env:", mapping=env)
250254
print_verbose("subprocess cwd:", workspace)
255+
print_verbose("subprocess argv:", values=subprocess_argv)
251256
ret_code = subprocess.call(subprocess_argv, env=env, cwd=workspace)
252257
sys.exit(ret_code)
253258
finally:
@@ -277,7 +282,7 @@ def main():
277282

278283
# The main Python source file.
279284
main_rel_path = _STAGE2_BOOTSTRAP
280-
if is_windows():
285+
if IS_WINDOWS:
281286
main_rel_path = main_rel_path.replace("/", os.sep)
282287

283288
runfiles_root = create_runfiles_root()

tests/py_zipapp/system_python_zipapp_external_bootstrap_test.sh

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,17 @@ export RULES_PYTHON_BOOTSTRAP_VERBOSE=1
1616

1717
# We're testing the invocation of `__main__.py`, so we have to
1818
# manually pass the zipapp to python.
19+
echo "====================================================================="
1920
echo "Running zipapp using an automatic temp directory..."
21+
echo "====================================================================="
2022
"$PYTHON" "$ZIPAPP"
2123

24+
echo
25+
echo
26+
27+
echo "====================================================================="
2228
echo "Running zipapp with extract root set..."
29+
echo "====================================================================="
2330
export RULES_PYTHON_EXTRACT_ROOT="${TEST_TMPDIR:-/tmp}/extract_root_test"
2431
"$PYTHON" "$ZIPAPP"
2532

@@ -29,5 +36,19 @@ if [[ ! -d "$RULES_PYTHON_EXTRACT_ROOT" ]]; then
2936
exit 1
3037
fi
3138

39+
# On windows, the path is shortened to just the basename to avoid long path errors.
40+
# Other platforms use the full path.
41+
# Note: [ -d ... ] expands globs, while [[ -d ... ]] does not.
42+
if [ -d "$RULES_PYTHON_EXTRACT_ROOT/_main/tests/py_zipapp/system_python_zipapp"/*/runfiles ]; then
43+
echo "Found runfiles at $RULES_PYTHON_EXTRACT_ROOT/_main/tests/py_zipapp/system_python_zipapp/*/runfiles"
44+
elif [ -d "$RULES_PYTHON_EXTRACT_ROOT/system_python_zipapp"/*/runfiles ]; then
45+
echo "Found runfiles at $RULES_PYTHON_EXTRACT_ROOT/system_python_zipapp/*/runfiles"
46+
else
47+
echo "Error: Could not find 'runfiles' directory"
48+
exit 1
49+
fi
50+
51+
echo "====================================================================="
3252
echo "Running zipapp with extract root set a second time..."
53+
echo "====================================================================="
3354
"$PYTHON" "$ZIPAPP"

tests/tools/zipapp/BUILD.bazel

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,3 +11,9 @@ py_test(
1111
srcs = ["exe_zip_maker_test.py"],
1212
deps = ["//tools/private/zipapp:exe_zip_maker_lib"],
1313
)
14+
15+
py_test(
16+
name = "zip_main_maker_test",
17+
srcs = ["zip_main_maker_test.py"],
18+
deps = ["//tools/private/zipapp:zip_main_maker_lib"],
19+
)
Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
import hashlib
2+
import os
3+
import tempfile
4+
import unittest
5+
from unittest import mock
6+
7+
from tools.private.zipapp import zip_main_maker
8+
9+
10+
class ZipMainMakerTest(unittest.TestCase):
11+
def setUp(self):
12+
self.temp_dir = tempfile.TemporaryDirectory()
13+
self.addCleanup(self.temp_dir.cleanup)
14+
15+
def test_creates_zip_main(self):
16+
template_path = os.path.join(self.temp_dir.name, "template.py")
17+
with open(template_path, "w", encoding="utf-8") as f:
18+
f.write("hash=%APP_HASH%\nfoo=%FOO%\n")
19+
20+
output_path = os.path.join(self.temp_dir.name, "output.py")
21+
22+
file1_path = os.path.join(self.temp_dir.name, "file1.txt")
23+
with open(file1_path, "wb") as f:
24+
f.write(b"content1")
25+
26+
file2_path = os.path.join(self.temp_dir.name, "file2.txt")
27+
with open(file2_path, "wb") as f:
28+
f.write(b"content2")
29+
30+
# Add a symlink to test symlink hashing
31+
symlink_path = os.path.join(self.temp_dir.name, "symlink.txt")
32+
os.symlink(file1_path, symlink_path)
33+
34+
manifest_path = os.path.join(self.temp_dir.name, "manifest.txt")
35+
with open(manifest_path, "w", encoding="utf-8") as f:
36+
f.write(f"rf-file|0|file1.txt|{file1_path}\n")
37+
f.write(f"rf-file|0|file2.txt|{file2_path}\n")
38+
f.write(f"rf-symlink|1|symlink.txt|{symlink_path}\n")
39+
f.write(f"rf-empty|empty_file.txt\n")
40+
41+
argv = [
42+
"zip_main_maker.py",
43+
"--template",
44+
template_path,
45+
"--output",
46+
output_path,
47+
"--substitution",
48+
"%FOO%=bar",
49+
"--hash_files_manifest",
50+
manifest_path,
51+
]
52+
53+
with mock.patch("sys.argv", argv):
54+
zip_main_maker.main()
55+
56+
# Calculate expected hash
57+
h = hashlib.sha256()
58+
line1 = f"rf-file|0|file1.txt|{file1_path}"
59+
line2 = f"rf-file|0|file2.txt|{file2_path}"
60+
line3 = f"rf-symlink|1|symlink.txt|{symlink_path}"
61+
line4 = f"rf-empty|empty_file.txt"
62+
63+
# Sort lines like the program does
64+
lines = sorted([line1, line2, line3, line4])
65+
for line in lines:
66+
parts = line.split("|")
67+
if len(parts) > 1:
68+
_, rest = line.split("|", 1)
69+
h.update(rest.encode("utf-8"))
70+
else:
71+
h.update(line.encode("utf-8"))
72+
73+
type_ = parts[0]
74+
if type_ == "rf-empty":
75+
continue
76+
if len(parts) >= 4:
77+
is_symlink_str = parts[1]
78+
path = parts[-1]
79+
if not path:
80+
continue
81+
if is_symlink_str == "-1":
82+
is_symlink = not os.path.exists(path)
83+
else:
84+
is_symlink = is_symlink_str == "1"
85+
86+
if is_symlink:
87+
h.update(os.readlink(path).encode("utf-8"))
88+
else:
89+
with open(path, "rb") as f:
90+
h.update(f.read())
91+
92+
expected_hash = h.hexdigest()
93+
94+
with open(output_path, "r", encoding="utf-8") as f:
95+
content = f.read()
96+
97+
self.assertEqual(content, f"hash={expected_hash}\nfoo=bar\n")
98+
99+
100+
if __name__ == "__main__":
101+
unittest.main()

tools/BUILD.bazel

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ filegroup(
3131
"wheelmaker.py",
3232
"//tools/launcher:distribution",
3333
"//tools/precompiler:distribution",
34+
"//tools/private:distribution",
3435
"//tools/publish:distribution",
3536
],
3637
visibility = ["//:__pkg__"],

tools/private/BUILD.bazel

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
package(
2+
default_visibility = ["//:__subpackages__"],
3+
)
4+
5+
filegroup(
6+
name = "distribution",
7+
srcs = glob(["**"]) + [
8+
"//tools/private/zipapp:distribution",
9+
],
10+
)

0 commit comments

Comments
 (0)