Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,8 @@ Other changes:
{obj}`experimental_index_url` which should speed up consecutive initializations and should no
longer require the network access if the cache is hydrated.
Implements [#2731](https://github.com/bazel-contrib/rules_python/issues/2731).
* (wheel) Specifying a path ending in `/` as a destination in `data_files`
will now install file(s) to a folder, preserving their basename.

{#v1-9-0}
## [1.9.0] - 2026-02-21
Expand Down
24 changes: 24 additions & 0 deletions examples/wheel/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -401,6 +401,29 @@ py_wheel(
version = "0.0.1",
)

filegroup(
name = "data_files_test_group",
# Re-using some files already checked into the repo.
srcs = [
"README.md",
"//examples/wheel:NOTICE",
],
)

py_wheel(
name = "data_files_installed_in_folder",
testonly = True, # Set this to verify the generated .dist target doesn't break things
# Re-using some files already checked into the repo.
data_files = {
# Single file
"//examples/wheel:NOTICE": "scripts/",
# Filegroup
":data_files_test_group": "data/",
},
distribution = "data_files_installed_in_folder",
version = "0.0.1",
)

py_test(
name = "wheel_test",
srcs = ["wheel_test.py"],
Expand All @@ -409,6 +432,7 @@ py_test(
":custom_package_root_multi_prefix",
":custom_package_root_multi_prefix_reverse_order",
":customized",
":data_files_installed_in_folder",
":empty_requires_files",
":extra_requires",
":filename_escaping",
Expand Down
19 changes: 19 additions & 0 deletions examples/wheel/wheel_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -615,6 +615,25 @@ def test_requires_dist_depends_on_extras_file(self):
requires,
)

def test_data_files_installed_in_folder(self):
filename = self._get_path(
"data_files_installed_in_folder-0.0.1-py3-none-any.whl"
)

with zipfile.ZipFile(filename) as zf:
self.assertAllEntriesHasReproducibleMetadata(zf)
self.assertEqual(
zf.namelist(),
[
"data_files_installed_in_folder-0.0.1.dist-info/WHEEL",
"data_files_installed_in_folder-0.0.1.dist-info/METADATA",
"data_files_installed_in_folder-0.0.1.data/data/NOTICE",
"data_files_installed_in_folder-0.0.1.data/data/README.md",
"data_files_installed_in_folder-0.0.1.data/scripts/NOTICE",
"data_files_installed_in_folder-0.0.1.dist-info/RECORD",
],
)


if __name__ == "__main__":
unittest.main()
49 changes: 40 additions & 9 deletions python/private/py_wheel.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -182,8 +182,35 @@ _other_attrs = {
doc = "A list of strings describing the categories for the package. For valid classifiers see https://pypi.org/classifiers",
),
"data_files": attr.label_keyed_string_dict(
doc = ("Any file that is not normally installed inside site-packages goes into the .data directory, named " +
"as the .dist-info directory but with the .data/ extension. Allowed paths: {prefixes}".format(prefixes = ALLOWED_DATA_FILE_PREFIX)),
doc = ("""
Mapping of data files to go into the wheel.

The keys are targets of files to include, and the values are the `.data`-relative
path to use.

Any file that is not normally installed inside site-packages goes into the .data
directory, named as the .dist-info directory but with the .data/ extension. If
the destination of a file or group of files ends in a `/`, the destination is a
folder and files are placed with their existing basenames under that folder.

For example:

```
":file1.txt": "data/file1.txt", # Destination: <wheelname>.data/data/file1.txt
":file1.txt": "data/", # Destination: <wheelname>.data/data/file1.txt
":file1.txt": "data/special.txt", # Destination: <wheelname>.data/data/special.txt

filegroup(name = "files", srcs = [":file1.txt", ":file2.txt"])
":files": "data/", # Destinations: <wheelname>.data/data/file1.txt, <wheelname>.data/data/file2.txt
```

Allowed paths: {prefixes}

:::{{versionchanged}} VERSION_NEXT_FEATURE
Values can end in slash (`/`) to indicate that all files of the target should
be moved under that directory.
:::
""".format(prefixes = ALLOWED_DATA_FILE_PREFIX)),
allow_files = True,
),
"description_content_type": attr.string(
Expand Down Expand Up @@ -506,9 +533,9 @@ def _py_wheel_impl(ctx):

for target, filename in ctx.attr.data_files.items():
target_files = target[DefaultInfo].files.to_list()
if len(target_files) != 1:
if len(target_files) != 1 and not filename.endswith("/"):
fail(
"Multi-file target listed in data_files %s",
"Multi-file target listed in data_files %s, this is only supported when specifying a folder path (i.e. a path ending in '/')",
filename,
)

Expand All @@ -520,11 +547,15 @@ def _py_wheel_impl(ctx):
filename,
),
)
other_inputs.extend(target_files)
args.add(
"--data_files",
filename + ";" + target_files[0].path,
)

for file in target_files:
final_filename = filename + file.basename if filename.endswith("/") else filename

other_inputs.extend(target_files)
args.add(
"--data_files",
final_filename + ";" + file.path,
)

ctx.actions.run(
mnemonic = "PyWheel",
Expand Down
82 changes: 41 additions & 41 deletions python/private/python_bootstrap_template.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,13 @@ import sys
import os
import subprocess
import uuid
# runfiles-relative path
# NOTE: The sentinel strings are split (e.g., "%stage2" + "_bootstrap%") so that
# the substitution logic won't replace them. This allows runtime detection of
# unsubstituted placeholders, which occurs when native py_binary is used in
# external repositories. In that case, we fall back to %main% which Bazel's
# native rule does substitute.
_STAGE2_BOOTSTRAP_SENTINEL = "%stage2" + "_bootstrap%"
# runfiles-root-relative path
STAGE2_BOOTSTRAP="%stage2_bootstrap%"

# NOTE: The fallback logic from stage2_bootstrap to main is only present
Expand All @@ -35,13 +35,13 @@ if not STAGE2_BOOTSTRAP:
print("ERROR: %stage2_bootstrap% (or %main%) was not substituted.", file=sys.stderr)
sys.exit(1)

# runfiles-relative path to venv's python interpreter
# runfiles-root-relative path to venv's python interpreter
# Empty string if a venv is not setup.
PYTHON_BINARY = '%python_binary%'

# The path to the actual interpreter that is used.
# Typically PYTHON_BINARY is a symlink pointing to this.
# runfiles-relative path, absolute path, or single word.
# runfiles-root-relative path, absolute path, or single word.
# Used to create a venv at runtime, or when a venv isn't setup.
PYTHON_BINARY_ACTUAL = "%python_binary_actual%"

Expand Down Expand Up @@ -136,12 +136,12 @@ def SearchPath(name):
return path
return None

def FindPythonBinary(module_space):
def FindPythonBinary(runfiles_root):
"""Finds the real Python binary if it's not a normal absolute path."""
if PYTHON_BINARY:
return FindBinary(module_space, PYTHON_BINARY)
return FindBinary(runfiles_root, PYTHON_BINARY)
else:
return FindBinary(module_space, PYTHON_BINARY_ACTUAL)
return FindBinary(runfiles_root, PYTHON_BINARY_ACTUAL)


def print_verbose(*args, mapping=None, values=None):
Expand All @@ -165,7 +165,7 @@ def print_verbose(*args, mapping=None, values=None):
else:
print("bootstrap: stage 1:", *args, file=sys.stderr, flush=True)

def FindBinary(module_space, bin_name):
def FindBinary(runfiles_root, bin_name):
"""Finds the real binary if it's not a normal absolute path."""
if not bin_name:
return None
Expand All @@ -180,12 +180,12 @@ def FindBinary(module_space, bin_name):
# Use normpath() to convert slashes to os.sep on Windows.
elif os.sep in os.path.normpath(bin_name):
# Case 3: Path is relative to the repo root.
return os.path.join(module_space, bin_name)
return os.path.join(runfiles_root, bin_name)
else:
# Case 4: Path has to be looked up in the search path.
return SearchPath(bin_name)

def FindModuleSpace(main_rel_path):
def find_runfiles_root(main_rel_path):
"""Finds the runfiles tree."""
# When the calling process used the runfiles manifest to resolve the
# location of this stub script, the path may be expanded. This means
Expand Down Expand Up @@ -214,9 +214,9 @@ def FindModuleSpace(main_rel_path):
stub_filename = os.path.join(os.getcwd(), stub_filename)

while True:
module_space = stub_filename + ('.exe' if IsWindows() else '') + '.runfiles'
if os.path.isdir(module_space):
return module_space
runfiles_root = stub_filename + ('.exe' if IsWindows() else '') + '.runfiles'
if os.path.isdir(runfiles_root):
return runfiles_root

runfiles_pattern = r'(.*\.runfiles)' + (r'\\' if IsWindows() else '/') + '.*'
matchobj = re.match(runfiles_pattern, stub_filename)
Expand Down Expand Up @@ -261,14 +261,14 @@ def ExtractZip(zip_path, dest_dir):
os.chmod(file_path, attrs & 0o7777)

# Create the runfiles tree by extracting the zip file
def CreateModuleSpace():
def create_runfiles_root():
temp_dir = tempfile.mkdtemp('', 'Bazel.runfiles_')
ExtractZip(os.path.dirname(__file__), temp_dir)
# IMPORTANT: Later code does `rm -fr` on dirname(module_space) -- it's
# IMPORTANT: Later code does `rm -fr` on dirname(runfiles_root) -- it's
# important that deletion code be in sync with this directory structure
return os.path.join(temp_dir, 'runfiles')

def RunfilesEnvvar(module_space):
def RunfilesEnvvar(runfiles_root):
"""Finds the runfiles manifest or the runfiles directory.

Returns:
Expand All @@ -288,30 +288,30 @@ def RunfilesEnvvar(module_space):

# If running from a zip, there's no manifest file.
if IsRunningFromZip():
return ('RUNFILES_DIR', module_space)
return ('RUNFILES_DIR', runfiles_root)

# Look for the runfiles "output" manifest, argv[0] + ".runfiles_manifest"
runfiles = module_space + '_manifest'
runfiles = runfiles_root + '_manifest'
if os.path.exists(runfiles):
return ('RUNFILES_MANIFEST_FILE', runfiles)

# Look for the runfiles "input" manifest, argv[0] + ".runfiles/MANIFEST"
# Normally .runfiles_manifest and MANIFEST are both present, but the
# former will be missing for zip-based builds or if someone copies the
# runfiles tree elsewhere.
runfiles = os.path.join(module_space, 'MANIFEST')
runfiles = os.path.join(runfiles_root, 'MANIFEST')
if os.path.exists(runfiles):
return ('RUNFILES_MANIFEST_FILE', runfiles)

# If running in a sandbox and no environment variables are set, then
# Look for the runfiles next to the binary.
if module_space.endswith('.runfiles') and os.path.isdir(module_space):
return ('RUNFILES_DIR', module_space)
if runfiles_root.endswith('.runfiles') and os.path.isdir(runfiles_root):
return ('RUNFILES_DIR', runfiles_root)

return (None, None)

def ExecuteFile(python_program, main_filename, args, env, module_space,
workspace, delete_module_space):
def ExecuteFile(python_program, main_filename, args, env, runfiles_root,
workspace, delete_runfiles_root):
# type: (str, str, list[str], dict[str, str], str, str|None, str|None) -> ...
"""Executes the given Python file using the various environment settings.

Expand All @@ -323,10 +323,10 @@ def ExecuteFile(python_program, main_filename, args, env, module_space,
main_filename: (str) The Python file to execute
args: (list[str]) Additional args to pass to the Python file
env: (dict[str, str]) A dict of environment variables to set for the execution
module_space: (str) Path to the module space/runfiles tree directory
runfiles_root: (str) Path to the runfiles root directory
workspace: (str|None) Name of the workspace to execute in. This is expected to be a
directory under the runfiles tree.
delete_module_space: (bool), True if the module space should be deleted
delete_runfiles_root: (bool), True if the runfiles root should be deleted
after a successful (exit code zero) program run, False if not.
"""
argv = [python_program]
Expand All @@ -351,7 +351,7 @@ def ExecuteFile(python_program, main_filename, args, env, module_space,
# can't execv because we need control to return here. This only
# happens for targets built in the host config.
#
if not (IsWindows() or workspace or delete_module_space):
if not (IsWindows() or workspace or delete_runfiles_root):
_RunExecv(python_program, argv, env)

ret_code = subprocess.call(
Expand All @@ -360,11 +360,11 @@ def ExecuteFile(python_program, main_filename, args, env, module_space,
cwd=workspace
)

if delete_module_space:
# NOTE: dirname() is called because CreateModuleSpace() creates a
if delete_runfiles_root:
# NOTE: dirname() is called because create_runfiles_root() creates a
# sub-directory within a temporary directory, and we want to remove the
# whole temporary directory.
shutil.rmtree(os.path.dirname(module_space), True)
shutil.rmtree(os.path.dirname(runfiles_root), True)
sys.exit(ret_code)

def _RunExecv(python_program, argv, env):
Expand Down Expand Up @@ -401,33 +401,33 @@ def Main():
print_verbose("main_rel_path:", main_rel_path)

if IsRunningFromZip():
module_space = CreateModuleSpace()
delete_module_space = True
runfiles_root = create_runfiles_root()
delete_runfiles_root = True
else:
module_space = FindModuleSpace(main_rel_path)
delete_module_space = False
runfiles_root = find_runfiles_root(main_rel_path)
delete_runfiles_root = False

print_verbose("runfiles root:", module_space)
print_verbose("runfiles root:", runfiles_root)

if os.environ.get("RULES_PYTHON_TESTING_TELL_MODULE_SPACE"):
new_env["RULES_PYTHON_TESTING_MODULE_SPACE"] = module_space
if os.environ.get("RULES_PYTHON_TESTING_TELL_RUNFILES_ROOT"):
new_env["RULES_PYTHON_TESTING_RUNFILES_ROOT"] = runfiles_root

runfiles_envkey, runfiles_envvalue = RunfilesEnvvar(module_space)
runfiles_envkey, runfiles_envvalue = RunfilesEnvvar(runfiles_root)
if runfiles_envkey:
new_env[runfiles_envkey] = runfiles_envvalue

# Don't prepend a potentially unsafe path to sys.path
# See: https://docs.python.org/3.11/using/cmdline.html#envvar-PYTHONSAFEPATH
new_env['PYTHONSAFEPATH'] = '1'

main_filename = os.path.join(module_space, main_rel_path)
main_filename = os.path.join(runfiles_root, main_rel_path)
main_filename = GetWindowsPathWithUNCPrefix(main_filename)
assert os.path.exists(main_filename), \
'Cannot exec() %r: file not found.' % main_filename
assert os.access(main_filename, os.R_OK), \
'Cannot exec() %r: file not readable.' % main_filename

program = python_program = FindPythonBinary(module_space)
program = python_program = FindPythonBinary(runfiles_root)
if python_program is None:
raise AssertionError("Could not find python binary: {} or {}".format(
repr(PYTHON_BINARY),
Expand All @@ -449,15 +449,15 @@ def Main():
# change directory to the right runfiles directory.
# (So that the data files are accessible)
if os.environ.get('RUN_UNDER_RUNFILES') == '1':
workspace = os.path.join(module_space, WORKSPACE_NAME)
workspace = os.path.join(runfiles_root, WORKSPACE_NAME)

try:
sys.stdout.flush()
# NOTE: ExecuteFile may call execve() and lines after this will never run.
ExecuteFile(
python_program, main_filename, args, new_env, module_space,
python_program, main_filename, args, new_env, runfiles_root,
workspace,
delete_module_space = delete_module_space,
delete_runfiles_root = delete_runfiles_root,
)

except EnvironmentError:
Expand Down
Loading