garymm · pull · Mar 15, 2026 · Mar 15, 2026 · Mar 15, 2026 · Mar 15, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -87,6 +87,8 @@ Other changes:
   {obj}`experimental_index_url` which should speed up consecutive initializations and should no
   longer require the network access if the cache is hydrated.
   Implements [#2731](https://github.com/bazel-contrib/rules_python/issues/2731).
+* (wheel) Specifying a path ending in `/` as a destination in `data_files`
+  will now install file(s) to a folder, preserving their basename.
 
 {#v1-9-0}
 ## [1.9.0] - 2026-02-21

diff --git a/examples/wheel/BUILD.bazel b/examples/wheel/BUILD.bazel
@@ -401,6 +401,29 @@ py_wheel(
     version = "0.0.1",
 )
 
+filegroup(
+    name = "data_files_test_group",
+    # Re-using some files already checked into the repo.
+    srcs = [
+        "README.md",
+        "//examples/wheel:NOTICE",
+    ],
+)
+
+py_wheel(
+    name = "data_files_installed_in_folder",
+    testonly = True,  # Set this to verify the generated .dist target doesn't break things
+    # Re-using some files already checked into the repo.
+    data_files = {
+        # Single file
+        "//examples/wheel:NOTICE": "scripts/",
+        # Filegroup
+        ":data_files_test_group": "data/",
+    },
+    distribution = "data_files_installed_in_folder",
+    version = "0.0.1",
+)
+
 py_test(
     name = "wheel_test",
     srcs = ["wheel_test.py"],
@@ -409,6 +432,7 @@ py_test(
         ":custom_package_root_multi_prefix",
         ":custom_package_root_multi_prefix_reverse_order",
         ":customized",
+        ":data_files_installed_in_folder",
         ":empty_requires_files",
         ":extra_requires",
         ":filename_escaping",

diff --git a/examples/wheel/wheel_test.py b/examples/wheel/wheel_test.py
@@ -615,6 +615,25 @@ def test_requires_dist_depends_on_extras_file(self):
                 requires,
             )
 
+    def test_data_files_installed_in_folder(self):
+        filename = self._get_path(
+            "data_files_installed_in_folder-0.0.1-py3-none-any.whl"
+        )
+
+        with zipfile.ZipFile(filename) as zf:
+            self.assertAllEntriesHasReproducibleMetadata(zf)
+            self.assertEqual(
+                zf.namelist(),
+                [
+                    "data_files_installed_in_folder-0.0.1.dist-info/WHEEL",
+                    "data_files_installed_in_folder-0.0.1.dist-info/METADATA",
+                    "data_files_installed_in_folder-0.0.1.data/data/NOTICE",
+                    "data_files_installed_in_folder-0.0.1.data/data/README.md",
+                    "data_files_installed_in_folder-0.0.1.data/scripts/NOTICE",
+                    "data_files_installed_in_folder-0.0.1.dist-info/RECORD",
+                ],
+            )
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/python/private/py_wheel.bzl b/python/private/py_wheel.bzl
@@ -182,8 +182,35 @@ _other_attrs = {
         doc = "A list of strings describing the categories for the package. For valid classifiers see https://pypi.org/classifiers",
     ),
     "data_files": attr.label_keyed_string_dict(
-        doc = ("Any file that is not normally installed inside site-packages goes into the .data directory, named " +
-               "as the .dist-info directory but with the .data/ extension.  Allowed paths: {prefixes}".format(prefixes = ALLOWED_DATA_FILE_PREFIX)),
+        doc = ("""
+Mapping of data files to go into the wheel.
+
+The keys are targets of files to include, and the values are the `.data`-relative
+path to use.
+
+Any file that is not normally installed inside site-packages goes into the .data
+directory, named as the .dist-info directory but with the .data/ extension. If
+the destination of a file or group of files ends in a `/`, the destination is a
+folder and files are placed with their existing basenames under that folder.
+
+For example:
+
+```
+":file1.txt": "data/file1.txt",   # Destination: <wheelname>.data/data/file1.txt
+":file1.txt": "data/",            # Destination: <wheelname>.data/data/file1.txt
+":file1.txt": "data/special.txt", # Destination: <wheelname>.data/data/special.txt
+
+filegroup(name = "files", srcs = [":file1.txt", ":file2.txt"])
+":files": "data/",                # Destinations: <wheelname>.data/data/file1.txt, <wheelname>.data/data/file2.txt
+```
+
+Allowed paths: {prefixes}
+
+:::{{versionchanged}} VERSION_NEXT_FEATURE
+Values can end in slash (`/`) to indicate that all files of the target should
+be moved under that directory.
+:::
+""".format(prefixes = ALLOWED_DATA_FILE_PREFIX)),
         allow_files = True,
     ),
     "description_content_type": attr.string(
@@ -506,9 +533,9 @@ def _py_wheel_impl(ctx):
 
     for target, filename in ctx.attr.data_files.items():
         target_files = target[DefaultInfo].files.to_list()
-        if len(target_files) != 1:
+        if len(target_files) != 1 and not filename.endswith("/"):
             fail(
-                "Multi-file target listed in data_files %s",
+                "Multi-file target listed in data_files %s, this is only supported when specifying a folder path (i.e. a path ending in '/')",
                 filename,
             )
 
@@ -520,11 +547,15 @@ def _py_wheel_impl(ctx):
                     filename,
                 ),
             )
-        other_inputs.extend(target_files)
-        args.add(
-            "--data_files",
-            filename + ";" + target_files[0].path,
-        )
+
+        for file in target_files:
+            final_filename = filename + file.basename if filename.endswith("/") else filename
+
+            other_inputs.extend(target_files)
+            args.add(
+                "--data_files",
+                final_filename + ";" + file.path,
+            )
 
     ctx.actions.run(
         mnemonic = "PyWheel",

diff --git a/python/private/python_bootstrap_template.txt b/python/private/python_bootstrap_template.txt
@@ -10,13 +10,13 @@ import sys
 import os
 import subprocess
 import uuid
-# runfiles-relative path
 # NOTE: The sentinel strings are split (e.g., "%stage2" + "_bootstrap%") so that
 # the substitution logic won't replace them. This allows runtime detection of
 # unsubstituted placeholders, which occurs when native py_binary is used in
 # external repositories. In that case, we fall back to %main% which Bazel's
 # native rule does substitute.
 _STAGE2_BOOTSTRAP_SENTINEL = "%stage2" + "_bootstrap%"
+# runfiles-root-relative path
 STAGE2_BOOTSTRAP="%stage2_bootstrap%"
 
 # NOTE: The fallback logic from stage2_bootstrap to main is only present
@@ -35,13 +35,13 @@ if not STAGE2_BOOTSTRAP:
   print("ERROR: %stage2_bootstrap% (or %main%) was not substituted.", file=sys.stderr)
   sys.exit(1)
 
-# runfiles-relative path to venv's python interpreter
+# runfiles-root-relative path to venv's python interpreter
 # Empty string if a venv is not setup.
 PYTHON_BINARY = '%python_binary%'
 
 # The path to the actual interpreter that is used.
 # Typically PYTHON_BINARY is a symlink pointing to this.
-# runfiles-relative path, absolute path, or single word.
+# runfiles-root-relative path, absolute path, or single word.
 # Used to create a venv at runtime, or when a venv isn't setup.
 PYTHON_BINARY_ACTUAL = "%python_binary_actual%"
 
@@ -136,12 +136,12 @@ def SearchPath(name):
         return path
   return None
 
-def FindPythonBinary(module_space):
+def FindPythonBinary(runfiles_root):
   """Finds the real Python binary if it's not a normal absolute path."""
   if PYTHON_BINARY:
-    return FindBinary(module_space, PYTHON_BINARY)
+    return FindBinary(runfiles_root, PYTHON_BINARY)
   else:
-    return FindBinary(module_space, PYTHON_BINARY_ACTUAL)
+    return FindBinary(runfiles_root, PYTHON_BINARY_ACTUAL)
 
 
 def print_verbose(*args, mapping=None, values=None):
@@ -165,7 +165,7 @@ def print_verbose(*args, mapping=None, values=None):
         else:
             print("bootstrap: stage 1:", *args, file=sys.stderr, flush=True)
 
-def FindBinary(module_space, bin_name):
+def FindBinary(runfiles_root, bin_name):
   """Finds the real binary if it's not a normal absolute path."""
   if not bin_name:
     return None
@@ -180,12 +180,12 @@ def FindBinary(module_space, bin_name):
   # Use normpath() to convert slashes to os.sep on Windows.
   elif os.sep in os.path.normpath(bin_name):
     # Case 3: Path is relative to the repo root.
-    return os.path.join(module_space, bin_name)
+    return os.path.join(runfiles_root, bin_name)
   else:
     # Case 4: Path has to be looked up in the search path.
     return SearchPath(bin_name)
 
-def FindModuleSpace(main_rel_path):
+def find_runfiles_root(main_rel_path):
   """Finds the runfiles tree."""
   # When the calling process used the runfiles manifest to resolve the
   # location of this stub script, the path may be expanded. This means
@@ -214,9 +214,9 @@ def FindModuleSpace(main_rel_path):
     stub_filename = os.path.join(os.getcwd(), stub_filename)
 
   while True:
-    module_space = stub_filename + ('.exe' if IsWindows() else '') + '.runfiles'
-    if os.path.isdir(module_space):
-      return module_space
+    runfiles_root = stub_filename + ('.exe' if IsWindows() else '') + '.runfiles'
+    if os.path.isdir(runfiles_root):
+      return runfiles_root
 
     runfiles_pattern = r'(.*\.runfiles)' + (r'\\' if IsWindows() else '/') + '.*'
     matchobj = re.match(runfiles_pattern, stub_filename)
@@ -261,14 +261,14 @@ def ExtractZip(zip_path, dest_dir):
         os.chmod(file_path, attrs & 0o7777)
 
 # Create the runfiles tree by extracting the zip file
-def CreateModuleSpace():
+def create_runfiles_root():
   temp_dir = tempfile.mkdtemp('', 'Bazel.runfiles_')
   ExtractZip(os.path.dirname(__file__), temp_dir)
-  # IMPORTANT: Later code does `rm -fr` on dirname(module_space) -- it's
+  # IMPORTANT: Later code does `rm -fr` on dirname(runfiles_root) -- it's
   # important that deletion code be in sync with this directory structure
   return os.path.join(temp_dir, 'runfiles')
 
-def RunfilesEnvvar(module_space):
+def RunfilesEnvvar(runfiles_root):
   """Finds the runfiles manifest or the runfiles directory.
 
   Returns:
@@ -288,30 +288,30 @@ def RunfilesEnvvar(module_space):
 
   # If running from a zip, there's no manifest file.
   if IsRunningFromZip():
-    return ('RUNFILES_DIR', module_space)
+    return ('RUNFILES_DIR', runfiles_root)
 
   # Look for the runfiles "output" manifest, argv[0] + ".runfiles_manifest"
-  runfiles = module_space + '_manifest'
+  runfiles = runfiles_root + '_manifest'
   if os.path.exists(runfiles):
     return ('RUNFILES_MANIFEST_FILE', runfiles)
 
   # Look for the runfiles "input" manifest, argv[0] + ".runfiles/MANIFEST"
   # Normally .runfiles_manifest and MANIFEST are both present, but the
   # former will be missing for zip-based builds or if someone copies the
   # runfiles tree elsewhere.
-  runfiles = os.path.join(module_space, 'MANIFEST')
+  runfiles = os.path.join(runfiles_root, 'MANIFEST')
   if os.path.exists(runfiles):
     return ('RUNFILES_MANIFEST_FILE', runfiles)
 
   # If running in a sandbox and no environment variables are set, then
   # Look for the runfiles  next to the binary.
-  if module_space.endswith('.runfiles') and os.path.isdir(module_space):
-    return ('RUNFILES_DIR', module_space)
+  if runfiles_root.endswith('.runfiles') and os.path.isdir(runfiles_root):
+    return ('RUNFILES_DIR', runfiles_root)
 
   return (None, None)
 
-def ExecuteFile(python_program, main_filename, args, env, module_space,
-                workspace, delete_module_space):
+def ExecuteFile(python_program, main_filename, args, env, runfiles_root,
+                workspace, delete_runfiles_root):
   # type: (str, str, list[str], dict[str, str], str, str|None, str|None) -> ...
   """Executes the given Python file using the various environment settings.
 
@@ -323,10 +323,10 @@ def ExecuteFile(python_program, main_filename, args, env, module_space,
     main_filename: (str) The Python file to execute
     args: (list[str]) Additional args to pass to the Python file
     env: (dict[str, str]) A dict of environment variables to set for the execution
-    module_space: (str) Path to the module space/runfiles tree directory
+    runfiles_root: (str) Path to the runfiles root directory
     workspace: (str|None) Name of the workspace to execute in. This is expected to be a
         directory under the runfiles tree.
-    delete_module_space: (bool), True if the module space should be deleted
+    delete_runfiles_root: (bool), True if the runfiles root should be deleted
         after a successful (exit code zero) program run, False if not.
   """
   argv = [python_program]
@@ -351,7 +351,7 @@ def ExecuteFile(python_program, main_filename, args, env, module_space,
   #   can't execv because we need control to return here. This only
   #   happens for targets built in the host config.
   #
-  if not (IsWindows() or workspace or delete_module_space):
+  if not (IsWindows() or workspace or delete_runfiles_root):
     _RunExecv(python_program, argv, env)
 
   ret_code = subprocess.call(
@@ -360,11 +360,11 @@ def ExecuteFile(python_program, main_filename, args, env, module_space,
     cwd=workspace
   )
 
-  if delete_module_space:
-    # NOTE: dirname() is called because CreateModuleSpace() creates a
+  if delete_runfiles_root:
+    # NOTE: dirname() is called because create_runfiles_root() creates a
     # sub-directory within a temporary directory, and we want to remove the
     # whole temporary directory.
-    shutil.rmtree(os.path.dirname(module_space), True)
+    shutil.rmtree(os.path.dirname(runfiles_root), True)
   sys.exit(ret_code)
 
 def _RunExecv(python_program, argv, env):
@@ -401,33 +401,33 @@ def Main():
   print_verbose("main_rel_path:", main_rel_path)
 
   if IsRunningFromZip():
-    module_space = CreateModuleSpace()
-    delete_module_space = True
+    runfiles_root = create_runfiles_root()
+    delete_runfiles_root = True
   else:
-    module_space = FindModuleSpace(main_rel_path)
-    delete_module_space = False
+    runfiles_root = find_runfiles_root(main_rel_path)
+    delete_runfiles_root = False
 
-  print_verbose("runfiles root:", module_space)
+  print_verbose("runfiles root:", runfiles_root)
 
-  if os.environ.get("RULES_PYTHON_TESTING_TELL_MODULE_SPACE"):
-    new_env["RULES_PYTHON_TESTING_MODULE_SPACE"] = module_space
+  if os.environ.get("RULES_PYTHON_TESTING_TELL_RUNFILES_ROOT"):
+    new_env["RULES_PYTHON_TESTING_RUNFILES_ROOT"] = runfiles_root
 
-  runfiles_envkey, runfiles_envvalue = RunfilesEnvvar(module_space)
+  runfiles_envkey, runfiles_envvalue = RunfilesEnvvar(runfiles_root)
   if runfiles_envkey:
     new_env[runfiles_envkey] = runfiles_envvalue
 
   # Don't prepend a potentially unsafe path to sys.path
   # See: https://docs.python.org/3.11/using/cmdline.html#envvar-PYTHONSAFEPATH
   new_env['PYTHONSAFEPATH'] = '1'
 
-  main_filename = os.path.join(module_space, main_rel_path)
+  main_filename = os.path.join(runfiles_root, main_rel_path)
   main_filename = GetWindowsPathWithUNCPrefix(main_filename)
   assert os.path.exists(main_filename), \
          'Cannot exec() %r: file not found.' % main_filename
   assert os.access(main_filename, os.R_OK), \
          'Cannot exec() %r: file not readable.' % main_filename
 
-  program = python_program = FindPythonBinary(module_space)
+  program = python_program = FindPythonBinary(runfiles_root)
   if python_program is None:
     raise AssertionError("Could not find python binary: {} or {}".format(
         repr(PYTHON_BINARY),
@@ -449,15 +449,15 @@ def Main():
     # change directory to the right runfiles directory.
     # (So that the data files are accessible)
     if os.environ.get('RUN_UNDER_RUNFILES') == '1':
-      workspace = os.path.join(module_space, WORKSPACE_NAME)
+      workspace = os.path.join(runfiles_root, WORKSPACE_NAME)
 
   try:
     sys.stdout.flush()
     # NOTE: ExecuteFile may call execve() and lines after this will never run.
     ExecuteFile(
-      python_program, main_filename, args, new_env, module_space,
+      python_program, main_filename, args, new_env, runfiles_root,
       workspace,
-      delete_module_space = delete_module_space,
+      delete_runfiles_root = delete_runfiles_root,
     )
 
   except EnvironmentError: