refactor(pypi): cleanup marker evaluation code in requirement parsing (#3765)

aignas · web-flow · commit bc8a62b3fb5b · 2026-05-23T05:49:30.000Z
Another cleanup PR to make the code easier to work with and optimize.
diff --git a/python/private/pypi/BUILD.bazel b/python/private/pypi/BUILD.bazel
@@ -190,7 +190,6 @@ bzl_library(
     visibility = ["//:__subpackages__"],
     deps = [
         ":attrs_bzl",
-        ":evaluate_markers_bzl",
         ":parse_requirements_bzl",
         ":pep508_env_bzl",
         ":pep508_evaluate_bzl",
@@ -247,6 +246,8 @@ bzl_library(
         ":argparse_bzl",
         ":index_sources_bzl",
         ":parse_requirements_txt_bzl",
+        ":pep508_evaluate_bzl",
+        ":pep508_requirement_bzl",
         ":pypi_repo_utils_bzl",
         ":requirements_files_by_platform_bzl",
         ":select_whl_bzl",
@@ -344,7 +345,6 @@ bzl_library(
     srcs = ["pip_repository.bzl"],
     deps = [
         ":attrs_bzl",
-        ":evaluate_markers_bzl",
         ":parse_requirements_bzl",
         ":pep508_env_bzl",
         ":pip_repository_attrs_bzl",
diff --git a/python/private/pypi/extension.bzl b/python/private/pypi/extension.bzl
@@ -356,7 +356,6 @@ You cannot use both the additive_build_content and additive_build_content_file a
                     simpleapi_cache = simpleapi_cache,
                     # TODO @aignas 2025-09-06: do not use kwargs
                     minor_mapping = kwargs.get("minor_mapping", MINOR_MAPPING),
-                    evaluate_markers_fn = kwargs.get("evaluate_markers", None),
                     available_interpreters = kwargs.get("available_interpreters", INTERPRETER_LABELS),
                     logger = repo_utils.logger(module_ctx, "pypi:hub:" + hub_name, mod = mod),
                 )
diff --git a/python/private/pypi/hub_builder.bzl b/python/private/pypi/hub_builder.bzl
@@ -7,7 +7,6 @@ load("//python/private:text_util.bzl", "render")
 load("//python/private:version.bzl", "version")
 load("//python/private:version_label.bzl", "version_label")
 load(":attrs.bzl", "use_isolated")
-load(":evaluate_markers.bzl", "evaluate_markers")
 load(":parse_requirements.bzl", "parse_requirements")
 load(":pep508_env.bzl", "env")
 load(":pep508_evaluate.bzl", "evaluate")
@@ -29,7 +28,6 @@ def hub_builder(
         minor_mapping,
         available_interpreters,
         simpleapi_download_fn,
-        evaluate_markers_fn,
         logger,
         simpleapi_cache):
     """Return a hub builder instance
@@ -40,7 +38,6 @@ def hub_builder(
         config: The platform configuration.
         whl_overrides: {type}`dict[str, struct]` - per-wheel overrides.
         minor_mapping: {type}`dict[str, str]` the mapping between minor and full versions.
-        evaluate_markers_fn: the override function used to evaluate the markers.
         available_interpreters: {type}`dict[str, Label]` The dictionary of available
             interpreters that have been registered using the `python` bzlmod extension.
             The keys are in the form `python_{snake_case_version}_host`. This is to be
@@ -97,7 +94,6 @@ def hub_builder(
         # Instance constants passed in by callers
         _config = config,
         _whl_overrides = whl_overrides,
-        _evaluate_markers_fn = evaluate_markers_fn,
         _logger = logger,
         _minor_mapping = minor_mapping,
         _available_interpreters = available_interpreters,
@@ -465,15 +461,6 @@ def _platforms(module_ctx, *, python_version, config, target_platforms):
         )
     return platforms
 
-def _evaluate_markers(self, pip_attr):
-    if self._evaluate_markers_fn:
-        return self._evaluate_markers_fn
-
-    return lambda requirements: evaluate_markers(
-        requirements = requirements,
-        platforms = self._platforms[pip_attr.python_version],
-    )
-
 def _create_whl_repos(
         self,
         module_ctx,
@@ -509,7 +496,6 @@ def _create_whl_repos(
         platforms = platforms,
         extra_pip_args = pip_attr.extra_pip_args,
         get_index_urls = self._get_index_urls.get(pip_attr.python_version),
-        evaluate_markers = _evaluate_markers(self, pip_attr),
         logger = logger,
     )
 
diff --git a/python/private/pypi/parse_requirements.bzl b/python/private/pypi/parse_requirements.bzl
@@ -31,6 +31,7 @@ load("//python/private:repo_utils.bzl", "repo_utils")
 load(":argparse.bzl", "argparse")
 load(":index_sources.bzl", "index_sources")
 load(":parse_requirements_txt.bzl", "parse_requirements_txt")
+load(":pep508_evaluate.bzl", "evaluate")
 load(":pep508_requirement.bzl", "requirement")
 load(":select_whl.bzl", "select_whl")
 
@@ -41,7 +42,6 @@ def parse_requirements(
         extra_pip_args = [],
         platforms = {},
         get_index_urls = None,
-        evaluate_markers = None,
         extract_url_srcs = True,
         logger):
     """Get the requirements with platforms that the requirements apply to.
@@ -57,11 +57,6 @@ def parse_requirements(
         get_index_urls: Callable[[ctx, dict[str, list[str]]], dict], a callable to get all
             of the distribution URLs from a PyPI index. Accepts ctx and
             distribution names to query.
-        evaluate_markers: A function to use to evaluate the requirements.
-            Accepts a dict where keys are requirement lines to evaluate against
-            the platforms stored as values in the input dict. Returns the same
-            dict, but with values being platforms that are compatible with the
-            requirements line.
         extract_url_srcs: A boolean to enable extracting URLs from requirement
             lines to enable using bazel downloader.
         logger: repo_utils.logger, a simple struct to log diagnostic messages.
@@ -86,11 +81,9 @@ def parse_requirements(
 
         The second element is extra_pip_args should be passed to `whl_library`.
     """
-    evaluate_markers = evaluate_markers or (lambda _requirements: {})
     options = {}
     requirements = {}
     all_files_parsed = {}
-    reqs_with_env_markers = {}
     index_url = None
     extra_index_urls = []
     for file, plats in requirements_by_platform.items():
@@ -114,39 +107,31 @@ def parse_requirements(
                 tokenized_options.append(p)
 
         pip_args = tokenized_options + extra_pip_args
+
+        # Parse the index URL from the requirement files once per file
+        index_url = argparse.index_url(pip_args, index_url)
+        extra_index_urls = argparse.extra_index_url(pip_args, [])
+        if argparse.platform(pip_args, []):
+            # No use of downloader if the user specifies "--platform" pip arg. This means that
+            # they intend to use pip to download the wheels
+            #
+            # TODO @aignas 2026-04-11: consider removing this line in the next major release
+            # (3.0).
+            get_index_urls = None
+
+        # Pre-parse requirements once per file to avoid redundant parsing in loops
+        parsed_reqs = [(entry, requirement(entry[1])) for entry in parse_result.requirements]
+
         for plat in plats:
-            requirements[plat] = parse_result.requirements
-            for entry in parse_result.requirements:
-                requirement_line = entry[1]
+            plat_env = platforms.get(plat)
 
-                # output all of the requirement lines that have a marker
-                if ";" in requirement_line:
-                    reqs_with_env_markers.setdefault(requirement_line, []).append(plat)
-            options[plat] = pip_args
+            requirements[plat] = [
+                entry
+                for entry, req in parsed_reqs
+                if not req.marker or (plat_env and evaluate(req.marker, env = plat_env.env))
+            ]
 
-            # Parse the index URL from the requirement files
-            index_url = argparse.index_url(pip_args, index_url)
-            extra_index_urls = argparse.extra_index_url(pip_args, [])
-            platform = argparse.platform(pip_args, [])
-            if platform:
-                # No use of downloader if the user specifies "--platform" pip arg. This means that
-                # they intend to use pip to download the wheels
-                #
-                # TODO @aignas 2026-04-11: consider removing this line in the next major release
-                # (3.0).
-                get_index_urls = None
-
-    # This may call to Python, so execute it early (before calling to the
-    # internet below) and ensure that we call it only once.
-    #
-    # TODO @aignas 2026-05-10: remove this assumption in the code because we
-    # are always using pipstar, so we can do the marker evaluation when we are
-    # parsing the files.
-    resolved_marker_platforms = evaluate_markers(reqs_with_env_markers)
-    logger.trace(lambda: "Evaluated env markers from:\n{}\n\nTo:\n{}".format(
-        reqs_with_env_markers,
-        resolved_marker_platforms,
-    ))
+            options[plat] = pip_args
 
     requirements_by_platform = {}
     for plat, parse_results in requirements.items():
@@ -170,9 +155,6 @@ def parse_requirements(
             req_line = entry[1]
             req = requirement(req_line)
 
-            if req.marker and plat not in resolved_marker_platforms.get(req_line, []):
-                continue
-
             requirements_dict[req.name] = entry
 
         extra_pip_args = options[plat]
diff --git a/python/private/pypi/pip_repository.bzl b/python/private/pypi/pip_repository.bzl
@@ -18,7 +18,6 @@ load("@bazel_skylib//lib:sets.bzl", "sets")
 load("//python/private:normalize_name.bzl", "normalize_name")
 load("//python/private:repo_utils.bzl", "REPO_DEBUG_ENV_VAR", "repo_utils")
 load("//python/private:text_util.bzl", "render")
-load(":evaluate_markers.bzl", "evaluate_markers")
 load(":parse_requirements.bzl", "host_platform", "parse_requirements", "select_requirement")
 load(":pep508_env.bzl", "env")
 load(":pip_repository_attrs.bzl", "ATTRS")
@@ -123,15 +122,14 @@ def _pip_repository_impl(rctx):
             platforms = platforms,
         ),
         extra_pip_args = rctx.attr.extra_pip_args,
-        evaluate_markers = lambda requirements: evaluate_markers(
-            requirements = {
-                # NOTE @aignas 2025-07-07: because we don't distinguish between
-                # freethreaded and non-freethreaded, it is a 1:1 mapping.
-                req: {p: p for p in plats}
-                for req, plats in requirements.items()
-            },
-            platforms = {p: struct(env = marker_env) for p in platforms},
-        ),
+        platforms = {
+            p: struct(
+                env = marker_env,
+                whl_abi_tags = [],
+                whl_platform_tags = [],
+            )
+            for p in platforms
+        },
         extract_url_srcs = False,
         logger = logger,
     )
diff --git a/tests/pypi/extension/pip_parse.bzl b/tests/pypi/extension/pip_parse.bzl
@@ -65,6 +65,5 @@ def pip_parse(
         parallel_download = False,
         experimental_index_url_overrides = {},
         simpleapi_skip = simpleapi_skip,
-        _evaluate_markers_srcs = [],
         **kwargs
     )
diff --git a/tests/pypi/hub_builder/hub_builder_tests.bzl b/tests/pypi/hub_builder/hub_builder_tests.bzl
@@ -47,7 +47,6 @@ def hub_builder(
         config = None,
         minor_mapping = {},
         whl_overrides = {},
-        evaluate_markers_fn = None,
         simpleapi_download_fn = None,
         log_printer = None,
         available_interpreters = {}):
@@ -87,7 +86,6 @@ def hub_builder(
             "python_3_15_host": "unit_test_interpreter_target",
         },
         simpleapi_download_fn = simpleapi_download_fn or (lambda *a, **k: {}),
-        evaluate_markers_fn = evaluate_markers_fn,
         logger = repo_utils.logger(
             struct(
                 getenv = {
@@ -441,17 +439,7 @@ def _test_simple_with_markers(env):
         ("linux", "x86_64"): "torch==2.4.1+cpu",
     }
     for (host_os, host_arch), want_requirement in sub_tests.items():
-        builder = hub_builder(
-            env,
-            evaluate_markers_fn = lambda requirements: {
-                key: [
-                    platform
-                    for platform in platforms
-                    if ("x86_64" in platform and "platform_machine ==" in key) or ("x86_64" not in platform and "platform_machine !=" in key)
-                ]
-                for key, platforms in requirements.items()
-            },
-        )
+        builder = hub_builder(env)
         builder.pip_parse(
             mocks.mctx(
                 mock_files = {
diff --git a/tests/pypi/parse_requirements/parse_requirements_tests.bzl b/tests/pypi/parse_requirements/parse_requirements_tests.bzl
@@ -16,7 +16,6 @@
 
 load("@rules_testing//lib:test_suite.bzl", "test_suite")
 load("//python/private:repo_utils.bzl", "REPO_DEBUG_ENV_VAR", "REPO_VERBOSITY_ENV_VAR", "repo_utils")  # buildifier: disable=bzl-visibility
-load("//python/private/pypi:evaluate_markers.bzl", "evaluate_markers")  # buildifier: disable=bzl-visibility
 load("//python/private/pypi:parse_requirements.bzl", "select_requirement", _parse_requirements = "parse_requirements")  # buildifier: disable=bzl-visibility
 load("//python/private/pypi:pep508_env.bzl", pep508_env = "env")  # buildifier: disable=bzl-visibility
 load("//tests/support/mocks:mocks.bzl", "mocks")
@@ -68,7 +67,7 @@ foo==0.0.1 --hash=sha256:deadbeef
 foo[extra]==0.0.1 --hash=sha256:deadbeef
 """,
         "requirements_marker": """\
-foo[extra]==0.0.1 ;marker --hash=sha256:deadbeef
+foo[extra]==0.0.1 ; os_name == 'nt' --hash=sha256:deadbeef
 bar==0.0.1 --hash=sha256:deadbeef
 """,
         "requirements_multi_version": """\
@@ -441,22 +440,24 @@ def _test_select_requirement_none_platform(env):
 _tests.append(_test_select_requirement_none_platform)
 
 def _test_env_marker_resolution(env):
-    """Test environment marker resolution with ``evaluate_markers``."""
-
-    def _mock_eval_markers(input):
-        ret = {
-            "foo[extra]==0.0.1 ;marker --hash=sha256:deadbeef": ["cp311_windows_x86_64"],
-        }
-
-        env.expect.that_collection(input.keys()).contains_exactly(ret.keys())
-        env.expect.that_collection(input.values()[0]).contains_exactly(["cp311_linux_super_exotic", "cp311_windows_x86_64"])
-        return ret
+    """Test environment marker resolution with platform env information."""
 
     got = parse_requirements(
         requirements_by_platform = {
             "requirements_marker": ["cp311_linux_super_exotic", "cp311_windows_x86_64"],
         },
-        evaluate_markers = _mock_eval_markers,
+        platforms = {
+            "cp311_linux_super_exotic": struct(
+                env = pep508_env(os = "linux", arch = "x86_64", python_version = "3.11.0"),
+                whl_abi_tags = [],
+                whl_platform_tags = [],
+            ),
+            "cp311_windows_x86_64": struct(
+                env = pep508_env(os = "windows", arch = "x86_64", python_version = "3.11.0"),
+                whl_abi_tags = [],
+                whl_platform_tags = [],
+            ),
+        },
     )
     env.expect.that_collection(got).contains_exactly([
         struct(
@@ -797,17 +798,6 @@ def _test_get_index_urls_different_versions(env):
                 },
             ),
         },
-        evaluate_markers = lambda requirements: evaluate_markers(
-            requirements = requirements,
-            platforms = {
-                "cp310_linux_x86_64": struct(
-                    env = {"python_full_version": "3.10.0"},
-                ),
-                "cp39_linux_x86_64": struct(
-                    env = {"python_full_version": "3.9.0"},
-                ),
-            },
-        ),
     )
 
     env.expect.that_collection(got).contains_exactly([
@@ -891,14 +881,6 @@ def _test_get_index_urls_cross_platform(env):
             ),
         },
         get_index_urls = _get_index_urls,
-        evaluate_markers = lambda requirements: evaluate_markers(
-            requirements = requirements,
-            platforms = {
-                "cp39_osx_x86_64": struct(
-                    env = {"python_full_version": "3.9.0"},
-                ),
-            },
-        ),
     )
 
     # distributions must include packages from ALL files, even those with
@@ -947,14 +929,6 @@ def _test_get_index_urls_single_py_version(env):
                 },
             ),
         },
-        evaluate_markers = lambda requirements: evaluate_markers(
-            requirements = requirements,
-            platforms = {
-                "cp310_linux_x86_64": struct(
-                    env = {"python_full_version": "3.10.0"},
-                ),
-            },
-        ),
     )
 
     env.expect.that_collection(got).contains_exactly([
@@ -1004,14 +978,6 @@ def _test_get_index_urls_all_versions(env):
             ),
         },
         get_index_urls = _get_index_urls,
-        evaluate_markers = lambda requirements: evaluate_markers(
-            requirements = requirements,
-            platforms = {
-                "cp39_linux_x86_64": struct(
-                    env = {"python_full_version": "3.9.0"},
-                ),
-            },
-        ),
     )
 
     env.expect.that_collection(calls).contains_exactly([

Original file line number	Diff line number	Diff line change
`@@ -356,7 +356,6 @@ You cannot use both the additive_build_content and additive_build_content_file a`
`356`	`356`	`simpleapi_cache = simpleapi_cache,`
`357`	`357`	`# TODO @aignas 2025-09-06: do not use kwargs`
`358`	`358`	`minor_mapping = kwargs.get("minor_mapping", MINOR_MAPPING),`
`359`		`- evaluate_markers_fn = kwargs.get("evaluate_markers", None),`
`360`	`359`	`available_interpreters = kwargs.get("available_interpreters", INTERPRETER_LABELS),`
`361`	`360`	`logger = repo_utils.logger(module_ctx, "pypi:hub:" + hub_name, mod = mod),`
`362`	`361`	`)`
Original file line number	Diff line number	Diff line change
`@@ -65,6 +65,5 @@ def pip_parse(`
`65`	`65`	`parallel_download = False,`
`66`	`66`	`experimental_index_url_overrides = {},`
`67`	`67`	`simpleapi_skip = simpleapi_skip,`
`68`		`- _evaluate_markers_srcs = [],`
`69`	`68`	`**kwargs`
`70`	`69`	`)`