diff --git a/src/fromager/constraints.py b/src/fromager/constraints.py index 1d1530a5..82504ad3 100644 --- a/src/fromager/constraints.py +++ b/src/fromager/constraints.py @@ -27,6 +27,23 @@ def _is_blocked_specifier(specifier: SpecifierSet) -> bool: ) +def _format_provenance(provenance: dict[str, list[str]]) -> str: + """Format a per-package provenance dict as a human-readable string. + + Args: + provenance: Mapping of source file to original constraint lines. + + Returns: + Formatted string, e.g. + ``"/path/to/base.txt (>=2.0), /path/to/override.txt (!=2.1.1)"``. + """ + parts: list[str] = [] + for source, lines in provenance.items(): + specifiers = ", ".join(str(Requirement(line).specifier) for line in lines) + parts.append(f"{source} ({specifiers})") + return ", ".join(parts) + + class InvalidConstraintError(ValueError): pass @@ -36,6 +53,8 @@ def __init__(self) -> None: # mapping of canonical names to requirements # NOTE: Requirement.name is not normalized self._data: dict[NormalizedName, Requirement] = {} + # per-package provenance: {canonical_name: {source_file: [original_lines]}} + self._provenance: dict[NormalizedName, dict[str, list[str]]] = {} def __iter__(self) -> Generator[NormalizedName, None, None]: yield from self._data @@ -46,13 +65,21 @@ def __bool__(self) -> bool: def __len__(self) -> int: return len(self._data) - def add_constraint(self, unparsed: str) -> None: - """Add new constraint, must not conflict with any existing constraints + def add_constraint(self, unparsed: str, *, source: str) -> None: + """Add new constraint, must not conflict with any existing constraints. + + Args: + unparsed: Raw constraint string, e.g. ``"foo>=2.0"``. + source: Path or URL of the file that contains this constraint. + Required for provenance tracking. - .. versionchanged: 0.83.0 + .. versionchanged:: 0.83.0 Non-conflicting constraints are now combined. Constraints with conflicts now raise :exc:`InvalidConstraintError`. Inputs without a version specifier or with extras or url are also refused. + + .. versionchanged:: 0.84.0 + Added *source* parameter for provenance tracking. """ req = Requirement(unparsed) canon_name = canonicalize_name(req.name) @@ -81,43 +108,77 @@ def add_constraint(self, unparsed: str) -> None: if previous is not None: prev_blocked = _is_blocked_specifier(previous.specifier) if blocked != prev_blocked: + prev_prov = _format_provenance(self._provenance.get(canon_name, {})) raise InvalidConstraintError( f"Cannot combine blocked and non-blocked constraints " - f"(existing: {previous}, new: {req})" + f"(existing: {previous} from {prev_prov}, " + f"new: {req} from {source})" ) if not blocked: logger.debug("combining constraints %s and %s", previous, req) new_specifier = req.specifier & previous.specifier if new_specifier.is_unsatisfiable(): + prev_prov = _format_provenance(self._provenance.get(canon_name, {})) raise InvalidConstraintError( f"Combined specifier '{new_specifier}' is not satisfiable " - f"(existing: {previous}, new: {req})" + f"(existing: {previous} from {prev_prov}, " + f"new: {req} from {source})" ) req.specifier = new_specifier else: logger.debug(f"adding constraint {req}") self._data[canon_name] = req + pkg_prov = self._provenance.setdefault(canon_name, {}) + pkg_prov.setdefault(source, []).append(unparsed) def load_constraints_file(self, constraints_file: str | pathlib.Path) -> None: - """Load constraints from a constraints file or URL""" + """Load constraints from a constraints file or URL.""" logger.info("loading constraints from %s", constraints_file) + source = str(constraints_file) content = requirements_file.parse_requirements_file(constraints_file) for line in content: - self.add_constraint(line) + self.add_constraint(line, source=source) def dump_constraints(self, output: typing.TextIO) -> None: - """Dump combined constraints to a text stream""" - # sort by normalized name - for _, req in sorted(self._data.items()): - # write requirement without markers. They have been evaluated - # in add_constraint() - output.write(f"{req.name}{req.specifier}\n") + """Dump combined constraints to a text stream. + + Each line includes an inline comment showing which source file(s) + contributed each specifier. + + Args: + output: Writable text stream. + + .. versionchanged:: 0.84.0 + Output now includes per-line provenance comments. + """ + # sort by normalized name, write requirement without markers. + # They have been evaluated in add_constraint() + for name, req in sorted(self._data.items()): + line = f"{req.name}{req.specifier}" + prov = self._provenance.get(name, {}) + if prov: + line = f"{line} # {_format_provenance(prov)}" + output.write(f"{line}\n") def get_constraint(self, name: str) -> Requirement | None: + """Return the merged constraint for *name*, or ``None``.""" return self._data.get(canonicalize_name(name)) + def get_provenance(self, name: str) -> dict[str, list[str]]: + """Return provenance info for *name*. + + Returns: + Mapping of ``{source_file: [original_constraint_lines]}``, + or an empty dict if the package has no constraints. + + .. versionadded:: 0.84.0 + """ + prov = self._provenance.get(canonicalize_name(name), {}) + return {source: list(lines) for source, lines in prov.items()} + def allow_prerelease(self, pkg_name: str) -> bool: + """Return ``True`` if the constraint for *pkg_name* allows prereleases.""" constraint = self.get_constraint(pkg_name) if constraint: return bool(constraint.specifier.prereleases) @@ -131,6 +192,7 @@ def is_blocked(self, pkg_name: str) -> bool: return False def is_satisfied_by(self, pkg_name: str, version: Version) -> bool: + """Return ``True`` if *version* satisfies the constraint for *pkg_name*.""" constraint = self.get_constraint(pkg_name) if constraint: return constraint.specifier.contains(version, prereleases=True) diff --git a/src/fromager/resolver.py b/src/fromager/resolver.py index e1b78a8b..ea00ee39 100644 --- a/src/fromager/resolver.py +++ b/src/fromager/resolver.py @@ -32,7 +32,7 @@ from . import overrides from .candidate import Candidate, Cooldown -from .constraints import Constraints +from .constraints import Constraints, _format_provenance from .extras_provider import ExtrasProvider from .http_retry import RETRYABLE_EXCEPTIONS, retry_on_exception from .request_session import session @@ -287,10 +287,15 @@ def find_all_matching_from_provider( ) except resolvelib.resolvers.ResolverException as err: constraint = provider.constraints.get_constraint(req.name) + provenance = provider.constraints.get_provenance(req.name) provider_desc = provider.get_provider_description() original_msg = str(err) + prov_msg = "" + if provenance: + prov_msg = f" (from {_format_provenance(provenance)})" raise resolvelib.resolvers.ResolverException( - f"Unable to resolve requirement specifier {req} with constraint {constraint} using {provider_desc}: {original_msg}" + f"Unable to resolve requirement specifier {req} with constraint " + f"{constraint}{prov_msg} using {provider_desc}: {original_msg}" ) from err # Materialize candidates so we can iterate more than once if filtering @@ -689,8 +694,13 @@ def is_satisfied_by(self, requirement: Requirement, candidate: Candidate) -> boo if not self.constraints.is_satisfied_by(requirement.name, candidate.version): if DEBUG_RESOLVER: c = self.constraints.get_constraint(requirement.name) + provenance = self.constraints.get_provenance(requirement.name) + prov_msg = "" + if provenance: + prov_msg = f" from {_format_provenance(provenance)}" logger.debug( - f"{requirement.name}: skipping {candidate.version} due to constraint {c}" + f"{requirement.name}: skipping {candidate.version} " + f"due to constraint {c}{prov_msg}" ) return False diff --git a/tests/test_constraints.py b/tests/test_constraints.py index d17485a4..d6f14d98 100644 --- a/tests/test_constraints.py +++ b/tests/test_constraints.py @@ -14,7 +14,7 @@ def test_constraint_is_satisfied_by() -> None: c = Constraints() assert not c - c.add_constraint("foo<=1.1") + c.add_constraint("foo<=1.1", source="test") assert c assert len(c) == 1 assert c.is_satisfied_by("foo", Version("1.1")) @@ -24,7 +24,7 @@ def test_constraint_is_satisfied_by() -> None: def test_constraint_canonical_name() -> None: c = Constraints() - c.add_constraint("flash_attn<=1.1") + c.add_constraint("flash_attn<=1.1", source="test") assert c.is_satisfied_by("flash_attn", Version("1.1")) assert c.is_satisfied_by("flash-attn", Version("1.1")) assert c.is_satisfied_by("Flash-ATTN", Version("1.1")) @@ -33,8 +33,8 @@ def test_constraint_canonical_name() -> None: def test_constraint_not_is_satisfied_by() -> None: c = Constraints() - c.add_constraint("foo<=1.1") - c.add_constraint("bar>=2.0") + c.add_constraint("foo<=1.1", source="test") + c.add_constraint("bar>=2.0", source="test") assert not c.is_satisfied_by("foo", Version("1.2")) assert not c.is_satisfied_by("foo", Version("2.0")) assert not c.is_satisfied_by("bar", Version("1.0")) @@ -45,29 +45,31 @@ def test_add_constraint_conflict() -> None: assert markers.default_environment()["platform_machine"] == "atari" c = Constraints() - c.add_constraint("foo<=1.1") - c.add_constraint("flit_core==2.0rc3") + c.add_constraint("foo<=1.1", source="test") + c.add_constraint("flit_core==2.0rc3", source="test") # Conflicting version, same marker (no marker) should raise error with pytest.raises(InvalidConstraintError): - c.add_constraint("foo>1.1") + c.add_constraint("foo>1.1", source="test") # Conflicting version for flit_core should raise error with pytest.raises(InvalidConstraintError): - c.add_constraint("flit_core>2.0.0") + c.add_constraint("flit_core>2.0.0", source="test") # Normalized name conflict should raise error with pytest.raises(InvalidConstraintError): - c.add_constraint("flit-core>2.0.0") + c.add_constraint("flit-core>2.0.0", source="test") # Constraints for other platforms are ignored c.add_constraint( - "bar==1.0; python_version >= '3.11' and platform_machine == 'amiga'" + "bar==1.0; python_version >= '3.11' and platform_machine == 'amiga'", + source="test", ) assert c.get_constraint("bar") is None c.add_constraint( - "bar==1.0; python_version >= '3.11' and platform_machine == 'atari'" + "bar==1.0; python_version >= '3.11' and platform_machine == 'atari'", + source="test", ) # Make sure correct constraint is added constraint = c.get_constraint("bar") @@ -81,16 +83,17 @@ def test_add_constraint_conflict() -> None: # Different, but equivalent markers should raise error with pytest.raises(InvalidConstraintError): c.add_constraint( - "bar==1.1; platform_machine == 'atari' and python_version >= '3.11'" + "bar==1.1; platform_machine == 'atari' and python_version >= '3.11'", + source="test", ) # Same package with different markers should NOT raise error - c.add_constraint("baz==1.0; platform_machine != 'amiga'") - c.add_constraint("baz==1.1; platform_machine == 'amiga'") + c.add_constraint("baz==1.0; platform_machine != 'amiga'", source="test") + c.add_constraint("baz==1.1; platform_machine == 'amiga'", source="test") # But same package with same marker should raise error with pytest.raises(InvalidConstraintError): - c.add_constraint("foo==1.2; platform_machine != 'amiga'") + c.add_constraint("foo==1.2; platform_machine != 'amiga'", source="test") # Verify multiple constraints for same package are stored assert len(c) == 4 # flit_core, foo, bar, and baz @@ -103,22 +106,23 @@ def test_dump_constraints() -> None: c.dump_constraints(out) assert out.getvalue() == "" - c.add_constraint("foo>=1.0") - c.add_constraint("foo<2.0") - c.add_constraint("bar==1.1") + c.add_constraint("foo>=1.0", source="test") + c.add_constraint("foo<2.0", source="test") + c.add_constraint("bar==1.1", source="test") out = io.StringIO() c.dump_constraints(out) - assert out.getvalue() == "bar==1.1\nfoo<2.0,>=1.0\n" + assert "bar==1.1 # test (==1.1)" in out.getvalue() + assert "foo<2.0,>=1.0 # test (>=1.0, <2.0)" in out.getvalue() def test_allow_prerelease() -> None: c = Constraints() - c.add_constraint("foo>=1.1") + c.add_constraint("foo>=1.1", source="test") assert not c.allow_prerelease("foo") - c.add_constraint("bar>=1.1a0") + c.add_constraint("bar>=1.1a0", source="test") assert c.allow_prerelease("bar") - c.add_constraint("flit_core==2.0rc3") + c.add_constraint("flit_core==2.0rc3", source="test") assert c.allow_prerelease("flit_core") @@ -153,32 +157,32 @@ def test_load_constraints_url() -> None: def test_invalid_constraints() -> None: c = Constraints() with pytest.raises(InvalidConstraintError, match=r".*no specifier"): - c.add_constraint("foo") + c.add_constraint("foo", source="test") with pytest.raises(InvalidConstraintError, match=r".*has extras"): - c.add_constraint("foo[extra]>=1.0") + c.add_constraint("foo[extra]>=1.0", source="test") with pytest.raises(InvalidConstraintError, match=r".*has an url"): - c.add_constraint("foo@https://foo.test") + c.add_constraint("foo@https://foo.test", source="test") def test_unsatisfiable() -> None: c = Constraints() with pytest.raises(InvalidConstraintError): - c.add_constraint("foo<1.0,>2.0") + c.add_constraint("foo<1.0,>2.0", source="test") def test_combine_constraints() -> None: c = Constraints() - c.add_constraint("foo>=1.0") - c.add_constraint("foo<2.0") + c.add_constraint("foo>=1.0", source="test") + c.add_constraint("foo<2.0", source="test") assert c.get_constraint("foo") == Requirement("foo<2.0,>=1.0") - c.add_constraint("foo!=1.1.0") + c.add_constraint("foo!=1.1.0", source="test") assert c.get_constraint("foo") == Requirement("foo<2.0,>=1.0,!=1.1.0") @pytest.mark.parametrize("specifier", ["<0", "<0.0", "<0.0.0"]) def test_blocked_package(specifier: str) -> None: c = Constraints() - c.add_constraint(f"blocked-pkg{specifier}") + c.add_constraint(f"blocked-pkg{specifier}", source="test") assert c.is_blocked("blocked-pkg") assert not c.is_satisfied_by("blocked-pkg", Version("0")) assert not c.is_satisfied_by("blocked-pkg", Version("0.0.1")) @@ -187,18 +191,127 @@ def test_blocked_package(specifier: str) -> None: def test_blocked_then_non_blocked_raises() -> None: c = Constraints() - c.add_constraint("foo<0") + c.add_constraint("foo<0", source="test") with pytest.raises(InvalidConstraintError, match=r"blocked and non-blocked"): - c.add_constraint("foo>=1.0") + c.add_constraint("foo>=1.0", source="test") def test_non_blocked_then_blocked_raises() -> None: c = Constraints() - c.add_constraint("foo>=1.0") + c.add_constraint("foo>=1.0", source="test") with pytest.raises(InvalidConstraintError, match=r"blocked and non-blocked"): - c.add_constraint("foo<0") + c.add_constraint("foo<0", source="test") def test_is_blocked_unknown_package() -> None: c = Constraints() assert not c.is_blocked("unknown") + + +def test_provenance_single_source() -> None: + """Provenance tracks the source for a directly added constraint.""" + c = Constraints() + c.add_constraint("foo>=2.0", source="/path/to/base.txt") + prov = c.get_provenance("foo") + assert prov == {"/path/to/base.txt": ["foo>=2.0"]} + + +def test_provenance_multiple_sources() -> None: + """Provenance records both files when two files constrain the same package.""" + c = Constraints() + c.add_constraint("foo>=2.0", source="/path/to/base.txt") + c.add_constraint("foo!=2.1.1", source="/path/to/override.txt") + prov = c.get_provenance("foo") + assert prov == { + "/path/to/base.txt": ["foo>=2.0"], + "/path/to/override.txt": ["foo!=2.1.1"], + } + + +def test_provenance_same_source_multiple_lines() -> None: + """Multiple constraints from the same file are grouped under one source.""" + c = Constraints() + c.add_constraint("foo>=2.0", source="shared.txt") + c.add_constraint("foo!=2.1.1", source="shared.txt") + prov = c.get_provenance("foo") + assert prov == {"shared.txt": ["foo>=2.0", "foo!=2.1.1"]} + + +def test_provenance_unknown_package() -> None: + """Provenance returns empty dict for unconstrained packages.""" + c = Constraints() + assert c.get_provenance("nonexistent") == {} + + +def test_provenance_load_constraints_file(tmp_path: pathlib.Path) -> None: + """Loading a file records the file path as the provenance source.""" + constraint_file = tmp_path / "constraints-base.txt" + constraint_file.write_text("egg==1.0\ntorch>=2.0\n") + c = Constraints() + c.load_constraints_file(constraint_file) + assert c.get_provenance("egg") == {str(constraint_file): ["egg==1.0"]} + assert c.get_provenance("torch") == {str(constraint_file): ["torch>=2.0"]} + + +def test_provenance_load_multiple_files(tmp_path: pathlib.Path) -> None: + """Loading two files with the same package tracks both sources.""" + base = tmp_path / "base.txt" + base.write_text("foo>=2.0\nbar==1.0\n") + override = tmp_path / "override.txt" + override.write_text("foo!=2.1.1\n") + + c = Constraints() + c.load_constraints_file(base) + c.load_constraints_file(override) + + prov = c.get_provenance("foo") + assert prov == { + str(base): ["foo>=2.0"], + str(override): ["foo!=2.1.1"], + } + assert c.get_provenance("bar") == {str(base): ["bar==1.0"]} + + +def test_provenance_returns_deep_copy() -> None: + """get_provenance returns a deep copy, not a reference to internal state.""" + c = Constraints() + c.add_constraint("foo>=1.0", source="a.txt") + prov = c.get_provenance("foo") + + # mutating the outer dict must not affect internal state + prov["injected.txt"] = ["foo<99"] + assert "injected.txt" not in c.get_provenance("foo") + + # mutating an inner list must not affect internal state + prov2 = c.get_provenance("foo") + prov2["a.txt"].append("foo<99") + assert c.get_provenance("foo") == {"a.txt": ["foo>=1.0"]} + + +def test_dump_constraints_multiple_sources() -> None: + """dump_constraints includes provenance from multiple source files.""" + c = Constraints() + c.add_constraint("foo>=2.0", source="/path/to/base.txt") + c.add_constraint("foo!=2.1.1", source="/path/to/override.txt") + c.add_constraint("bar==1.0", source="/path/to/base.txt") + + out = io.StringIO() + c.dump_constraints(out) + result = out.getvalue() + + assert "bar==1.0 # /path/to/base.txt (==1.0)" in result + assert ( + "foo!=2.1.1,>=2.0 # /path/to/base.txt (>=2.0), /path/to/override.txt (!=2.1.1)" + in result + ) + + +def test_conflict_error_includes_provenance() -> None: + """InvalidConstraintError message includes source file provenance.""" + c = Constraints() + c.add_constraint("foo>=2.0", source="/constraints/base.txt") + with pytest.raises( + InvalidConstraintError, + match=r"(?=.*base\.txt)(?=.*override\.txt)", + ): + c.add_constraint("foo<1.0", source="/constraints/override.txt") diff --git a/tests/test_context.py b/tests/test_context.py index 948ca2a3..53b2d4f8 100644 --- a/tests/test_context.py +++ b/tests/test_context.py @@ -57,7 +57,7 @@ def test_pip_constraints_args(tmp_path: pathlib.Path) -> None: "# auto-generated constraints file", f"# {constraints_file}", "", - "test==1.0", + f"test==1.0 # {constraints_file} (==1.0)", "", ) ) @@ -87,8 +87,8 @@ def test_multiple_constraints_files(tmp_path: pathlib.Path) -> None: f"# {constraints2}", f"# {constraints3}", "", - "foo!=2.1.1,>=2.0", - "test==1.0", + f"foo!=2.1.1,>=2.0 # {constraints2} (>=2.0), {constraints3} (!=2.1.1)", + f"test==1.0 # {constraints1} (==1.0)", "", ) ) diff --git a/tests/test_resolver.py b/tests/test_resolver.py index 690a1288..9a32b63e 100644 --- a/tests/test_resolver.py +++ b/tests/test_resolver.py @@ -306,7 +306,7 @@ def test_provider_choose_sdist() -> None: def test_provider_choose_either_with_constraint() -> None: constraint = constraints.Constraints() - constraint.add_constraint("hydra-core==1.3.2") + constraint.add_constraint("hydra-core==1.3.2", source="test") with requests_mock.Mocker() as r: r.get( "https://pypi.org/simple/hydra-core/", @@ -333,7 +333,7 @@ def test_provider_choose_either_with_constraint() -> None: def test_provider_constraint_mismatch() -> None: constraint = constraints.Constraints() - constraint.add_constraint("hydra-core<=1.1") + constraint.add_constraint("hydra-core<=1.1", source="test") with requests_mock.Mocker() as r: r.get( "https://pypi.org/simple/hydra-core/", @@ -350,7 +350,7 @@ def test_provider_constraint_mismatch() -> None: def test_provider_constraint_match() -> None: constraint = constraints.Constraints() - constraint.add_constraint("hydra-core<=1.3") + constraint.add_constraint("hydra-core<=1.3", source="test") with requests_mock.Mocker() as r: r.get( "https://pypi.org/simple/hydra-core/", @@ -766,7 +766,7 @@ def test_resolve_github_override_download_url() -> None: def test_github_constraint_mismatch() -> None: constraint = constraints.Constraints() - constraint.add_constraint("fromager>=1.0") + constraint.add_constraint("fromager>=1.0", source="test") with requests_mock.Mocker() as r: r.get( "https://api.github.com:443/repos/python-wheel-build/fromager", @@ -789,7 +789,7 @@ def test_github_constraint_mismatch() -> None: def test_github_constraint_match() -> None: constraint = constraints.Constraints() - constraint.add_constraint("fromager<0.9") + constraint.add_constraint("fromager<0.9", source="test") with requests_mock.Mocker() as r: r.get( "https://api.github.com:443/repos/python-wheel-build/fromager", @@ -884,7 +884,7 @@ def test_resolve_versionmap_with_constraint() -> None: ) c = constraints.Constraints() - c.add_constraint("testpkg<1.4") + c.add_constraint("testpkg<1.4", source="test") provider = resolver.VersionMapProvider( version_map=version_map, package_name="testpkg", constraints=c @@ -1077,7 +1077,7 @@ def test_resolve_gitlab_override_download_url() -> None: def test_gitlab_constraint_mismatch() -> None: constraint = constraints.Constraints() - constraint.add_constraint("submodlib>=1.0") + constraint.add_constraint("submodlib>=1.0", source="test") with requests_mock.Mocker() as r: r.get( "https://gitlab.com/api/v4/projects/mirrors%2Fgithub%2Fdecile-team%2Fsubmodlib/repository/tags", @@ -1099,7 +1099,7 @@ def test_gitlab_constraint_mismatch() -> None: def test_gitlab_constraint_match() -> None: constraint = constraints.Constraints() - constraint.add_constraint("submodlib<0.0.3") + constraint.add_constraint("submodlib<0.0.3", source="test") with requests_mock.Mocker() as r: r.get( "https://gitlab.com/api/v4/projects/mirrors%2Fgithub%2Fdecile-team%2Fsubmodlib/repository/tags", @@ -1168,7 +1168,7 @@ def test_pep592_support_latest_version_yanked() -> None: def test_pep592_support_constraint_mismatch() -> None: constraint = constraints.Constraints() - constraint.add_constraint("setuptools-scm>=9.0.0") + constraint.add_constraint("setuptools-scm>=9.0.0", source="test") with requests_mock.Mocker() as r: r.get( "https://pypi.org/simple/setuptools-scm/", text=_response_with_data_yanked