Skip to content

Commit a271eb0

Browse files
smoparthclaude
andcommitted
feat(sbom): use annotated data types in Pydantic config
Replace generic `str` fields in SbomSettings and PurlConfig with specialized annotated types that validate input and make the schema self-documenting. New annotated types: - PurlType: strips, lowercases, rejects empty purl type strings - RepositoryUrl: validates http/https URL with scheme and host - UpstreamPurl: validates purl strings via PackageURL.from_string() - SpdxNamespace: validates SPDX documentNamespace as a URL - SpdxActor: validates SPDX actor format (Organization/Person/Tool/NOASSERTION) The @field_validator on PurlConfig.upstream is replaced by the UpstreamPurl annotated type, moving validation into the type itself. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> Closes: #1072 Signed-off-by: Shanmukh Pawan <smoparth@redhat.com>
1 parent 05c65ab commit a271eb0

4 files changed

Lines changed: 198 additions & 21 deletions

File tree

src/fromager/packagesettings/__init__.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,13 @@
2525
Package,
2626
PackageVersion,
2727
PatchMap,
28+
PurlType,
2829
RawAnnotations,
30+
RepositoryUrl,
31+
SpdxActor,
32+
SpdxNamespace,
2933
Template,
34+
UpstreamPurl,
3035
Variant,
3136
VariantChangelog,
3237
)
@@ -48,12 +53,17 @@
4853
"PatchMap",
4954
"ProjectOverride",
5055
"PurlConfig",
56+
"PurlType",
5157
"RawAnnotations",
58+
"RepositoryUrl",
5259
"ResolverDist",
5360
"SbomSettings",
5461
"Settings",
5562
"SettingsFile",
63+
"SpdxActor",
64+
"SpdxNamespace",
5665
"Template",
66+
"UpstreamPurl",
5767
"Variant",
5868
"VariantChangelog",
5969
"VariantInfo",

src/fromager/packagesettings/_models.py

Lines changed: 13 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010

1111
import pydantic
1212
import yaml
13-
from packageurl import PackageURL
1413
from packaging.requirements import Requirement
1514
from packaging.utils import canonicalize_name
1615
from pydantic import Field
@@ -21,8 +20,13 @@
2120
BuildDirectory,
2221
EnvVars,
2322
Package,
23+
PurlType,
2424
RawAnnotations,
25+
RepositoryUrl,
26+
SpdxActor,
27+
SpdxNamespace,
2528
Template,
29+
UpstreamPurl,
2630
Variant,
2731
VariantChangelog,
2832
)
@@ -46,22 +50,22 @@ class SbomSettings(pydantic.BaseModel):
4650

4751
model_config = MODEL_CONFIG
4852

49-
supplier: str = "NOASSERTION"
53+
supplier: SpdxActor = "NOASSERTION"
5054
"""SPDX supplier field for the wheel package (e.g. ``Organization: ExampleCo``)"""
5155

52-
namespace: str = "https://spdx.org/spdxdocs"
56+
namespace: SpdxNamespace = "https://spdx.org/spdxdocs"
5357
"""Base URL for the SPDX documentNamespace"""
5458

55-
creators: list[str] = Field(default_factory=list)
59+
creators: list[SpdxActor] = Field(default_factory=list)
5660
"""Additional SPDX creator entries (e.g. ``Organization: ExampleCo``)
5761
5862
The fromager tool creator entry is always added automatically.
5963
"""
6064

61-
purl_type: str = "pypi"
65+
purl_type: PurlType = "pypi"
6266
"""Default purl type for all packages (e.g. ``pypi``, ``generic``)"""
6367

64-
repository_url: str | None = None
68+
repository_url: RepositoryUrl | None = None
6569
"""Default purl ``repository_url`` qualifier for all packages
6670
6771
When set, this URL is added to every purl as a qualifier
@@ -89,7 +93,7 @@ class PurlConfig(pydantic.BaseModel):
8993

9094
model_config = MODEL_CONFIG
9195

92-
type: str | None = None
96+
type: PurlType | None = None
9397
"""Override the purl type (e.g. ``generic`` instead of ``pypi``)"""
9498

9599
namespace: str | None = None
@@ -101,13 +105,13 @@ class PurlConfig(pydantic.BaseModel):
101105
version: str | None = None
102106
"""Override the purl version component (defaults to the resolved version)"""
103107

104-
repository_url: str | None = None
108+
repository_url: RepositoryUrl | None = None
105109
"""Per-package override for the purl ``repository_url`` qualifier.
106110
107111
Overrides the global ``sbom.repository_url`` setting for this package.
108112
"""
109113

110-
upstream: str | None = None
114+
upstream: UpstreamPurl | None = None
111115
"""Full purl string identifying the upstream source package.
112116
113117
When set, this is used as the upstream identity in the SBOM's
@@ -118,18 +122,6 @@ class PurlConfig(pydantic.BaseModel):
118122
purl without the ``repository_url`` qualifier.
119123
"""
120124

121-
@pydantic.field_validator("upstream")
122-
@classmethod
123-
def validate_upstream_purl(cls, v: str | None) -> str | None:
124-
"""Validate that upstream is a valid purl string."""
125-
if v is None:
126-
return v
127-
try:
128-
PackageURL.from_string(v)
129-
except ValueError as err:
130-
raise ValueError(f"invalid upstream purl {v!r}") from err
131-
return v
132-
133125

134126
class ResolverDist(pydantic.BaseModel):
135127
"""Packages resolver dist

src/fromager/packagesettings/_typedefs.py

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,13 @@
33
from __future__ import annotations
44

55
import pathlib
6+
import re
67
import typing
78
from collections.abc import Mapping
9+
from urllib.parse import urlparse
810

911
import pydantic
12+
from packageurl import PackageURL
1013
from packaging.utils import NormalizedName, canonicalize_name
1114
from packaging.version import Version
1215
from pydantic_core import CoreSchema, core_schema
@@ -98,6 +101,99 @@ def _validate_envkey(v: typing.Any) -> str:
98101
GlobalChangelog = Mapping[Variant, list[str]]
99102
VariantChangelog = Mapping[PackageVersion, list[str]]
100103

104+
_SPDX_ACTOR_RE = re.compile(r"^(Organization|Person|Tool):\s+\S.*$", flags=re.DOTALL)
105+
106+
107+
def _validate_url(v: str) -> str:
108+
"""Validate that *v* has an ``http`` or ``https`` scheme and a host."""
109+
if not isinstance(v, str):
110+
raise TypeError(f"expected str, got {type(v)}: {v!r}")
111+
v = v.strip()
112+
parsed = urlparse(v)
113+
if parsed.scheme not in ("http", "https"):
114+
raise ValueError(f"URL must use http or https scheme, got {v!r}")
115+
if not parsed.netloc:
116+
raise ValueError(f"URL is missing a host/netloc component: {v!r}")
117+
return v
118+
119+
120+
# purl type (e.g. "pypi", "generic", "github")
121+
def _validate_purl_type(v: str) -> str:
122+
"""Strip, lowercase, and reject empty purl type strings."""
123+
if not isinstance(v, str):
124+
raise TypeError(f"expected str, got {type(v)}: {v!r}")
125+
v = v.strip().lower()
126+
if not v:
127+
raise ValueError("purl type must not be empty")
128+
return v
129+
130+
131+
PurlType = typing.Annotated[
132+
str,
133+
pydantic.BeforeValidator(_validate_purl_type),
134+
]
135+
136+
137+
# repository URL used as a purl qualifier
138+
RepositoryUrl = typing.Annotated[
139+
str,
140+
pydantic.BeforeValidator(_validate_url),
141+
]
142+
143+
144+
# full purl string identifying an upstream source package
145+
def _validate_upstream_purl(v: str) -> str:
146+
"""Validate that *v* is a well-formed purl string."""
147+
if not isinstance(v, str):
148+
raise TypeError(f"expected str, got {type(v)}: {v!r}")
149+
v = v.strip()
150+
try:
151+
PackageURL.from_string(v)
152+
except ValueError as err:
153+
raise ValueError(f"invalid upstream purl {v!r}: {err}") from err
154+
return v
155+
156+
157+
UpstreamPurl = typing.Annotated[
158+
str,
159+
pydantic.BeforeValidator(_validate_upstream_purl),
160+
]
161+
162+
163+
# SPDX documentNamespace base URL
164+
SpdxNamespace = typing.Annotated[
165+
str,
166+
pydantic.BeforeValidator(_validate_url),
167+
]
168+
169+
170+
# SPDX actor value for supplier / creator fields
171+
def _validate_spdx_actor(v: str) -> str:
172+
"""Validate SPDX 2.3 actor format.
173+
174+
Must be ``NOASSERTION`` or ``<Category>: <name>`` where
175+
category is ``Organization``, ``Person``, or ``Tool``.
176+
"""
177+
if not isinstance(v, str):
178+
raise TypeError(f"expected str, got {type(v)}: {v!r}")
179+
v = v.strip()
180+
if v == "NOASSERTION":
181+
return v
182+
if not _SPDX_ACTOR_RE.match(v):
183+
raise ValueError(
184+
f"SPDX actor must be 'NOASSERTION' or "
185+
f"'Organization: <name>' / 'Person: <name>' / 'Tool: <name>', "
186+
f"got {v!r}"
187+
)
188+
return v
189+
190+
191+
SpdxActor = typing.Annotated[
192+
str,
193+
pydantic.BeforeValidator(_validate_spdx_actor),
194+
]
195+
196+
101197
# Annotations
102198
RawAnnotations = Mapping[str, str]
103199

tests/test_packagesettings.py

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,14 @@
1717
Package,
1818
PackageBuildInfo,
1919
PackageSettings,
20+
PurlType,
21+
RepositoryUrl,
2022
ResolverDist,
2123
Settings,
2224
SettingsFile,
25+
SpdxActor,
26+
SpdxNamespace,
27+
UpstreamPurl,
2328
Variant,
2429
substitute_template,
2530
)
@@ -490,6 +495,80 @@ def test_type_builddirectory() -> None:
490495
ta.validate_python("/absolute/path")
491496

492497

498+
def test_type_purl_type() -> None:
499+
"""Verify PurlType normalizes and rejects empty strings."""
500+
ta = pydantic.TypeAdapter(PurlType)
501+
assert ta.validate_python("pypi") == "pypi"
502+
assert ta.validate_python(" Generic ") == "generic"
503+
assert ta.validate_python("GITHUB") == "github"
504+
with pytest.raises(ValueError):
505+
ta.validate_python("")
506+
with pytest.raises(ValueError):
507+
ta.validate_python(" ")
508+
509+
510+
def test_type_repository_url() -> None:
511+
"""Verify RepositoryUrl accepts valid URLs and rejects invalid ones."""
512+
ta = pydantic.TypeAdapter(RepositoryUrl)
513+
assert (
514+
ta.validate_python("https://example.com/simple") == "https://example.com/simple"
515+
)
516+
assert (
517+
ta.validate_python("http://packages.redhat.com") == "http://packages.redhat.com"
518+
)
519+
assert ta.validate_python(" https://example.com ") == "https://example.com"
520+
with pytest.raises(ValueError):
521+
ta.validate_python("not-a-url")
522+
with pytest.raises(ValueError):
523+
ta.validate_python("ftp://files.example.com")
524+
with pytest.raises(ValueError):
525+
ta.validate_python("")
526+
527+
528+
def test_type_upstream_purl() -> None:
529+
"""Verify UpstreamPurl accepts valid purls and rejects invalid strings."""
530+
ta = pydantic.TypeAdapter(UpstreamPurl)
531+
assert ta.validate_python("pkg:pypi/flask@2.0") == "pkg:pypi/flask@2.0"
532+
assert (
533+
ta.validate_python("pkg:github/vllm-project/bart-plugin@v0.2.0")
534+
== "pkg:github/vllm-project/bart-plugin@v0.2.0"
535+
)
536+
with pytest.raises(ValueError):
537+
ta.validate_python("invalid-not-purl")
538+
with pytest.raises(ValueError):
539+
ta.validate_python("")
540+
541+
542+
def test_type_spdx_namespace() -> None:
543+
"""Verify SpdxNamespace accepts valid URLs and rejects invalid ones."""
544+
ta = pydantic.TypeAdapter(SpdxNamespace)
545+
assert (
546+
ta.validate_python("https://spdx.org/spdxdocs") == "https://spdx.org/spdxdocs"
547+
)
548+
assert ta.validate_python("https://www.example.com") == "https://www.example.com"
549+
with pytest.raises(ValueError):
550+
ta.validate_python("not-a-url")
551+
with pytest.raises(ValueError):
552+
ta.validate_python("")
553+
554+
555+
def test_type_spdx_actor() -> None:
556+
"""Verify SpdxActor accepts valid SPDX actor formats."""
557+
ta = pydantic.TypeAdapter(SpdxActor)
558+
assert ta.validate_python("NOASSERTION") == "NOASSERTION"
559+
assert ta.validate_python("Organization: ExampleCo") == "Organization: ExampleCo"
560+
assert ta.validate_python("Person: Jane Doe") == "Person: Jane Doe"
561+
assert ta.validate_python("Tool: fromager-1.0") == "Tool: fromager-1.0"
562+
with pytest.raises(ValueError):
563+
ta.validate_python("ExampleCo")
564+
with pytest.raises(ValueError):
565+
ta.validate_python("Organization:")
566+
with pytest.raises(ValueError):
567+
ta.validate_python("Organization: ")
568+
with pytest.raises(ValueError):
569+
ta.validate_python("")
570+
571+
493572
def test_global_settings(testdata_path: pathlib.Path) -> None:
494573
filename = testdata_path / "context/overrides/settings.yaml"
495574
gs = SettingsFile.from_file(filename)

0 commit comments

Comments
 (0)