Skip to content

Commit 47eaed9

Browse files
committed
toolshed: enforce cuda_core SPDX license policy
Refactor the SPDX checker so package-specific license rules can be enforced cleanly while preserving the existing autofix flow. Keep focused regression coverage alongside the toolshed script instead of in routine package test collection. Made-with: Cursor
1 parent 7738635 commit 47eaed9

File tree

2 files changed

+196
-40
lines changed

2 files changed

+196
-40
lines changed

toolshed/check_spdx.py

Lines changed: 105 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,14 @@
99

1010
import pathspec
1111

12-
# Intentionally puzzling together EXPECTED_SPDX_BYTES so that we don't overlook
13-
# if the identifiers are missing in this file.
14-
EXPECTED_SPDX_BYTES = (
15-
b"-".join((b"SPDX", b"License", b"Identifier: ")),
16-
b"-".join((b"SPDX", b"FileCopyrightText: ")),
17-
)
12+
# Intentionally puzzling together SPDX prefixes so that we don't overlook if the
13+
# identifiers are missing in this file.
14+
SPDX_LICENSE_IDENTIFIER_PREFIX = b"-".join((b"SPDX", b"License", b"Identifier: "))
15+
SPDX_FILE_COPYRIGHT_TEXT_PREFIX = b"-".join((b"SPDX", b"FileCopyrightText: "))
16+
17+
LICENSE_IDENTIFIER_REGEX = re.compile(re.escape(SPDX_LICENSE_IDENTIFIER_PREFIX) + rb"(?P<license_identifier>[^\r\n]+)")
18+
19+
EXPECTED_LICENSE_IDENTIFIERS = (("cuda_core/", "Apache-2.0"),)
1820

1921
SPDX_IGNORE_FILENAME = ".spdx-ignore"
2022

@@ -47,51 +49,114 @@ def is_staged(filepath):
4749
return process.stdout.strip() != ""
4850

4951

52+
def normalize_repo_path(filepath):
53+
normalized_path = filepath.replace("\\", "/")
54+
while normalized_path.startswith("./"):
55+
normalized_path = normalized_path[2:]
56+
return normalized_path
57+
58+
59+
def get_expected_license_identifier(filepath):
60+
normalized_path = normalize_repo_path(filepath)
61+
for prefix, license_identifier in EXPECTED_LICENSE_IDENTIFIERS:
62+
if normalized_path.startswith(prefix):
63+
return license_identifier
64+
return None
65+
66+
67+
def validate_required_spdx_field(filepath, blob, expected_bytes):
68+
if expected_bytes in blob:
69+
return True
70+
print(f"MISSING {expected_bytes.decode()}{filepath!r}")
71+
return False
72+
73+
74+
def extract_license_identifier(blob):
75+
match = LICENSE_IDENTIFIER_REGEX.search(blob)
76+
if match is None:
77+
return None
78+
try:
79+
return match.group("license_identifier").decode("ascii")
80+
except UnicodeDecodeError:
81+
return None
82+
83+
84+
def validate_license_identifier(filepath, blob):
85+
license_identifier = extract_license_identifier(blob)
86+
if license_identifier is None:
87+
print(f"MISSING valid SPDX license identifier in {filepath!r}")
88+
return False
89+
90+
expected_license_identifier = get_expected_license_identifier(filepath)
91+
if expected_license_identifier is None:
92+
return True
93+
94+
if license_identifier != expected_license_identifier:
95+
print(
96+
f"INVALID SPDX license identifier {license_identifier!r} "
97+
f"(expected {expected_license_identifier!r}) in {filepath!r}"
98+
)
99+
return False
100+
101+
return True
102+
103+
104+
def validate_or_fix_copyright(filepath, blob, fix):
105+
match = re.search(COPYRIGHT_REGEX, blob)
106+
if match is None:
107+
print(f"MISSING valid copyright line in {filepath!r}")
108+
return False, blob
109+
110+
years = match.group("years").decode()
111+
if "-" in years:
112+
start_year, end_year = years.split("-", 1)
113+
if int(start_year) > int(end_year):
114+
print(f"INVALID copyright years {years!r} in {filepath!r}")
115+
return False, blob
116+
else:
117+
start_year = end_year = years
118+
119+
if not is_staged(filepath) or int(end_year) >= int(CURRENT_YEAR):
120+
return True, blob
121+
122+
print(f"OUTDATED copyright {years!r} (expected {CURRENT_YEAR!r}) in {filepath!r}")
123+
if not fix:
124+
return False, blob
125+
126+
new_years = f"{start_year}-{CURRENT_YEAR}"
127+
return (
128+
False,
129+
re.sub(
130+
COPYRIGHT_REGEX,
131+
COPYRIGHT_SUB.format(new_years).encode("ascii"),
132+
blob,
133+
),
134+
)
135+
136+
50137
def find_or_fix_spdx(filepath, fix):
51138
with open(filepath, "rb") as f:
52139
blob = f.read()
53140
if len(blob.strip()) == 0:
54141
return True
55142

56143
good = True
57-
for expected_bytes in EXPECTED_SPDX_BYTES:
58-
if expected_bytes not in blob:
59-
print(f"MISSING {expected_bytes.decode()}{filepath!r}")
60-
good = False
61-
continue
62-
63-
match = re.search(COPYRIGHT_REGEX, blob)
64-
if match is None:
65-
print(f"MISSING valid copyright line in {filepath!r}")
66-
good = False
67-
continue
144+
has_license_identifier = validate_required_spdx_field(filepath, blob, SPDX_LICENSE_IDENTIFIER_PREFIX)
145+
has_copyright = validate_required_spdx_field(filepath, blob, SPDX_FILE_COPYRIGHT_TEXT_PREFIX)
68146

69-
years = match.group("years").decode()
70-
if "-" in years:
71-
start_year, end_year = years.split("-", 1)
72-
if int(start_year) > int(end_year):
73-
print(f"INVALID copyright years {years!r} in {filepath!r}")
74-
good = False
75-
continue
76-
else:
77-
start_year = end_year = years
147+
if not has_license_identifier or not validate_license_identifier(filepath, blob):
148+
good = False
78149

79-
staged = is_staged(filepath)
80-
81-
if staged and int(end_year) < int(CURRENT_YEAR):
82-
print(f"OUTDATED copyright {years!r} (expected {CURRENT_YEAR!r}) in {filepath!r}")
150+
if not has_copyright:
151+
good = False
152+
else:
153+
copyright_ok, updated_blob = validate_or_fix_copyright(filepath, blob, fix)
154+
if updated_blob != blob:
155+
with open(filepath, "wb") as f:
156+
f.write(updated_blob)
157+
if not copyright_ok:
83158
good = False
84159

85-
if fix:
86-
new_years = f"{start_year}-{CURRENT_YEAR}"
87-
blob = re.sub(
88-
COPYRIGHT_REGEX,
89-
COPYRIGHT_SUB.format(new_years).encode("ascii"),
90-
blob,
91-
)
92-
with open(filepath, "wb") as f:
93-
f.write(blob)
94-
95160
return good
96161

97162

toolshed/test_check_spdx.py

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
import importlib.util
5+
import sys
6+
from pathlib import Path
7+
from types import ModuleType
8+
9+
10+
def _install_pathspec_stub():
11+
if "pathspec" in sys.modules:
12+
return
13+
14+
class _StubSpec:
15+
def match_file(self, _filepath):
16+
return False
17+
18+
class _StubPathSpec:
19+
@staticmethod
20+
def from_lines(_pattern_type, _lines):
21+
return _StubSpec()
22+
23+
module = ModuleType("pathspec")
24+
module.PathSpec = _StubPathSpec
25+
sys.modules["pathspec"] = module
26+
27+
28+
def _load_check_spdx():
29+
check_spdx_path = Path(__file__).resolve().with_name("check_spdx.py")
30+
spec = importlib.util.spec_from_file_location("check_spdx", check_spdx_path)
31+
assert spec is not None
32+
assert spec.loader is not None
33+
_install_pathspec_stub()
34+
module = importlib.util.module_from_spec(spec)
35+
spec.loader.exec_module(module)
36+
return module
37+
38+
39+
check_spdx = _load_check_spdx()
40+
41+
42+
def _write_spdx_file(root, relative_path, license_identifier, *, years="2025-2026"):
43+
path = root / relative_path
44+
path.parent.mkdir(parents=True, exist_ok=True)
45+
path.write_text(
46+
(
47+
f"# SPDX-FileCopyrightText: Copyright (c) {years} NVIDIA CORPORATION & AFFILIATES. "
48+
"All rights reserved.\n"
49+
f"# SPDX-License-Identifier: {license_identifier}\n"
50+
"\n"
51+
"print('hello')\n"
52+
),
53+
encoding="ascii",
54+
)
55+
return path
56+
57+
58+
def test_get_expected_license_identifier_normalizes_windows_paths():
59+
assert check_spdx.get_expected_license_identifier(r".\cuda_core\example.py") == "Apache-2.0"
60+
61+
62+
def test_find_or_fix_spdx_rejects_non_apache_license_under_cuda_core(tmp_path, monkeypatch, capsys):
63+
monkeypatch.chdir(tmp_path)
64+
monkeypatch.setattr(check_spdx, "is_staged", lambda _: False)
65+
_write_spdx_file(tmp_path, "cuda_core/example.py", "LicenseRef-NVIDIA-SOFTWARE-LICENSE")
66+
67+
assert not check_spdx.find_or_fix_spdx("cuda_core/example.py", fix=False)
68+
69+
assert "expected 'Apache-2.0'" in capsys.readouterr().out
70+
71+
72+
def test_find_or_fix_spdx_allows_non_apache_license_outside_cuda_core(tmp_path, monkeypatch):
73+
monkeypatch.chdir(tmp_path)
74+
monkeypatch.setattr(check_spdx, "is_staged", lambda _: False)
75+
_write_spdx_file(tmp_path, "cuda_bindings/example.py", "LicenseRef-NVIDIA-SOFTWARE-LICENSE")
76+
77+
assert check_spdx.find_or_fix_spdx("cuda_bindings/example.py", fix=False)
78+
79+
80+
def test_find_or_fix_spdx_updates_outdated_copyright_when_fix_requested(tmp_path, monkeypatch, capsys):
81+
monkeypatch.chdir(tmp_path)
82+
monkeypatch.setattr(check_spdx, "CURRENT_YEAR", "2026")
83+
monkeypatch.setattr(check_spdx, "is_staged", lambda _: True)
84+
path = _write_spdx_file(tmp_path, "cuda_core/example.py", "Apache-2.0", years="2024")
85+
86+
assert not check_spdx.find_or_fix_spdx("cuda_core/example.py", fix=True)
87+
88+
assert "OUTDATED copyright '2024' (expected '2026')" in capsys.readouterr().out
89+
assert "Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved." in path.read_text(
90+
encoding="ascii"
91+
)

0 commit comments

Comments
 (0)