|
9 | 9 |
|
10 | 10 | import pathspec |
11 | 11 |
|
12 | | -# Intentionally puzzling together EXPECTED_SPDX_BYTES so that we don't overlook |
13 | | -# if the identifiers are missing in this file. |
14 | | -EXPECTED_SPDX_BYTES = ( |
15 | | - b"-".join((b"SPDX", b"License", b"Identifier: ")), |
16 | | - b"-".join((b"SPDX", b"FileCopyrightText: ")), |
17 | | -) |
| 12 | +# Intentionally puzzling together SPDX prefixes so that we don't overlook if the |
| 13 | +# identifiers are missing in this file. |
| 14 | +SPDX_LICENSE_IDENTIFIER_PREFIX = b"-".join((b"SPDX", b"License", b"Identifier: ")) |
| 15 | +SPDX_FILE_COPYRIGHT_TEXT_PREFIX = b"-".join((b"SPDX", b"FileCopyrightText: ")) |
| 16 | + |
| 17 | +LICENSE_IDENTIFIER_REGEX = re.compile(re.escape(SPDX_LICENSE_IDENTIFIER_PREFIX) + rb"(?P<license_identifier>[^\r\n]+)") |
| 18 | + |
| 19 | +EXPECTED_LICENSE_IDENTIFIERS = (("cuda_core/", "Apache-2.0"),) |
18 | 20 |
|
19 | 21 | SPDX_IGNORE_FILENAME = ".spdx-ignore" |
20 | 22 |
|
@@ -47,51 +49,114 @@ def is_staged(filepath): |
47 | 49 | return process.stdout.strip() != "" |
48 | 50 |
|
49 | 51 |
|
| 52 | +def normalize_repo_path(filepath): |
| 53 | + normalized_path = filepath.replace("\\", "/") |
| 54 | + while normalized_path.startswith("./"): |
| 55 | + normalized_path = normalized_path[2:] |
| 56 | + return normalized_path |
| 57 | + |
| 58 | + |
| 59 | +def get_expected_license_identifier(filepath): |
| 60 | + normalized_path = normalize_repo_path(filepath) |
| 61 | + for prefix, license_identifier in EXPECTED_LICENSE_IDENTIFIERS: |
| 62 | + if normalized_path.startswith(prefix): |
| 63 | + return license_identifier |
| 64 | + return None |
| 65 | + |
| 66 | + |
| 67 | +def validate_required_spdx_field(filepath, blob, expected_bytes): |
| 68 | + if expected_bytes in blob: |
| 69 | + return True |
| 70 | + print(f"MISSING {expected_bytes.decode()}{filepath!r}") |
| 71 | + return False |
| 72 | + |
| 73 | + |
| 74 | +def extract_license_identifier(blob): |
| 75 | + match = LICENSE_IDENTIFIER_REGEX.search(blob) |
| 76 | + if match is None: |
| 77 | + return None |
| 78 | + try: |
| 79 | + return match.group("license_identifier").decode("ascii") |
| 80 | + except UnicodeDecodeError: |
| 81 | + return None |
| 82 | + |
| 83 | + |
| 84 | +def validate_license_identifier(filepath, blob): |
| 85 | + license_identifier = extract_license_identifier(blob) |
| 86 | + if license_identifier is None: |
| 87 | + print(f"MISSING valid SPDX license identifier in {filepath!r}") |
| 88 | + return False |
| 89 | + |
| 90 | + expected_license_identifier = get_expected_license_identifier(filepath) |
| 91 | + if expected_license_identifier is None: |
| 92 | + return True |
| 93 | + |
| 94 | + if license_identifier != expected_license_identifier: |
| 95 | + print( |
| 96 | + f"INVALID SPDX license identifier {license_identifier!r} " |
| 97 | + f"(expected {expected_license_identifier!r}) in {filepath!r}" |
| 98 | + ) |
| 99 | + return False |
| 100 | + |
| 101 | + return True |
| 102 | + |
| 103 | + |
| 104 | +def validate_or_fix_copyright(filepath, blob, fix): |
| 105 | + match = re.search(COPYRIGHT_REGEX, blob) |
| 106 | + if match is None: |
| 107 | + print(f"MISSING valid copyright line in {filepath!r}") |
| 108 | + return False, blob |
| 109 | + |
| 110 | + years = match.group("years").decode() |
| 111 | + if "-" in years: |
| 112 | + start_year, end_year = years.split("-", 1) |
| 113 | + if int(start_year) > int(end_year): |
| 114 | + print(f"INVALID copyright years {years!r} in {filepath!r}") |
| 115 | + return False, blob |
| 116 | + else: |
| 117 | + start_year = end_year = years |
| 118 | + |
| 119 | + if not is_staged(filepath) or int(end_year) >= int(CURRENT_YEAR): |
| 120 | + return True, blob |
| 121 | + |
| 122 | + print(f"OUTDATED copyright {years!r} (expected {CURRENT_YEAR!r}) in {filepath!r}") |
| 123 | + if not fix: |
| 124 | + return False, blob |
| 125 | + |
| 126 | + new_years = f"{start_year}-{CURRENT_YEAR}" |
| 127 | + return ( |
| 128 | + False, |
| 129 | + re.sub( |
| 130 | + COPYRIGHT_REGEX, |
| 131 | + COPYRIGHT_SUB.format(new_years).encode("ascii"), |
| 132 | + blob, |
| 133 | + ), |
| 134 | + ) |
| 135 | + |
| 136 | + |
50 | 137 | def find_or_fix_spdx(filepath, fix): |
51 | 138 | with open(filepath, "rb") as f: |
52 | 139 | blob = f.read() |
53 | 140 | if len(blob.strip()) == 0: |
54 | 141 | return True |
55 | 142 |
|
56 | 143 | good = True |
57 | | - for expected_bytes in EXPECTED_SPDX_BYTES: |
58 | | - if expected_bytes not in blob: |
59 | | - print(f"MISSING {expected_bytes.decode()}{filepath!r}") |
60 | | - good = False |
61 | | - continue |
62 | | - |
63 | | - match = re.search(COPYRIGHT_REGEX, blob) |
64 | | - if match is None: |
65 | | - print(f"MISSING valid copyright line in {filepath!r}") |
66 | | - good = False |
67 | | - continue |
| 144 | + has_license_identifier = validate_required_spdx_field(filepath, blob, SPDX_LICENSE_IDENTIFIER_PREFIX) |
| 145 | + has_copyright = validate_required_spdx_field(filepath, blob, SPDX_FILE_COPYRIGHT_TEXT_PREFIX) |
68 | 146 |
|
69 | | - years = match.group("years").decode() |
70 | | - if "-" in years: |
71 | | - start_year, end_year = years.split("-", 1) |
72 | | - if int(start_year) > int(end_year): |
73 | | - print(f"INVALID copyright years {years!r} in {filepath!r}") |
74 | | - good = False |
75 | | - continue |
76 | | - else: |
77 | | - start_year = end_year = years |
| 147 | + if not has_license_identifier or not validate_license_identifier(filepath, blob): |
| 148 | + good = False |
78 | 149 |
|
79 | | - staged = is_staged(filepath) |
80 | | - |
81 | | - if staged and int(end_year) < int(CURRENT_YEAR): |
82 | | - print(f"OUTDATED copyright {years!r} (expected {CURRENT_YEAR!r}) in {filepath!r}") |
| 150 | + if not has_copyright: |
| 151 | + good = False |
| 152 | + else: |
| 153 | + copyright_ok, updated_blob = validate_or_fix_copyright(filepath, blob, fix) |
| 154 | + if updated_blob != blob: |
| 155 | + with open(filepath, "wb") as f: |
| 156 | + f.write(updated_blob) |
| 157 | + if not copyright_ok: |
83 | 158 | good = False |
84 | 159 |
|
85 | | - if fix: |
86 | | - new_years = f"{start_year}-{CURRENT_YEAR}" |
87 | | - blob = re.sub( |
88 | | - COPYRIGHT_REGEX, |
89 | | - COPYRIGHT_SUB.format(new_years).encode("ascii"), |
90 | | - blob, |
91 | | - ) |
92 | | - with open(filepath, "wb") as f: |
93 | | - f.write(blob) |
94 | | - |
95 | 160 | return good |
96 | 161 |
|
97 | 162 |
|
|
0 commit comments