Skip to content

Commit e463cd3

Browse files
committed
[cr checker]: fix duplicate header check
1 parent 55ff194 commit e463cd3

4 files changed

Lines changed: 42 additions & 17 deletions

File tree

BUILD

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ package(default_visibility = ["//visibility:public"])
1717

1818
copyright_checker(
1919
name = "copyright",
20+
exclusion = "//cr_checker/resources:exclusion",
2021
srcs = [
2122
#"//tools", # Use full label if src is a package
2223
"//:BUILD",

cr_checker/resources/BUILD

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,3 +26,11 @@ filegroup(
2626
],
2727
visibility = ["//visibility:public"],
2828
)
29+
30+
filegroup(
31+
name = "exclusion",
32+
srcs = [
33+
"exclusion.txt",
34+
],
35+
visibility = ["//visibility:public"],
36+
)

cr_checker/resources/exclusion.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
cr_checker/resources/templates.ini

cr_checker/tool/cr_checker.py

Lines changed: 32 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -408,35 +408,46 @@ def has_copyright(path, template, use_mmap, encoding, offset, config=None):
408408
return False
409409

410410

411-
def has_duplicate_copyright(path, template, use_mmap, encoding, offset):
411+
def has_any_copyright(path, use_mmap, encoding, offset):
412412
"""
413-
Checks if the copyright header appears more than once in the file.
413+
Checks if any copyright notice is present in the file header, regardless of format.
414414
415415
Args:
416416
path (Path): A `pathlib.Path` object pointing to the file to check.
417-
template (str): The copyright template to search for.
418417
use_mmap (bool): If True, uses memory-mapped file reading.
419418
encoding (str): Encoding type to use when reading the file.
420419
offset (int): Byte offset to skip (e.g. shebang line).
421420
422421
Returns:
423-
bool: True if the copyright header appears more than once, False otherwise.
422+
bool: True if any copyright notice is found, False otherwise.
424423
"""
425424
load_text = load_text_from_file_with_mmap if use_mmap else load_text_from_file
425+
content = load_text(path, BYTES_TO_READ, encoding, offset)
426+
return bool(re.search(r"Copyright.*SPDX-License-Identifier", content, re.IGNORECASE | re.DOTALL))
426427

427-
lines = template.splitlines(keepends=True)
428-
regex_parts = []
429-
for line in lines:
430-
stripped_line = line.rstrip("\n")
431-
if BORDER_FILL_PATTERN.search(stripped_line):
432-
regex_parts.append(line_to_flexible_regex(line))
433-
else:
434-
formatted = line.format(year=r"\\d\{4\}\(-\\d\{4\}\)\?", author=r"\.\*")
435-
regex_parts.append(convert_bre_to_regex(formatted))
436-
template_regex = "\n?".join(regex_parts)
437428

438-
content = load_text(path, 2 * BYTES_TO_READ, encoding, offset)
439-
matches = list(re.finditer(template_regex, content))
429+
def has_duplicate_copyright(path, template, use_mmap, encoding, offset):
430+
"""
431+
Checks if more than one copyright notice is present in the file header.
432+
433+
The check is format-agnostic: it counts occurrences of ``SPDX-License-Identifier``
434+
within a window of twice the template length, so that headers written by different
435+
tools (e.g. REUSE vs. cr_checker) are both counted while string literals that
436+
embed copyright text further into the file are ignored.
437+
438+
Args:
439+
path (Path): A `pathlib.Path` object pointing to the file to check.
440+
template (str): The copyright template; its length defines the search window.
441+
use_mmap (bool): If True, uses memory-mapped file reading.
442+
encoding (str): Encoding type to use when reading the file.
443+
offset (int): Byte offset to skip (e.g. shebang line).
444+
445+
Returns:
446+
bool: True if more than one copyright notice is found, False otherwise.
447+
"""
448+
load_text = load_text_from_file_with_mmap if use_mmap else load_text_from_file
449+
content = load_text(path, 2 * len(template), encoding, offset)
450+
matches = list(re.finditer(r"SPDX-License-Identifier", content, re.IGNORECASE))
440451
if len(matches) > 1:
441452
LOGGER.debug("File %s has %d copyright headers.", path, len(matches))
442453
return True
@@ -658,7 +669,11 @@ def process_files(
658669
elif not has_copyright(
659670
item, templates[key], use_mmap, encoding, effective_offset, config
660671
):
661-
if fix:
672+
if has_any_copyright(item, use_mmap, encoding, effective_offset):
673+
LOGGER.warning(
674+
"Wrong copyright format in: %s, expected format from template", item
675+
)
676+
elif fix:
662677
if remove_offset:
663678
remove_old_header(item, encoding, remove_offset)
664679
fix_result = fix_copyright(

0 commit comments

Comments
 (0)