@@ -408,35 +408,46 @@ def has_copyright(path, template, use_mmap, encoding, offset, config=None):
408408 return False
409409
410410
411- def has_duplicate_copyright (path , template , use_mmap , encoding , offset ):
411+ def has_any_copyright (path , use_mmap , encoding , offset ):
412412 """
413- Checks if the copyright header appears more than once in the file.
413+ Checks if any copyright notice is present in the file header, regardless of format .
414414
415415 Args:
416416 path (Path): A `pathlib.Path` object pointing to the file to check.
417- template (str): The copyright template to search for.
418417 use_mmap (bool): If True, uses memory-mapped file reading.
419418 encoding (str): Encoding type to use when reading the file.
420419 offset (int): Byte offset to skip (e.g. shebang line).
421420
422421 Returns:
423- bool: True if the copyright header appears more than once , False otherwise.
422+ bool: True if any copyright notice is found , False otherwise.
424423 """
425424 load_text = load_text_from_file_with_mmap if use_mmap else load_text_from_file
425+ content = load_text (path , BYTES_TO_READ , encoding , offset )
426+ return bool (re .search (r"Copyright.*SPDX-License-Identifier" , content , re .IGNORECASE | re .DOTALL ))
426427
427- lines = template .splitlines (keepends = True )
428- regex_parts = []
429- for line in lines :
430- stripped_line = line .rstrip ("\n " )
431- if BORDER_FILL_PATTERN .search (stripped_line ):
432- regex_parts .append (line_to_flexible_regex (line ))
433- else :
434- formatted = line .format (year = r"\\d\{4\}\(-\\d\{4\}\)\?" , author = r"\.\*" )
435- regex_parts .append (convert_bre_to_regex (formatted ))
436- template_regex = "\n ?" .join (regex_parts )
437428
438- content = load_text (path , 2 * BYTES_TO_READ , encoding , offset )
439- matches = list (re .finditer (template_regex , content ))
429+ def has_duplicate_copyright (path , template , use_mmap , encoding , offset ):
430+ """
431+ Checks if more than one copyright notice is present in the file header.
432+
433+ The check is format-agnostic: it counts occurrences of ``SPDX-License-Identifier``
434+ within a window of twice the template length, so that headers written by different
435+ tools (e.g. REUSE vs. cr_checker) are both counted while string literals that
436+ embed copyright text further into the file are ignored.
437+
438+ Args:
439+ path (Path): A `pathlib.Path` object pointing to the file to check.
440+ template (str): The copyright template; its length defines the search window.
441+ use_mmap (bool): If True, uses memory-mapped file reading.
442+ encoding (str): Encoding type to use when reading the file.
443+ offset (int): Byte offset to skip (e.g. shebang line).
444+
445+ Returns:
446+ bool: True if more than one copyright notice is found, False otherwise.
447+ """
448+ load_text = load_text_from_file_with_mmap if use_mmap else load_text_from_file
449+ content = load_text (path , 2 * len (template ), encoding , offset )
450+ matches = list (re .finditer (r"SPDX-License-Identifier" , content , re .IGNORECASE ))
440451 if len (matches ) > 1 :
441452 LOGGER .debug ("File %s has %d copyright headers." , path , len (matches ))
442453 return True
@@ -658,7 +669,11 @@ def process_files(
658669 elif not has_copyright (
659670 item , templates [key ], use_mmap , encoding , effective_offset , config
660671 ):
661- if fix :
672+ if has_any_copyright (item , use_mmap , encoding , effective_offset ):
673+ LOGGER .warning (
674+ "Wrong copyright format in: %s, expected format from template" , item
675+ )
676+ elif fix :
662677 if remove_offset :
663678 remove_old_header (item , encoding , remove_offset )
664679 fix_result = fix_copyright (
0 commit comments