@@ -408,35 +408,50 @@ def has_copyright(path, template, use_mmap, encoding, offset, config=None):
408408 return False
409409
410410
411- def has_duplicate_copyright (path , template , use_mmap , encoding , offset ):
411+ def has_any_copyright (path , use_mmap , encoding , offset ):
412412 """
413- Checks if the copyright header appears more than once in the file.
413+ Checks if any copyright notice is present in the file header, regardless of format .
414414
415415 Args:
416416 path (Path): A `pathlib.Path` object pointing to the file to check.
417- template (str): The copyright template to search for.
418417 use_mmap (bool): If True, uses memory-mapped file reading.
419418 encoding (str): Encoding type to use when reading the file.
420419 offset (int): Byte offset to skip (e.g. shebang line).
421420
422421 Returns:
423- bool: True if the copyright header appears more than once , False otherwise.
422+ bool: True if any copyright notice is found , False otherwise.
424423 """
425424 load_text = load_text_from_file_with_mmap if use_mmap else load_text_from_file
425+ content = load_text (path , BYTES_TO_READ , encoding , offset )
426+ return bool (
427+ re .search (
428+ r"Copyright.*SPDX-License-Identifier" , content , re .IGNORECASE | re .DOTALL
429+ )
430+ )
426431
427- lines = template .splitlines (keepends = True )
428- regex_parts = []
429- for line in lines :
430- stripped_line = line .rstrip ("\n " )
431- if BORDER_FILL_PATTERN .search (stripped_line ):
432- regex_parts .append (line_to_flexible_regex (line ))
433- else :
434- formatted = line .format (year = r"\\d\{4\}\(-\\d\{4\}\)\?" , author = r"\.\*" )
435- regex_parts .append (convert_bre_to_regex (formatted ))
436- template_regex = "\n ?" .join (regex_parts )
437432
438- content = load_text (path , 2 * BYTES_TO_READ , encoding , offset )
439- matches = list (re .finditer (template_regex , content ))
433+ def has_duplicate_copyright (path , template , use_mmap , encoding , offset ):
434+ """
435+ Checks if more than one copyright notice is present in the file header.
436+
437+ The check is format-agnostic: it counts occurrences of ``SPDX-License-Identifier``
438+ within a window of twice the template length, so that headers written by different
439+ tools (e.g. REUSE vs. cr_checker) are both counted while string literals that
440+ embed copyright text further into the file are ignored.
441+
442+ Args:
443+ path (Path): A `pathlib.Path` object pointing to the file to check.
444+ template (str): The copyright template; its length defines the search window.
445+ use_mmap (bool): If True, uses memory-mapped file reading.
446+ encoding (str): Encoding type to use when reading the file.
447+ offset (int): Byte offset to skip (e.g. shebang line).
448+
449+ Returns:
450+ bool: True if more than one copyright notice is found, False otherwise.
451+ """
452+ load_text = load_text_from_file_with_mmap if use_mmap else load_text_from_file
453+ content = load_text (path , 2 * len (template ), encoding , offset )
454+ matches = list (re .finditer (r"SPDX-License-Identifier" , content , re .IGNORECASE ))
440455 if len (matches ) > 1 :
441456 LOGGER .debug ("File %s has %d copyright headers." , path , len (matches ))
442457 return True
@@ -500,6 +515,10 @@ def collect_inputs(inputs, exts=None):
500515 ):
501516 LOGGER .debug ("Processing file: %s" , item )
502517 all_files .append (item )
518+ elif item .is_file ():
519+ LOGGER .debug (
520+ "Skipped (no configuration for file extension): %s" , item
521+ )
503522 else :
504523 LOGGER .warning ("Skipped (input is not a valid file or directory): %s" , item )
505524 return all_files
@@ -658,7 +677,11 @@ def process_files(
658677 elif not has_copyright (
659678 item , templates [key ], use_mmap , encoding , effective_offset , config
660679 ):
661- if fix :
680+ if has_any_copyright (item , use_mmap , encoding , effective_offset ):
681+ LOGGER .warning (
682+ "Wrong copyright format in: %s, expected format from template" , item
683+ )
684+ elif fix :
662685 if remove_offset :
663686 remove_old_header (item , encoding , remove_offset )
664687 fix_result = fix_copyright (
0 commit comments