2929BYTES_TO_READ = 4 * 1024
3030DEFAULT_AUTHOR = "Contributors to the Eclipse Foundation"
3131
32+ BORDER_FILL_PATTERN = re .compile (r"([/*#'\-=+])\1{4,}" )
33+ FILL_CHARS_REGEX = r"[/*#'\-=+]+"
34+
3235LOGGER = logging .getLogger ()
3336
3437COLORS = {
@@ -139,6 +142,28 @@ def convert_bre_to_regex(template: str) -> str:
139142 return escaped
140143
141144
145+ def line_to_flexible_regex (line : str ) -> str :
146+ """
147+ Convert a border line to a regex that accepts any fill characters.
148+
149+ Runs of 5+ identical fill characters (e.g. ``****``) are replaced with
150+ ``[/*#'\\ -=+]+`` so that alternative styles (e.g. ``////``) are also
151+ accepted.
152+ """
153+ stripped = line .rstrip ("\n " )
154+ has_newline = line .endswith ("\n " )
155+ result = []
156+ last_end = 0
157+ for m in BORDER_FILL_PATTERN .finditer (stripped ):
158+ result .append (re .escape (stripped [last_end : m .start ()]))
159+ result .append (FILL_CHARS_REGEX )
160+ last_end = m .end ()
161+ result .append (re .escape (stripped [last_end :]))
162+ if has_newline :
163+ result .append ("\n " )
164+ return "" .join (result )
165+
166+
142167def load_templates (path ):
143168 """
144169 Loads the copyright templates from a configuration file.
@@ -196,7 +221,7 @@ def load_exclusion(path):
196221 path (str): Path to the exclusion file.
197222
198223 Returns:
199- tuple(list, bool): a list of files that are excluded from the coypright check and a boolean indicating whether
224+ tuple(list, bool): a list of files that are excluded from the copyright check and a boolean indicating whether
200225 all paths listed in the exclusion file exist and are files.
201226 """
202227
@@ -362,13 +387,18 @@ def has_copyright(path, template, use_mmap, encoding, offset, config=None):
362387 IOError: If there is an error opening or reading the file.
363388 """
364389
365- load_text = load_text_from_file
366- if use_mmap :
367- load_text = load_text_from_file_with_mmap
390+ load_text = load_text_from_file_with_mmap if use_mmap else load_text_from_file
368391
369- template_regex = convert_bre_to_regex (
370- template .format (year = r"\\d\{4\}" , author = r"\.\*" )
371- )
392+ lines = template .splitlines (keepends = True )
393+ regex_parts = []
394+ for line in lines :
395+ stripped_line = line .rstrip ("\n " )
396+ if BORDER_FILL_PATTERN .search (stripped_line ):
397+ regex_parts .append (line_to_flexible_regex (line ))
398+ else :
399+ formatted = line .format (year = r"\\d\{4\}" , author = r"\.\*" )
400+ regex_parts .append (convert_bre_to_regex (formatted ))
401+ template_regex = "" .join (regex_parts ) + "\n ?"
372402
373403 if re .match (template_regex , load_text (path , BYTES_TO_READ , encoding , offset )):
374404 LOGGER .debug ("File %s has copyright." , path )
@@ -378,6 +408,41 @@ def has_copyright(path, template, use_mmap, encoding, offset, config=None):
378408 return False
379409
380410
411+ def has_duplicate_copyright (path , template , use_mmap , encoding , offset ):
412+ """
413+ Checks if the copyright header appears more than once in the file.
414+
415+ Args:
416+ path (Path): A `pathlib.Path` object pointing to the file to check.
417+ template (str): The copyright template to search for.
418+ use_mmap (bool): If True, uses memory-mapped file reading.
419+ encoding (str): Encoding type to use when reading the file.
420+ offset (int): Byte offset to skip (e.g. shebang line).
421+
422+ Returns:
423+ bool: True if the copyright header appears more than once, False otherwise.
424+ """
425+ load_text = load_text_from_file_with_mmap if use_mmap else load_text_from_file
426+
427+ lines = template .splitlines (keepends = True )
428+ regex_parts = []
429+ for line in lines :
430+ stripped_line = line .rstrip ("\n " )
431+ if BORDER_FILL_PATTERN .search (stripped_line ):
432+ regex_parts .append (line_to_flexible_regex (line ))
433+ else :
434+ formatted = line .format (year = r"\\d\{4\}" , author = r"\.\*" )
435+ regex_parts .append (convert_bre_to_regex (formatted ))
436+ template_regex = "\n ?" .join (regex_parts )
437+
438+ content = load_text (path , 2 * BYTES_TO_READ , encoding , offset )
439+ matches = list (re .finditer (template_regex , content ))
440+ if len (matches ) > 1 :
441+ LOGGER .debug ("File %s has %d copyright headers." , path , len (matches ))
442+ return True
443+ return False
444+
445+
381446def get_files_from_dir (directory , exts = None ):
382447 """
383448 Finds files in the specified directories. Filters by extensions if provided.
@@ -520,6 +585,7 @@ def fix_copyright(path, copyright_text, encoding, offset, config=None) -> bool:
520585 copyright_text .format (
521586 year = datetime .now ().year , author = get_author_from_config (config )
522587 )
588+ + "\n "
523589 )
524590 for chunk in iter (lambda : temp .read (4096 ), "" ):
525591 handle .write (chunk )
@@ -562,7 +628,7 @@ def process_files(
562628 Returns:
563629 int: The number of files that do not contain the required copyright text.
564630 """
565- results = {"no_copyright" : 0 , "fixed" : 0 }
631+ results = {"no_copyright" : 0 , "fixed" : 0 , "duplicate_copyright" : 0 }
566632 for item in files :
567633 name = Path (item ).name
568634 key = name if name == "BUILD" else Path (item ).suffix [1 :]
@@ -584,7 +650,12 @@ def process_files(
584650 shebang_offset = detect_shebang_offset (item , encoding )
585651 effective_offset = offset + shebang_offset if offset == 0 else offset
586652
587- if not has_copyright (
653+ if has_duplicate_copyright (
654+ item , templates [key ], use_mmap , encoding , effective_offset
655+ ):
656+ LOGGER .error ("Duplicate copyright header in: %s" , item )
657+ results ["duplicate_copyright" ] += 1
658+ elif not has_copyright (
588659 item , templates [key ], use_mmap , encoding , effective_offset , config
589660 ):
590661 if fix :
@@ -771,6 +842,7 @@ def main(argv=None):
771842 )
772843 total_no = results ["no_copyright" ]
773844 total_fixes = results ["fixed" ]
845+ total_duplicates = results ["duplicate_copyright" ]
774846
775847 LOGGER .info ("=" * 64 )
776848 LOGGER .info ("Process completed." )
@@ -780,6 +852,12 @@ def main(argv=None):
780852 total_no ,
781853 COLORS ["ENDC" ],
782854 )
855+ LOGGER .info (
856+ "Total files with duplicate copyright: %s%d%s" ,
857+ COLORS ["RED" ] if total_duplicates > 0 else COLORS ["GREEN" ],
858+ total_duplicates ,
859+ COLORS ["ENDC" ],
860+ )
783861 if not exclusion_valid :
784862 LOGGER .info ("The exclusion file contains paths that do not exist." )
785863 if args .fix :
@@ -798,7 +876,7 @@ def main(argv=None):
798876 )
799877 LOGGER .info ("=" * 64 )
800878
801- return 0 if (total_no == 0 and exclusion_valid ) else 1
879+ return 0 if (total_no == 0 and total_duplicates == 0 and exclusion_valid ) else 1
802880
803881
804882if __name__ == "__main__" :
0 commit comments