1+ #!/usr/bin/env python3
2+
3+ #
4+ # Copyright OpenSearch Contributors
5+ # SPDX-License-Identifier: Apache-2.0
6+ #
7+ # The OpenSearch Contributors require contributions made to
8+ # this file be licensed under the Apache-2.0 license or a
9+ # compatible open source license.
10+ #
11+
12+ """
13+ License Header Compliance Checker for OpenSearch Data Prepper
14+
15+ This script checks that files contain the required license headers
16+ as specified in CONTRIBUTING.md.
17+
18+ Usage:
19+ python check-license-headers.py file1.java file2.py ...
20+ echo "file1.java\n file2.py" | python check-license-headers.py
21+ """
22+
23+ import os
24+ import sys
25+ from pathlib import Path
26+ from typing import List
27+
28+ # File extensions that require license headers
29+ SUPPORTED_EXTENSIONS = {
30+ '.java' , '.groovy' , '.gradle' , # Java ecosystem
31+ '.py' , # Python
32+ '.sh' , '.bash' , '.zsh' , # Shell scripts
33+ '.yaml' , '.yml' , # YAML files
34+ '.properties' , # Properties files
35+ }
36+
37+ def needs_license_header (file_path : str ) -> bool :
38+ """Check if a file needs a license header based on its extension."""
39+ path = Path (file_path )
40+ return path .suffix .lower () in SUPPORTED_EXTENSIONS
41+
42+ def check_file_header (file_path : str ) -> bool :
43+ """Check if a file has the required complete license header."""
44+ if not Path (file_path ).exists ():
45+ return True # File might have been deleted, skip
46+
47+ try :
48+ with open (file_path , 'r' , encoding = 'utf-8' , errors = 'ignore' ) as f :
49+ # Read first 15 lines to check for license header
50+ lines = []
51+ for i , line in enumerate (f ):
52+ if i >= 15 : # Only check first 15 lines
53+ break
54+ lines .append (line )
55+
56+ content = '' .join (lines )
57+
58+ # Check for all 5 required license header components
59+ required_components = [
60+ 'Copyright OpenSearch Contributors' ,
61+ 'SPDX-License-Identifier: Apache-2.0' ,
62+ 'The OpenSearch Contributors require contributions made to' ,
63+ 'this file be licensed under the Apache-2.0 license or a' ,
64+ 'compatible open source license.'
65+ ]
66+
67+ # All components must be present
68+ for component in required_components :
69+ if component not in content :
70+ return False
71+
72+ return True
73+
74+ except Exception as e :
75+ print (f"Error reading file { file_path } : { e } " , file = sys .stderr )
76+ return True # Skip files we can't read
77+
78+ def get_file_type_description (file_path : str ) -> str :
79+ """Get a human-readable description of the file type."""
80+ path = Path (file_path )
81+ ext = path .suffix .lower ()
82+
83+ if ext in {'.java' , '.groovy' , '.gradle' }:
84+ return 'Java/Gradle file'
85+ elif ext == '.py' :
86+ return 'Python file'
87+ elif ext in {'.sh' , '.bash' , '.zsh' }:
88+ return 'Shell script'
89+ elif ext in {'.yaml' , '.yml' }:
90+ return 'YAML file'
91+ elif ext == '.properties' :
92+ return 'Properties file'
93+ else :
94+ return 'Source file'
95+
96+ def get_files_to_check () -> List [str ]:
97+ """Get files to check from command line args or stdin."""
98+ if len (sys .argv ) > 1 :
99+ # Files provided as command line arguments
100+ return sys .argv [1 :]
101+ else :
102+ # Read files from stdin
103+ files = []
104+ for line in sys .stdin :
105+ file_path = line .strip ()
106+ if file_path :
107+ files .append (file_path )
108+ return files
109+
110+ def main ():
111+ """Main function to check license headers."""
112+ files_to_check = get_files_to_check ()
113+
114+ if not files_to_check :
115+ print ("No files to check" , file = sys .stderr )
116+ return
117+
118+ print (f"Checking { len (files_to_check )} files for license headers..." )
119+
120+ violations = []
121+
122+ for file_path in files_to_check :
123+ print (f"Checking: { file_path } " )
124+
125+ if not Path (file_path ).exists ():
126+ print (f" File not found: { file_path } " )
127+ continue
128+
129+ # Skip if doesn't need header
130+ if not needs_license_header (file_path ):
131+ print (f" Skipped (no header needed): { file_path } " )
132+ continue
133+
134+ # Check header
135+ if not check_file_header (file_path ):
136+ file_type = get_file_type_description (file_path )
137+ violations .append (f"- `{ file_path } ` ({ file_type } missing license header)" )
138+ print (f" ❌ Missing license header: { file_path } " )
139+ else :
140+ print (f" ✅ Header OK: { file_path } " )
141+
142+ # Output results
143+ if violations :
144+ print (f"\n ❌ Found { len (violations )} license header violations:" )
145+
146+ violation_text = '\n ' .join (violations )
147+
148+ # Set output for GitHub Actions
149+ github_output = os .environ .get ('GITHUB_OUTPUT' )
150+ if github_output :
151+ with open (github_output , 'a' ) as f :
152+ f .write (f"violations<<EOF\n { violation_text } \n EOF\n " )
153+
154+ print ("\n Violations:" )
155+ for violation in violations :
156+ print (f" { violation } " )
157+
158+ sys .exit (1 )
159+ else :
160+ print ("\n ✅ All files have proper license headers!" )
161+ # Set empty output for GitHub Actions
162+ github_output = os .environ .get ('GITHUB_OUTPUT' )
163+ if github_output :
164+ with open (github_output , 'a' ) as f :
165+ f .write ("violations=\n " )
166+
167+ if __name__ == "__main__" :
168+ main ()
0 commit comments