Skip to content

Commit 4864d56

Browse files
committed
GitHub Action to verify that newly added files have the license header.
Signed-off-by: David Venable <dlv@amazon.com>
1 parent 502f601 commit 4864d56

3 files changed

Lines changed: 305 additions & 0 deletions

File tree

Lines changed: 168 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,168 @@
1+
#!/usr/bin/env python3
2+
3+
#
4+
# Copyright OpenSearch Contributors
5+
# SPDX-License-Identifier: Apache-2.0
6+
#
7+
# The OpenSearch Contributors require contributions made to
8+
# this file be licensed under the Apache-2.0 license or a
9+
# compatible open source license.
10+
#
11+
12+
"""
13+
License Header Compliance Checker for OpenSearch Data Prepper
14+
15+
This script checks that files contain the required license headers
16+
as specified in CONTRIBUTING.md.
17+
18+
Usage:
19+
python check-license-headers.py file1.java file2.py ...
20+
echo "file1.java\nfile2.py" | python check-license-headers.py
21+
"""
22+
23+
import os
24+
import sys
25+
from pathlib import Path
26+
from typing import List
27+
28+
# File extensions that require license headers
29+
SUPPORTED_EXTENSIONS = {
30+
'.java', '.groovy', '.gradle', # Java ecosystem
31+
'.py', # Python
32+
'.sh', '.bash', '.zsh', # Shell scripts
33+
'.yaml', '.yml', # YAML files
34+
'.properties', # Properties files
35+
}
36+
37+
def needs_license_header(file_path: str) -> bool:
38+
"""Check if a file needs a license header based on its extension."""
39+
path = Path(file_path)
40+
return path.suffix.lower() in SUPPORTED_EXTENSIONS
41+
42+
def check_file_header(file_path: str) -> bool:
43+
"""Check if a file has the required complete license header."""
44+
if not Path(file_path).exists():
45+
return True # File might have been deleted, skip
46+
47+
try:
48+
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
49+
# Read first 15 lines to check for license header
50+
lines = []
51+
for i, line in enumerate(f):
52+
if i >= 15: # Only check first 15 lines
53+
break
54+
lines.append(line)
55+
56+
content = ''.join(lines)
57+
58+
# Check for all 5 required license header components
59+
required_components = [
60+
'Copyright OpenSearch Contributors',
61+
'SPDX-License-Identifier: Apache-2.0',
62+
'The OpenSearch Contributors require contributions made to',
63+
'this file be licensed under the Apache-2.0 license or a',
64+
'compatible open source license.'
65+
]
66+
67+
# All components must be present
68+
for component in required_components:
69+
if component not in content:
70+
return False
71+
72+
return True
73+
74+
except Exception as e:
75+
print(f"Error reading file {file_path}: {e}", file=sys.stderr)
76+
return True # Skip files we can't read
77+
78+
def get_file_type_description(file_path: str) -> str:
79+
"""Get a human-readable description of the file type."""
80+
path = Path(file_path)
81+
ext = path.suffix.lower()
82+
83+
if ext in {'.java', '.groovy', '.gradle'}:
84+
return 'Java/Gradle file'
85+
elif ext == '.py':
86+
return 'Python file'
87+
elif ext in {'.sh', '.bash', '.zsh'}:
88+
return 'Shell script'
89+
elif ext in {'.yaml', '.yml'}:
90+
return 'YAML file'
91+
elif ext == '.properties':
92+
return 'Properties file'
93+
else:
94+
return 'Source file'
95+
96+
def get_files_to_check() -> List[str]:
97+
"""Get files to check from command line args or stdin."""
98+
if len(sys.argv) > 1:
99+
# Files provided as command line arguments
100+
return sys.argv[1:]
101+
else:
102+
# Read files from stdin
103+
files = []
104+
for line in sys.stdin:
105+
file_path = line.strip()
106+
if file_path:
107+
files.append(file_path)
108+
return files
109+
110+
def main():
111+
"""Main function to check license headers."""
112+
files_to_check = get_files_to_check()
113+
114+
if not files_to_check:
115+
print("No files to check", file=sys.stderr)
116+
return
117+
118+
print(f"Checking {len(files_to_check)} files for license headers...")
119+
120+
violations = []
121+
122+
for file_path in files_to_check:
123+
print(f"Checking: {file_path}")
124+
125+
if not Path(file_path).exists():
126+
print(f" File not found: {file_path}")
127+
continue
128+
129+
# Skip if doesn't need header
130+
if not needs_license_header(file_path):
131+
print(f" Skipped (no header needed): {file_path}")
132+
continue
133+
134+
# Check header
135+
if not check_file_header(file_path):
136+
file_type = get_file_type_description(file_path)
137+
violations.append(f"- `{file_path}` ({file_type} missing license header)")
138+
print(f" ❌ Missing license header: {file_path}")
139+
else:
140+
print(f" ✅ Header OK: {file_path}")
141+
142+
# Output results
143+
if violations:
144+
print(f"\n❌ Found {len(violations)} license header violations:")
145+
146+
violation_text = '\n'.join(violations)
147+
148+
# Set output for GitHub Actions
149+
github_output = os.environ.get('GITHUB_OUTPUT')
150+
if github_output:
151+
with open(github_output, 'a') as f:
152+
f.write(f"violations<<EOF\n{violation_text}\nEOF\n")
153+
154+
print("\nViolations:")
155+
for violation in violations:
156+
print(f" {violation}")
157+
158+
sys.exit(1)
159+
else:
160+
print("\n✅ All files have proper license headers!")
161+
# Set empty output for GitHub Actions
162+
github_output = os.environ.get('GITHUB_OUTPUT')
163+
if github_output:
164+
with open(github_output, 'a') as f:
165+
f.write("violations=\n")
166+
167+
if __name__ == "__main__":
168+
main()

.github/scripts/get-new-files.py

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
#!/usr/bin/env python3
2+
3+
#
4+
# Copyright OpenSearch Contributors
5+
# SPDX-License-Identifier: Apache-2.0
6+
#
7+
# The OpenSearch Contributors require contributions made to
8+
# this file be licensed under the Apache-2.0 license or a
9+
# compatible open source license.
10+
#
11+
12+
"""
13+
Get newly added files from Git for license header checking.
14+
15+
This script identifies files added in the current PR and outputs them
16+
one per line to stdout.
17+
"""
18+
19+
import os
20+
import subprocess
21+
import sys
22+
23+
def get_newly_added_files():
24+
"""Get list of files added in this PR."""
25+
try:
26+
# Get the base branch (usually main)
27+
base_ref = os.environ.get('GITHUB_BASE_REF', 'main')
28+
29+
# Get added files in this PR
30+
result = subprocess.run([
31+
'git', 'diff', '--name-only', '--diff-filter=A',
32+
f'origin/{base_ref}...HEAD'
33+
], capture_output=True, text=True, check=True)
34+
35+
files = [f.strip() for f in result.stdout.split('\n') if f.strip()]
36+
return files
37+
38+
except subprocess.CalledProcessError as e:
39+
print(f"Error getting changed files: {e}", file=sys.stderr)
40+
return []
41+
42+
def main():
43+
"""Main function to get newly added files."""
44+
files = get_newly_added_files()
45+
46+
if not files:
47+
print("No newly added files found", file=sys.stderr)
48+
sys.exit(0)
49+
50+
# Output files one per line
51+
for file_path in files:
52+
print(file_path)
53+
54+
if __name__ == "__main__":
55+
main()
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
#
2+
# Copyright OpenSearch Contributors
3+
# SPDX-License-Identifier: Apache-2.0
4+
#
5+
# The OpenSearch Contributors require contributions made to
6+
# this file be licensed under the Apache-2.0 license or a
7+
# compatible open source license.
8+
#
9+
10+
name: License Header Check
11+
12+
on:
13+
pull_request:
14+
types: [opened, synchronize, reopened]
15+
16+
jobs:
17+
license-header-check:
18+
runs-on: ubuntu-latest
19+
name: Check License Headers on New Files
20+
21+
steps:
22+
- name: Checkout code
23+
uses: actions/checkout@v4
24+
with:
25+
fetch-depth: 0
26+
27+
- name: Setup Python
28+
uses: actions/setup-python@v4
29+
with:
30+
python-version: '3.9'
31+
32+
- name: Run License Header Check
33+
id: license-check
34+
run: |
35+
python .github/scripts/get-new-files.py | python .github/scripts/check-license-headers.py
36+
37+
- name: Comment on PR
38+
if: failure() && steps.license-check.outputs.violations != ''
39+
uses: actions/github-script@v7
40+
with:
41+
script: |
42+
const violations = `${{ steps.license-check.outputs.violations }}`;
43+
44+
const body = `## ⚠️ License Header Violations Found
45+
46+
The following newly added files are missing required license headers:
47+
48+
${violations}
49+
50+
Please add the appropriate license header to each file and push your changes.
51+
52+
**See the license header requirements:** https://github.com/opensearch-project/data-prepper/blob/main/CONTRIBUTING.md#license-headers`;
53+
54+
// Find existing bot comment
55+
const { data: comments } = await github.rest.issues.listComments({
56+
owner: context.repo.owner,
57+
repo: context.repo.repo,
58+
issue_number: context.issue.number,
59+
});
60+
61+
const botComment = comments.find(comment =>
62+
comment.user.type === 'Bot' &&
63+
comment.body.includes('License Header Violations Found')
64+
);
65+
66+
if (botComment) {
67+
// Update existing comment
68+
await github.rest.issues.updateComment({
69+
owner: context.repo.owner,
70+
repo: context.repo.repo,
71+
comment_id: botComment.id,
72+
body: body
73+
});
74+
} else {
75+
// Create new comment
76+
await github.rest.issues.createComment({
77+
owner: context.repo.owner,
78+
repo: context.repo.repo,
79+
issue_number: context.issue.number,
80+
body: body
81+
});
82+
}

0 commit comments

Comments
 (0)