Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
168 changes: 168 additions & 0 deletions .github/scripts/check-license-headers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
#!/usr/bin/env python3

#
# Copyright OpenSearch Contributors
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
#

"""
License Header Compliance Checker for OpenSearch Data Prepper

This script checks that files contain the required license headers
as specified in CONTRIBUTING.md.

Usage:
python check-license-headers.py file1.java file2.py ...
echo "file1.java\nfile2.py" | python check-license-headers.py
"""

import os
import sys
from pathlib import Path
from typing import List

# File extensions that require license headers
SUPPORTED_EXTENSIONS = {
'.java', '.groovy', '.gradle', # Java ecosystem
'.py', # Python
'.sh', '.bash', '.zsh', # Shell scripts
'.yaml', '.yml', # YAML files
'.properties', # Properties files
}

def needs_license_header(file_path: str) -> bool:
"""Check if a file needs a license header based on its extension."""
path = Path(file_path)
return path.suffix.lower() in SUPPORTED_EXTENSIONS

def check_file_header(file_path: str) -> bool:
"""Check if a file has the required complete license header."""
if not Path(file_path).exists():
return True # File might have been deleted, skip

try:
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
# Read first 15 lines to check for license header
lines = []
for i, line in enumerate(f):
if i >= 15: # Only check first 15 lines
break
lines.append(line)

content = ''.join(lines)

# Check for all 5 required license header components
required_components = [
'Copyright OpenSearch Contributors',
'SPDX-License-Identifier: Apache-2.0',
'The OpenSearch Contributors require contributions made to',
'this file be licensed under the Apache-2.0 license or a',
'compatible open source license.'
]

# All components must be present
for component in required_components:
if component not in content:
return False

return True

except Exception as e:
print(f"Error reading file {file_path}: {e}", file=sys.stderr)
return True # Skip files we can't read

def get_file_type_description(file_path: str) -> str:
"""Get a human-readable description of the file type."""
path = Path(file_path)
ext = path.suffix.lower()

if ext in {'.java', '.groovy', '.gradle'}:
return 'Java/Gradle file'
elif ext == '.py':
return 'Python file'
elif ext in {'.sh', '.bash', '.zsh'}:
return 'Shell script'
elif ext in {'.yaml', '.yml'}:
return 'YAML file'
elif ext == '.properties':
return 'Properties file'
else:
return 'Source file'

def get_files_to_check() -> List[str]:
"""Get files to check from command line args or stdin."""
if len(sys.argv) > 1:
# Files provided as command line arguments
return sys.argv[1:]
else:
# Read files from stdin
files = []
for line in sys.stdin:
file_path = line.strip()
if file_path:
files.append(file_path)
return files

def main():
"""Main function to check license headers."""
files_to_check = get_files_to_check()

if not files_to_check:
print("No files to check", file=sys.stderr)
return

print(f"Checking {len(files_to_check)} files for license headers...")

violations = []

for file_path in files_to_check:
print(f"Checking: {file_path}")

if not Path(file_path).exists():
print(f" File not found: {file_path}")
continue

# Skip if doesn't need header
if not needs_license_header(file_path):
print(f" Skipped (no header needed): {file_path}")
continue

# Check header
if not check_file_header(file_path):
file_type = get_file_type_description(file_path)
violations.append(f"- `{file_path}` ({file_type} missing license header)")
print(f" ❌ Missing license header: {file_path}")
else:
print(f" ✅ Header OK: {file_path}")

# Output results
if violations:
print(f"\n❌ Found {len(violations)} license header violations:")

violation_text = '\n'.join(violations)

# Set output for GitHub Actions
github_output = os.environ.get('GITHUB_OUTPUT')
if github_output:
with open(github_output, 'a') as f:
f.write(f"violations<<EOF\n{violation_text}\nEOF\n")

print("\nViolations:")
for violation in violations:
print(f" {violation}")

sys.exit(1)
else:
print("\n✅ All files have proper license headers!")
# Set empty output for GitHub Actions
github_output = os.environ.get('GITHUB_OUTPUT')
if github_output:
with open(github_output, 'a') as f:
f.write("violations=\n")

if __name__ == "__main__":
main()
55 changes: 55 additions & 0 deletions .github/scripts/get-new-files.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
#!/usr/bin/env python3

#
# Copyright OpenSearch Contributors
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
#

"""
Get newly added files from Git for license header checking.

This script identifies files added in the current PR and outputs them
one per line to stdout.
"""

import os
import subprocess
import sys

def get_newly_added_files():
"""Get list of files added in this PR."""
try:
# Get the base branch (usually main)
base_ref = os.environ.get('GITHUB_BASE_REF', 'main')

# Get added files in this PR
result = subprocess.run([
'git', 'diff', '--name-only', '--diff-filter=A',
f'origin/{base_ref}...HEAD'
], capture_output=True, text=True, check=True)

files = [f.strip() for f in result.stdout.split('\n') if f.strip()]
return files

except subprocess.CalledProcessError as e:
print(f"Error getting changed files: {e}", file=sys.stderr)
return []

def main():
"""Main function to get newly added files."""
files = get_newly_added_files()

if not files:
print("No newly added files found", file=sys.stderr)
sys.exit(0)

# Output files one per line
for file_path in files:
print(file_path)

if __name__ == "__main__":
main()
79 changes: 79 additions & 0 deletions .github/workflows/license-header-check.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
#
# Copyright OpenSearch Contributors
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
#

name: License Header Check

on:
pull_request:
types: [opened, synchronize, reopened]

jobs:
license-header-check:
runs-on: ubuntu-latest
name: Check License Headers on New Files

steps:
- name: Checkout code
uses: actions/checkout@v4
with:
fetch-depth: 0

- name: Setup Python
uses: actions/setup-python@v4
with:
python-version: '3.9'

- name: Run License Header Check
id: license-check
run: |
python .github/scripts/get-new-files.py | python .github/scripts/check-license-headers.py

- name: Comment on PR
if: failure() && steps.license-check.outputs.violations != ''
uses: actions/github-script@v7
with:
script: |
const violations = `${{ steps.license-check.outputs.violations }}`;

const body = `## ⚠️ License Header Violations Found

The following newly added files are missing required license headers:

${violations}

Please add the appropriate license header to each file and push your changes.

**See the license header requirements:** https://github.com/opensearch-project/data-prepper/blob/main/CONTRIBUTING.md#license-headers`;

const { data: comments } = await github.rest.issues.listComments({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: context.issue.number,
});

const botComment = comments.find(comment =>
comment.user.type === 'Bot' &&
comment.body.includes('License Header Violations Found')
);

if (botComment) {
await github.rest.issues.updateComment({
owner: context.repo.owner,
repo: context.repo.repo,
comment_id: botComment.id,
body: body
});
} else {
await github.rest.issues.createComment({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: context.issue.number,
body: body
});
}
18 changes: 18 additions & 0 deletions config/docker/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#
# Copyright OpenSearch Contributors
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
#

version: '3.8'

services:
data-prepper:
image: opensearchproject/data-prepper:latest
ports:
- "4900:4900"
volumes:
- ./pipelines.yaml:/usr/share/data-prepper/pipelines/pipelines.yaml
5 changes: 5 additions & 0 deletions config/environments/dev.properties
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# Development environment configuration
environment=development
debug.enabled=true
log.level=DEBUG
data-prepper.workers=2
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
package org.opensearch.dataprepper.core;

/**
* This file is missing the license header completely.
*/
public class NewFeature {

public void doSomething() {
System.out.println("This file has no license header!");
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.dataprepper.plugins.test;

import org.opensearch.dataprepper.model.processor.Processor;

public class TestProcessor implements Processor {
// This is a good Java file with complete license header
}
13 changes: 13 additions & 0 deletions deployment/kubernetes/data-prepper-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: data-prepper-config
namespace: opensearch
data:
pipelines.yaml: |
simple-pipeline:
source:
http:
port: 2021
sink:
- stdout:
13 changes: 13 additions & 0 deletions examples/trace-analytics/application.properties
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#
# Copyright OpenSearch Contributors
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
#

# Application configuration for trace analytics example
server.port=8080
logging.level.org.opensearch.dataprepper=DEBUG
data-prepper.pipeline.workers=4
Loading
Loading