Skip to content

Commit 23afc35

Browse files
committed
Merge branch 'master' into NUTCH-2455
2 parents a50c958 + 89e6ec1 commit 23afc35

80 files changed

Lines changed: 5709 additions & 771 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.github/workflows/junit-report.yml

Lines changed: 24 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -31,48 +31,40 @@ jobs:
3131
name: junit-test-results-ubuntu-latest
3232
workflow: master-build.yml
3333
run_id: ${{ github.event.workflow_run.id }}
34-
path: ./results-ubuntu
3534
continue-on-error: true
36-
- name: Download Test Report (macOS)
37-
uses: dawidd6/action-download-artifact@v11
38-
with:
39-
name: junit-test-results-macos-latest
40-
workflow: master-build.yml
41-
run_id: ${{ github.event.workflow_run.id }}
42-
path: ./results-macos
43-
continue-on-error: true
44-
- name: Debug XML files
45-
if: always()
35+
- name: Resolve PR number
36+
id: pr
4637
run: |
47-
echo "=== Listing downloaded artifacts ==="
48-
find ./results-ubuntu ./results-macos -name "TEST-*.xml" 2>/dev/null | head -20 || echo "No files found"
49-
echo ""
50-
echo "=== TestCommonCrawlDataDumper.xml (macOS) ==="
51-
cat ./results-macos/test/TEST-org.apache.nutch.tools.TestCommonCrawlDataDumper.xml 2>/dev/null || echo "File not found"
52-
echo ""
53-
echo "=== TestCommonCrawlDataDumper.xml (Ubuntu) ==="
54-
cat ./results-ubuntu/test/TEST-org.apache.nutch.tools.TestCommonCrawlDataDumper.xml 2>/dev/null || echo "File not found"
55-
echo ""
56-
echo "=== TestPrefixStringMatcher.xml (Ubuntu) ==="
57-
cat ./results-ubuntu/test/TEST-org.apache.nutch.util.TestPrefixStringMatcher.xml 2>/dev/null || echo "File not found"
58-
echo ""
59-
echo "=== TestPrefixStringMatcher.xml (macOS) ==="
60-
cat ./results-macos/test/TEST-org.apache.nutch.util.TestPrefixStringMatcher.xml 2>/dev/null || echo "File not found"
38+
PR_NUMBER="${{ github.event.workflow_run.pull_requests[0].number }}"
39+
if [ -z "$PR_NUMBER" ]; then
40+
PR_NUMBER=$(gh api \
41+
"repos/${{ github.repository }}/commits/${{ github.event.workflow_run.head_sha }}/pulls" \
42+
--jq '.[0].number // empty')
43+
fi
44+
echo "number=${PR_NUMBER:-}" >> "$GITHUB_OUTPUT"
45+
if [ -n "$PR_NUMBER" ]; then
46+
echo "has_pr=true" >> "$GITHUB_OUTPUT"
47+
else
48+
echo "has_pr=false" >> "$GITHUB_OUTPUT"
49+
fi
50+
env:
51+
GH_TOKEN: ${{ github.token }}
6152
- name: Publish Test Report
62-
uses: mikepenz/action-junit-report@v5
53+
uses: mikepenz/action-junit-report@v6
6354
with:
6455
report_paths: |-
65-
./results-ubuntu/**/TEST-*.xml
66-
./results-macos/**/TEST-*.xml
56+
./test/TEST-*.xml
57+
./**/test/TEST-*.xml
6758
check_name: |-
6859
JUnit Test Report
6960
JUnit Test Report Plugins
7061
commit: ${{ github.event.workflow_run.head_sha }}
7162
fail_on_failure: false
72-
fail_on_parse_error: false
63+
fail_on_parse_error: true
7364
require_tests: true
7465
require_passed_tests: true
7566
include_passed: false
67+
include_skipped: true
7668
check_annotations: true
7769
annotate_notice: true
7870
job_summary: true
@@ -81,10 +73,10 @@ jobs:
8173
skip_success_summary: true
8274
include_time_in_summary: true
8375
group_suite: true
84-
comment: true
85-
updateComment: true
76+
comment: ${{ steps.pr.outputs.has_pr == 'true' }}
77+
updateComment: ${{ steps.pr.outputs.has_pr == 'true' }}
8678
skip_comment_without_tests: true
8779
job_name: tests
8880
truncate_stack_traces: false
8981
annotations_limit: 50
90-
pr_id: ${{ github.event.workflow_run.pull_requests[0].number || '' }}
82+
pr_id: ${{ steps.pr.outputs.number }}

.github/workflows/master-build.yml

Lines changed: 130 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,16 @@ on:
2020
pull_request:
2121
types: [opened, synchronize, reopened]
2222
branches: [master]
23+
24+
# Java Version Strategy:
25+
# - BUILD: Requires Java 17+ (JUnit 6 dependency)
26+
# - RUNTIME: Supports Java 11+ (javac.version=11 produces Java 11 bytecode)
27+
#
28+
# The 'build' job verifies bytecode compilation for both Java 11 and 17 targets.
29+
# The 'runtime-java11' job verifies the built artifacts actually run on Java 11.
30+
# The 'tests' job runs on JDK 17 (required by JUnit 6) with the default
31+
# javac.version=11 bytecode target for backward compatibility.
32+
2333
jobs:
2434
javadoc:
2535
strategy:
@@ -43,6 +53,7 @@ jobs:
4353
${{ runner.os }}-ivy-
4454
- name: Javadoc
4555
run: ant clean javadoc -buildfile build.xml
56+
4657
rat:
4758
strategy:
4859
matrix:
@@ -73,19 +84,108 @@ jobs:
7384
- name: Fail if any unknown licenses
7485
if: ${{ env.UNKNOWN_LICENSES != '0 Unknown Licenses' }}
7586
run: exit 1
87+
88+
# Build verification with Java bytecode target matrix
89+
# Verifies bytecode compatibility for both Java 11 and Java 17 targets
90+
build:
91+
strategy:
92+
fail-fast: false
93+
matrix:
94+
javac-version: ['11', '17']
95+
os: [ubuntu-latest]
96+
runs-on: ${{ matrix.os }}
97+
name: build (javac.version=${{ matrix.javac-version }})
98+
steps:
99+
- uses: actions/checkout@v5
100+
- name: Set up JDK 17
101+
uses: actions/setup-java@v5
102+
with:
103+
java-version: '17'
104+
distribution: 'temurin'
105+
- name: Cache Ivy dependencies
106+
uses: actions/cache@v4
107+
with:
108+
path: ~/.ivy2/cache
109+
key: ${{ runner.os }}-ivy-${{ hashFiles('ivy/ivy.xml', 'src/plugin/**/ivy.xml') }}
110+
restore-keys: |
111+
${{ runner.os }}-ivy-
112+
- name: Build with javac.version=${{ matrix.javac-version }}
113+
run: ant clean runtime -Djavac.version=${{ matrix.javac-version }} -buildfile build.xml
114+
- name: Verify bytecode version
115+
run: |
116+
# Extract and verify the bytecode version of compiled classes
117+
# Java 11 = major version 55, Java 17 = major version 61
118+
EXPECTED_VERSION=${{ matrix.javac-version == '11' && '55' || '61' }}
119+
echo "Expected major version: $EXPECTED_VERSION (Java ${{ matrix.javac-version }})"
120+
121+
# Find a real class file (exclude package-info.class which may have different version)
122+
cd build/classes
123+
CLASS_FILE=$(find . -name "*.class" ! -name "package-info.class" | head -1)
124+
if [ -n "$CLASS_FILE" ]; then
125+
echo "Checking: $CLASS_FILE"
126+
ACTUAL_VERSION=$(javap -verbose "$CLASS_FILE" 2>/dev/null | grep "major version" | awk '{print $NF}')
127+
echo "Actual major version: $ACTUAL_VERSION"
128+
if [ "$ACTUAL_VERSION" != "$EXPECTED_VERSION" ]; then
129+
echo "ERROR: Bytecode version mismatch!"
130+
exit 1
131+
fi
132+
echo "Bytecode version verified successfully"
133+
else
134+
echo "ERROR: No class files found"
135+
exit 1
136+
fi
137+
138+
# Verify runtime compatibility on Java 11
139+
# This ensures the built artifacts can actually run on Java 11
140+
runtime-java11:
141+
needs: build
142+
runs-on: ubuntu-latest
143+
steps:
144+
- uses: actions/checkout@v5
145+
- name: Set up JDK 17 for building
146+
uses: actions/setup-java@v5
147+
with:
148+
java-version: '17'
149+
distribution: 'temurin'
150+
- name: Cache Ivy dependencies
151+
uses: actions/cache@v4
152+
with:
153+
path: ~/.ivy2/cache
154+
key: ${{ runner.os }}-ivy-${{ hashFiles('ivy/ivy.xml', 'src/plugin/**/ivy.xml') }}
155+
restore-keys: |
156+
${{ runner.os }}-ivy-
157+
- name: Build with Java 11 target
158+
run: ant clean runtime -Djavac.version=11 -buildfile build.xml
159+
- name: Set up JDK 11 for runtime verification
160+
uses: actions/setup-java@v5
161+
with:
162+
java-version: '11'
163+
distribution: 'temurin'
164+
- name: Verify runtime on Java 11
165+
run: |
166+
echo "Verifying Nutch can run on Java 11..."
167+
java -version
168+
cd runtime/local
169+
# Actually load Java classes by running showproperties
170+
# This invokes org.apache.nutch.tools.ShowProperties and verifies the JAR loads
171+
bin/nutch showproperties | head -20
172+
echo "Java 11 runtime verification complete"
173+
174+
# Tests run on JDK 17 (required by JUnit 6) with default javac.version=11
175+
# Java 11 runtime compatibility is verified by the runtime-java11 job
76176
tests:
77177
strategy:
178+
fail-fast: false
78179
matrix:
79-
java: ['17']
80180
os: [ubuntu-latest, macos-latest]
81181
runs-on: ${{ matrix.os }}
82182
timeout-minutes: 45
83183
steps:
84184
- uses: actions/checkout@v5
85-
- name: Set up JDK ${{ matrix.java }}
185+
- name: Set up JDK 17
86186
uses: actions/setup-java@v5
87187
with:
88-
java-version: ${{ matrix.java }}
188+
java-version: '17'
89189
distribution: 'temurin'
90190
- name: Cache Ivy dependencies
91191
uses: actions/cache@v4
@@ -104,6 +204,8 @@ jobs:
104204
- 'src/testresources/**'
105205
plugins:
106206
- 'src/plugin/**'
207+
indexer_plugins:
208+
- 'src/plugin/indexer-*/**'
107209
buildconf:
108210
- 'build.xml'
109211
- 'ivy/ivy.xml'
@@ -120,16 +222,35 @@ jobs:
120222
- name: test plugins
121223
if: ${{ steps.filter.outputs.plugins == 'true' && steps.filter.outputs.core == 'false' && steps.filter.outputs.buildconf == 'false' }}
122224
run: ant clean test-plugins -buildfile build.xml
123-
# fallback: run all tests if no specific filter matched (e.g., docs-only changes)
124-
- name: test all (fallback)
125-
if: ${{ steps.filter.outputs.buildconf == 'false' && steps.filter.outputs.core == 'false' && steps.filter.outputs.plugins == 'false' }}
126-
run: ant clean test -buildfile build.xml
225+
# run indexer integration tests when indexer plugin files change (Docker required, ubuntu-latest only)
226+
- name: test indexer integration
227+
if: ${{ steps.filter.outputs.indexer_plugins == 'true' && matrix.os == 'ubuntu-latest' }}
228+
run: ant clean test-indexer-integration -buildfile build.xml
229+
- name: Check for test results
230+
id: check_tests
231+
if: always() && matrix.os == 'ubuntu-latest'
232+
run: |
233+
shopt -s globstar nullglob
234+
files=(./build/test/TEST-*.xml ./build/**/test/TEST-*.xml)
235+
if [ ${#files[@]} -gt 0 ]; then
236+
echo "has_results=true" >> $GITHUB_OUTPUT
237+
else
238+
echo "has_results=false" >> $GITHUB_OUTPUT
239+
fi
127240
- name: Upload Test Report
128241
uses: actions/upload-artifact@v4
129-
if: always()
242+
if: always() && matrix.os == 'ubuntu-latest' && steps.check_tests.outputs.has_results == 'true'
130243
with:
131244
name: junit-test-results-${{ matrix.os }}
132245
path: |
133246
./build/test/TEST-*.xml
134247
./build/**/test/TEST-*.xml
135-
retention-days: 1
248+
retention-days: 1
249+
- name: Upload Coverage Data
250+
uses: actions/upload-artifact@v4
251+
if: always() && matrix.os == 'ubuntu-latest'
252+
with:
253+
name: coverage-data
254+
path: ./build/coverage/*.exec
255+
retention-days: 1
256+
if-no-files-found: ignore

.github/workflows/sonarcloud.yml

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one or more
2+
# contributor license agreements. See the NOTICE file distributed with
3+
# this work for additional information regarding copyright ownership.
4+
# The ASF licenses this file to You under the Apache License, Version 2.0
5+
# (the "License"); you may not use this file except in compliance with
6+
# the License. You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
16+
name: sonarcloud
17+
on:
18+
workflow_run:
19+
workflows: [master pull request ci]
20+
types: [completed]
21+
jobs:
22+
analysis:
23+
if: github.event.workflow_run.conclusion == 'success'
24+
runs-on: ubuntu-latest
25+
steps:
26+
- uses: actions/checkout@v5
27+
with:
28+
repository: ${{ github.event.workflow_run.head_repository.full_name }}
29+
ref: ${{ github.event.workflow_run.head_sha }}
30+
fetch-depth: 0
31+
- name: Set up JDK 17
32+
uses: actions/setup-java@v5
33+
with:
34+
java-version: '17'
35+
distribution: 'temurin'
36+
- name: Cache Ivy dependencies
37+
uses: actions/cache@v4
38+
with:
39+
path: ~/.ivy2/cache
40+
key: ${{ runner.os }}-ivy-${{ hashFiles('ivy/ivy.xml', 'src/plugin/**/ivy.xml') }}
41+
restore-keys: |
42+
${{ runner.os }}-ivy-
43+
- name: Compile (no tests)
44+
run: ant compile compile-plugins -buildfile build.xml
45+
- name: Download coverage data
46+
uses: dawidd6/action-download-artifact@v11
47+
with:
48+
name: coverage-data
49+
workflow: master-build.yml
50+
run_id: ${{ github.event.workflow_run.id }}
51+
path: ./build/coverage/
52+
continue-on-error: true
53+
- name: Download test reports
54+
uses: dawidd6/action-download-artifact@v11
55+
with:
56+
name: junit-test-results-ubuntu-latest
57+
workflow: master-build.yml
58+
run_id: ${{ github.event.workflow_run.id }}
59+
path: ./build/test/
60+
continue-on-error: true
61+
- name: Generate JaCoCo XML report
62+
run: ant jacoco-report -buildfile build.xml
63+
continue-on-error: true
64+
- name: Resolve PR number
65+
id: pr
66+
run: |
67+
if [ "${{ github.event.workflow_run.event }}" != "pull_request" ]; then
68+
echo "is_pr=false" >> "$GITHUB_OUTPUT"
69+
exit 0
70+
fi
71+
PR_NUMBER=$(gh api \
72+
"repos/${{ github.repository }}/commits/${{ github.event.workflow_run.head_sha }}/pulls" \
73+
--jq '.[0].number // empty')
74+
echo "number=${PR_NUMBER:-}" >> "$GITHUB_OUTPUT"
75+
if [ -n "$PR_NUMBER" ]; then
76+
echo "is_pr=true" >> "$GITHUB_OUTPUT"
77+
else
78+
echo "is_pr=false" >> "$GITHUB_OUTPUT"
79+
fi
80+
env:
81+
GH_TOKEN: ${{ github.token }}
82+
- name: SonarCloud Scan (PR)
83+
if: steps.pr.outputs.is_pr == 'true'
84+
uses: SonarSource/sonarqube-scan-action@v6
85+
with:
86+
args: >
87+
-Dsonar.pullrequest.key=${{ steps.pr.outputs.number }}
88+
-Dsonar.pullrequest.branch=${{ github.event.workflow_run.head_branch }}
89+
-Dsonar.pullrequest.base=master
90+
env:
91+
SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }}
92+
SONAR_HOST_URL: https://sonarcloud.io
93+
- name: SonarCloud Scan (branch)
94+
if: steps.pr.outputs.is_pr == 'false'
95+
uses: SonarSource/sonarqube-scan-action@v6
96+
with:
97+
args: >
98+
-Dsonar.branch.name=${{ github.event.workflow_run.head_branch }}
99+
env:
100+
SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }}
101+
SONAR_HOST_URL: https://sonarcloud.io

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
conf/*.txt
22
conf/*.xml
3+
conf/*.mmdb
34
!conf/nutch-default.xml
45
conf/hadoop-env.sh
56
conf/slaves

0 commit comments

Comments
 (0)