Skip to content

Implement NIF-based citation extraction with precise character offsets #80

Implement NIF-based citation extraction with precise character offsets

Implement NIF-based citation extraction with precise character offsets #80

# This workflow runs long-running extraction tests for DIEF Server
# These tests take significant time to start and execute
name: DIEF Long-Running Extraction Tests
on:
workflow_dispatch:
pull_request:
schedule:
# Runs once a week (Sunday at 02:00 UTC)
- cron: '0 2 * * 0'
jobs:
long-running-tests:
runs-on: ubuntu-latest
env:
JAVA_TOOL_OPTIONS: '-Dhttp.agent=curl/8.6.0 -Dextract.wikiapi.customUserAgent.enabled=true -Dextract.wikiapi.customUserAgent.text=curl/8.6.0'
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Set up JDK 1.8
uses: actions/setup-java@v1
with:
java-version: 1.8
- name: Build DIEF with Maven
run: |
mvn clean install \
-Dskip=true \
-DskipTests \
-Dmaven.source.skip=true \
-Dhttp.keepAlive=false \
-Dmaven.wagon.http.pool=false \
-Dmaven.wagon.http.retryHandler.count=3
# TODO: fi,et,lt are causing issues, i.e. NoSuchElementException: no mapping namespace for language fi
# sed -i 's/^languages=.*/languages=wikidata,en,de,fr,es,it,pt,ru,ja,ko,ar,hi,nl,sv,pl,uk,cs,sk,sl,sr,hr,ro,bg,hu,tr,el,fi,et,lv,lt,ga,cy,eo,ca,gl,eu,be,az,hy,am,bn,ur,vi,id,mk,commons/' server.default.properties
- name: Create temporary server config with languages (CI-only)
run: |
cd server
sed -i 's/^languages=.*/languages=wikidata,en,de,fr,es,it,pt,ru,ja,ko,ar,hi,nl,sv,pl,uk,cs,sk,sl,sr,hr,ro,bg,hu,tr,el,lv,ga,cy,eo,ca,gl,eu,be,az,hy,am,bn,ur,vi,id,mk,commons/' server.default.properties
- name: Start DBpedia Server
run: |
cd server
mvn scala:run \
-Dlauncher=server \
> server.log 2>&1 &
echo $! > server.pid
sleep 10
- name: Debug - Check server status immediately
run: |
echo "=== Checking server process ==="
if [ ! -f server/server.pid ]; then
echo "❌ server.pid not found"
exit 1
fi
PID=$(cat server/server.pid)
if ps -p $PID > /dev/null; then
echo "✅ Server process $PID is running"
else
echo "❌ Server process crashed"
echo "===== server.log ====="
tail -200 server/server.log || true
exit 1
fi
echo ""
echo "=== Last 100 lines of server.log ==="
tail -100 server/server.log || true
- name: Wait for server to be ready (port 9999)
run: |
echo "Waiting for DIEF server on localhost:9999..."
for i in {1..120}; do
if nc -z localhost 9999; then
echo "✅ Server is listening"
exit 0
fi
echo "Attempt $i: Server not ready yet, waiting..."
sleep 5
done
echo "❌ Server never started listening on port 9999"
echo "===== server.log ====="
tail -200 server/server.log || true
exit 1
- name: Run Wikidata extractor tests
run: |
cd scripts/src/main/bash
./test-wikidata-extractors.sh
- name: Run Multi-Extractor Combination Tests
run: |
cd scripts/src/main/bash
./test-extraction-combinations.sh
- name: Run Stats and Redirects Tests
run: |
cd scripts/src/main/bash
./stats-redirects-test.sh
- name: Stop DBpedia Server
if: always()
run: |
if [ -f server/server.pid ]; then
PID=$(cat server/server.pid)
echo "Stopping server $PID"
kill -TERM $PID || kill -KILL $PID
rm server/server.pid
fi