Implement NIF-based citation extraction with precise character offsets #80
Workflow file for this run
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # This workflow runs long-running extraction tests for DIEF Server | |
| # These tests take significant time to start and execute | |
| name: DIEF Long-Running Extraction Tests | |
| on: | |
| workflow_dispatch: | |
| pull_request: | |
| schedule: | |
| # Runs once a week (Sunday at 02:00 UTC) | |
| - cron: '0 2 * * 0' | |
| jobs: | |
| long-running-tests: | |
| runs-on: ubuntu-latest | |
| env: | |
| JAVA_TOOL_OPTIONS: '-Dhttp.agent=curl/8.6.0 -Dextract.wikiapi.customUserAgent.enabled=true -Dextract.wikiapi.customUserAgent.text=curl/8.6.0' | |
| steps: | |
| - name: Checkout repository | |
| uses: actions/checkout@v4 | |
| - name: Set up JDK 1.8 | |
| uses: actions/setup-java@v1 | |
| with: | |
| java-version: 1.8 | |
| - name: Build DIEF with Maven | |
| run: | | |
| mvn clean install \ | |
| -Dskip=true \ | |
| -DskipTests \ | |
| -Dmaven.source.skip=true \ | |
| -Dhttp.keepAlive=false \ | |
| -Dmaven.wagon.http.pool=false \ | |
| -Dmaven.wagon.http.retryHandler.count=3 | |
| # TODO: fi,et,lt are causing issues, i.e. NoSuchElementException: no mapping namespace for language fi | |
| # sed -i 's/^languages=.*/languages=wikidata,en,de,fr,es,it,pt,ru,ja,ko,ar,hi,nl,sv,pl,uk,cs,sk,sl,sr,hr,ro,bg,hu,tr,el,fi,et,lv,lt,ga,cy,eo,ca,gl,eu,be,az,hy,am,bn,ur,vi,id,mk,commons/' server.default.properties | |
| - name: Create temporary server config with languages (CI-only) | |
| run: | | |
| cd server | |
| sed -i 's/^languages=.*/languages=wikidata,en,de,fr,es,it,pt,ru,ja,ko,ar,hi,nl,sv,pl,uk,cs,sk,sl,sr,hr,ro,bg,hu,tr,el,lv,ga,cy,eo,ca,gl,eu,be,az,hy,am,bn,ur,vi,id,mk,commons/' server.default.properties | |
| - name: Start DBpedia Server | |
| run: | | |
| cd server | |
| mvn scala:run \ | |
| -Dlauncher=server \ | |
| > server.log 2>&1 & | |
| echo $! > server.pid | |
| sleep 10 | |
| - name: Debug - Check server status immediately | |
| run: | | |
| echo "=== Checking server process ===" | |
| if [ ! -f server/server.pid ]; then | |
| echo "❌ server.pid not found" | |
| exit 1 | |
| fi | |
| PID=$(cat server/server.pid) | |
| if ps -p $PID > /dev/null; then | |
| echo "✅ Server process $PID is running" | |
| else | |
| echo "❌ Server process crashed" | |
| echo "===== server.log =====" | |
| tail -200 server/server.log || true | |
| exit 1 | |
| fi | |
| echo "" | |
| echo "=== Last 100 lines of server.log ===" | |
| tail -100 server/server.log || true | |
| - name: Wait for server to be ready (port 9999) | |
| run: | | |
| echo "Waiting for DIEF server on localhost:9999..." | |
| for i in {1..120}; do | |
| if nc -z localhost 9999; then | |
| echo "✅ Server is listening" | |
| exit 0 | |
| fi | |
| echo "Attempt $i: Server not ready yet, waiting..." | |
| sleep 5 | |
| done | |
| echo "❌ Server never started listening on port 9999" | |
| echo "===== server.log =====" | |
| tail -200 server/server.log || true | |
| exit 1 | |
| - name: Run Wikidata extractor tests | |
| run: | | |
| cd scripts/src/main/bash | |
| ./test-wikidata-extractors.sh | |
| - name: Run Multi-Extractor Combination Tests | |
| run: | | |
| cd scripts/src/main/bash | |
| ./test-extraction-combinations.sh | |
| - name: Run Stats and Redirects Tests | |
| run: | | |
| cd scripts/src/main/bash | |
| ./stats-redirects-test.sh | |
| - name: Stop DBpedia Server | |
| if: always() | |
| run: | | |
| if [ -f server/server.pid ]; then | |
| PID=$(cat server/server.pid) | |
| echo "Stopping server $PID" | |
| kill -TERM $PID || kill -KILL $PID | |
| rm server/server.pid | |
| fi | |