Skip to content

fix(hybrid): activate --hybrid-fallback on server-absent path (PDFDLOSP-21) #430

fix(hybrid): activate --hybrid-fallback on server-absent path (PDFDLOSP-21)

fix(hybrid): activate --hybrid-fallback on server-absent path (PDFDLOSP-21) #430

Workflow file for this run

name: Test & Benchmark
on:
pull_request:
branches: [main]
paths:
- 'java/**'
- 'python/**'
- 'node/**'
- 'scripts/**'
- '.github/workflows/**'
workflow_dispatch:
concurrency:
group: ci-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
permissions:
contents: read
jobs:
test:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v6
- name: Setup Java
uses: actions/setup-java@v5
with:
distribution: 'temurin'
java-version: '21'
- name: Setup uv
uses: astral-sh/setup-uv@v7
- name: Setup Node.js
uses: actions/setup-node@v6
with:
node-version: '20'
- name: Setup pnpm
run: npm install -g pnpm
- name: Build & Test All
run: ./scripts/build-all.sh
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v5
with:
files: java/opendataloader-pdf-core/target/site/jacoco/jacoco.xml
fail_ci_if_error: false
token: ${{ secrets.CODECOV_TOKEN }}
- name: Upload build artifacts
uses: actions/upload-artifact@v7
with:
name: java-build
path: java/opendataloader-pdf-cli/target/*.jar
retention-days: 1
benchmark:
needs: test
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v6
- name: Setup Java
uses: actions/setup-java@v5
with:
distribution: 'temurin'
java-version: '21'
- name: Download build artifacts
uses: actions/download-artifact@v8
with:
name: java-build
path: java/opendataloader-pdf-cli/target/
- name: Setup Python
uses: actions/setup-python@v6
with:
python-version: '3.13'
- name: Setup uv
uses: astral-sh/setup-uv@v7
- name: Run benchmark
run: ./scripts/bench.sh --skip-build --check-regression
- name: Benchmark summary
if: always()
run: |
python3 << 'PYEOF'
import json, os, sys
from pathlib import Path
eval_path = Path("/tmp/opendataloader-bench/prediction/opendataloader/evaluation.json")
thresh_path = Path("/tmp/opendataloader-bench/thresholds.json")
summary_file = os.environ.get("GITHUB_STEP_SUMMARY", "/dev/null")
if not eval_path.exists() or not thresh_path.exists():
with open(summary_file, "a") as f:
f.write("## Benchmark Results\n\nBenchmark did not produce evaluation results.\n")
sys.exit(0)
try:
with open(eval_path) as f:
eval_data = json.load(f)
with open(thresh_path) as f:
thresholds = json.load(f)
except json.JSONDecodeError as e:
with open(summary_file, "a") as f:
f.write(f"## Benchmark Results\n\nFailed to parse results: {e}\n")
sys.exit(0)
scores = eval_data.get("metrics", {}).get("score", {})
table_detection = eval_data.get("table_detection", {})
speed = eval_data.get("speed", {})
triage = eval_data.get("triage", {})
tol = thresholds.get("regression_tolerance", 0)
rows = []
for key, label, src in [
("nid", "NID", scores.get("nid_mean")),
("teds", "TEDS", scores.get("teds_mean")),
("mhs", "MHS", scores.get("mhs_mean")),
("table_detection_f1", "Table Detection F1", table_detection.get("f1")),
]:
t = thresholds.get(key)
if src is not None and t is not None:
effective = t - tol
status = "✅" if src >= effective else "❌"
rows.append(f"| {label} | {src:.4f} | ≥ {effective:.2f} | {status} |")
elapsed = speed.get("elapsed_per_doc")
elapsed_thresh = thresholds.get("elapsed_per_doc")
if elapsed is not None and elapsed_thresh is not None:
status = "✅" if elapsed <= elapsed_thresh else "❌"
rows.append(f"| Speed | {elapsed:.2f}s/doc | ≤ {elapsed_thresh}s/doc | {status} |")
if triage:
tr_recall = triage.get("recall")
tr_thresh = thresholds.get("triage_recall")
if tr_recall is not None and tr_thresh is not None:
effective = tr_thresh - tol
status = "✅" if tr_recall >= effective else "❌"
rows.append(f"| Triage Recall | {tr_recall:.4f} | ≥ {effective:.2f} | {status} |")
tr_fn = triage.get("fn_count")
tr_fn_max = thresholds.get("triage_fn_max")
if tr_fn is not None and tr_fn_max is not None:
status = "✅" if tr_fn <= tr_fn_max else "❌"
rows.append(f"| Triage FN | {tr_fn} | ≤ {tr_fn_max} | {status} |")
with open(summary_file, "a") as f:
f.write("## Benchmark Results\n\n")
f.write("| Metric | Score | Threshold | Status |\n")
f.write("|--------|-------|-----------|--------|\n")
for row in rows:
f.write(row + "\n")
if not rows:
f.write("| (no metrics found) | | | |\n")
PYEOF
- name: Upload evaluation results
uses: actions/upload-artifact@v7
if: always()
with:
name: benchmark-results
path: /tmp/opendataloader-bench/prediction/opendataloader/evaluation.json