-
Notifications
You must be signed in to change notification settings - Fork 2
103 lines (93 loc) · 4.61 KB
/
llmxive-real-call-tests.yml
File metadata and controls
103 lines (93 loc) · 4.61 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
name: llmXive Real-Call Tests
on:
pull_request:
paths:
- "src/llmxive/**"
- "agents/**"
- "tests/**"
- ".github/workflows/llmxive-*.yml"
- "pyproject.toml"
- "contracts/**"
permissions:
contents: read
jobs:
real-call:
runs-on: ubuntu-latest
# Spec 013 added heavy real-LLM e2e tests (implementer drives a
# multi-task edit loop with a real Dartmouth call + lualatex compile per
# task; publisher hits the real Zenodo Sandbox). The full real-call
# suite no longer fits in 30 min on the standard runner — it was getting
# cancelled mid-run. 60 min gives the suite headroom to complete and
# print its full pass/fail summary.
timeout-minutes: 60
env:
LLMXIVE_REAL_TESTS: "1"
DARTMOUTH_CHAT_API_KEY: ${{ secrets.DARTMOUTH_CHAT_API_KEY }}
DARTMOUTH_API_KEY: ${{ secrets.DARTMOUTH_API_KEY }}
HF_TOKEN: ${{ secrets.HF_TOKEN }}
# Spec 013: the paper_publisher real-call test (SC-006 / SC-008)
# publishes to Zenodo Sandbox. The sandbox token is a SEPARATE
# credential from production (sandbox.zenodo.org is its own service);
# without it the test skips gracefully. ZENODO_API_TOKEN is the
# production token (not used by the sandbox test, but wired here so
# any future production-path real-call test can find it).
ZENODO_API_TOKEN: ${{ secrets.ZENODO_API_TOKEN }}
ZENODO_SANDBOX_API_TOKEN: ${{ secrets.ZENODO_SANDBOX_API_TOKEN }}
steps:
# No `ref:` override — use actions/checkout's default
# merge-commit-SHA fetch for pull_request events. A previous
# branch-name fetch raced against PR auto-merge --delete-branch
# and every PR failed CI here with "exit 1" on checkout. None of
# the steps below need the source branch name; they're all
# Python checks and pytest. SHA-based checkout is race-safe.
- uses: actions/checkout@v5
- uses: actions/setup-python@v6
with:
python-version: "3.11"
cache: pip
- name: Install package and dev deps
run: pip install -e ".[dev]"
# The publisher real-call test (SC-006) and any compile-dependent
# e2e test need a real TeX toolchain: the llmxive.cls full compile
# runs lualatex + bibtex + fontspec. Without TeX Live the publisher's
# `_compile_full` fails, the deposit never happens, and the test can't
# reach `posted`. Install lualatex + the package set llmxive.cls pulls
# in (fontspec, geometry, xcolor, microtype, adjustbox, tabularray,
# tcolorbox, caption, booktabs, listings, natbib, hyperref, fancyhdr,
# fontawesome5, tikz, algorithms, …) via the texlive-* meta-packages.
- name: Install TeX Live (lualatex + bibtex) for paper compiles
run: |
sudo apt-get update
sudo apt-get install -y --no-install-recommends \
texlive-luatex texlive-latex-recommended texlive-latex-extra \
texlive-fonts-recommended texlive-fonts-extra texlive-science \
texlive-bibtex-extra texlive-pictures texlive-plain-generic \
texlive-lang-english fonts-noto-core
lualatex --version | head -1
bibtex --version | head -1
# Install the house fonts (Fraunces serif + JetBrains Mono) so
# fontspec resolves them and the real-call compile renders the true
# llmXive style. The class falls back to TeX Gyre Pagella / Inconsolata
# via \IfFontExistsTF when these are absent, but the published PDF
# should match production.
- name: Install llmXive house fonts
run: |
mkdir -p "$HOME/.fonts"
cp papers/.style/fonts/*.ttf "$HOME/.fonts/"
fc-cache -f >/dev/null
fc-list | grep -iE "fraunces|jetbrains" | head || \
echo "WARN: house fonts not registered (class will fall back)"
# FR-031 / T114: Per-PR pre-checks BEFORE running real-call tests.
# If any of these fail, the gate fails fast — saving real-LLM minutes.
- name: Prompt existence check (every prompt referenced by registry exists and parses)
run: python -m llmxive.checks.prompts
- name: Spec Kit script executability check
run: python -m llmxive.checks.speckit_scripts
- name: Backend reachability check (Dartmouth + HF list_models())
run: python -m llmxive.checks.backends
# The contract suite validates every schema in contracts/.
- name: Contract tests
run: pytest tests/contract -v
# The real-call suite is the canonical Constitution III gate.
- name: Real-call tests (Dartmouth + HF + Spec Kit scripts + e2e)
run: pytest tests/real_call -v