-
Notifications
You must be signed in to change notification settings - Fork 0
254 lines (236 loc) · 11.2 KB
/
Copy pathnightly-try-integration.yml
File metadata and controls
254 lines (236 loc) · 11.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
name: Nightly Try Integration
# Live integration vs https://try.getaxonflow.com.
#
# Layered on top of the docker-compose integration job in integration.yml,
# which validates the SDK <-> agent wire on every main merge. This nightly
# adds coverage that docker-compose can't provide:
#
# - Full-config stack: planning engine + LLM providers configured the
# way a real community SaaS deployment is, not the bare minimum a
# local docker-compose ships with.
# - Production canary: try.getaxonflow.com is the hosted sandbox real
# users hit; if it regresses, every download-and-try newcomer hits a
# broken stack. Nightly cron makes that loud.
#
# Operates entirely through the documented register-flow path:
# 1. POST /api/v1/register -> ephemeral tenant_id + secret
# 2. Run integration suite against the live endpoint with those creds
#
# Read-only against try.getaxonflow.com. No destructive writes — all
# operations are LLM queries through the gateway, policy evaluation,
# or read endpoints (health, list_connectors, generate_plan).
on:
schedule:
# 06:00 UTC daily — low-traffic hour, aligns with the existing
# weekly-cron slot used by integration.yml for the docker-compose run.
- cron: '0 6 * * *'
workflow_dispatch: {}
# Self-validating trigger: PRs that modify the workflow file itself,
# the integration tests it runs, or the live example scripts it
# exercises run the workflow once as a pre-merge sanity check. Path
# filter is intentionally narrow — this is not a per-PR gate, just a
# guarantee that any change to the live-stack-exercising surface
# lands working.
pull_request:
paths:
- '.github/workflows/nightly-try-integration.yml'
- 'tests/test_integration.py'
- 'examples/quickstart.py'
- 'examples/gateway_mode.py'
# Auto-filing a tracking issue on scheduled-run failures requires
# `issues: write`. PRs (which never trigger this workflow) and dispatch
# runs only need read.
permissions:
contents: read
issues: write
concurrency:
group: nightly-try-${{ github.ref }}
cancel-in-progress: false
jobs:
nightly-try:
name: Live integration vs try.getaxonflow.com
runs-on: ubuntu-latest
timeout-minutes: 25
env:
# Suppress SDK telemetry pings from this CI run. We hit
# try.getaxonflow.com as the integration target, so without an
# explicit opt-out the SDK would fire an anonymous heartbeat
# against the production checkpoint endpoint and pollute external
# adoption metrics. AXONFLOW_TELEMETRY=off is the canonical and
# only opt-out as of v7.0.0 — DO_NOT_TRACK is no longer honored.
AXONFLOW_TELEMETRY: 'off'
steps:
- uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.11'
cache: 'pip'
- name: Install SDK + dev deps
run: pip install -e ".[dev,all]"
# Capture the resolved dependency graph so a failure two months
# from now doesn't leave us guessing which transitive bumped.
- name: Snapshot resolved deps
run: pip freeze > /tmp/pip-freeze.txt
- name: Probe try.getaxonflow.com /health
run: |
if ! curl -sSf --max-time 15 https://try.getaxonflow.com/health > /tmp/health.json; then
echo "FAIL: https://try.getaxonflow.com/health unreachable"
exit 1
fi
cat /tmp/health.json
echo
- name: Register ephemeral tenant on try.getaxonflow.com
id: register
run: |
set -euo pipefail
response=$(curl -sSf --max-time 20 \
-H 'Content-Type: application/json' \
-d "{\"label\":\"sdk-python-nightly-${GITHUB_RUN_ID}\"}" \
https://try.getaxonflow.com/api/v1/register)
tenant_id=$(printf '%s' "$response" | jq -r 'try .tenant_id // empty')
secret=$(printf '%s' "$response" | jq -r 'try .secret // empty')
if [ -z "$tenant_id" ] || [ -z "$secret" ]; then
echo "FAIL: register response missing tenant_id and/or secret"
# Don't echo $response verbatim — it may contain a freshly
# minted secret if the response shape is partially valid.
# Wrap the redaction in `try ... catch` so a non-object
# response (string, array, malformed JSON) falls back to a
# safe placeholder rather than printing whatever jq received.
printf '%s' "$response" \
| jq 'try (with_entries(select(.key != "secret"))) catch "<unparseable register response>"' \
|| echo "<jq failed to parse register response>"
exit 1
fi
echo "::add-mask::$secret"
{
echo "tenant_id=$tenant_id"
echo "secret=$secret"
} >> "$GITHUB_OUTPUT"
- name: Run integration tests against try
env:
RUN_INTEGRATION_TESTS: '1'
AXONFLOW_AGENT_URL: https://try.getaxonflow.com
AXONFLOW_CLIENT_ID: ${{ steps.register.outputs.tenant_id }}
AXONFLOW_CLIENT_SECRET: ${{ steps.register.outputs.secret }}
# Bump the test client timeout for the hosted SaaS path —
# try.getaxonflow.com runs real LLM providers whose tail
# latency exceeds the 30s default that's adequate for a bare
# docker-compose stack with no LLM configured. Plan generation
# routinely lands close to 60s; 120s gives headroom without
# masking real wire / agent regressions.
AXONFLOW_TEST_TIMEOUT: '120.0'
# MAP timeout is separate — plan generation makes multiple
# LLM calls (one per decomposed step), so the worst case is
# N × per-call latency. Ollama on a small instance lands
# 4-step plans at 150-200s. 240s is the smallest value that
# both passes today AND would catch a regression that
# doubles per-call latency.
AXONFLOW_TEST_MAP_TIMEOUT: '240.0'
# try.getaxonflow.com now has the planning engine reachable
# within the 300s ALB idle timeout (community-saas re-deploy
# for axonflow-enterprise#1751). test_generate_plan exercises
# the planning surface against the live SaaS — drop back to
# '0' if the SaaS regresses (the test will skip cleanly with
# a clear reason rather than 504-ing every nightly).
AXONFLOW_HAS_PLANNING: '1'
# try.getaxonflow.com's agent task now sets SQLI_ACTION=block
# explicitly (axonflow-enterprise#1747); SQLi requests return
# response.blocked=True with a sys_sqli_* policy hit. Strict
# mode flips test_proxy_llm_call_sql_injection from
# "engine engaged" tolerance to "blocked is required" — a
# SaaS regression that drops back to alert mode trips the
# nightly instead of silently passing.
AXONFLOW_STRICT_SQLI_BLOCK: '1'
run: pytest tests/test_integration.py -v --no-cov
- name: Run quickstart example
env:
AXONFLOW_AGENT_URL: https://try.getaxonflow.com
AXONFLOW_CLIENT_ID: ${{ steps.register.outputs.tenant_id }}
AXONFLOW_CLIENT_SECRET: ${{ steps.register.outputs.secret }}
run: |
cd examples
timeout 120 python quickstart.py
- name: Run gateway_mode example
env:
AXONFLOW_AGENT_URL: https://try.getaxonflow.com
AXONFLOW_CLIENT_ID: ${{ steps.register.outputs.tenant_id }}
AXONFLOW_CLIENT_SECRET: ${{ steps.register.outputs.secret }}
run: |
cd examples
timeout 120 python gateway_mode.py
# Persist the dependency snapshot on failure so the tracking
# issue can pin the resolved graph at the time of the regression.
- name: Upload dep snapshot on failure
if: failure()
uses: actions/upload-artifact@v4
with:
name: pip-freeze-${{ github.run_id }}
path: /tmp/pip-freeze.txt
if-no-files-found: ignore
# Auto-file an issue on scheduled-run failure. Skipped on
# workflow_dispatch — manual dispatch is intentional ad-hoc work,
# not a regression signal worth filing.
- name: Open / comment tracking issue on scheduled failure
if: failure() && github.event_name == 'schedule'
uses: actions/github-script@v7
with:
script: |
const runUrl = `${context.serverUrl}/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}`;
const title = 'Nightly try.getaxonflow.com integration failing';
const body = [
`Scheduled nightly run against \`https://try.getaxonflow.com\` failed.`,
``,
`**Run:** ${runUrl}`,
`**Commit:** ${context.sha}`,
``,
`try.getaxonflow.com is the production canary for the Python SDK. Investigate:`,
``,
`1. Is the endpoint itself reachable / healthy?`,
`2. Did \`POST /api/v1/register\` change shape (expecting \`tenant_id\` + \`secret\`)?`,
`3. Did \`tests/test_integration.py\` regress vs the live stack?`,
`4. Did \`examples/quickstart.py\` or \`examples/gateway_mode.py\` regress?`,
``,
`Pinned dependency graph for this run is uploaded as the \`pip-freeze-${context.runId}\` artifact.`,
`Logs and reproduction details in the run link above.`,
].join('\n');
// Prefer label-based dedup: search is eventually-consistent and
// can miss a sister-run issue created seconds earlier. Falling
// through to the search query covers older issues that may
// pre-date the canary label set.
let existingIssueNumber = null;
const labelMatches = await github.rest.issues.listForRepo({
owner: context.repo.owner,
repo: context.repo.repo,
state: 'open',
labels: 'nightly-canary',
per_page: 10,
});
if (labelMatches.data.length > 0) {
existingIssueNumber = labelMatches.data[0].number;
} else {
const search = await github.rest.search.issuesAndPullRequests({
q: `repo:${context.repo.owner}/${context.repo.repo} is:issue is:open in:title "${title}"`,
});
if (search.data.total_count > 0) {
existingIssueNumber = search.data.items[0].number;
}
}
if (existingIssueNumber) {
await github.rest.issues.createComment({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: existingIssueNumber,
body: `Another scheduled run failed: ${runUrl}`,
});
core.notice(`Commented on existing issue #${existingIssueNumber}`);
} else {
const created = await github.rest.issues.create({
owner: context.repo.owner,
repo: context.repo.repo,
title,
body,
labels: ['nightly-canary', 'ci-failure'],
});
core.notice(`Opened tracking issue #${created.data.number}`);
}