Skip to content

Commit 79b9d93

Browse files
Merge branch 'release/v0.101.0' into hotfix/table-cell-click-issue
2 parents b64f25a + 7df30ca commit 79b9d93

20 files changed

Lines changed: 492 additions & 54 deletions

File tree

.github/workflows/41-railway-setup.yml

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,10 +73,45 @@ jobs:
7373
chmod +x hosting/railway/oss/scripts/*.sh
7474
# shellcheck source=/dev/null
7575
source hosting/railway/oss/scripts/preview-resolve-env.sh
76-
hosting/railway/oss/scripts/bootstrap.sh
76+
77+
# Persist the full bootstrap output so the "Upload setup log" step can
78+
# publish it as an artifact, regardless of live-log truncation.
79+
log_file="${GITHUB_WORKSPACE:-$PWD}/railway-setup-${PR_NUMBER:-unknown}.log"
80+
81+
set +e
82+
hosting/railway/oss/scripts/bootstrap.sh 2>&1 | tee "$log_file"
83+
setup_status=${PIPESTATUS[0]}
84+
set -e
85+
7786
echo "project_name=${RAILWAY_PROJECT_NAME}" >> "$GITHUB_OUTPUT"
7887
echo "environment_name=${RAILWAY_ENVIRONMENT_NAME}" >> "$GITHUB_OUTPUT"
7988
89+
if [ "$setup_status" -ne 0 ]; then
90+
{
91+
echo "### Railway Preview Setup — Failed"
92+
echo
93+
echo "<details><summary>Setup log (last 100 lines)</summary>"
94+
echo
95+
echo '```'
96+
tail -n 100 "$log_file" 2>/dev/null
97+
echo '```'
98+
echo "</details>"
99+
} >> "$GITHUB_STEP_SUMMARY"
100+
exit "$setup_status"
101+
fi
102+
103+
- name: Upload setup log
104+
if: always()
105+
# Diagnostics only: a failed/duplicate upload must never fail the job.
106+
continue-on-error: true
107+
uses: actions/upload-artifact@v4
108+
with:
109+
name: railway-setup-log-${{ inputs.pr_number }}
110+
path: railway-setup-*.log
111+
if-no-files-found: ignore
112+
overwrite: true
113+
retention-days: 7
114+
80115
- name: Summary
81116
run: |
82117
{

.github/workflows/43-railway-deploy.yml

Lines changed: 51 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -112,11 +112,10 @@ jobs:
112112
# shellcheck source=/dev/null
113113
source hosting/railway/oss/scripts/preview-resolve-env.sh
114114
115-
log_file="$(mktemp)"
116-
cleanup() {
117-
rm -f "$log_file"
118-
}
119-
trap cleanup EXIT
115+
# Keep the log in the workspace so the "Upload deploy log" step can
116+
# publish it as an artifact. GitHub's live log can truncate streamed
117+
# output, so we always persist a full copy.
118+
log_file="${GITHUB_WORKSPACE:-$PWD}/railway-deploy-${PR_NUMBER:-unknown}.log"
120119
121120
project="$RAILWAY_PROJECT_NAME"
122121
environment_name="$RAILWAY_ENVIRONMENT_NAME"
@@ -177,13 +176,58 @@ jobs:
177176
echo "environment_name=${environment_name}" >> "$GITHUB_OUTPUT"
178177
echo "railway_logs_url=${railway_logs_url}" >> "$GITHUB_OUTPUT"
179178
180-
trap - EXIT
181-
cleanup
179+
# Best-effort diagnostics; never let these change the step outcome.
180+
set +e
181+
# On failure, pull the tail of the key services' Railway logs into
182+
# this job so the root cause (e.g. a Postgres crash-loop) is visible
183+
# here instead of only in the Railway dashboard.
184+
if [ "$deploy_failed" = "true" ]; then
185+
# Tee into the persisted log so the uploaded artifact and the
186+
# step-summary tail include the Railway service logs too, not just
187+
# the (possibly truncated) live Actions log.
188+
dump_railway_logs 2>&1 | tee -a "$log_file"
189+
fi
190+
191+
status_label="Deployed"
192+
[ "$deploy_failed" = "true" ] && status_label="Failed"
193+
{
194+
echo "### Railway Preview Deploy"
195+
echo
196+
echo "| Item | Value |"
197+
echo "| --- | --- |"
198+
echo "| PR | \`${PR_NUMBER}\` |"
199+
echo "| Image tag | \`${IMAGE_TAG}\` |"
200+
echo "| Status | ${status_label} |"
201+
[ -n "$url" ] && echo "| Preview URL | ${url} |"
202+
[ -n "$railway_logs_url" ] && echo "| Railway logs | [Open logs](${railway_logs_url}) |"
203+
if [ "$deploy_failed" = "true" ]; then
204+
echo
205+
echo "<details><summary>Deploy log (last 100 lines)</summary>"
206+
echo
207+
echo '```'
208+
tail -n 100 "$log_file" 2>/dev/null
209+
echo '```'
210+
echo "</details>"
211+
fi
212+
} >> "$GITHUB_STEP_SUMMARY"
213+
set -e
182214
183215
if [ "$deploy_failed" = "true" ]; then
184216
exit 1
185217
fi
186218
219+
- name: Upload deploy log
220+
if: always()
221+
# Diagnostics only: a failed/duplicate upload must never fail the job.
222+
continue-on-error: true
223+
uses: actions/upload-artifact@v4
224+
with:
225+
name: railway-deploy-log-${{ inputs.pr_number }}
226+
path: railway-deploy-*.log
227+
if-no-files-found: ignore
228+
overwrite: true
229+
retention-days: 7
230+
187231
- name: Post preview URL as PR comment
188232
if: inputs.pr_number != '' && steps.deploy.outputs.preview_url != ''
189233
uses: actions/github-script@v7

.github/workflows/44-railway-tests.yml

Lines changed: 48 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,11 @@ jobs:
235235
needs: prepare
236236
if: needs.prepare.outputs.needs_deployment == 'true'
237237
runs-on: ubuntu-latest
238+
# Backstop only: the real hangs are bounded per-step (wait_for caps at ~10m,
239+
# the browser install retries with a 180s/attempt cap). This guards against
240+
# an unexpected hang without cutting off a legitimately slow run (deploy
241+
# readiness wait + cold browser install can take ~20m).
242+
timeout-minutes: 30
238243
steps:
239244
- name: Wait for deployed web and API
240245
env:
@@ -281,7 +286,7 @@ jobs:
281286
if: steps.auth_bootstrap.outputs.enabled == 'true'
282287
uses: actions/setup-node@v4
283288
with:
284-
node-version: "24"
289+
node-version: "22"
285290

286291
- name: Install pnpm
287292
if: steps.auth_bootstrap.outputs.enabled == 'true'
@@ -294,10 +299,50 @@ jobs:
294299
working-directory: web
295300
run: pnpm install --no-frozen-lockfile --filter agenta-web-tests...
296301

302+
# Cache the downloaded browsers. On a cache hit `playwright install` is a
303+
# no-op, which avoids the chromium download entirely — and that download
304+
# is what stalls: the debug trace showed the 170 MiB transfer hitting 100%
305+
# in ~2s, then the install hanging (no progress) until killed. apt deps
306+
# were never the problem (they finished in ~10s).
307+
- name: Cache Playwright browsers
308+
id: pw-cache
309+
if: steps.auth_bootstrap.outputs.enabled == 'true'
310+
uses: actions/cache@v4
311+
with:
312+
path: ~/.cache/ms-playwright
313+
key: playwright-${{ runner.os }}-${{ hashFiles('web/pnpm-lock.yaml') }}
314+
restore-keys: |
315+
playwright-${{ runner.os }}-
316+
317+
# OS libraries for chromium. This is the fast, reliable part (~10s); kept
318+
# as its own step so a browser-download stall can't be confused with it.
319+
- name: Install Playwright system dependencies
320+
if: steps.auth_bootstrap.outputs.enabled == 'true'
321+
working-directory: web/tests
322+
run: pnpm exec playwright install-deps chromium
323+
324+
# Browser binaries. Root cause of the original ~6h hang: Playwright 1.59's
325+
# zip extraction deadlocks on Node 24 (reproduced locally: Node 22/23
326+
# extract in ~16s, Node 24 hangs indefinitely). The job is pinned to Node
327+
# 22 (LTS) above, which fixes it. The cache makes this a no-op on a hit,
328+
# and the retry + per-attempt timeout guard against any transient stall.
297329
- name: Install Playwright browser
298330
if: steps.auth_bootstrap.outputs.enabled == 'true'
299331
working-directory: web/tests
300-
run: pnpm exec playwright install --with-deps chromium
332+
run: |
333+
for attempt in 1 2 3; do
334+
echo "::group::playwright install chromium (attempt ${attempt}/3)"
335+
if timeout 180 pnpm exec playwright install chromium; then
336+
echo "::endgroup::"
337+
echo "browser install succeeded on attempt ${attempt}"
338+
exit 0
339+
fi
340+
echo "::endgroup::"
341+
echo "attempt ${attempt} stalled or failed; retrying after 5s..."
342+
sleep 5
343+
done
344+
echo "playwright browser install failed after 3 attempts" >&2
345+
exit 1
301346
302347
- name: Bootstrap auth with global setup
303348
if: steps.auth_bootstrap.outputs.enabled == 'true'
@@ -587,7 +632,7 @@ jobs:
587632
- name: Setup Node.js
588633
uses: actions/setup-node@v4
589634
with:
590-
node-version: "24"
635+
node-version: "22"
591636

592637
- name: Install pnpm
593638
uses: pnpm/action-setup@v4

api/pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "api"
3-
version = "0.100.9"
3+
version = "0.101.0"
44
description = "Agenta API"
55
requires-python = ">=3.11,<3.14"
66
authors = [

api/uv.lock

Lines changed: 3 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

clients/python/pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "agenta-client"
3-
version = "0.100.9"
3+
version = "0.101.0"
44
description = "Fern-generated Python client for the Agenta API."
55
requires-python = ">=3.11,<3.14"
66
authors = [

clients/python/uv.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

hosting/kubernetes/helm/Chart.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@ apiVersion: v2
22
name: agenta
33
description: A Helm chart for deploying Agenta (OSS or EE) on Kubernetes
44
type: application
5-
version: 0.100.9
6-
appVersion: "v0.100.9"
5+
version: 0.101.0
6+
appVersion: "v0.101.0"
77
keywords:
88
- agenta
99
- llm

hosting/railway/oss/scripts/bootstrap.sh

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@ ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../../.." && pwd)"
77
# shellcheck source=lib.sh
88
source "$(dirname "${BASH_SOURCE[0]}")/lib.sh"
99

10+
install_error_trap
11+
1012
PROJECT_NAME="${RAILWAY_PROJECT_NAME:-agenta-oss-railway}"
1113
ENV_NAME="${RAILWAY_ENVIRONMENT_NAME:-staging}"
1214
SOURCE_COMPOSE_FILE="${RAILWAY_SOURCE_COMPOSE_FILE:-$(railway_source_compose_file "$ROOT_DIR")}"
@@ -37,9 +39,19 @@ require_railway_auth() {
3739

3840
# Verify the token actually works. A revoked or invalid token will cause
3941
# every subsequent call to fail with a confusing "Unauthorized" error.
42+
# Distinguish a genuine auth failure from rate-limiting / transient network
43+
# errors (where the token is fine) so the log points at the real cause.
4044
local whoami_output
4145
whoami_output="$(railway_call whoami 2>&1)" || {
42-
printf "Railway authentication failed. The token appears to be invalid or revoked.\n" >&2
46+
if printf "%s" "$whoami_output" | grep -qiE "rate.?limit"; then
47+
printf "Railway auth check could not complete: the API is rate-limiting requests (retries exhausted).\n" >&2
48+
printf "This is throttling, not a bad token. Re-run once the rate-limit window clears.\n" >&2
49+
elif printf "%s" "$whoami_output" | grep -qiE "timed out|error sending request|failed to fetch|connection (reset|refused|closed)|temporarily unavailable|service unavailable|bad gateway|gateway time-?out"; then
50+
printf "Railway auth check could not complete: transient network error reaching the Railway API.\n" >&2
51+
printf "The token is likely fine; this is usually temporary. Re-run.\n" >&2
52+
else
53+
printf "Railway authentication failed. The token appears to be invalid or revoked.\n" >&2
54+
fi
4355
printf "Output: %s\n" "$whoami_output" >&2
4456
exit 1
4557
}

0 commit comments

Comments
 (0)