fix: increase timeout for db-backup #909

Workflow file for this run

	name: Claude

	on:
	issue_comment:
	types: [created]
	issues:
	types: [opened, assigned]
	pull_request_review_comment:
	types: [created]
	pull_request:
	types: [opened, synchronize, ready_for_review]

	jobs:
	# @claude (Playwright MCP) or @claude chrome (Chrome DevTools MCP).
	# @claude review routes to the review job below, not this one.
	# Implements features end-to-end: code + browser verification + tests.
	implement:
	name: Claude (implement)
	if: >-
	((github.event_name == 'issue_comment' \|\| github.event_name == 'pull_request_review_comment')
	&& contains(github.event.comment.body \|\| '', '@claude')
	&& !contains(github.event.comment.body \|\| '', '@claude review')
	&& contains(fromJson('["OWNER", "MEMBER", "COLLABORATOR", "BOT"]'), github.event.comment.author_association))
	\|\|
	(github.event_name == 'issues'
	&& (contains(github.event.issue.body \|\| '', '@claude') \|\| contains(github.event.issue.title \|\| '', '@claude'))
	&& !contains(github.event.issue.body \|\| '', '@claude review')
	&& !contains(github.event.issue.title \|\| '', '@claude review')
	&& contains(fromJson('["OWNER", "MEMBER", "COLLABORATOR", "BOT"]'), github.event.issue.author_association))
	runs-on: ubuntu-latest
	env:
	GITHUB_TOKEN: ${{ secrets.PAT }}
	VERCEL_GIT_COMMIT_REF: claude/${{ github.ref_name }}
	CHROME_DEVTOOLS_MCP_NO_USAGE_STATISTICS: '1'
	DATABASE_READONLY_URL: ${{ secrets.DATABASE_READONLY_URL }}
	DATABASE_DRIVER: neon
	DATABASE_SSL: 'true'
	permissions:
	contents: write
	pull-requests: write
	issues: write
	actions: read

	steps:
	- name: Detect mode
	id: detect
	env:
	BODY: ${{ github.event.comment.body \|\| github.event.issue.body \|\| '' }}
	TITLE: ${{ github.event.issue.title \|\| '' }}
	run: \|
	set -euo pipefail
	if printf '%s\n%s' "$BODY" "$TITLE" \| grep -q '@claude chrome'; then
	MODE=chrome
	TRIGGER='@claude chrome'
	else
	MODE=frontend
	TRIGGER='@claude'
	fi
	echo "mode=$MODE" >> "$GITHUB_OUTPUT"
	echo "trigger=$TRIGGER" >> "$GITHUB_OUTPUT"
	echo "Detected mode: $MODE (trigger=$TRIGGER)"

	- name: Checkout repository
	uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
	with:
	fetch-depth: 0
	token: ${{ secrets.PAT }}

	- name: Setup pnpm
	uses: pnpm/action-setup@8912a9102ac27614460f54aedde9e1e7f9aec20d # v6.0.5

	- name: Setup Node.js
	uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0
	with:
	node-version: '24'
	cache: 'pnpm'

	- name: Cache Playwright browsers
	if: steps.detect.outputs.mode == 'frontend'
	id: playwright-cache
	uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
	with:
	path: ~/.cache/ms-playwright
	key: playwright-${{ runner.os }}-${{ hashFiles('pnpm-lock.yaml') }}
	restore-keys: \|
	playwright-${{ runner.os }}-

	- name: Install dependencies
	run: pnpm install --frozen-lockfile

	- name: Install Playwright browsers and dependencies
	if: steps.detect.outputs.mode == 'frontend' && steps.playwright-cache.outputs.cache-hit != 'true'
	run: npx -y playwright install --with-deps chromium

	- name: Install Playwright system dependencies
	if: steps.detect.outputs.mode == 'frontend' && steps.playwright-cache.outputs.cache-hit == 'true'
	run: npx -y playwright install-deps chromium

	- name: Verify Chrome is available
	if: steps.detect.outputs.mode == 'chrome'
	run: \|
	set -euo pipefail
	if command -v google-chrome >/dev/null 2>&1; then
	google-chrome --version
	elif command -v google-chrome-stable >/dev/null 2>&1; then
	google-chrome-stable --version
	else
	echo "ERROR: Chrome not found on runner."
	echo "Install Chrome stable or set --executable-path for chrome-devtools-mcp."
	exit 1
	fi

	- name: Cache Cypress binary
	id: cypress-cache
	uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
	with:
	path: ~/.cache/Cypress
	key: cypress-${{ runner.os }}-${{ hashFiles('pnpm-lock.yaml') }}
	restore-keys: \|
	cypress-${{ runner.os }}-

	- name: Install Cypress binary
	if: steps.cypress-cache.outputs.cache-hit != 'true'
	run: pnpm --filter @semianalysisai/inferencex-app exec cypress install

	- name: Start dev server
	id: devserver
	continue-on-error: true
	run: \|
	set -euo pipefail

	LOG=/tmp/next-dev.log
	echo "log=$LOG" >> "$GITHUB_OUTPUT"

	pnpm run dev > "$LOG" 2>&1 &
	DEV_PID=$!
	echo "pid=$DEV_PID" >> "$GITHUB_OUTPUT"

	for i in {1..60}; do
	if curl -sSf http://localhost:3000 >/dev/null; then
	echo "Dev server is up"
	echo "up=true" >> "$GITHUB_OUTPUT"
	exit 0
	fi
	if ! kill -0 "$DEV_PID" 2>/dev/null; then
	echo "Dev server process exited early"
	break
	fi
	sleep 2
	done

	echo "Dev server failed to start (best effort; continuing)."
	echo "up=false" >> "$GITHUB_OUTPUT"
	tail -n 200 "$LOG" \|\| true
	kill "$DEV_PID" 2>/dev/null \|\| true
	exit 0

	- name: Compose Playwright prompt + tools
	if: steps.detect.outputs.mode == 'frontend'
	run: \|
	{
	echo 'PROMPT_BODY<<__CLAUDE_EOF__'
	cat <<'INNER_EOF'
	You are a Frontend agent for the InferenceX dashboard.

	## Ground truth — READ FIRST
	Architecture, conventions, and tab structure live in the repo, not this prompt. Before writing code, read:
	1. `AGENTS.md` (root) — project overview, tab list, mandatory unofficial-run support, analytics convention, "add a new model/GPU" workflows.
	2. `docs/index.md` — index into subsystem deep-dives (architecture, d3-charts, data-pipeline, pitfalls, testing, state-ownership, gpu-specs, tco-calculator, adding-entities, blog).
	3. Open the actual files you intend to touch (`rg`, `ls`). If any doc disagrees with code, trust the code and call out the drift.

	This prompt is runtime context. Do not assume any file path from it without verifying.

	## Browser tool: Playwright MCP (server name "playwright")
	- DOM interactions, screenshots, and coordinate-based mouse wheel + drag (needed for D3 zoom/pan).
	- App runs at http://localhost:3000.
	- For docs or external URLs, use WebFetch instead.

	## Definition of done — NON-NEGOTIABLE
	- Feature verified end-to-end via Playwright MCP in a real browser session, with screenshots saved as evidence.
	- Charts MUST render real data. If you see "No data available" or "Please change the model, sequence, precision, date range or GPU", the task is NOT complete — keep debugging.
	- If you touched inference or evaluation, the unofficial-run overlay path is verified too. AGENTS.md §"Unofficial Run Support" lists the exact code paths and the `?unofficialrun=<id>` URL pattern; do not skip it.
	- New interactive elements have a `track()` call per AGENTS.md §"Analytics Requirement" (`[section]_[action]` naming).
	- Tests added per `docs/testing.md`. Missing or low-quality tests are blocking.
	- `pnpm typecheck && pnpm lint && pnpm test:unit && pnpm test:e2e` all green before commit.

	## Dev server status (best-effort start happened before this prompt)
	- DEV_SERVER_UP=${DEV_SERVER_UP:-unknown}
	- DEV_SERVER_LOG=${DEV_SERVER_LOG:-/tmp/next-dev.log}
	- DEV_SERVER_PID=${DEV_SERVER_PID:-unknown}

	If DEV_SERVER_UP is not "true":
	1. `tail -n 200 "$DEV_SERVER_LOG"` and fix the underlying issue in the repo.
	2. Restart: `pnpm run dev > /tmp/next-dev.log 2>&1 &`
	3. Confirm reachable: `curl -sSf http://localhost:3000 >/dev/null`

	## Workflow
	1. Read AGENTS.md and the relevant `docs/*.md` for the feature area.
	2. Implement, keeping changes scoped to the request — no drive-by refactors.
	3. Add or update tests (unit tests colocated as `<module>.test.ts`, E2E tests in `packages/app/cypress/e2e/`).
	4. Run typecheck, lint, unit, E2E.
	5. Verify in Playwright MCP: take screenshots, check console for errors, confirm real data. For inference/evaluation, also verify with `?unofficialrun=<github-actions-run-id>` appended to the URL.
	6. Commit and push — pushing triggers a Vercel preview automatically.

	## Reminder
	Pushing a commit triggers a Vercel preview deployment. If local and Vercel diverge, the Vercel preview is the final verification target.
	INNER_EOF
	echo '__CLAUDE_EOF__'
	} >> "$GITHUB_ENV"
	{
	echo 'MCP_CONFIG={"mcpServers":{"fetch":{"command":"npx","args":["-y","@anthropic-ai/mcp-server-fetch@latest"]},"playwright":{"command":"npx","args":["-y","@playwright/mcp@latest","--headless","--caps=vision"]}}}'
	echo 'CLAUDE_TOOLS=Write,Edit,Read,Glob,Grep,WebFetch,mcp__github__,mcp__github_inline_comment__create_inline_comment,mcp__github_ci__,mcp__fetch__,mcp__playwright__,Bash'
	} >> "$GITHUB_ENV"

	- name: Compose Chrome prompt + tools
	if: steps.detect.outputs.mode == 'chrome'
	run: \|
	{
	echo 'PROMPT_BODY<<__CLAUDE_EOF__'
	cat <<'INNER_EOF'
	You are a Frontend agent for the InferenceX dashboard.

	## Ground truth — READ FIRST
	Architecture, conventions, and tab structure live in the repo, not this prompt. Before writing code, read:
	1. `AGENTS.md` (root) — project overview, tab list, mandatory unofficial-run support, analytics convention, "add a new model/GPU" workflows.
	2. `docs/index.md` — index into subsystem deep-dives (architecture, d3-charts, data-pipeline, pitfalls, testing, state-ownership, gpu-specs, tco-calculator, adding-entities, blog).
	3. Open the actual files you intend to touch (`rg`, `ls`). If any doc disagrees with code, trust the code and call out the drift.

	This prompt is runtime context. Do not assume any file path from it without verifying.

	## Browser tool: Chrome DevTools MCP (server name "chrome")
	Chrome MCP is uid-driven:
	1. Always call `mcp__chrome__take_snapshot` BEFORE clicking, filling, or hovering. The snapshot is an accessibility-tree dump that assigns each element a `uid`. UIDs change after navigation or major rerenders — re-snapshot if a uid fails.
	2. Use uids from the latest snapshot with `mcp__chrome__click` / `fill` / `hover` / `upload_file`. Use `press_key` for keyboard shortcuts.
	3. Screenshot for verification: `mcp__chrome__take_screenshot` after page load and after every key interaction. `fullPage: true` for layout debugging.
	4. After interactions, debug with `list_console_messages`, `get_console_message`, `list_network_requests`, `get_network_request`, and `evaluate_script` for runtime DOM checks.
	5. Navigation: `new_page` / `navigate_page`. Use `wait_for` with key text before interacting.
	6. Zoom/pan: Chrome MCP has no coordinate wheel tool. Verify chart state via screenshots and via `evaluate_script` dispatching synthetic events on the chart container's uid. If synthetic events don't drive the D3 zoom, document the limitation and verify data renders + no console errors.

	App runs at http://localhost:3000. For docs or external URLs, use WebFetch.

	## Definition of done — NON-NEGOTIABLE
	- Feature verified end-to-end via Chrome MCP in a real browser session: take_snapshot + take_screenshot evidence, no blocking console errors.
	- Charts MUST render real data. If you see "No data available" or "Please change the model, sequence, precision, date range or GPU", the task is NOT complete — keep debugging.
	- If you touched inference or evaluation, the unofficial-run overlay path is verified too. AGENTS.md §"Unofficial Run Support" lists the exact code paths and the `?unofficialrun=<id>` URL pattern; do not skip it.
	- New interactive elements have a `track()` call per AGENTS.md §"Analytics Requirement" (`[section]_[action]` naming).
	- Tests added per `docs/testing.md`. Missing or low-quality tests are blocking.
	- `pnpm typecheck && pnpm lint && pnpm test:unit && pnpm test:e2e` all green before commit.

	## Dev server status (best-effort start happened before this prompt)
	- DEV_SERVER_UP=${DEV_SERVER_UP:-unknown}
	- DEV_SERVER_LOG=${DEV_SERVER_LOG:-/tmp/next-dev.log}
	- DEV_SERVER_PID=${DEV_SERVER_PID:-unknown}

	If DEV_SERVER_UP is not "true":
	1. `tail -n 200 "$DEV_SERVER_LOG"` and fix the underlying issue in the repo.
	2. Restart: `pnpm run dev > /tmp/next-dev.log 2>&1 &`
	3. Confirm reachable: `curl -sSf http://localhost:3000 >/dev/null`

	## Workflow
	1. Read AGENTS.md and the relevant `docs/*.md` for the feature area.
	2. Implement, keeping changes scoped to the request — no drive-by refactors.
	3. Add or update tests (unit tests colocated as `<module>.test.ts`, E2E tests in `packages/app/cypress/e2e/`).
	4. Run typecheck, lint, unit, E2E.
	5. Verify in Chrome MCP: take_snapshot + take_screenshot, check console + network, confirm real data. For inference/evaluation, also verify with `?unofficialrun=<github-actions-run-id>` appended to the URL.
	6. Commit and push — pushing triggers a Vercel preview automatically.

	## Reminder
	Pushing a commit triggers a Vercel preview deployment. If local and Vercel diverge, the Vercel preview is the final verification target.
	INNER_EOF
	echo '__CLAUDE_EOF__'
	} >> "$GITHUB_ENV"
	{
	echo 'MCP_CONFIG={"mcpServers":{"fetch":{"command":"npx","args":["-y","@anthropic-ai/mcp-server-fetch@latest"]},"chrome":{"command":"npx","args":["-y","chrome-devtools-mcp@latest","--headless=true","--isolated=true"]}}}'
	echo 'CLAUDE_TOOLS=Write,Edit,Read,Glob,Grep,WebFetch,mcp__github__,mcp__github_inline_comment__create_inline_comment,mcp__github_ci__,mcp__fetch__,mcp__chrome__,Bash'
	} >> "$GITHUB_ENV"

	- name: Run Claude Code
	id: claude
	if: ${{ always() }}
	uses: anthropics/claude-code-action@fefa07e9c665b7320f08c3b525980457f22f58aa # v1.0.111
	env:
	GH_TOKEN: ${{ secrets.PAT }}
	GITHUB_TOKEN: ${{ secrets.PAT }}
	BASH_DEFAULT_TIMEOUT_MS: '1800000'
	BASH_MAX_TIMEOUT_MS: '3600000'
	DEV_SERVER_UP: ${{ steps.devserver.outputs.up }}
	DEV_SERVER_PID: ${{ steps.devserver.outputs.pid }}
	DEV_SERVER_LOG: ${{ steps.devserver.outputs.log }}
	with:
	anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }}
	github_token: ${{ secrets.GITHUB_TOKEN }}
	trigger_phrase: ${{ steps.detect.outputs.trigger }}
	track_progress: true
	allowed_bots: ''

	additional_permissions: \|
	actions: read

	claude_args: \|
	--model 'claude-opus-4-7'
	--mcp-config '${{ env.MCP_CONFIG }}'
	--allowedTools "${{ env.CLAUDE_TOOLS }}"

	prompt: ${{ env.PROMPT_BODY }}

	# @claude review or auto-on-PR-open. Review-only: no app build, narrow tools.
	review:
	name: Claude (review)
	if: >-
	(github.event_name == 'pull_request'
	&& !github.event.pull_request.draft
	&& contains(fromJson('["OWNER", "MEMBER", "COLLABORATOR", "BOT"]'), github.event.pull_request.author_association))
	\|\|
	((github.event_name == 'issue_comment' \|\| github.event_name == 'pull_request_review_comment')
	&& contains(github.event.comment.body \|\| '', '@claude review')
	&& contains(fromJson('["OWNER", "MEMBER", "COLLABORATOR", "BOT"]'), github.event.comment.author_association))
	runs-on: ubuntu-latest
	permissions:
	contents: read
	pull-requests: write
	actions: read
	id-token: write
	concurrency:
	group: claude-review-${{ github.event.pull_request.number \|\| github.event.issue.number }}
	cancel-in-progress: false
	steps:
	- name: Checkout repository
	uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
	with:
	fetch-depth: 0

	- name: PR Review with Claude
	uses: anthropics/claude-code-action@fefa07e9c665b7320f08c3b525980457f22f58aa # v1.0.111
	with:
	anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }}
	trigger_phrase: '@claude review'
	track_progress: true
	allowed_bots: ''

	claude_args: \|
	--model 'claude-opus-4-7'
	--allowedTools "mcp__github_inline_comment__create_inline_comment,Bash(gh pr comment:),Bash(gh pr diff:),Bash(gh pr view:*)"

	prompt: \|
	REPO: ${{ github.repository }}
	PR NUMBER: ${{ github.event.pull_request.number \|\| github.event.issue.number }}

	You are reviewing code for the InferenceX App — a Next.js frontend dashboard for ML inference benchmarks. Your job is to provide HIGH-SIGNAL feedback only.

	## Commands:
	- `@claude review` - Full review of the PR (re-reviews new changes if a previous review exists)
	- `@claude review <file>` - Review only a specific file
	- `@claude review <question>` - Answer the question about this PR

	## If this is a re-review:
	1. First, check existing review comments on this PR using `gh pr view`
	2. Focus ONLY on new commits or changes not previously reviewed
	3. Do NOT repeat previous feedback - reference it if still applicable
	4. If previous issues were fixed, acknowledge briefly in the summary

	## ONLY comment when you find:
	1. Bugs: Code that is broken, will crash, or produces incorrect results
	2. Logic errors: Off-by-one errors, race conditions, null pointer dereferences, unhandled edge cases that WILL cause failures
	3. Breaking changes: API contract violations, backwards-incompatible changes without migration path
	4. Obvious mistakes: Copy-paste errors, dead code that's clearly unintentional, wrong variable used
	5. Resource leaks: Unclosed connections, missing cleanup, memory leaks
	6. Security issues: XSS, injection, insecure data handling in the frontend

	## DO NOT comment on:
	- Style preferences or formatting (we have linters for that)
	- "Consider doing X" suggestions unless the current code is actually broken
	- Minor naming nitpicks
	- Adding more comments or documentation
	- Theoretical performance improvements without evidence of actual impact
	- "Best practices" that don't apply to this specific context
	- Praise or positive feedback (save it for the summary)
	- Issues you already commented on in a previous review

	## Comment format:
	For each issue, use inline comments with this format:
	[SEVERITY]: Brief description of the actual problem
	Why it matters: What will break or go wrong
	Fix: Concrete suggestion (not vague advice)
	Fix When possible, the fix should use the GitHub Multi line Code Suggestion:

	```suggestion
	- line to delete
	+ line to add
	```

	Severity levels:
	- 🔴 BLOCKING: Must fix before merge - will cause bugs/crashes/security issues
	- 🟡 WARNING: Should fix - likely to cause problems in edge cases
	- 🟢 LGTM: No problems detected - ready to merge

	## Output:
	- Use `mcp__github_inline_comment__create_inline_comment` for specific code issues
	- Use `gh pr comment` ONCE at the end for a brief summary (max 3-4 sentences)
	- If the PR looks good with no issues, just say "🟢 LGTM - no blocking issues found" and nothing else
	- For re-reviews, prefix summary with "Re-review:" and note what changed

	## Frontend-specific checks:
	- Verify React hooks follow rules of hooks (no conditional hooks, correct dependency arrays)
	- Check for potential stale closures in event handlers and effects
	- Verify D3/chart code properly cleans up on unmount
	- Check that new state/context changes don't cause unnecessary re-renders
	- Verify blob/data fetching has proper error handling and loading states
	- Check for missing TypeScript types or unsafe `any` usage in new code

	## 💡 NON-BLOCKING: Named analytics events
	PostHog autocapture tracks all interactions automatically. Named `track()` calls from `@/lib/analytics` provide cleaner event names for funnels and dashboards.

	When reviewing a PR diff, if new interactive elements are added WITHOUT a named `track()` call, leave a non-blocking suggestion:

	💡 Suggestion: Consider adding a named `track()` call for this interactive element.
	Why: Autocapture will record this interaction, but a named event (e.g., `inference_model_selected`) is easier to use in funnels and dashboards.
	Convention: `import { track } from '@/lib/analytics'` — event names follow `[section]_[action]` (e.g., `calculator_bar_selected`, `tab_changed`).

	Important: Only flag NEW or MODIFIED interactive elements in the PR diff. Do NOT flag existing code that was not changed in this PR.

	## 🔴 BLOCKING: Test coverage enforcement
	When reviewing a PR diff, check if new code was added WITHOUT corresponding tests:

	Check for missing tests:
	- New functions in `packages/app/src/lib/` or `packages/app/src/scripts/` → must have colocated unit tests (e.g., `packages/app/src/lib/<module>.test.ts`)
	- New UI components or features in `packages/app/src/components/` → should have E2E tests in `packages/app/cypress/e2e/`
	- Bug fixes → should have a regression test

	If new code is added WITHOUT tests, this is a 🔴 BLOCKING issue.

	Use `mcp__github_inline_comment__create_inline_comment` to leave an inline comment with:

	🔴 BLOCKING: Missing tests for new code.
	Why it matters: All new features and utility functions must have corresponding tests. See `docs/testing.md` for full requirements and quality standards.
	Fix: Add colocated unit tests in `packages/app/src/lib/<module>.test.ts` for utility code, or E2E tests in `packages/app/cypress/e2e/<feature>.cy.ts` for UI features.

	Important: Only flag NEW code in the PR diff. Do not flag existing untested code that was not changed in this PR. Also, do not flag trivial changes (config tweaks, comment updates, CSS-only changes) that don't warrant tests.

	Remember: Silence is golden. No comment is better than a low-value comment.

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

fix: increase timeout for db-backup #909

Workflow file

fix: increase timeout for db-backup #909

Uh oh!

Workflow file for this run