integ

integ #158

Workflow file for this run

	name: integ

	# Phase-0 deploy-then-verify integration tests (issue #236). integ-runner
	# deploys a trimmed Task API stack into the shared account behind
	# secrets.AWS_ROLE_TO_ASSUME, runs the create-and-persist smoke assertions, then
	# tears the stack down.
	#
	# Trigger model mirrors deploy.yml: build.yml completes -> workflow_run picks it
	# up in the trusted base-repo context (secrets/OIDC available even for fork PRs)
	# -> we resolve whether the PR touches cdk/ or agent/ -> an admin approves
	# the `integ` environment gate -> deploy/assert/destroy runs against the shared
	# account -> a commit status `integ-smoke` is posted back to the PR head so it
	# shows up as a (required) check that blocks merge.
	#
	# Local dev path is unchanged: run `mise //cdk:integ` with your own AWS creds.
	#
	# Nightly schedule was intentionally dropped (previously 07:00 UTC) — the per-PR
	# path plus manual dispatch is the agreed coverage; this is not an oversight.
	on:
	# zizmor: ignore[dangerous-triggers] — intentional; workflow_run is required so
	# fork PRs can run against the shared account (a fork `pull_request` job gets no
	# secrets/OIDC). Mitigations: build-success guard, path-filter, `integ`
	# environment approval gate (admin reviews fork test code before it runs with
	# the privileged role), least-privilege role, status-only tokens per job.
	workflow_run:
	workflows: [build]
	types: [completed]
	workflow_dispatch: {}

	# Only one integ run at a time against the shared account — overlapping deploys
	# would collide on the single hardcoded `backgroundagent-integ` stack name.
	concurrency:
	group: cdk-integ
	cancel-in-progress: false

	permissions: {}

	jobs:
	# Decides whether this PR needs the integ run (touches cdk/ or agent/) and
	# posts the gating `integ-smoke` status. Always runs on a successful build so
	# docs/cli-only PRs get an immediate green (skipped) status and never deadlock
	# the required check.
	resolve:
	# Manual dispatch is restricted to main (defence in depth — the `integ`
	# environment approval is the primary gate). PR runs come via workflow_run.
	if: >-
	(github.event_name == 'workflow_dispatch' && github.ref == 'refs/heads/main') \|\|
	(github.event_name == 'workflow_run' && github.event.workflow_run.conclusion == 'success')
	runs-on: ubuntu-latest
	permissions:
	statuses: write
	pull-requests: read
	outputs:
	applicable: ${{ steps.decide.outputs.applicable }}
	head_sha: ${{ steps.decide.outputs.head_sha }}
	head_repo: ${{ steps.decide.outputs.head_repo }}
	steps:
	- name: Resolve applicability and post pending status
	id: decide
	env:
	GH_TOKEN: ${{ github.token }}
	REPO: ${{ github.repository }}
	EVENT_NAME: ${{ github.event_name }}
	# Empty for workflow_dispatch.
	PR_NUMBER_FROM_EVENT: ${{ github.event.workflow_run.pull_requests[0].number }}
	WF_HEAD_SHA: ${{ github.event.workflow_run.head_sha }}
	WF_HEAD_REPO: ${{ github.event.workflow_run.head_repository.full_name }}
	run: \|
	set -euo pipefail

	# workflow_dispatch: no PR context — run against the dispatched ref
	# (the job's own checkout defaults). Mark applicable, skip status post.
	if [[ "$EVENT_NAME" == "workflow_dispatch" ]]; then
	echo "applicable=true" >> "$GITHUB_OUTPUT"
	echo "head_sha=${GITHUB_SHA}" >> "$GITHUB_OUTPUT"
	echo "head_repo=${REPO}" >> "$GITHUB_OUTPUT"
	echo "Manual dispatch — running integ against ${GITHUB_SHA}."
	exit 0
	fi

	HEAD_SHA="$WF_HEAD_SHA"
	echo "head_sha=${HEAD_SHA}" >> "$GITHUB_OUTPUT"
	echo "head_repo=${WF_HEAD_REPO}" >> "$GITHUB_OUTPUT"

	# Track whether we've posted a terminal integ-smoke status. If the job
	# dies (failed API call, runner crash) before reaching one, the EXIT
	# trap posts `error` so the required check resolves instead of hanging
	# pending forever and silently blocking merge.
	STATUS_POSTED=""

	post_status() {
	# $1=state $2=description
	gh api -X POST "repos/$REPO/statuses/$HEAD_SHA" \
	-f context=integ-smoke \
	-f state="$1" \
	-f description="$2" \
	-f target_url="${{ github.server_url }}/$REPO/actions/runs/${{ github.run_id }}" \
	>/dev/null
	STATUS_POSTED="yes"
	}

	on_exit() {
	rc=$?
	# Only meaningful in the workflow_run (PR) context and only if we have
	# a SHA to post against and haven't already posted a terminal status.
	if [[ $rc -ne 0 && -z "$STATUS_POSTED" && -n "${HEAD_SHA:-}" ]]; then
	gh api -X POST "repos/$REPO/statuses/$HEAD_SHA" \
	-f context=integ-smoke \
	-f state=error \
	-f description="resolve step failed before gating" \
	-f target_url="${{ github.server_url }}/$REPO/actions/runs/${{ github.run_id }}" \
	>/dev/null 2>&1 \|\| true
	fi
	}
	trap on_exit EXIT

	resolve_pr_number() {
	if [[ -n "$PR_NUMBER_FROM_EVENT" ]]; then
	echo "$PR_NUMBER_FROM_EVENT"
	return
	fi
	gh api "repos/$REPO/commits/$HEAD_SHA/pulls" --jq '.[0].number // empty' 2>/dev/null \|\| true
	}
	PR_NUMBER=$(resolve_pr_number)

	if [[ -z "$PR_NUMBER" ]]; then
	echo "::warning::No PR resolved for $HEAD_SHA — nothing to gate; skipping."
	echo "applicable=false" >> "$GITHUB_OUTPUT"
	exit 0
	fi

	# Fork-PR safety: only run fork-authored code after a maintainer has
	# applied the `safe-to-test` label (defence in depth on top of the
	# `integ` environment approval). If it's absent, leave the status
	# pending and don't run — re-trigger once the label is added.
	if [[ "$WF_HEAD_REPO" != "$REPO" ]]; then
	if ! LABELS=$(gh api "repos/$REPO/issues/$PR_NUMBER/labels" --jq '.[].name'); then
	echo "::error::Failed to read labels for PR #$PR_NUMBER."
	exit 1
	fi
	if ! echo "$LABELS" \| grep -qx 'safe-to-test'; then
	post_status pending "awaiting safe-to-test label on fork PR"
	echo "applicable=false" >> "$GITHUB_OUTPUT"
	echo "Fork PR #$PR_NUMBER lacks safe-to-test label — not running."
	exit 0
	fi
	fi

	# Path-filter must happen here (not on.pull_request.paths) because the
	# trigger is workflow_run. Fail loud on API error: a failed or truncated
	# /files response must NOT fall through to a false-green skip. With
	# `set -e`, an assignment inside an `if !` condition does not trip
	# errexit, so we handle the failure explicitly and let the EXIT trap
	# post `error`.
	if ! CHANGED=$(gh api "repos/$REPO/pulls/$PR_NUMBER/files" --paginate --jq '.[].filename'); then
	echo "::error::Failed to list changed files for PR #$PR_NUMBER."
	exit 1
	fi
	if echo "$CHANGED" \| grep -Eq '^(cdk\|agent)/'; then
	post_status pending "awaiting admin approval / running"
	echo "applicable=true" >> "$GITHUB_OUTPUT"
	echo "PR #$PR_NUMBER touches cdk/ or agent/ — integ applies."
	else
	post_status success "skipped — no cdk/ or agent/ changes"
	echo "applicable=false" >> "$GITHUB_OUTPUT"
	echo "PR #$PR_NUMBER has no cdk/ or agent/ changes — integ skipped (green)."
	fi

	# The admin-gated deploy -> assert -> destroy. The `integ` environment's
	# required reviewer is the approval gate; while it waits, the integ-smoke
	# status stays pending and merge stays blocked.
	integ:
	needs: resolve
	if: needs.resolve.outputs.applicable == 'true'
	name: CDK integ smoke (Task API)
	runs-on: ubuntu-latest
	environment: integ
	timeout-minutes: 45
	permissions:
	id-token: write
	contents: read
	env:
	CI: "true"
	MISE_EXPERIMENTAL: "1"
	steps:
	- name: Checkout PR head (incl. forks)
	uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
	with:
	# Approving the `integ` environment authorizes this fork-authored test
	# code to run with the privileged role — the approver MUST review
	# cdk/test/integ/** changes before approving.
	repository: ${{ needs.resolve.outputs.head_repo }}
	ref: ${{ needs.resolve.outputs.head_sha }}
	persist-credentials: false

	- name: Configure AWS credentials
	uses: aws-actions/configure-aws-credentials@e7f100cf4c008499ea8adda475de1042d6975c7b # v6.2.0
	with:
	role-to-assume: ${{ secrets.AWS_ROLE_TO_ASSUME }}
	# Fall back to us-east-1 if the repo variable is unset, so the action
	# never runs region-less (which would fail credential resolution).
	aws-region: ${{ vars.AWS_REGION \|\| 'us-east-1' }}

	- name: Install mise
	uses: jdx/mise-action@dba19683ed58901619b14f395a24841710cb4925 # v4.1.0
	with:
	cache: true

	- name: Setup Node.js
	uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0
	with:
	node-version: 22.x

	- name: Install dependencies
	run: yarn install --immutable

	- name: Run integ tests (deploy → assert → destroy)
	run: mise //cdk:integ

	# Safety net: integ-runner forces teardown on success and failure, but if
	# the run is cancelled or crashes mid-deploy the stack can be stranded in
	# the shared account. Delete it directly via CloudFormation so we never
	# leak billable resources.
	#
	# NOTE: `cdk destroy backgroundagent-integ` would NOT work here — it
	# synthesizes the main app (src/main.ts), which does not contain the integ
	# stack, so it exits 0 having deleted nothing. Target the stack by its
	# literal CloudFormation name instead. delete-stack is idempotent (no-op if
	# already gone), so `\|\| true` only guards transient API errors.
	- name: Ensure stack torn down
	if: always()
	env:
	AWS_REGION: ${{ vars.AWS_REGION \|\| 'us-east-1' }}
	AWS_DEFAULT_REGION: ${{ vars.AWS_REGION \|\| 'us-east-1' }}
	run: \|
	set -euo pipefail
	aws cloudformation delete-stack --stack-name backgroundagent-integ \|\| true
	# No `\|\| true` on the wait: a DELETE_FAILED must surface loudly so we
	# never silently leak billable resources in the shared account.
	aws cloudformation wait stack-delete-complete --stack-name backgroundagent-integ

	# Post the final integ-smoke status back to the PR head so the check flips from
	# pending to success/failure. Skipped for workflow_dispatch (no PR to gate).
	report:
	needs: [resolve, integ]
	if: >-
	always() &&
	needs.resolve.result == 'success' &&
	needs.resolve.outputs.applicable == 'true' &&
	github.event_name == 'workflow_run'
	runs-on: ubuntu-latest
	permissions:
	statuses: write
	steps:
	- name: Post final integ-smoke status
	env:
	GH_TOKEN: ${{ github.token }}
	REPO: ${{ github.repository }}
	HEAD_SHA: ${{ needs.resolve.outputs.head_sha }}
	INTEG_RESULT: ${{ needs.integ.result }}
	run: \|
	set -euo pipefail
	if [[ "$INTEG_RESULT" == "success" ]]; then
	STATE=success
	DESC="deploy → assert → destroy passed"
	else
	STATE=failure
	DESC="integ run ${INTEG_RESULT}"
	fi
	gh api -X POST "repos/$REPO/statuses/$HEAD_SHA" \
	-f context=integ-smoke \
	-f state="$STATE" \
	-f description="$DESC" \
	-f target_url="${{ github.server_url }}/$REPO/actions/runs/${{ github.run_id }}" \
	>/dev/null

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

integ #158

Workflow file

integ #158

Uh oh!

Workflow file for this run