Skip to content

test(handlers): expand deploy + stack handler coverage (#153) #134

test(handlers): expand deploy + stack handler coverage (#153)

test(handlers): expand deploy + stack handler coverage (#153) #134

Workflow file for this run

# instant.dev/api — Auto-deploy on push to master
#
# Why this exists:
# Until 2026-05-15, "shipped to master" did NOT mean "running in prod" —
# an operator had to manually `docker buildx build && kubectl set image`.
# A worker fix landed but never deployed; a user got a broken expiry email
# twice as a result. This workflow eliminates that gap.
#
# Build context note:
# The Dockerfile expects to be invoked from the parent of api/, with
# sibling common/ and proto/ directories present (CLAUDE.md convention).
# In CI we mirror that by checking out:
# . (workspace root)
# ├── api/ (this repo)
# ├── common/ (sibling repo)
# └── proto/ (sibling repo)
# then `docker buildx build -f api/Dockerfile .` from the workspace root.
#
# Required repo secret:
# KUBECONFIG_B64 — base64-encoded kubeconfig with permission to
# `kubectl set image deployment/instant-api -n instant`.
# See CLAUDE.md "Local Kubernetes Setup" for the cluster.
#
# GHCR auth uses the per-job GITHUB_TOKEN with `packages: write`.
name: Deploy
on:
push:
branches: [master]
# CI-minute savings (2026-05-21): skip Deploy on docs-only commits.
# Markdown, CLAUDE.md, runbooks, design docs, and the BUGBASH ledger
# never change the binary — they don't need a 7-min test step + a 3-min
# image build + rollout. Push paths matching ONLY these globs are ignored.
# If a real code change happens to also touch a .md file in the same
# commit, the non-ignored path triggers Deploy normally.
paths-ignore:
- '**.md'
- 'docs/**'
- 'CLAUDE.md'
- '.gitignore'
- 'LICENSE'
- 'BUGBASH-*/**'
workflow_dispatch:
concurrency:
# CI-minute savings (2026-05-21): rapid-fire pushes now cancel the prior
# in-flight Deploy instead of running both to completion. The 5-pushes-
# in-10-minutes pattern that doubled today's burn now costs the duration
# of one final Deploy, not five.
group: deploy-${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
permissions:
contents: read
packages: write
env:
IMAGE_REPO: ghcr.io/instanode-dev/instant-api
K8S_NAMESPACE: instant
K8S_DEPLOYMENT: instant-api
K8S_CONTAINER: api
HEALTHZ_URL: https://api.instanode.dev/healthz
jobs:
deploy:
runs-on: ubuntu-latest
# 2026-05-15: api unit tests require a real Postgres + Redis
# (testhelpers.SetupTestDB / SetupTestRedis). First auto-deploy
# run failed because no DB was reachable from the runner. These
# service containers match the defaults in
# api/internal/testhelpers/testhelpers.go:
# defaultTestDBURL = postgres://postgres:postgres@localhost:5432/instant_dev_test
# defaultTestRedisURL = redis://localhost:6379/15
services:
postgres:
image: postgres:17-alpine
env:
POSTGRES_USER: postgres
POSTGRES_PASSWORD: postgres
POSTGRES_DB: instant_dev_test
ports:
- 5432:5432
options: >-
--health-cmd "pg_isready -U postgres"
--health-interval 5s
--health-timeout 3s
--health-retries 12
redis:
image: redis:7-alpine
ports:
- 6379:6379
options: >-
--health-cmd "redis-cli ping"
--health-interval 5s
--health-timeout 3s
--health-retries 12
steps:
- name: Checkout api (this repo) into ./api
uses: actions/checkout@v4
with:
path: api
- name: Checkout common sibling into ./common
uses: actions/checkout@v4
with:
repository: ${{ vars.COMMON_REPO || format('{0}/common', github.repository_owner) }}
# 2026-05-15: GITHUB_TOKEN is scoped to THIS repo only and 404s
# on private sibling repos in the same org. REPO_ACCESS_TOKEN
# is a fine-grained PAT with read access to
# InstaNode-dev/{common,proto}. Set via
# `gh secret set REPO_ACCESS_TOKEN --repo InstaNode-dev/<name>`.
token: ${{ secrets.REPO_ACCESS_TOKEN }}
path: common
- name: Checkout proto sibling into ./proto
uses: actions/checkout@v4
with:
repository: ${{ vars.PROTO_REPO || format('{0}/proto', github.repository_owner) }}
token: ${{ secrets.REPO_ACCESS_TOKEN }}
path: proto
- name: Compute build metadata
id: meta
run: |
SHORT_SHA="${GITHUB_SHA:0:7}"
BUILD_TIME="$(date -u +%Y-%m-%dT%H:%M:%SZ)"
VERSION="master-${SHORT_SHA}"
echo "short_sha=${SHORT_SHA}" >> "$GITHUB_OUTPUT"
echo "build_time=${BUILD_TIME}" >> "$GITHUB_OUTPUT"
echo "version=${VERSION}" >> "$GITHUB_OUTPUT"
echo "Built ${VERSION} (${BUILD_TIME})"
- name: Set up Go (for unit tests + go.mod replace directives)
uses: actions/setup-go@v5
with:
go-version: '1.25'
- name: Stage sibling repos for go.mod replace (../common, ../proto)
# The api repo's go.mod uses `replace instant.dev/common => ../common`
# and `replace instant.dev/proto => ../proto`. When `go test` runs
# inside ./api, the relative paths resolve to ./common and ./proto
# in the workspace root — which is already correct. No mv needed.
run: ls -la
- name: Apply DB migrations to the test database
# 2026-05-16: before this step CI ran tests against a BARE Postgres
# whose schema came ONLY from testhelpers.runMigrations — a
# hand-maintained mirror of the prod schema. Every migration that
# added a table/column without a matching mirror edit silently broke
# this gate (email_events, pending_deletions, deployment_events,
# deployments.private, …). This step applies the REAL migration
# files, exactly like `make test-db-up` does locally, so CI runs
# against the same schema developers do. runMigrations still runs
# (all IF NOT EXISTS) as a harmless backstop. The TestRunMigrations-
# MirrorsEveryMigrationTable guard keeps the mirror itself honest.
env:
PGPASSWORD: postgres
run: |
for f in $(ls api/internal/db/migrations/*.sql | sort); do
echo "→ applying $(basename "$f")"
psql -h localhost -U postgres -d instant_dev_test -f "$f" >/dev/null
done
echo "all migrations applied to instant_dev_test"
# The db provider's local backend (internal/providers/db/local.go)
# CREATEs a customer database per /db/new. In tests it connects to
# TEST_POSTGRES_CUSTOMERS_URL — which testhelpers defaults to a
# localhost:5434 instance that does NOT exist on the CI runner, so
# every postgres provision (TestDBNew_*, TestBulkTwin_*) 503'd.
# Create that database on the same service container and point the
# env var at it below. It needs no migrations — it is only the
# admin connection target for CREATE DATABASE / CREATE USER.
psql -h localhost -U postgres -d postgres -c "CREATE DATABASE instant_customers" >/dev/null
echo "created instant_customers (db-provider admin target)"
- name: Run unit tests (short, no integration deps)
working-directory: api
env:
# Match the service container above. testhelpers default would
# also work since localhost:5432 is the same, but setting these
# explicitly survives any future default-URL drift.
TEST_DATABASE_URL: postgres://postgres:postgres@localhost:5432/instant_dev_test?sslmode=disable
TEST_REDIS_URL: redis://localhost:6379/15
# db-provider admin target (see the migrations step above). Without
# this the default is an unreachable localhost:5434 and every
# postgres-provisioning test fails with 503.
TEST_POSTGRES_CUSTOMERS_URL: postgres://postgres:postgres@localhost:5432/instant_customers?sslmode=disable
# 2026-05-16: the previous -skip list (TestOpenAPI_CoversAll-
# RegisteredRoutes | TestCrossTeam_ | TestCustomDomainCreate_) was
# removed once their real causes were fixed: the OpenAPI test had a
# stale internal-route whitelist, TestCrossTeam_ never needed a
# second DB at all, and TestCustomDomainCreate_ had a stale 5-column
# sqlmock row. The whole `./...` suite passes — keep it that way; do
# not re-add a -skip list.
#
# `-p 1` is load-bearing: every package shares the single
# instant_dev_test DB + redis/15. With the default parallelism,
# `go test ./...` runs ~25 package binaries at once and they corrupt
# each other's DB/redis state mid-test (a handler test CREATEs a real
# DB while a models test TRUNCATEs, a middleware test's rate-limit
# counter is FLUSHed by another package, …). The Makefile's
# `test-unit` target sidesteps this by running per-package; `-p 1`
# serialises package execution for the same effect in one invocation.
run: |
go test ./... -short -count=1 -p 1
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Log in to GHCR
# 2026-05-17: the per-job GITHUB_TOKEN (even with packages: write)
# is scoped to THIS repo and is not authorised to push the
# org-owned package ghcr.io/instanode-dev/instant-api — every push
# 403'd. GHCR_PUSH_TOKEN is a classic PAT with write:packages owned
# by a user who has write access to that package. See task #121.
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GHCR_PUSH_TOKEN }}
- name: Build and push image
# Build context = workspace root so Dockerfile's
# `COPY proto/`, `COPY common/`, `COPY api/` all resolve.
run: |
docker buildx build \
--platform linux/amd64 \
-f api/Dockerfile \
--build-arg GIT_SHA="${{ steps.meta.outputs.short_sha }}" \
--build-arg BUILD_TIME="${{ steps.meta.outputs.build_time }}" \
--build-arg VERSION="${{ steps.meta.outputs.version }}" \
-t "${IMAGE_REPO}:${{ steps.meta.outputs.version }}" \
-t "${IMAGE_REPO}:latest" \
--push \
.
- name: Set up kubectl
uses: azure/setup-kubectl@v3
with:
version: 'latest'
- name: Configure kubeconfig from KUBECONFIG_B64 secret
env:
KUBECONFIG_B64: ${{ secrets.KUBECONFIG_B64 }}
run: |
if [ -z "${KUBECONFIG_B64}" ]; then
echo "::error::KUBECONFIG_B64 repo secret is not set. Add it under Settings → Secrets → Actions."
exit 1
fi
mkdir -p "$HOME/.kube"
echo "$KUBECONFIG_B64" | base64 -d > "$HOME/.kube/config"
chmod 600 "$HOME/.kube/config"
kubectl version --client=true
- name: Roll out new image
run: |
IMAGE="${IMAGE_REPO}:${{ steps.meta.outputs.version }}"
echo "Setting ${K8S_DEPLOYMENT}.${K8S_CONTAINER} to ${IMAGE}"
kubectl set image \
"deployment/${K8S_DEPLOYMENT}" \
"${K8S_CONTAINER}=${IMAGE}" \
-n "${K8S_NAMESPACE}"
kubectl rollout status \
"deployment/${K8S_DEPLOYMENT}" \
-n "${K8S_NAMESPACE}" \
--timeout=180s
- name: Verify rolled-out image tag matches built version
run: |
ROLLED=$(kubectl get deployment "${K8S_DEPLOYMENT}" -n "${K8S_NAMESPACE}" \
-o jsonpath="{.spec.template.spec.containers[?(@.name=='${K8S_CONTAINER}')].image}")
EXPECTED="${IMAGE_REPO}:${{ steps.meta.outputs.version }}"
echo "Live image: ${ROLLED}"
echo "Expected: ${EXPECTED}"
if [ "${ROLLED}" != "${EXPECTED}" ]; then
echo "::error::Rolled image (${ROLLED}) != expected (${EXPECTED})"
exit 1
fi
- name: Curl live /healthz and confirm new SHA is reported
run: |
SHORT_SHA="${{ steps.meta.outputs.short_sha }}"
# Allow up to ~30s for the new pod to start serving the public URL.
for i in 1 2 3 4 5 6; do
BODY=$(curl -fsSL --max-time 5 "${HEALTHZ_URL}" || echo "")
echo "Attempt ${i}: ${BODY}"
if echo "${BODY}" | grep -q "${SHORT_SHA}"; then
echo "Confirmed live /healthz reports commit_id=${SHORT_SHA}"
exit 0
fi
sleep 5
done
echo "::error::live /healthz never reported commit_id=${SHORT_SHA}"
exit 1