test(coverage): worker internal/circuit + jobs tail → ≥95% (CI-measured) (#59)

mastermanas805 · claude · web-flow · commit 088f61cd61a4 · 2026-05-22T23:07:49.000+05:30
Closes the last two per-module coverage gaps in the worker repo, measured
under the EXISTING coverage.yml CI environment (postgres/redis/mongo
service containers + TEST_* env) — NOT relying on TEST_WORKER_STARTUP_DSN,
which is unset in CI so its StartWorkers boot test skips there.

internal/circuit 91.8% → 100.0%
  - NewBreaker: threshold&lt;1 clamp + cooldown&lt;=0 default arms
  - State(): half_open / within-cooldown / after-cooldown-before-trial arms
  - Name(): metric-label accessor
  (These tests mirror the api/internal/circuit copy by the file's own
   lock-step contract — apply the same additions there in a follow-up.)

internal/jobs 94.7% → 95.1%
  Reachable arms covered with sqlmock + an SDK-disabled New Relic app +
  pure-value calls (no live infra):
  - middleware Work: nrApp != nil transaction path + txn.NoticeError arm
  - event_email_mapping build{BackupFailed,RestoreSucceeded,RestoreFailed}:
    the row.ResourceType != "" column-wins arm
  - billing_reconciler emit{Upgrade,Cancel}Audit: fail-open err != nil arm
  - checkout_reconcile emailAbandonedCheckout: non-ErrNoRows claim error arm
  - provisioner_reconciler markAbandoned: UPDATE-error arm
  Otherwise-unreachable json.Marshal degradation arms (writeAudit ×3,
  insertPropagationAuditRow) covered by passing an unmarshalable meta map
  (chan value) — no source-level seam required.

Co-authored-by: Claude Opus 4.7 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/internal/circuit/circuit_test.go b/internal/circuit/circuit_test.go
@@ -160,6 +160,88 @@ func TestBreaker_OnOpenCallback(t *testing.T) {
 	}
 }
 
+// TestBreaker_NewBreakerClampsInvalidArgs — threshold < 1 is clamped to 1
+// and a non-positive cooldown defaults to 30s. Exercises the two guard
+// branches in NewBreaker (otherwise only the happy path is hit).
+func TestBreaker_NewBreakerClampsInvalidArgs(t *testing.T) {
+	// threshold 0 → clamped to 1: a single failure must trip the breaker.
+	b := NewBreaker("worker_test_clamp_threshold", 0, 10*time.Millisecond)
+	if !b.Allow() {
+		t.Fatal("fresh breaker should allow")
+	}
+	b.Record(errBoom)
+	if b.State() != StateOpen {
+		t.Fatalf("threshold should clamp to 1 (single failure opens), got %s", b.State())
+	}
+
+	// cooldown <= 0 → defaults to 30s. We can't wait 30s, but we can prove
+	// the breaker is still open well past a tiny sleep (a 0 cooldown would
+	// have re-admitted immediately).
+	b2 := NewBreaker("worker_test_clamp_cooldown", 1, 0)
+	_ = b2.Allow()
+	b2.Record(errBoom)
+	time.Sleep(5 * time.Millisecond)
+	if b2.Allow() {
+		t.Fatal("cooldown should default to 30s; breaker must still reject after 5ms")
+	}
+	if b2.State() != StateOpen {
+		t.Fatalf("expected open within default cooldown, got %s", b2.State())
+	}
+}
+
+// TestBreaker_StateOpenWithinCooldown — when the breaker is open and the
+// cooldown has NOT elapsed, State() takes the `now < openUntilNs` branch
+// and reports open. Distinct from the half-open trial path.
+func TestBreaker_StateOpenWithinCooldown(t *testing.T) {
+	b := NewBreaker("worker_test_state_within_cooldown", 1, time.Hour)
+	_ = b.Allow()
+	b.Record(errBoom)
+	// halfOpen is false, openUntil is set, now < openUntil → first return.
+	if got := b.State(); got != StateOpen {
+		t.Fatalf("breaker within cooldown should report open, got %s", got)
+	}
+}
+
+// TestBreaker_StateOpenAfterCooldownBeforeTrial — once the cooldown has
+// elapsed but no caller has claimed the half-open trial yet, State() falls
+// through past the `now < openUntilNs` check and still reports open (the
+// final return). This exercises the trailing branch of State().
+func TestBreaker_StateOpenAfterCooldownBeforeTrial(t *testing.T) {
+	b := NewBreaker("worker_test_state_after_cooldown", 1, 10*time.Millisecond)
+	_ = b.Allow()
+	b.Record(errBoom)
+	time.Sleep(15 * time.Millisecond)
+	// Cooldown elapsed, but we have NOT called Allow() — so halfOpen is
+	// still false and openUntil is still in the past.
+	if got := b.State(); got != StateOpen {
+		t.Fatalf("breaker after cooldown but before trial should report open, got %s", got)
+	}
+}
+
+// TestBreaker_StateHalfOpenReported — once a caller claims the half-open
+// trial slot (Allow() after cooldown), State() takes the leading
+// `halfOpen.Load()` branch and reports half_open.
+func TestBreaker_StateHalfOpenReported(t *testing.T) {
+	b := NewBreaker("worker_test_state_half_open", 1, 10*time.Millisecond)
+	_ = b.Allow()
+	b.Record(errBoom)
+	time.Sleep(15 * time.Millisecond)
+	if !b.Allow() {
+		t.Fatal("first Allow() after cooldown should claim the half-open trial")
+	}
+	if got := b.State(); got != StateHalfOpen {
+		t.Fatalf("breaker mid-trial should report half_open, got %s", got)
+	}
+}
+
+// TestBreaker_Name — the metric-label accessor returns the configured name.
+func TestBreaker_Name(t *testing.T) {
+	b := NewBreaker("worker_test_name_accessor", 1, time.Second)
+	if got := b.Name(); got != "worker_test_name_accessor" {
+		t.Fatalf("Name() = %q, want %q", got, "worker_test_name_accessor")
+	}
+}
+
 // TestBreaker_StateStringValues — NR runbook references these exact
 // strings.
 func TestBreaker_StateStringValues(t *testing.T) {
diff --git a/internal/jobs/coverage_tail_95_test.go b/internal/jobs/coverage_tail_95_test.go
@@ -0,0 +1,267 @@
+package jobs
+
+// coverage_tail_95_test.go — closes the last reachable per-function gaps in
+// the jobs package so the CI-measured package total clears the 95% floor.
+//
+// Every test here runs under the EXISTING coverage.yml CI environment
+// (postgres/redis/mongo service containers + TEST_* env). NONE of them
+// depend on TEST_WORKER_STARTUP_DSN — that env var is NOT set in CI, so the
+// StartWorkers boot test it gates SKIPS there. We add coverage via
+// sqlmock + an SDK-disabled New Relic application + pure-value calls that
+// need no live infra at all.
+//
+// Targets (each was < 95% in the CI-measured profile):
+//   * middleware.go        Work — the w.nrApp != nil transaction path +
+//                          the txn.NoticeError(err) error arm.
+//   * event_email_mapping  buildBackupFailed / buildRestoreSucceeded /
+//                          buildRestoreFailed — the `row.ResourceType != ""`
+//                          column-wins branch (the metadata-fallback else
+//                          branch is already covered).
+//   * billing_reconciler   emitUpgradeAudit / emitCancelAudit — the
+//                          fail-open `err != nil` arm.
+//   * checkout_reconcile   emailAbandonedCheckout — the claim-row
+//                          non-ErrNoRows DB-error arm.
+
+import (
+	"context"
+	"errors"
+	"testing"
+
+	sqlmock "github.com/DATA-DOG/go-sqlmock"
+	"github.com/google/uuid"
+	"github.com/newrelic/go-agent/v3/newrelic"
+
+	"instant.dev/common/logctx"
+)
+
+// newDisabledNRApp builds a real, non-nil *newrelic.Application whose
+// transactions are no-ops and which performs NO network I/O. ConfigEnabled(false)
+// is the SDK's documented hermetic mode — StartTransaction returns a live
+// (but inert) *Transaction, exercising the wrapper's nrApp != nil path
+// without a daemon, a license key, or any harvest cycle.
+func newDisabledNRApp(t *testing.T) *newrelic.Application {
+	t.Helper()
+	app, err := newrelic.NewApplication(
+		newrelic.ConfigAppName("instant-worker-test"),
+		newrelic.ConfigEnabled(false),
+	)
+	if err != nil {
+		t.Fatalf("newrelic.NewApplication(disabled): %v", err)
+	}
+	return app
+}
+
+// TestWithObservability_NRPresent_Success drives the w.nrApp != nil arm of
+// Work on the success path: StartTransaction + NewContext + defer End all
+// execute, then the inner worker returns nil.
+func TestWithObservability_NRPresent_Success(t *testing.T) {
+	fake := &fakeWorker{}
+	wrapped := WithObservability[fakeArgs](fake, newDisabledNRApp(t))
+
+	if err := wrapped.Work(context.Background(), newJob(42)); err != nil {
+		t.Fatalf("Work returned error on success path: %v", err)
+	}
+	// The wrapper must still have stamped the ctx ids on the way through the
+	// NR-present branch — same contract as the nil-app path.
+	if got := logctx.TIDFromContext(fake.gotCtx); got == "" {
+		t.Error("tid not stamped on ctx in NR-present path")
+	}
+	if got := logctx.TraceIDFromContext(fake.gotCtx); got == "" {
+		t.Error("trace_id not stamped on ctx in NR-present path")
+	}
+}
+
+// TestWithObservability_NRPresent_Error drives the txn.NoticeError(err) arm:
+// nrApp != nil AND the inner worker fails, so both the transaction-open
+// branch and the error-notice branch execute.
+func TestWithObservability_NRPresent_Error(t *testing.T) {
+	wantErr := errors.New("inner work blew up")
+	fake := &fakeWorker{returns: wantErr}
+	wrapped := WithObservability[fakeArgs](fake, newDisabledNRApp(t))
+
+	err := wrapped.Work(context.Background(), newJob(7))
+	if !errors.Is(err, wantErr) {
+		t.Fatalf("Work should bubble the inner error unchanged, got %v", err)
+	}
+}
+
+// TestEventEmail_Builders_ResourceTypeFromColumn covers the
+// `if row.ResourceType != ""` arm of the three backup/restore builders —
+// when the audit row carries a ResourceType column, it wins over the
+// metadata fallback. The else (metadata) arm is covered elsewhere.
+func TestEventEmail_Builders_ResourceTypeFromColumn(t *testing.T) {
+	cases := []struct {
+		name    string
+		builder func(auditRow) (map[string]string, bool)
+	}{
+		{"buildBackupFailed", buildBackupFailed},
+		{"buildRestoreSucceeded", buildRestoreSucceeded},
+		{"buildRestoreFailed", buildRestoreFailed},
+	}
+	for _, c := range cases {
+		t.Run(c.name, func(t *testing.T) {
+			row := auditRow{
+				ID:           "a-rt-1",
+				TeamID:       "t-rt-1",
+				OwnerEmail:   "owner@example.com",
+				ResourceType: "postgres", // non-empty → column-wins branch
+			}
+			params, ok := c.builder(row)
+			if !ok {
+				t.Fatalf("%s returned ok=false with a valid owner email", c.name)
+			}
+			if params["resource_type"] != "postgres" {
+				t.Errorf("%s: resource_type = %q; want the column value %q",
+					c.name, params["resource_type"], "postgres")
+			}
+		})
+	}
+}
+
+// TestBillingReconciler_EmitUpgradeAudit_FailOpen drives the fail-open arm of
+// emitUpgradeAudit: the audit INSERT errors, so RecordFailOpen runs and the
+// method returns without panicking (tier change already committed upstream).
+func TestBillingReconciler_EmitUpgradeAudit_FailOpen(t *testing.T) {
+	db, mock, err := sqlmock.New(sqlmock.QueryMatcherOption(sqlmock.QueryMatcherRegexp))
+	if err != nil {
+		t.Fatalf("sqlmock.New: %v", err)
+	}
+	defer db.Close()
+	mock.ExpectExec(`INSERT INTO audit_log`).WillReturnError(errors.New("audit DB brownout"))
+
+	w := &BillingReconcilerWorker{db: db}
+	// Must not panic; the fail-open path swallows the error.
+	w.emitUpgradeAudit(context.Background(), uuid.New(), "hobby", "pro", "sub_x")
+
+	if err := mock.ExpectationsWereMet(); err != nil {
+		t.Errorf("unmet sqlmock expectations: %v", err)
+	}
+}
+
+// TestBillingReconciler_EmitCancelAudit_FailOpen — same fail-open arm for the
+// cancel audit row.
+func TestBillingReconciler_EmitCancelAudit_FailOpen(t *testing.T) {
+	db, mock, err := sqlmock.New(sqlmock.QueryMatcherOption(sqlmock.QueryMatcherRegexp))
+	if err != nil {
+		t.Fatalf("sqlmock.New: %v", err)
+	}
+	defer db.Close()
+	mock.ExpectExec(`INSERT INTO audit_log`).WillReturnError(errors.New("audit DB brownout"))
+
+	w := &BillingReconcilerWorker{db: db}
+	w.emitCancelAudit(context.Background(), uuid.New(), "pro", "hobby", "sub_x")
+
+	if err := mock.ExpectationsWereMet(); err != nil {
+		t.Errorf("unmet sqlmock expectations: %v", err)
+	}
+}
+
+// TestCheckoutReconcile_EmailAbandonedCheckout_ClaimError drives the
+// non-ErrNoRows error arm of emailAbandonedCheckout's claim SELECT: a generic
+// DB error (not sql.ErrNoRows) must propagate as a wrapped Work error so the
+// tx rolls back and the sweep records the failure.
+func TestCheckoutReconcile_EmailAbandonedCheckout_ClaimError(t *testing.T) {
+	db, mock, err := sqlmock.New(sqlmock.QueryMatcherOption(sqlmock.QueryMatcherRegexp))
+	if err != nil {
+		t.Fatalf("sqlmock.New: %v", err)
+	}
+	defer db.Close()
+
+	mock.ExpectBegin()
+	// The claim SELECT returns a generic error (NOT sql.ErrNoRows) → the
+	// `if err != nil` arm wraps and returns it.
+	mock.ExpectQuery(`SELECT subscription_id\s+FROM pending_checkouts`).
+		WithArgs("sub_claim_err").
+		WillReturnError(errors.New("lock wait timeout"))
+	mock.ExpectRollback()
+
+	w := &CheckoutReconcileWorker{db: db}
+	gotErr := w.emailAbandonedCheckout(context.Background(), checkoutRow{
+		subscriptionID: "sub_claim_err",
+		teamID:         uuid.New().String(),
+		customerEmail:  "buyer@example.com",
+		planTier:       "pro",
+	})
+	if gotErr == nil {
+		t.Fatal("expected a wrapped claim-row error, got nil")
+	}
+}
+
+// unmarshalableMeta returns a metadata map that json.Marshal genuinely
+// cannot encode (a channel value has no JSON representation). This drives
+// the audit-row marshal-error degradation arms that are otherwise
+// unreachable with primitive-only maps — without any source-level seam.
+func unmarshalableMeta() map[string]any {
+	return map[string]any{"bad": make(chan int)}
+}
+
+// TestCustomerRestoreRunner_WriteAudit_MarshalError drives the
+// audit_marshal_failed degradation arm: an unmarshalable meta map makes
+// json.Marshal fail, so writeAudit logs + returns BEFORE touching the DB
+// (db is nil here, proving the early return).
+func TestCustomerRestoreRunner_WriteAudit_MarshalError(t *testing.T) {
+	w := &CustomerRestoreRunnerWorker{db: nil}
+	// Must not panic and must not dereference the nil db — the marshal
+	// failure short-circuits ahead of ExecContext.
+	w.writeAudit(context.Background(), uuid.New(), uuid.New().String(),
+		"postgres", "restore.failed", "summary", unmarshalableMeta())
+}
+
+// TestCustomerBackupRunner_WriteAudit_MarshalError — same degradation arm
+// on the backup runner's writeAudit.
+func TestCustomerBackupRunner_WriteAudit_MarshalError(t *testing.T) {
+	w := &CustomerBackupRunnerWorker{db: nil}
+	w.writeAudit(context.Background(), uuid.New(), uuid.New().String(),
+		"postgres", "backup.failed", "summary", unmarshalableMeta())
+}
+
+// TestPlatformDBBackup_WriteAudit_MarshalError — the platform-DB backup
+// writeAudit checks w.db == nil first, so we pass a sqlmock DB (with NO
+// expectations: the marshal failure returns before any ExecContext).
+func TestPlatformDBBackup_WriteAudit_MarshalError(t *testing.T) {
+	db, mock, err := sqlmock.New()
+	if err != nil {
+		t.Fatalf("sqlmock.New: %v", err)
+	}
+	defer db.Close()
+	w := &PlatformDBBackupWorker{db: db}
+	w.writeAudit(context.Background(), "backup.failed", "summary", unmarshalableMeta())
+	// No DB call expected — the marshal error returns first.
+	if err := mock.ExpectationsWereMet(); err != nil {
+		t.Errorf("unexpected DB call on marshal-error path: %v", err)
+	}
+}
+
+// TestPropagationRunner_InsertAuditRow_MarshalError drives the
+// audit_meta_marshal_failed arm of insertPropagationAuditRow: an
+// unmarshalable meta map short-circuits before the INSERT (db nil proves it).
+func TestPropagationRunner_InsertAuditRow_MarshalError(t *testing.T) {
+	w := &PropagationRunnerWorker{db: nil}
+	w.insertPropagationAuditRow(context.Background(),
+		propagationRow{id: uuid.New(), teamID: uuid.New(), kind: "regrade"},
+		"propagation.failed", "summary", unmarshalableMeta())
+}
+
+// TestProvisionerReconciler_MarkAbandoned_UpdateError drives the reachable
+// `UPDATE resources ... err != nil` arm of markAbandoned: a DB error on the
+// status flip must propagate (the audit INSERT is never reached).
+func TestProvisionerReconciler_MarkAbandoned_UpdateError(t *testing.T) {
+	db, mock, err := sqlmock.New(sqlmock.QueryMatcherOption(sqlmock.QueryMatcherRegexp))
+	if err != nil {
+		t.Fatalf("sqlmock.New: %v", err)
+	}
+	defer db.Close()
+	mock.ExpectExec(`UPDATE resources`).
+		WillReturnError(errors.New("update blew up"))
+
+	w := &ProvisionerReconcilerWorker{db: db}
+	gotErr := w.markAbandoned(context.Background(),
+		reconcilerCandidate{id: uuid.New(), token: uuid.New(), resourceType: "postgres"},
+		errors.New("probe failed"))
+	if gotErr == nil {
+		t.Fatal("expected the UPDATE error to propagate, got nil")
+	}
+	if err := mock.ExpectationsWereMet(); err != nil {
+		t.Errorf("unmet sqlmock expectations: %v", err)
+	}
+}