@@ -7,21 +7,38 @@ package handlers_test
77// and a LATER one errors, so we seed a team-owned stack on the pooled DB and
88// run the handler over a fault DB sharing the same postgres DSN.
99//
10- // * UpdateEnv GetStackEnvVars error → fetch_failed (stack.go L1185-1188, failAfter=2)
11- // * UpdateEnv UpdateStackEnvVars error → persist_failed (L1216-1219, failAfter=3)
12- // * Family GetStackBySlug error → fetch_failed (L1885, failAfter=1)
10+ // * UpdateEnv MergeStackEnvVars error → persist_failed (bug-bash #10)
11+ // * Family GetStackBySlug error → fetch_failed (failAfter=1)
12+ //
13+ // bug-bash #10 (2026-06-04): UpdateEnv's old GetStackEnvVars → merge-in-Go →
14+ // UpdateStackEnvVars sequence was a non-atomic read-modify-write that lost keys
15+ // under concurrency. It was replaced by a single atomic models.MergeStackEnvVars
16+ // call (one row-locked transaction). That collapsed the two distinct DB-error
17+ // arms (fetch_failed for the read, persist_failed for the write) into ONE
18+ // surface: any error out of MergeStackEnvVars — including the tx's internal
19+ // SELECT ... FOR UPDATE and the UPDATE — maps to persist_failed 503. (Note the
20+ // merge's tx-internal queries DO fault through the shared faultConn: lib/pq's
21+ // driver.Tx has no QueryerContext, so sql.Tx.QueryContext/ExecContext fall back
22+ // to the conn-level faultConn.QueryContext/ExecContext, which honor the
23+ // failAfter budget.) Both former tests now assert the single persist_failed
24+ // surface at the two query depths (the in-tx SELECT and the in-tx UPDATE).
1325
1426import (
27+ "context"
1528 "database/sql"
29+ "database/sql/driver"
1630 "errors"
31+ "io"
1732 "net/http"
1833 "net/http/httptest"
1934 "os"
2035 "strings"
36+ "sync"
2137 "testing"
2238
2339 "github.com/gofiber/fiber/v2"
2440 "github.com/google/uuid"
41+ "github.com/lib/pq"
2542 "github.com/stretchr/testify/assert"
2643 "github.com/stretchr/testify/require"
2744
@@ -82,7 +99,12 @@ func patchStackEnvF2(t *testing.T, app *fiber.App, slug, jwt, body string) (int,
8299 return resp .StatusCode , string (raw [:n ])
83100}
84101
85- func TestStackFinal2_UpdateEnv_FetchEnvFailed (t * testing.T ) {
102+ // TestStackFinal2_UpdateEnv_MergeSelectFailed faults the merge tx's internal
103+ // SELECT ... FOR UPDATE. team(1)+GetStackBySlug(2) ok, then MergeStackEnvVars
104+ // begins a tx and its SELECT (3rd conn-level query) errors → persist_failed.
105+ // (Pre-bug-bash-#10 this depth was the GetStackEnvVars "fetch_failed" arm; the
106+ // atomic merge collapsed it into the single persist_failed surface.)
107+ func TestStackFinal2_UpdateEnv_MergeSelectFailed (t * testing.T ) {
86108 stackNeedDB (t )
87109 seedDB , clean := testhelpers .SetupTestDB (t )
88110 defer clean ()
@@ -91,13 +113,16 @@ func TestStackFinal2_UpdateEnv_FetchEnvFailed(t *testing.T) {
91113 _ , slug := seedStack (t , seedDB , & teamID , "healthy" )
92114 jwt := testhelpers .MustSignSessionJWT (t , uuid .NewString (), teamIDStr , "stkf2@example.com" )
93115
94- // team(1)+GetStackBySlug(2) ok, GetStackEnvVars (3) errors.
116+ // team(1)+GetStackBySlug(2) ok, merge tx SELECT ... FOR UPDATE (3) errors.
95117 app := stackFaultApp (t , openFaultDB (t , 2 ))
96118 status , body := patchStackEnvF2 (t , app , slug , jwt , `{"env":{"FOO":"bar"}}` )
97119 assert .Equal (t , http .StatusServiceUnavailable , status )
98- assert .Contains (t , body , "fetch_failed " )
120+ assert .Contains (t , body , "persist_failed " )
99121}
100122
123+ // TestStackFinal2_UpdateEnv_PersistFailed faults the merge tx's internal UPDATE.
124+ // team(1)+slug(2)+merge SELECT(3) ok, the merge tx UPDATE(4) errors →
125+ // persist_failed.
101126func TestStackFinal2_UpdateEnv_PersistFailed (t * testing.T ) {
102127 stackNeedDB (t )
103128 seedDB , clean := testhelpers .SetupTestDB (t )
@@ -107,7 +132,7 @@ func TestStackFinal2_UpdateEnv_PersistFailed(t *testing.T) {
107132 _ , slug := seedStack (t , seedDB , & teamID , "healthy" )
108133 jwt := testhelpers .MustSignSessionJWT (t , uuid .NewString (), teamIDStr , "stkf2@example.com" )
109134
110- // team(1)+slug(2)+GetStackEnvVars (3) ok, UpdateStackEnvVars (4) errors.
135+ // team(1)+slug(2)+merge SELECT FOR UPDATE (3) ok, merge UPDATE (4) errors.
111136 app := stackFaultApp (t , openFaultDB (t , 3 ))
112137 status , body := patchStackEnvF2 (t , app , slug , jwt , `{"env":{"FOO":"bar"}}` )
113138 assert .Equal (t , http .StatusServiceUnavailable , status )
@@ -131,3 +156,103 @@ func TestStackFinal2_Family_FetchFailed(t *testing.T) {
131156 defer resp .Body .Close ()
132157 assert .Equal (t , http .StatusServiceUnavailable , resp .StatusCode )
133158}
159+
160+ // ── merge-NotFound (TOCTOU) arm coverage ──────────────────────────────────────
161+ //
162+ // The UpdateEnv handler maps a models.ErrStackNotFound out of MergeStackEnvVars
163+ // to a 404 — the row vanished between GetStackBySlug and the merge tx's
164+ // SELECT ... FOR UPDATE (a genuine TOCTOU that can't be timed deterministically
165+ // against a live row). To exercise that handler arm deterministically we proxy
166+ // the real pq driver and return ZERO rows ONLY for the merge's
167+ // `SELECT ... FOR UPDATE` query: GetStackBySlug (no FOR UPDATE) still finds the
168+ // row, so the handler reaches the merge, whose SELECT then sees no rows →
169+ // ErrStackNotFound → 404 not_found.
170+
171+ type forUpdateVanishConn struct { inner driver.Conn }
172+
173+ func (c * forUpdateVanishConn ) Prepare (q string ) (driver.Stmt , error ) { return c .inner .Prepare (q ) }
174+ func (c * forUpdateVanishConn ) Close () error { return c .inner .Close () }
175+ func (c * forUpdateVanishConn ) Begin () (driver.Tx , error ) { return c .inner .Begin () } //nolint:staticcheck
176+
177+ func (c * forUpdateVanishConn ) BeginTx (ctx context.Context , opts driver.TxOptions ) (driver.Tx , error ) {
178+ if bt , ok := c .inner .(driver.ConnBeginTx ); ok {
179+ return bt .BeginTx (ctx , opts )
180+ }
181+ return c .inner .Begin () //nolint:staticcheck
182+ }
183+
184+ func (c * forUpdateVanishConn ) QueryContext (ctx context.Context , query string , args []driver.NamedValue ) (driver.Rows , error ) {
185+ if strings .Contains (query , "FOR UPDATE" ) {
186+ // Simulate the row vanishing: return an empty result set so
187+ // MergeStackEnvVars' Scan yields sql.ErrNoRows → ErrStackNotFound.
188+ return & emptyRows {}, nil
189+ }
190+ if qc , ok := c .inner .(driver.QueryerContext ); ok {
191+ return qc .QueryContext (ctx , query , args )
192+ }
193+ return nil , driver .ErrSkip
194+ }
195+
196+ func (c * forUpdateVanishConn ) ExecContext (ctx context.Context , query string , args []driver.NamedValue ) (driver.Result , error ) {
197+ if ec , ok := c .inner .(driver.ExecerContext ); ok {
198+ return ec .ExecContext (ctx , query , args )
199+ }
200+ return nil , driver .ErrSkip
201+ }
202+
203+ // emptyRows is a zero-row driver.Rows for the single env_vars column.
204+ type emptyRows struct {}
205+
206+ func (* emptyRows ) Columns () []string { return []string {"coalesce" } }
207+ func (* emptyRows ) Close () error { return nil }
208+ func (* emptyRows ) Next (_ []driver.Value ) error { return io .EOF }
209+
210+ type forUpdateVanishDriver struct { dsn string }
211+
212+ func (d * forUpdateVanishDriver ) Open (_ string ) (driver.Conn , error ) {
213+ inner , err := pq .Open (d .dsn )
214+ if err != nil {
215+ return nil , err
216+ }
217+ return & forUpdateVanishConn {inner : inner }, nil
218+ }
219+
220+ var fuvRegMu sync.Mutex
221+ var fuvRegN int
222+
223+ func openForUpdateVanishDB (t * testing.T ) * sql.DB {
224+ t .Helper ()
225+ dsn := os .Getenv ("TEST_DATABASE_URL" )
226+ if dsn == "" {
227+ t .Skip ("TEST_DATABASE_URL not set" )
228+ }
229+ fuvRegMu .Lock ()
230+ fuvRegN ++
231+ name := "fuvpq_" + itoaFault (fuvRegN )
232+ sql .Register (name , & forUpdateVanishDriver {dsn : dsn })
233+ fuvRegMu .Unlock ()
234+ db , err := sql .Open (name , dsn )
235+ require .NoError (t , err )
236+ db .SetMaxOpenConns (1 )
237+ db .SetMaxIdleConns (1 )
238+ t .Cleanup (func () { _ = db .Close () })
239+ return db
240+ }
241+
242+ // TestStackFinal2_UpdateEnv_MergeRowVanished_404 covers the handler's
243+ // ErrStackNotFound→404 mapping on the atomic merge: GetStackBySlug finds the
244+ // row, but the merge tx's SELECT ... FOR UPDATE sees none (simulated vanish).
245+ func TestStackFinal2_UpdateEnv_MergeRowVanished_404 (t * testing.T ) {
246+ stackNeedDB (t )
247+ seedDB , clean := testhelpers .SetupTestDB (t )
248+ defer clean ()
249+ teamIDStr := testhelpers .MustCreateTeamDB (t , seedDB , "pro" )
250+ teamID := uuid .MustParse (teamIDStr )
251+ _ , slug := seedStack (t , seedDB , & teamID , "healthy" )
252+ jwt := testhelpers .MustSignSessionJWT (t , uuid .NewString (), teamIDStr , "vanish@example.com" )
253+
254+ app := stackFaultApp (t , openForUpdateVanishDB (t ))
255+ status , body := patchStackEnvF2 (t , app , slug , jwt , `{"env":{"FOO":"bar"}}` )
256+ assert .Equal (t , http .StatusNotFound , status )
257+ assert .Contains (t , body , "not_found" )
258+ }
0 commit comments