Skip to content

Commit 7461f4a

Browse files
sql: enforce SERIALIZABLE isolation for DDL in stored procedures (#170733)
sql: enforce SERIALIZABLE isolation for DDL in stored procedures
2 parents 3bf642e + 4e2351f commit 7461f4a

7 files changed

Lines changed: 546 additions & 89 deletions

File tree

pkg/sql/conn_executor.go

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1991,6 +1991,23 @@ type connExecutor struct {
19911991
// responds to user queries or an internal one.
19921992
executorType executorType
19931993

1994+
// forceNextTxnSerializable is a one-shot override consumed when the next
1995+
// implicit txn is opened. It is set by maybeAutoCommitBeforeDDL only
1996+
// after handleAutoCommit confirms the current txn was committed for a
1997+
// CALL whose body contains DDL under weaker isolation; the restart must
1998+
// then begin at SERIALIZABLE so the body's DDL runs safely.
1999+
//
2000+
// The flag is cleared in two places to bound its lifetime to the very
2001+
// next txn: implicitTxnIsoLevel applies and clears it when the restart's
2002+
// implicit txn opens; the BEGIN branch of execStmtInNoTxnState discards
2003+
// it if the user opens an explicit txn first, preventing leakage into
2004+
// a later unrelated implicit txn.
2005+
//
2006+
// Lives outside extraTxnState because that struct is reset per-txn and
2007+
// this flag must survive the boundary between the auto-committed txn
2008+
// and its restart.
2009+
forceNextTxnSerializable bool
2010+
19942011
// hasCreatedTemporarySchema is set if the executor has created a
19952012
// temporary schema, which requires special cleanup on close.
19962013
hasCreatedTemporarySchema bool
@@ -3821,6 +3838,18 @@ var allowBufferedWritesForWeakIsolation = settings.RegisterBoolSetting(
38213838
),
38223839
)
38233840

3841+
// implicitTxnIsoLevel returns the isolation level to use for a new implicit
3842+
// transaction. It honors and clears forceNextTxnSerializable: when set, the
3843+
// next implicit txn starts at SERIALIZABLE regardless of the session default.
3844+
// The flag is consumed exactly once.
3845+
func (ex *connExecutor) implicitTxnIsoLevel(ctx context.Context) isolation.Level {
3846+
if ex.forceNextTxnSerializable {
3847+
ex.forceNextTxnSerializable = false
3848+
return isolation.Serializable
3849+
}
3850+
return ex.txnIsolationLevelToKV(ctx, tree.UnspecifiedIsolation)
3851+
}
3852+
38243853
func (ex *connExecutor) txnIsolationLevelToKV(
38253854
ctx context.Context, level tree.IsolationLevel,
38263855
) isolation.Level {

pkg/sql/conn_executor_ddl.go

Lines changed: 200 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,11 @@ import (
1313
"github.com/cockroachdb/cockroach/pkg/settings"
1414
"github.com/cockroachdb/cockroach/pkg/sql/catalog"
1515
"github.com/cockroachdb/cockroach/pkg/sql/catalog/descpb"
16+
"github.com/cockroachdb/cockroach/pkg/sql/parser"
1617
"github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgnotice"
18+
plpgsqlparser "github.com/cockroachdb/cockroach/pkg/sql/plpgsql/parser"
1719
"github.com/cockroachdb/cockroach/pkg/sql/schemachanger/scrun"
20+
"github.com/cockroachdb/cockroach/pkg/sql/sem/plpgsqltree"
1821
"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
1922
"github.com/cockroachdb/cockroach/pkg/util/fsm"
2023
"github.com/cockroachdb/cockroach/pkg/util/log"
@@ -33,19 +36,31 @@ var defaultAutocommitBeforeDDL = settings.RegisterBoolSetting(
3336
// auto-committed before processing a DDL statement. If so, it auto-commits the
3437
// transaction and advances the state machine so that the current command gets
3538
// processed again.
39+
//
40+
// Two conditions can trigger an auto-commit:
41+
//
42+
// 1. The statement itself is a DDL (CanModifySchema) and either we are in an
43+
// explicit transaction or a prior statement has already executed; the
44+
// auto-commit is gated on the autocommit_before_ddl session setting.
45+
//
46+
// 2. The statement is a CALL whose procedure body contains DDL, the txn
47+
// isolation tolerates write skew, and the txn is implicit. In-place
48+
// isolation upgrade is unavailable in the routine body (the txn is
49+
// already active by the time CALL planning completes), so we
50+
// auto-commit and arrange for the next implicit txn to start at
51+
// SERIALIZABLE via forceNextTxnSerializable. This case is gated on
52+
// implicit txns only — explicit-txn CALLs with DDL are handled
53+
// elsewhere (#170019) or rejected by the in-routine safety net.
3654
func (ex *connExecutor) maybeAutoCommitBeforeDDL(
3755
ctx context.Context, ast tree.Statement,
3856
) (fsm.Event, fsm.EventPayload) {
39-
// We will auto-commit if all of the following conditions are met:
40-
// - The query is not internal.
41-
// - Auto-commit before DDL is enabled.
42-
// - We have an explicit transaction or have executed at least one
43-
// statement in the transaction. The former check ensures that we commit
44-
// if we haven't executed the first statement, which can happen if only the
45-
// BEGIN statement was executed.
57+
if ex.executorType == executorTypeInternal {
58+
return nil, nil
59+
}
4660
explicitTxn := !ex.implicitTxn()
47-
if ex.executorType != executorTypeInternal &&
48-
tree.CanModifySchema(ast) &&
61+
62+
// Case 1: top-level DDL with autocommit_before_ddl enabled.
63+
if tree.CanModifySchema(ast) &&
4964
ex.sessionData().AutoCommitBeforeDDL &&
5065
(explicitTxn || ex.extraTxnState.firstStmtExecuted) {
5166
if err := ex.planner.SendClientNotice(
@@ -55,17 +70,188 @@ func (ex *connExecutor) maybeAutoCommitBeforeDDL(
5570
); err != nil {
5671
return ex.makeErrEvent(err, ast)
5772
}
58-
retEv, retPayload := ex.handleAutoCommit(ctx, ast)
59-
if _, committed := retEv.(eventTxnFinishCommitted); committed && retPayload == nil {
60-
// Use eventTxnCommittedDueToDDL so that the current statement gets
61-
// picked up again when the state machine advances.
62-
retEv = eventTxnCommittedDueToDDL{}
73+
return ex.autoCommitForDDL(ctx, ast)
74+
}
75+
76+
// Case 2: implicit-txn CALL whose body contains DDL under weak isolation.
77+
// Skip when:
78+
// - A prior statement in the same implicit-txn batch has already
79+
// executed (mirrors Case 1's reluctance to split a multi-statement
80+
// batch with an auto-commit; the in-routine safety net rejects
81+
// instead).
82+
// - The planner is not bound to the current txn (prepare path:
83+
// descriptor resolution there hits a stale planner state).
84+
// - There are active portals: auto-committing would invalidate them,
85+
// and the extended-query protocol does not handle a portal
86+
// disappearing mid-execute. Users hitting this path fall back to
87+
// the in-routine safety net.
88+
if call, ok := ast.(*tree.Call); ok &&
89+
!explicitTxn &&
90+
!ex.extraTxnState.firstStmtExecuted &&
91+
ex.state.mu.txn != nil &&
92+
ex.planner.txn == ex.state.mu.txn &&
93+
!ex.extraTxnState.prepStmtsNamespace.HasActivePortals() &&
94+
ex.state.mu.txn.IsoLevel().ToleratesWriteSkew() {
95+
hasDDL, err := ex.callBodyContainsDDL(ctx, call)
96+
if err != nil {
97+
// Any error from the pre-plan walker (descriptor lookup, body
98+
// parse, etc.) is swallowed: the same error will be raised by
99+
// normal planning with proper context, and surfacing it from
100+
// the auto-commit hook would replace a clearer planning error
101+
// with one tagged at the wrong source line.
102+
log.VEventf(ctx, 2, "callBodyContainsDDL: %v", err)
103+
return nil, nil
104+
}
105+
if !hasDDL {
106+
return nil, nil
107+
}
108+
if err := ex.planner.SendClientNotice(
109+
ctx,
110+
pgnotice.Newf("setting transaction isolation level to SERIALIZABLE due to schema change"),
111+
false, /* immediateFlush */
112+
); err != nil {
113+
return ex.makeErrEvent(err, ast)
114+
}
115+
// Only set forceNextTxnSerializable after confirming the auto-commit
116+
// rewrote the event to eventTxnCommittedDueToDDL — meaning the
117+
// statement will be re-processed in a fresh txn that should pick up
118+
// the override. Otherwise the flag would leak into an unrelated
119+
// future implicit txn.
120+
retEv, retPayload := ex.autoCommitForDDL(ctx, ast)
121+
if _, ok := retEv.(eventTxnCommittedDueToDDL); ok && retPayload == nil {
122+
ex.forceNextTxnSerializable = true
63123
}
64124
return retEv, retPayload
65125
}
126+
66127
return nil, nil
67128
}
68129

130+
// autoCommitForDDL commits the current txn and rewrites a successful commit
131+
// into eventTxnCommittedDueToDDL so the current statement is picked up again
132+
// once the state machine advances.
133+
func (ex *connExecutor) autoCommitForDDL(
134+
ctx context.Context, ast tree.Statement,
135+
) (fsm.Event, fsm.EventPayload) {
136+
retEv, retPayload := ex.handleAutoCommit(ctx, ast)
137+
if _, committed := retEv.(eventTxnFinishCommitted); committed && retPayload == nil {
138+
retEv = eventTxnCommittedDueToDDL{}
139+
}
140+
return retEv, retPayload
141+
}
142+
143+
// callBodyContainsDDL resolves the procedure named by the CALL and returns
144+
// true if any of its overload bodies contains a DDL statement reachable
145+
// from the top-level body. Nested CALLs inside the body are not followed;
146+
// if such a nested CALL has DDL in its own body, the in-routine reject in
147+
// routineGenerator.startInternal catches it at runtime.
148+
//
149+
// The function may activate the txn (descriptor reads); callers must be
150+
// prepared to auto-commit. If a procedure has multiple overloads, any
151+
// overload with DDL triggers the upgrade — we cannot identify the selected
152+
// overload pre-planning without re-doing argument type-checking.
153+
//
154+
// The AST's resolved function reference is restored before returning, so
155+
// the subsequent optbuilder pass re-resolves against the post-restart txn
156+
// instead of inheriting a stale cached descriptor from this peek.
157+
func (ex *connExecutor) callBodyContainsDDL(ctx context.Context, call *tree.Call) (bool, error) {
158+
// Resolve mutates both FunctionReference (always) and ReferenceByName
159+
// (in the *UnresolvedName branch). Snapshot the whole struct so the
160+
// post-restart re-plan re-resolves against the new txn instead of
161+
// inheriting cached state from this peek.
162+
savedRef := call.Proc.Func
163+
defer func() { call.Proc.Func = savedRef }()
164+
def, err := call.Proc.Func.Resolve(ctx, ex.planner.semaCtx.SearchPath, ex.planner.semaCtx.FunctionResolver)
165+
if err != nil {
166+
return false, err
167+
}
168+
for _, ov := range def.Overloads {
169+
if ov.Type != tree.ProcedureRoutine {
170+
continue
171+
}
172+
// Resolve typically returns a signature-only overload cached on the
173+
// schema descriptor. Fetch the full descriptor by OID to obtain the
174+
// body, mirroring the path the optbuilder takes later.
175+
full := ov.Overload
176+
if ov.UDFContainsOnlySignature || ov.Body == "" {
177+
_, fullOv, resolveErr := ex.planner.ResolveFunctionByOID(ctx, ov.Oid)
178+
if resolveErr != nil {
179+
return false, resolveErr
180+
}
181+
full = fullOv
182+
}
183+
if full == nil || full.Body == "" {
184+
continue
185+
}
186+
hasDDL, err := routineBodyContainsDDL(full.Body, full.Language)
187+
if err != nil {
188+
return false, err
189+
}
190+
if hasDDL {
191+
return true, nil
192+
}
193+
}
194+
return false, nil
195+
}
196+
197+
// routineBodyContainsDDL parses the routine body and returns true if any
198+
// SQL statement reachable from the top-level body is DDL. PL/pgSQL bodies
199+
// are walked via plpgsqltree so that SQL statements embedded as plain
200+
// statements inside the block (wrapped in *plpgsqltree.Execute) are
201+
// inspected.
202+
func routineBodyContainsDDL(body string, lang tree.RoutineLanguage) (bool, error) {
203+
switch lang {
204+
case tree.RoutineLangSQL:
205+
stmts, err := parser.Parse(body)
206+
if err != nil {
207+
return false, err
208+
}
209+
for _, s := range stmts {
210+
if tree.CanModifySchema(s.AST) {
211+
return true, nil
212+
}
213+
}
214+
return false, nil
215+
case tree.RoutineLangPLpgSQL:
216+
parsed, err := plpgsqlparser.Parse(body)
217+
if err != nil {
218+
return false, err
219+
}
220+
v := &plpgsqlDDLFinder{}
221+
plpgsqltree.Walk(v, parsed.AST)
222+
return v.found, nil
223+
default:
224+
return false, errors.AssertionFailedf("unknown routine language %q", lang)
225+
}
226+
}
227+
228+
// plpgsqlDDLFinder is a plpgsqltree.StatementVisitor that sets found to true
229+
// the first time it encounters a PL/pgSQL statement wrapping a DDL SQL
230+
// statement. Note: *plpgsqltree.Execute is the AST node for any plain SQL
231+
// statement embedded in a PL/pgSQL block (e.g. a top-level CREATE TABLE),
232+
// not for the PL/pgSQL EXECUTE keyword (that is *plpgsqltree.DynamicExecute).
233+
// Dynamic SQL is not inspected because the SQL string is opaque at parse
234+
// time. Other constructs that wrap queries — CursorDeclaration, Open,
235+
// ReturnQuery — never carry DDL, so they are not inspected.
236+
type plpgsqlDDLFinder struct {
237+
found bool
238+
}
239+
240+
var _ plpgsqltree.StatementVisitor = (*plpgsqlDDLFinder)(nil)
241+
242+
func (v *plpgsqlDDLFinder) Visit(stmt plpgsqltree.Statement) (plpgsqltree.Statement, bool) {
243+
if v.found {
244+
return stmt, false
245+
}
246+
if e, ok := stmt.(*plpgsqltree.Execute); ok && e.SqlStmt != nil {
247+
if tree.CanModifySchema(e.SqlStmt) {
248+
v.found = true
249+
return stmt, false
250+
}
251+
}
252+
return stmt, true
253+
}
254+
69255
// maybeAdjustTxnForDDL checks if the statement is a schema change and adjusts
70256
// the txn if it is. The following adjustments will be performed:
71257
// - upgrading to serializable isolation. If the txn contains multiple

pkg/sql/conn_executor_exec.go

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3865,6 +3865,11 @@ func (ex *connExecutor) execStmtInNoTxnState(
38653865
ex.incrementExecutedStmtCounter(ast)
38663866
}
38673867
}()
3868+
// The implicit-CALL+DDL upgrade override (forceNextTxnSerializable)
3869+
// is meant for the very next implicit txn; an intervening explicit
3870+
// BEGIN discards it so it can't silently upgrade an unrelated
3871+
// future implicit txn.
3872+
ex.forceNextTxnSerializable = false
38683873
mode, sqlTs, historicalTs, err := ex.beginTransactionTimestampsAndReadMode(ctx, s)
38693874
if err != nil {
38703875
return ex.makeErrEvent(err, s)
@@ -3913,7 +3918,7 @@ func (ex *connExecutor) execStmtInNoTxnState(
39133918
historicalTs,
39143919
ex.transitionCtx,
39153920
ex.QualityOfService(),
3916-
ex.txnIsolationLevelToKV(ctx, tree.UnspecifiedIsolation),
3921+
ex.implicitTxnIsoLevel(ctx),
39173922
ex.omitInRangefeeds(),
39183923
ex.bufferedWritesEnabled(implicitTxn),
39193924
ex.rng.internal,
@@ -3948,7 +3953,7 @@ func (ex *connExecutor) beginImplicitTxn(
39483953
historicalTs,
39493954
ex.transitionCtx,
39503955
qos,
3951-
ex.txnIsolationLevelToKV(ctx, tree.UnspecifiedIsolation),
3956+
ex.implicitTxnIsoLevel(ctx),
39523957
ex.omitInRangefeeds(),
39533958
ex.bufferedWritesEnabled(implicitTxn),
39543959
ex.rng.internal,

pkg/sql/logictest/testdata/logic_test/procedure_dcl

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -318,18 +318,29 @@ BEGIN
318318
END
319319
$$;
320320

321+
# Under SERIALIZABLE the nested CALL succeeds. Under weak isolation the
322+
# pre-plan body walker does not recurse into nested CALLs, so the inner
323+
# GRANT hits the in-routine safety net and the CALL is rejected. See the
324+
# analogous nested-CALL case in procedure_ddl for the same trade-off.
325+
skipif config local-read-committed local-repeatable-read
321326
statement ok
322327
CALL p_outer_dcl();
323328

329+
skipif config local-read-committed local-repeatable-read
324330
query TTTTTB colnames
325331
SHOW GRANTS ON t_dcl FOR r1
326332
----
327333
database_name schema_name table_name grantee privilege_type is_grantable
328334
test public t_dcl r1 SELECT false
329335

336+
skipif config local-read-committed local-repeatable-read
330337
statement ok
331338
REVOKE SELECT ON t_dcl FROM r1;
332339

340+
onlyif config local-read-committed local-repeatable-read
341+
statement error pgcode 0A000 to use multi-statement transactions involving a schema change under weak isolation levels
342+
CALL p_outer_dcl();
343+
333344
statement ok
334345
DROP PROCEDURE p_outer_dcl;
335346
DROP PROCEDURE p_inner_dcl;

0 commit comments

Comments
 (0)