Skip to content

Commit 754e131

Browse files
mw5hroachdev-claude
andcommitted
schemaexpr: inline UDF calls in computed column expressions
Contexts that bypass the optimizer — IMPORT and schema change backfill — cannot evaluate user-defined functions with SQL bodies. The eval.Expr path returns an unimplemented error for functions where fn.Body != "" because it lacks the optimizer and execbuilder infrastructure needed to produce RoutineExpr plan generators. Fix this by inlining UDF calls within MakeComputedExprs, the shared entry point for both IMPORT and backfill computed column evaluation. For each FuncExpr that references a UDF (fn.Body != ""), inlineUDFCalls: 1. Parses the SQL body and extracts the result expression. 2. Substitutes parameter references (named, ColumnItem, and ordinal $1/$2 placeholders) with the actual call-site argument expressions. 3. Wraps the result in a CASE expression for strict (RETURNS NULL ON NULL INPUT) functions to preserve null-handling semantics. 4. Type-checks the inlined expression against the function's return type. This provides parity with non-UDF computed column expressions for the constructs allowed in immutable UDF bodies: arithmetic, string operations, type casts, CASE/COALESCE, immutable builtin calls, and array constructors. Immutable UDFs cannot reference relations, so their bodies are restricted to the same scalar expression language available in non-UDF computed columns. Limitations during backfill/IMPORT (the optimizer handles these fine in CREATE TABLE and INSERT): - Multi-statement SQL UDFs: cannot be inlined to a single expression - PL/pgSQL UDFs: cannot be inlined (different language) - UDFs with OUT/INOUT parameters: not supported by the inliner - Nested UDF calls: the DistSQLFunctionResolver resolves UDFs by OID only; inlined body expressions reference functions by name, which only resolves builtins - UDF bodies containing subqueries, CTEs, or FROM clauses: cannot be evaluated outside the optimizer All of these produce explicit unimplemented errors during backfill. Fixes: #157195 Informs: #147472 Release note (bug fix): Fixed a bug where IMPORT INTO and schema change backfills would fail when the destination table had computed columns defined using user-defined functions, reporting "function <oid> not found". Computed columns referencing immutable single-expression SQL UDFs now work correctly with IMPORT INTO and ALTER TABLE ADD COLUMN. Multi-statement UDFs, PL/pgSQL UDFs, and nested UDF calls in computed columns are not yet supported in these contexts. Co-Authored-By: roachdev-claude <roachdev-claude-bot@cockroachlabs.com>
1 parent 988abf9 commit 754e131

4 files changed

Lines changed: 623 additions & 15 deletions

File tree

pkg/sql/catalog/schemaexpr/computed_column.go

Lines changed: 193 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -323,8 +323,8 @@ func MakeComputedExprs(
323323
continue
324324
}
325325

326-
// Collect all column IDs that are referenced in the partial index
327-
// predicate expression.
326+
// Collect all column IDs that are referenced in the computed
327+
// column expression.
328328
colIDs, err := ExtractColumnIDs(tableDesc, exprs[compExprIdx])
329329
if err != nil {
330330
return nil, refColIDs, err
@@ -341,6 +341,16 @@ func MakeComputedExprs(
341341
return nil, catalog.TableColSet{}, err
342342
}
343343

344+
// Inline any UDF calls so that the expression can be evaluated
345+
// by eval.Expr. Contexts that bypass the optimizer (IMPORT,
346+
// backfill) cannot evaluate functions with SQL bodies directly.
347+
// This must happen before the assignment cast so the cast
348+
// operates on the resolved expression, not a FuncExpr wrapper.
349+
typedExpr, err = inlineUDFCalls(ctx, typedExpr, semaCtx)
350+
if err != nil {
351+
return nil, catalog.TableColSet{}, err
352+
}
353+
344354
// If the expression has a type that is not identical to the
345355
// column's type, wrap the computed column expression in an assignment cast.
346356
typedExpr, err = wrapWithAssignmentCast(ctx, typedExpr, col, semaCtx)
@@ -357,3 +367,184 @@ func MakeComputedExprs(
357367
}
358368
return computedExprs, refColIDs, nil
359369
}
370+
371+
// inlineUDFCalls walks a TypedExpr tree and replaces FuncExpr nodes that
372+
// reference user-defined functions (those with SQL bodies) with the inlined
373+
// body expression. This is necessary because eval.Expr cannot evaluate
374+
// functions with SQL bodies — that capability is only available through
375+
// the optimizer. Contexts that evaluate computed column expressions without
376+
// the optimizer (IMPORT, schema change backfill) rely on this inlining.
377+
//
378+
// Only single-statement SQL-language UDFs can be inlined. Multi-statement
379+
// or PL/pgSQL functions will produce an error.
380+
func inlineUDFCalls(
381+
ctx context.Context, expr tree.TypedExpr, semaCtx *tree.SemaContext,
382+
) (tree.TypedExpr, error) {
383+
newExpr, err := tree.SimpleVisit(expr, func(e tree.Expr) (recurse bool, newExpr tree.Expr, err error) {
384+
funcExpr, ok := e.(*tree.FuncExpr)
385+
if !ok {
386+
return true, e, nil
387+
}
388+
fn := funcExpr.ResolvedOverload()
389+
if fn == nil || fn.Body == "" {
390+
return true, e, nil
391+
}
392+
if fn.Language != tree.RoutineLangSQL {
393+
return false, nil, unimplemented.Newf(
394+
"computed_column_plpgsql_udf",
395+
"PL/pgSQL user-defined functions in computed columns "+
396+
"cannot be evaluated in this context",
397+
)
398+
}
399+
400+
// Parse the function body.
401+
stmts, err := parserutils.Parse(fn.Body)
402+
if err != nil {
403+
return false, nil, errors.Wrap(err, "parsing UDF body for inlining")
404+
}
405+
if len(stmts) != 1 {
406+
return false, nil, unimplemented.Newf(
407+
"computed_column_multi_stmt_udf",
408+
"multi-statement user-defined functions in computed columns "+
409+
"cannot be evaluated in this context",
410+
)
411+
}
412+
413+
// Extract the result expression from the SELECT statement.
414+
sel, ok := stmts[0].AST.(*tree.Select)
415+
if !ok {
416+
return false, nil, errors.Newf(
417+
"expected SELECT in UDF body, got %T", stmts[0].AST,
418+
)
419+
}
420+
if sel.With != nil {
421+
return false, nil, unimplemented.Newf(
422+
"computed_column_cte_udf",
423+
"user-defined functions with CTEs in computed columns "+
424+
"cannot be evaluated in this context",
425+
)
426+
}
427+
selClause, ok := sel.Select.(*tree.SelectClause)
428+
if !ok || len(selClause.Exprs) != 1 {
429+
return false, nil, errors.Newf(
430+
"expected single-expression SELECT in UDF body",
431+
)
432+
}
433+
if len(selClause.From.Tables) > 0 {
434+
return false, nil, unimplemented.Newf(
435+
"computed_column_from_udf",
436+
"user-defined functions with FROM clauses in computed columns "+
437+
"cannot be evaluated in this context",
438+
)
439+
}
440+
bodyExpr := selClause.Exprs[0].Expr
441+
442+
// Build a mapping from named parameters to the actual argument
443+
// expressions from the call site. Unnamed parameters (those
444+
// with empty names) are only referenceable via ordinal $1/$2
445+
// placeholders, so they are excluded from this map.
446+
paramMap := make(map[string]tree.Expr, len(fn.RoutineParams))
447+
for i, p := range fn.RoutineParams {
448+
if tree.IsOutParamClass(p.Class) {
449+
return false, nil, unimplemented.Newf(
450+
"computed_column_out_param_udf",
451+
"user-defined functions with OUT or INOUT parameters "+
452+
"in computed columns cannot be evaluated in this context",
453+
)
454+
}
455+
if p.Name != "" && i < len(funcExpr.Exprs) {
456+
paramMap[string(p.Name)] = funcExpr.Exprs[i]
457+
}
458+
}
459+
460+
// Substitute parameter references in the body. Named
461+
// parameters appear as UnresolvedName or ColumnItem nodes;
462+
// unnamed parameters use ordinal placeholders ($1, $2, etc.).
463+
inlined, err := tree.SimpleVisit(
464+
bodyExpr,
465+
func(inner tree.Expr) (bool, tree.Expr, error) {
466+
if name, ok := inner.(*tree.UnresolvedName); ok &&
467+
name.NumParts == 1 {
468+
if arg, exists := paramMap[name.Parts[0]]; exists {
469+
return false, arg, nil
470+
}
471+
}
472+
if ci, ok := inner.(*tree.ColumnItem); ok {
473+
if arg, exists := paramMap[string(ci.ColumnName)]; exists {
474+
return false, arg, nil
475+
}
476+
}
477+
if ph, ok := inner.(*tree.Placeholder); ok {
478+
idx := int(ph.Idx)
479+
if idx < len(funcExpr.Exprs) {
480+
return false, funcExpr.Exprs[idx], nil
481+
}
482+
}
483+
return true, inner, nil
484+
},
485+
)
486+
if err != nil {
487+
return false, nil, err
488+
}
489+
490+
// Verify the inlined body contains no constructs that cannot
491+
// be evaluated outside the optimizer.
492+
if _, err := tree.SimpleVisit(inlined, func(inner tree.Expr) (bool, tree.Expr, error) {
493+
if _, ok := inner.(*tree.Subquery); ok {
494+
return false, inner, unimplemented.Newf(
495+
"computed_column_subquery_udf",
496+
"user-defined functions with subqueries in computed columns "+
497+
"cannot be evaluated in this context",
498+
)
499+
}
500+
// Check for unreplaced parameter references. These
501+
// indicate a parameter substitution bug or an
502+
// unsupported reference pattern.
503+
if ph, ok := inner.(*tree.Placeholder); ok {
504+
return false, inner, errors.Newf(
505+
"unresolved parameter reference $%d in inlined UDF body", ph.Idx+1,
506+
)
507+
}
508+
return true, inner, nil
509+
}); err != nil {
510+
return false, nil, err
511+
}
512+
513+
// For strict functions (RETURNS NULL ON NULL INPUT), wrap the
514+
// inlined body to return NULL when any argument is NULL,
515+
// preserving the function's null-handling semantics.
516+
if !fn.CalledOnNullInput && len(funcExpr.Exprs) > 0 {
517+
var nullCheck tree.TypedExpr
518+
for i, arg := range funcExpr.Exprs {
519+
isNull := &tree.IsNullExpr{Expr: arg}
520+
if i == 0 {
521+
nullCheck = isNull
522+
} else {
523+
nullCheck = tree.NewTypedOrExpr(nullCheck, isNull)
524+
}
525+
}
526+
inlined = &tree.CaseExpr{
527+
Whens: []*tree.When{{
528+
Cond: nullCheck,
529+
Val: tree.DNull,
530+
}},
531+
Else: inlined,
532+
}
533+
}
534+
535+
// Type-check the inlined expression against the function's
536+
// return type.
537+
typedInlined, err := tree.TypeCheck(
538+
ctx, inlined, semaCtx, funcExpr.ResolvedType(),
539+
)
540+
if err != nil {
541+
return false, nil, errors.Wrap(err, "type-checking inlined UDF body")
542+
}
543+
544+
return false, typedInlined, nil
545+
})
546+
if err != nil {
547+
return nil, err
548+
}
549+
return newExpr.(tree.TypedExpr), nil
550+
}

pkg/sql/importer/import_stmt_test.go

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1252,6 +1252,41 @@ END
12521252
}
12531253
}
12541254

1255+
// TestImportIntoComputedColumnWithUDF verifies that IMPORT INTO works when
1256+
// the destination table has computed columns defined using user-defined
1257+
// functions. Regression test for #157195.
1258+
func TestImportIntoComputedColumnWithUDF(t *testing.T) {
1259+
defer leaktest.AfterTest(t)()
1260+
defer log.Scope(t).Close(t)
1261+
ctx := context.Background()
1262+
baseDir, cleanup := testutils.TempDir(t)
1263+
defer cleanup()
1264+
tc := serverutils.StartCluster(
1265+
t, 1, base.TestClusterArgs{ServerArgs: base.TestServerArgs{ExternalIODir: baseDir}})
1266+
defer tc.Stopper().Stop(ctx)
1267+
conn := tc.ServerConn(0)
1268+
sqlDB := sqlutils.MakeSQLRunner(conn)
1269+
1270+
// Helper that writes CSV data and runs IMPORT INTO.
1271+
importCSV := func(t *testing.T, table, intoCols, csvData string) {
1272+
t.Helper()
1273+
f, err := os.CreateTemp(baseDir, "data")
1274+
require.NoError(t, err)
1275+
_, err = f.Write([]byte(csvData))
1276+
require.NoError(t, err)
1277+
require.NoError(t, f.Close())
1278+
sqlDB.Exec(t, fmt.Sprintf(
1279+
`IMPORT INTO %s (%s) CSV DATA ($1)`, table, intoCols,
1280+
), fmt.Sprintf("nodelocal://1/%s", filepath.Base(f.Name())))
1281+
}
1282+
1283+
sqlDB.Exec(t, `CREATE FUNCTION double_val(x INT) RETURNS INT IMMUTABLE LANGUAGE SQL AS $$ SELECT x * 2 $$`)
1284+
sqlDB.Exec(t, `CREATE TABLE t (a INT, b INT AS (double_val(a)) STORED)`)
1285+
importCSV(t, "t", "a", "1\n2\n3\n")
1286+
sqlDB.CheckQueryResults(t, `SELECT a, b FROM t ORDER BY a`,
1287+
[][]string{{"1", "2"}, {"2", "4"}, {"3", "6"}})
1288+
}
1289+
12551290
func TestImportRowLimit(t *testing.T) {
12561291
defer leaktest.AfterTest(t)()
12571292
defer log.Scope(t).Close(t)

0 commit comments

Comments
 (0)