Skip to content

Commit ecb294e

Browse files
mw5hroachdev-claude
andcommitted
schemaexpr: inline UDF calls in computed column expressions
Contexts that bypass the optimizer — IMPORT and schema change backfill — cannot evaluate user-defined functions with SQL bodies. The eval.Expr path returns an unimplemented error for functions where fn.Body != "" because it lacks the optimizer and execbuilder infrastructure needed to produce RoutineExpr plan generators. Fix this by inlining UDF calls within MakeComputedExprs, the shared entry point for both IMPORT and backfill computed column evaluation. For each FuncExpr that references a UDF (fn.Body != ""), inlineUDFCalls: 1. Parses the SQL body and extracts the result expression. 2. Substitutes parameter name references with the actual call-site argument expressions. 3. Wraps the result in a CASE expression for strict (RETURNS NULL ON NULL INPUT) functions to preserve null-handling semantics. 4. Type-checks the inlined expression against the function's return type. This is safe because computed columns only permit immutable functions, and CockroachDB currently restricts UDFs in computed columns to single-statement SQL-language functions — exactly the class of UDFs that can be mechanically inlined. Fixes: #157195 Informs: #147472 Release note (bug fix): Fixed a bug where IMPORT INTO would fail when the destination table had computed columns defined using user-defined functions, reporting "function <oid> not found". Computed columns referencing immutable single-statement SQL UDFs now work correctly with IMPORT INTO. Co-Authored-By: roachdev-claude <roachdev-claude-bot@cockroachlabs.com>
1 parent 0f0cf0d commit ecb294e

3 files changed

Lines changed: 239 additions & 5 deletions

File tree

pkg/sql/catalog/schemaexpr/computed_column.go

Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -341,6 +341,16 @@ func MakeComputedExprs(
341341
return nil, catalog.TableColSet{}, err
342342
}
343343

344+
// Inline any UDF calls so that the expression can be evaluated
345+
// by eval.Expr. Contexts that bypass the optimizer (IMPORT,
346+
// backfill) cannot evaluate functions with SQL bodies directly.
347+
// This must happen before the assignment cast so the cast
348+
// operates on the resolved expression, not a FuncExpr wrapper.
349+
typedExpr, err = inlineUDFCalls(ctx, typedExpr, semaCtx)
350+
if err != nil {
351+
return nil, catalog.TableColSet{}, err
352+
}
353+
344354
// If the expression has a type that is not identical to the
345355
// column's type, wrap the computed column expression in an assignment cast.
346356
typedExpr, err = wrapWithAssignmentCast(ctx, typedExpr, col, semaCtx)
@@ -357,3 +367,132 @@ func MakeComputedExprs(
357367
}
358368
return computedExprs, refColIDs, nil
359369
}
370+
371+
// inlineUDFCalls walks a TypedExpr tree and replaces FuncExpr nodes that
372+
// reference user-defined functions (those with SQL bodies) with the inlined
373+
// body expression. This is necessary because eval.Expr cannot evaluate
374+
// functions with SQL bodies — that capability is only available through
375+
// the optimizer. Contexts that evaluate computed column expressions without
376+
// the optimizer (IMPORT, schema change backfill) rely on this inlining.
377+
//
378+
// Only single-statement SQL-language UDFs can be inlined. Multi-statement
379+
// or PL/pgSQL functions will produce an error.
380+
func inlineUDFCalls(
381+
ctx context.Context, expr tree.TypedExpr, semaCtx *tree.SemaContext,
382+
) (tree.TypedExpr, error) {
383+
newExpr, err := tree.SimpleVisit(expr, func(e tree.Expr) (bool, tree.Expr, error) {
384+
funcExpr, ok := e.(*tree.FuncExpr)
385+
if !ok {
386+
return true, e, nil
387+
}
388+
fn := funcExpr.ResolvedOverload()
389+
if fn == nil || fn.Body == "" {
390+
return true, e, nil
391+
}
392+
if fn.Language != tree.RoutineLangSQL {
393+
return false, nil, unimplemented.Newf(
394+
"computed_column_plpgsql_udf",
395+
"PL/pgSQL user-defined functions in computed columns "+
396+
"cannot be evaluated in this context",
397+
)
398+
}
399+
400+
// Parse the function body.
401+
stmts, err := parserutils.Parse(fn.Body)
402+
if err != nil {
403+
return false, nil, errors.Wrap(err, "parsing UDF body for inlining")
404+
}
405+
if len(stmts) != 1 {
406+
return false, nil, unimplemented.Newf(
407+
"computed_column_multi_stmt_udf",
408+
"multi-statement user-defined functions in computed columns "+
409+
"cannot be evaluated in this context",
410+
)
411+
}
412+
413+
// Extract the result expression from the SELECT statement.
414+
sel, ok := stmts[0].AST.(*tree.Select)
415+
if !ok {
416+
return false, nil, errors.Newf(
417+
"expected SELECT in UDF body, got %T", stmts[0].AST,
418+
)
419+
}
420+
selClause, ok := sel.Select.(*tree.SelectClause)
421+
if !ok || len(selClause.Exprs) != 1 {
422+
return false, nil, errors.Newf(
423+
"expected single-expression SELECT in UDF body",
424+
)
425+
}
426+
bodyExpr := selClause.Exprs[0].Expr
427+
428+
// Build a mapping from parameter names to the actual argument
429+
// expressions from the call site. The body references parameters
430+
// by name (e.g. "x"), and we replace those with the
431+
// already-typed argument expressions (e.g. IndexedVar).
432+
paramMap := make(map[string]tree.Expr, len(fn.RoutineParams))
433+
for i, p := range fn.RoutineParams {
434+
if i < len(funcExpr.Exprs) {
435+
paramMap[string(p.Name)] = funcExpr.Exprs[i]
436+
}
437+
}
438+
439+
// Substitute parameter references in the body.
440+
inlined, err := tree.SimpleVisit(
441+
bodyExpr,
442+
func(inner tree.Expr) (bool, tree.Expr, error) {
443+
if name, ok := inner.(*tree.UnresolvedName); ok &&
444+
name.NumParts == 1 {
445+
if arg, exists := paramMap[name.Parts[0]]; exists {
446+
return false, arg, nil
447+
}
448+
}
449+
if ci, ok := inner.(*tree.ColumnItem); ok {
450+
if arg, exists := paramMap[string(ci.ColumnName)]; exists {
451+
return false, arg, nil
452+
}
453+
}
454+
return true, inner, nil
455+
},
456+
)
457+
if err != nil {
458+
return false, nil, err
459+
}
460+
461+
// For strict functions (RETURNS NULL ON NULL INPUT), wrap the
462+
// inlined body to return NULL when any argument is NULL,
463+
// preserving the function's null-handling semantics.
464+
if !fn.CalledOnNullInput && len(funcExpr.Exprs) > 0 {
465+
var nullCheck tree.TypedExpr
466+
for i, arg := range funcExpr.Exprs {
467+
isNull := &tree.IsNullExpr{Expr: arg}
468+
if i == 0 {
469+
nullCheck = isNull
470+
} else {
471+
nullCheck = tree.NewTypedOrExpr(nullCheck, isNull)
472+
}
473+
}
474+
inlined = &tree.CaseExpr{
475+
Whens: []*tree.When{{
476+
Cond: nullCheck,
477+
Val: tree.DNull,
478+
}},
479+
Else: inlined,
480+
}
481+
}
482+
483+
// Type-check the inlined expression against the function's
484+
// return type.
485+
typedInlined, err := tree.TypeCheck(
486+
ctx, inlined, semaCtx, funcExpr.ResolvedType(),
487+
)
488+
if err != nil {
489+
return false, nil, errors.Wrap(err, "type-checking inlined UDF body")
490+
}
491+
492+
return false, typedInlined, nil
493+
})
494+
if err != nil {
495+
return nil, err
496+
}
497+
return newExpr.(tree.TypedExpr), nil
498+
}

pkg/sql/importer/import_stmt_test.go

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1252,6 +1252,41 @@ END
12521252
}
12531253
}
12541254

1255+
// TestImportIntoComputedColumnWithUDF verifies that IMPORT INTO works when
1256+
// the destination table has computed columns defined using user-defined
1257+
// functions. Regression test for #157195.
1258+
func TestImportIntoComputedColumnWithUDF(t *testing.T) {
1259+
defer leaktest.AfterTest(t)()
1260+
defer log.Scope(t).Close(t)
1261+
ctx := context.Background()
1262+
baseDir, cleanup := testutils.TempDir(t)
1263+
defer cleanup()
1264+
tc := serverutils.StartCluster(
1265+
t, 1, base.TestClusterArgs{ServerArgs: base.TestServerArgs{ExternalIODir: baseDir}})
1266+
defer tc.Stopper().Stop(ctx)
1267+
conn := tc.ServerConn(0)
1268+
sqlDB := sqlutils.MakeSQLRunner(conn)
1269+
1270+
// Helper that writes CSV data and runs IMPORT INTO.
1271+
importCSV := func(t *testing.T, table, intoCols, csvData string) {
1272+
t.Helper()
1273+
f, err := os.CreateTemp(baseDir, "data")
1274+
require.NoError(t, err)
1275+
_, err = f.Write([]byte(csvData))
1276+
require.NoError(t, err)
1277+
require.NoError(t, f.Close())
1278+
sqlDB.Exec(t, fmt.Sprintf(
1279+
`IMPORT INTO %s (%s) CSV DATA ($1)`, table, intoCols,
1280+
), fmt.Sprintf("nodelocal://1/%s", filepath.Base(f.Name())))
1281+
}
1282+
1283+
sqlDB.Exec(t, `CREATE FUNCTION double_val(x INT) RETURNS INT IMMUTABLE LANGUAGE SQL AS $$ SELECT x * 2 $$`)
1284+
sqlDB.Exec(t, `CREATE TABLE t (a INT, b INT AS (double_val(a)) STORED)`)
1285+
importCSV(t, "t", "a", "1\n2\n3\n")
1286+
sqlDB.CheckQueryResults(t, `SELECT a, b FROM t ORDER BY a`,
1287+
[][]string{{"1", "2"}, {"2", "4"}, {"3", "6"}})
1288+
}
1289+
12551290
func TestImportRowLimit(t *testing.T) {
12561291
defer leaktest.AfterTest(t)()
12571292
defer log.Scope(t).Close(t)

pkg/sql/logictest/testdata/logic_test/udf_unsupported

Lines changed: 65 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,25 +6,85 @@ CREATE FUNCTION test_tbl_f() RETURNS INT IMMUTABLE LANGUAGE SQL AS $$ SELECT 1 $
66
statement ok
77
CREATE TABLE test_tbl_t (a INT PRIMARY KEY, b INT);
88

9-
# Insert a row to verify that backfills that use UDFs are blocked without internal errors.
9+
# Insert rows to verify that backfills that use UDFs produce correct values.
1010
statement ok
11-
INSERT INTO test_tbl_t VALUES (1, 1);
11+
INSERT INTO test_tbl_t VALUES (1, 10), (2, 20), (3, 30);
1212

13-
statement error pgcode 0A000 unimplemented: cannot evaluate function in this context
13+
# UDFs in computed columns are supported via expression inlining.
14+
statement ok
1415
ALTER TABLE test_tbl_t ADD COLUMN c int AS (test_tbl_f()) stored;
1516

17+
# Verify backfilled values are correct.
18+
query III rowsort
19+
SELECT * FROM test_tbl_t
20+
----
21+
1 10 1
22+
2 20 1
23+
3 30 1
24+
1625
statement error pgcode 0A000 unimplemented: cannot evaluate function in this context
17-
ALTER TABLE test_tbl_t ADD COLUMN c int DEFAULT (test_tbl_f());
26+
ALTER TABLE test_tbl_t ADD COLUMN d int DEFAULT (test_tbl_f());
1827

1928
statement error pgcode 0A000 unimplemented: cannot evaluate function in this context
2029
CREATE INDEX t_idx_partial ON test_tbl_t(b) WHERE test_tbl_f() > 0;
2130

22-
statement error pgcode 0A000 unimplemented: cannot evaluate function in this context
31+
# UDFs in expression indexes are supported via expression inlining.
32+
statement ok
2333
CREATE INDEX idx_b ON test_tbl_t (test_tbl_f());
2434

2535
subtest end
2636

2737

38+
subtest backfill_udf_computed_column
39+
40+
# Test that backfilling a computed column that references a UDF with
41+
# parameters produces correct values.
42+
statement ok
43+
CREATE FUNCTION bf_double(x INT) RETURNS INT IMMUTABLE LANGUAGE SQL AS $$ SELECT x * 2 $$;
44+
45+
statement ok
46+
CREATE TABLE bf_t (a INT PRIMARY KEY);
47+
48+
statement ok
49+
INSERT INTO bf_t VALUES (1), (2), (3), (4), (5);
50+
51+
statement ok
52+
ALTER TABLE bf_t ADD COLUMN b INT AS (bf_double(a)) STORED;
53+
54+
query II rowsort
55+
SELECT * FROM bf_t
56+
----
57+
1 2
58+
2 4
59+
3 6
60+
4 8
61+
5 10
62+
63+
# Test with a strict UDF (RETURNS NULL ON NULL INPUT) on a nullable
64+
# column. Rows with NULL input should produce NULL output.
65+
statement ok
66+
CREATE FUNCTION bf_strict(x INT) RETURNS INT IMMUTABLE RETURNS NULL ON NULL INPUT LANGUAGE SQL AS $$ SELECT x + 100 $$;
67+
68+
statement ok
69+
CREATE TABLE bf_strict_t (a INT PRIMARY KEY, b INT);
70+
71+
statement ok
72+
INSERT INTO bf_strict_t VALUES (1, 10), (2, NULL), (3, 30), (4, NULL);
73+
74+
statement ok
75+
ALTER TABLE bf_strict_t ADD COLUMN c INT AS (bf_strict(b)) STORED;
76+
77+
query III rowsort
78+
SELECT * FROM bf_strict_t
79+
----
80+
1 10 110
81+
2 NULL NULL
82+
3 30 130
83+
4 NULL NULL
84+
85+
subtest end
86+
87+
2888
subtest cross_db
2989

3090
statement ok

0 commit comments

Comments
 (0)