Skip to content
This repository was archived by the owner on Apr 1, 2026. It is now read-only.

Commit 10cf398

Browse files
wrap more nodes in ctes
1 parent b7d360a commit 10cf398

File tree

44 files changed

+570
-452
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

44 files changed

+570
-452
lines changed

bigframes/core/compile/sqlglot/compiler.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,8 @@ def _compile_result_node(root: nodes.ResultNode) -> str:
113113
# Probably, should defer even further
114114
root = typing.cast(nodes.ResultNode, schema_binding.bind_schema_to_tree(root))
115115

116-
sqlglot_ir_obj = compile_node(rewrite.as_sql_nodes(root), uid_gen)
116+
# TODO: Bake all IDs in tree, stop passing uid_gen to emitters
117+
sqlglot_ir_obj = compile_node(rewrite.as_sql_nodes(root, uid_gen), uid_gen)
117118
return sqlglot_ir_obj.sql
118119

119120

bigframes/core/rewrite/as_sql.py

Lines changed: 41 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
from bigframes.core import (
2020
agg_expressions,
2121
expression,
22+
guid,
2223
identifiers,
2324
nodes,
2425
ordering,
@@ -222,31 +223,59 @@ def _as_sql_node(node: nodes.BigFrameNode) -> nodes.BigFrameNode:
222223
return node
223224

224225

225-
def _extract_ctes(root: nodes.BigFrameNode) -> nodes.BigFrameNode:
226-
topological_ctes = list(
227-
filter(lambda n: isinstance(n, nodes.CteNode), root.iter_nodes_topo())
226+
# In the future, we will have sql nodes for each of these node types.
227+
_LOGICAL_NODE_TYPES_TO_WRAP = (
228+
nodes.ReadLocalNode,
229+
nodes.ExplodeNode,
230+
nodes.InNode,
231+
nodes.AggregateNode,
232+
nodes.FromRangeNode,
233+
nodes.ConcatNode,
234+
)
235+
236+
237+
def _extract_nodes_as_ctes(
238+
root: nodes.BigFrameNode, uid_gen: guid.SequentialUIDGenerator
239+
) -> nodes.BigFrameNode:
240+
# CTE nodes we must wrap, and logical nodes we want to wrap for style
241+
topological_extracted_nodes = list(
242+
filter(
243+
lambda n: isinstance(n, (nodes.CteNode, *_LOGICAL_NODE_TYPES_TO_WRAP)),
244+
root.iter_nodes_topo(),
245+
)
246+
)
247+
cte_names = tuple(
248+
next(uid_gen.get_uid_stream("bfcte_"))
249+
for _ in range(len(topological_extracted_nodes))
228250
)
229-
cte_names = tuple(f"bfcte_{i}" for i in range(len(topological_ctes)))
230251

231-
if len(topological_ctes) == 0:
252+
if len(topological_extracted_nodes) == 0:
232253
return root
233254

234255
mapping = {
235-
cte_node: sql_nodes.SqlCteRefNode(cte_name, tuple(cte_node.fields))
236-
for cte_node, cte_name in zip(topological_ctes, cte_names)
256+
extracted_node: sql_nodes.SqlCteRefNode(cte_name, tuple(extracted_node.fields))
257+
for extracted_node, cte_name in zip(topological_extracted_nodes, cte_names)
237258
}
238259

260+
def _maybe_unwrap_cte(node: nodes.BigFrameNode) -> nodes.BigFrameNode:
261+
"""Unwrap CTE nodes, but not logical nodes."""
262+
if isinstance(node, nodes.CteNode):
263+
return node.child
264+
return node
265+
239266
# Replace all CTEs with CTE references and wrap the new root in a WITH clause
240267
return sql_nodes.SqlWithCtesNode(
241268
root.top_down(lambda x: mapping.get(x, x)),
242269
cte_names,
243270
tuple(
244-
cte.child.top_down(lambda x: mapping.get(x, x)) for cte in topological_ctes # type: ignore
271+
_maybe_unwrap_cte(extracted_node).top_down(lambda x: mapping.get(x, x) if x != extracted_node else x) for extracted_node in topological_extracted_nodes # type: ignore
245272
),
246273
)
247274

248275

249-
def as_sql_nodes(root: nodes.BigFrameNode) -> nodes.BigFrameNode:
250-
# TODO: Aggregations, Unions, Joins, raw data sources
251-
root = _extract_ctes(root)
252-
return nodes.bottom_up(root, _as_sql_node)
276+
def as_sql_nodes(
277+
root: nodes.BigFrameNode, uid_gen: guid.SequentialUIDGenerator
278+
) -> nodes.BigFrameNode:
279+
root = _extract_nodes_as_ctes(root, uid_gen)
280+
root = nodes.bottom_up(root, _as_sql_node)
281+
return root

bigframes/session/bq_caching_executor.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -308,10 +308,10 @@ def _export_gbq(
308308

309309
ir = sqlglot_ir.SQLGlotIR.from_unparsed_query(sql)
310310
if spec.if_exists == "append":
311-
sql = sg_sql.to_sql(sg_sql.insert(ir.expr, spec.table))
311+
sql = sg_sql.to_sql(sg_sql.insert(ir.expr.as_select_all(), spec.table))
312312
else: # for "replace"
313313
assert spec.if_exists == "replace"
314-
sql = sg_sql.to_sql(sg_sql.replace(ir.expr, spec.table))
314+
sql = sg_sql.to_sql(sg_sql.replace(ir.expr.as_select_all(), spec.table))
315315
else:
316316
dispositions = {
317317
"fail": bigquery.WriteDisposition.WRITE_EMPTY,
Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,4 @@
1-
SELECT
2-
`bfcol_2` AS `corr_col`
3-
FROM (
1+
WITH `bfcte_0` AS (
42
SELECT
53
CORR(`int64_col`, `float64_col`) AS `bfcol_2`
64
FROM (
@@ -9,4 +7,7 @@ FROM (
97
`float64_col`
108
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0`
119
)
12-
)
10+
)
11+
SELECT
12+
`bfcol_2` AS `corr_col`
13+
FROM `bfcte_0`
Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,4 @@
1-
SELECT
2-
`bfcol_2` AS `cov_col`
3-
FROM (
1+
WITH `bfcte_0` AS (
42
SELECT
53
COVAR_SAMP(`int64_col`, `float64_col`) AS `bfcol_2`
64
FROM (
@@ -9,4 +7,7 @@ FROM (
97
`float64_col`
108
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0`
119
)
12-
)
10+
)
11+
SELECT
12+
`bfcol_2` AS `cov_col`
13+
FROM `bfcte_0`
Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
1-
SELECT
2-
`bfcol_32` AS `size`
3-
FROM (
1+
WITH `bfcte_0` AS (
42
SELECT
53
COUNT(1) AS `bfcol_32`
64
FROM (
75
SELECT
86
*
97
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0`
108
)
11-
)
9+
)
10+
SELECT
11+
`bfcol_32` AS `size`
12+
FROM `bfcte_0`
Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
1-
SELECT
2-
`bfcol_1` AS `int64_col`
3-
FROM (
1+
WITH `bfcte_0` AS (
42
SELECT
53
ARRAY_AGG(`int64_col` IGNORE NULLS ORDER BY `int64_col` IS NULL ASC, `int64_col` ASC) AS `bfcol_1`
64
FROM (
75
SELECT
86
`int64_col`
97
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0`
108
)
11-
)
9+
)
10+
SELECT
11+
`bfcol_1` AS `int64_col`
12+
FROM `bfcte_0`

tests/unit/core/compile/sqlglot/aggregations/snapshots/test_ordered_unary_compiler/test_string_agg/out.sql

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,4 @@
1-
SELECT
2-
`bfcol_1` AS `string_col`
3-
FROM (
1+
WITH `bfcte_0` AS (
42
SELECT
53
COALESCE(
64
STRING_AGG(`string_col`, ','
@@ -14,4 +12,7 @@ FROM (
1412
`string_col`
1513
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0`
1614
)
17-
)
15+
)
16+
SELECT
17+
`bfcol_1` AS `string_col`
18+
FROM `bfcte_0`
Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,4 @@
1-
SELECT
2-
`bfcol_2` AS `bool_col`,
3-
`bfcol_3` AS `int64_col`
4-
FROM (
1+
WITH `bfcte_0` AS (
52
SELECT
63
COALESCE(LOGICAL_AND(`bool_col`), TRUE) AS `bfcol_2`,
74
COALESCE(LOGICAL_AND(`int64_col` <> 0), TRUE) AS `bfcol_3`
@@ -11,4 +8,8 @@ FROM (
118
`int64_col`
129
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0`
1310
)
14-
)
11+
)
12+
SELECT
13+
`bfcol_2` AS `bool_col`,
14+
`bfcol_3` AS `int64_col`
15+
FROM `bfcte_0`
Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,4 @@
1-
SELECT
2-
`bfcol_2` AS `bool_col`,
3-
`bfcol_3` AS `int64_col`
4-
FROM (
1+
WITH `bfcte_0` AS (
52
SELECT
63
COALESCE(LOGICAL_OR(`bool_col`), FALSE) AS `bfcol_2`,
74
COALESCE(LOGICAL_OR(`int64_col` <> 0), FALSE) AS `bfcol_3`
@@ -11,4 +8,8 @@ FROM (
118
`int64_col`
129
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0`
1310
)
14-
)
11+
)
12+
SELECT
13+
`bfcol_2` AS `bool_col`,
14+
`bfcol_3` AS `int64_col`
15+
FROM `bfcte_0`

0 commit comments

Comments
 (0)