Skip to content
This repository was archived by the owner on Apr 1, 2026. It is now read-only.

Commit e6043dd

Browse files
avoid rewrapping nodes, wrapping root
1 parent 28865fa commit e6043dd

File tree

40 files changed

+415
-376
lines changed

40 files changed

+415
-376
lines changed

bigframes/core/compile/sqlglot/compiler.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,8 @@ def _compile_result_node(root: nodes.ResultNode) -> str:
107107
# of nodes using the same generator.
108108
uid_gen = guid.SequentialUIDGenerator()
109109
root = _remap_variables(root, uid_gen)
110+
# Remap variables creates too mayn new
111+
# root = rewrite.select_pullup(root, prefer_source_names=False)
110112
root = typing.cast(nodes.ResultNode, rewrite.defer_selection(root))
111113

112114
# Have to bind schema as the final step before compilation.

bigframes/core/rewrite/as_sql.py

Lines changed: 29 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -231,51 +231,63 @@ def _as_sql_node(node: nodes.BigFrameNode) -> nodes.BigFrameNode:
231231
nodes.AggregateNode,
232232
nodes.FromRangeNode,
233233
nodes.ConcatNode,
234+
sql_nodes.SqlSelectNode,
234235
)
235236

236237

237-
def _extract_nodes_as_ctes(
238+
def _insert_cte_markers(root: nodes.BigFrameNode) -> nodes.BigFrameNode:
239+
# important not to wrap nodes that are already wrapped
240+
wrapped_nodes = set(
241+
node.child for node in root.unique_nodes() if isinstance(node, nodes.CteNode)
242+
)
243+
244+
def maybe_insert_cte_marker(node: nodes.BigFrameNode) -> nodes.BigFrameNode:
245+
if node == root:
246+
return node
247+
if isinstance(node, _LOGICAL_NODE_TYPES_TO_WRAP) and node not in wrapped_nodes:
248+
wrapped_nodes.add(node)
249+
return nodes.CteNode(node)
250+
return node
251+
252+
return root.top_down(maybe_insert_cte_marker)
253+
254+
255+
def _extract_ctes_to_with_expr(
238256
root: nodes.BigFrameNode, uid_gen: guid.SequentialUIDGenerator
239257
) -> nodes.BigFrameNode:
240-
# CTE nodes we must wrap, and logical nodes we want to wrap for style
241-
topological_extracted_nodes = list(
258+
topological_ctes = list(
242259
filter(
243-
lambda n: isinstance(n, (nodes.CteNode, *_LOGICAL_NODE_TYPES_TO_WRAP)),
260+
lambda n: isinstance(n, nodes.CteNode),
244261
root.iter_nodes_topo(),
245262
)
246263
)
247264
cte_names = tuple(
248-
next(uid_gen.get_uid_stream("bfcte_"))
249-
for _ in range(len(topological_extracted_nodes))
265+
next(uid_gen.get_uid_stream("bfcte_")) for _ in range(len(topological_ctes))
250266
)
251267

252-
if len(topological_extracted_nodes) == 0:
268+
if len(topological_ctes) == 0:
253269
return root
254270

255271
mapping = {
256-
extracted_node: sql_nodes.SqlCteRefNode(cte_name, tuple(extracted_node.fields))
257-
for extracted_node, cte_name in zip(topological_extracted_nodes, cte_names)
272+
cte_node: sql_nodes.SqlCteRefNode(cte_name, tuple(cte_node.fields))
273+
for cte_node, cte_name in zip(topological_ctes, cte_names)
258274
}
259275

260-
def _maybe_unwrap_cte(node: nodes.BigFrameNode) -> nodes.BigFrameNode:
261-
"""Unwrap CTE nodes, but not logical nodes."""
262-
if isinstance(node, nodes.CteNode):
263-
return node.child
264-
return node
265-
266276
# Replace all CTEs with CTE references and wrap the new root in a WITH clause
267277
return sql_nodes.SqlWithCtesNode(
268278
root.top_down(lambda x: mapping.get(x, x)),
269279
cte_names,
270280
tuple(
271-
_maybe_unwrap_cte(extracted_node).top_down(lambda x: mapping.get(x, x) if x != extracted_node else x) for extracted_node in topological_extracted_nodes # type: ignore
281+
cte_node.child.top_down(lambda x: mapping.get(x, x)) for cte_node in topological_ctes # type: ignore
272282
),
273283
)
274284

275285

276286
def as_sql_nodes(
277287
root: nodes.BigFrameNode, uid_gen: guid.SequentialUIDGenerator
278288
) -> nodes.BigFrameNode:
279-
root = _extract_nodes_as_ctes(root, uid_gen)
280289
root = nodes.bottom_up(root, _as_sql_node)
290+
# Insert CTE markers to indicate where we want to split the query.
291+
root = _insert_cte_markers(root)
292+
root = _extract_ctes_to_with_expr(root, uid_gen)
281293
return root
Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
WITH `bfcte_0` AS (
2+
SELECT
3+
`int64_col`,
4+
`float64_col`
5+
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0`
6+
), `bfcte_1` AS (
27
SELECT
38
CORR(`int64_col`, `float64_col`) AS `bfcol_2`
4-
FROM (
5-
SELECT
6-
`int64_col`,
7-
`float64_col`
8-
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0`
9-
)
9+
FROM `bfcte_0`
1010
)
1111
SELECT
1212
`bfcol_2` AS `corr_col`
13-
FROM `bfcte_0`
13+
FROM `bfcte_1`
Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
WITH `bfcte_0` AS (
2+
SELECT
3+
`int64_col`,
4+
`float64_col`
5+
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0`
6+
), `bfcte_1` AS (
27
SELECT
38
COVAR_SAMP(`int64_col`, `float64_col`) AS `bfcol_2`
4-
FROM (
5-
SELECT
6-
`int64_col`,
7-
`float64_col`
8-
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0`
9-
)
9+
FROM `bfcte_0`
1010
)
1111
SELECT
1212
`bfcol_2` AS `cov_col`
13-
FROM `bfcte_0`
13+
FROM `bfcte_1`
Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
WITH `bfcte_0` AS (
2+
SELECT
3+
*
4+
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0`
5+
), `bfcte_1` AS (
26
SELECT
37
COUNT(1) AS `bfcol_32`
4-
FROM (
5-
SELECT
6-
*
7-
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0`
8-
)
8+
FROM `bfcte_0`
99
)
1010
SELECT
1111
`bfcol_32` AS `size`
12-
FROM `bfcte_0`
12+
FROM `bfcte_1`
Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
WITH `bfcte_0` AS (
2+
SELECT
3+
`int64_col`
4+
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0`
5+
), `bfcte_1` AS (
26
SELECT
37
ARRAY_AGG(`int64_col` IGNORE NULLS ORDER BY `int64_col` IS NULL ASC, `int64_col` ASC) AS `bfcol_1`
4-
FROM (
5-
SELECT
6-
`int64_col`
7-
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0`
8-
)
8+
FROM `bfcte_0`
99
)
1010
SELECT
1111
`bfcol_1` AS `int64_col`
12-
FROM `bfcte_0`
12+
FROM `bfcte_1`
Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,8 @@
11
WITH `bfcte_0` AS (
2+
SELECT
3+
`string_col`
4+
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0`
5+
), `bfcte_1` AS (
26
SELECT
37
COALESCE(
48
STRING_AGG(`string_col`, ','
@@ -7,12 +11,8 @@ WITH `bfcte_0` AS (
711
`string_col` ASC),
812
''
913
) AS `bfcol_1`
10-
FROM (
11-
SELECT
12-
`string_col`
13-
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0`
14-
)
14+
FROM `bfcte_0`
1515
)
1616
SELECT
1717
`bfcol_1` AS `string_col`
18-
FROM `bfcte_0`
18+
FROM `bfcte_1`
Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,15 @@
11
WITH `bfcte_0` AS (
2+
SELECT
3+
`bool_col`,
4+
`int64_col`
5+
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0`
6+
), `bfcte_1` AS (
27
SELECT
38
COALESCE(LOGICAL_AND(`bool_col`), TRUE) AS `bfcol_2`,
49
COALESCE(LOGICAL_AND(`int64_col` <> 0), TRUE) AS `bfcol_3`
5-
FROM (
6-
SELECT
7-
`bool_col`,
8-
`int64_col`
9-
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0`
10-
)
10+
FROM `bfcte_0`
1111
)
1212
SELECT
1313
`bfcol_2` AS `bool_col`,
1414
`bfcol_3` AS `int64_col`
15-
FROM `bfcte_0`
15+
FROM `bfcte_1`
Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,15 @@
11
WITH `bfcte_0` AS (
2+
SELECT
3+
`bool_col`,
4+
`int64_col`
5+
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0`
6+
), `bfcte_1` AS (
27
SELECT
38
COALESCE(LOGICAL_OR(`bool_col`), FALSE) AS `bfcol_2`,
49
COALESCE(LOGICAL_OR(`int64_col` <> 0), FALSE) AS `bfcol_3`
5-
FROM (
6-
SELECT
7-
`bool_col`,
8-
`int64_col`
9-
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0`
10-
)
10+
FROM `bfcte_0`
1111
)
1212
SELECT
1313
`bfcol_2` AS `bool_col`,
1414
`bfcol_3` AS `int64_col`
15-
FROM `bfcte_0`
15+
FROM `bfcte_1`
Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
WITH `bfcte_0` AS (
2+
SELECT
3+
`int64_col`
4+
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0`
5+
), `bfcte_1` AS (
26
SELECT
37
ANY_VALUE(`int64_col`) AS `bfcol_1`
4-
FROM (
5-
SELECT
6-
`int64_col`
7-
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0`
8-
)
8+
FROM `bfcte_0`
99
)
1010
SELECT
1111
`bfcol_1` AS `int64_col`
12-
FROM `bfcte_0`
12+
FROM `bfcte_1`

0 commit comments

Comments
 (0)