Skip to content
This repository was archived by the owner on Apr 1, 2026. It is now read-only.

Commit 75e02cc

Browse files
fix id issues preventing cte factoring
1 parent 5f2ed0d commit 75e02cc

File tree

16 files changed

+207
-336
lines changed

16 files changed

+207
-336
lines changed

bigframes/core/bigframe_node.py

Lines changed: 4 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -330,22 +330,12 @@ def top_down(
330330
"""
331331
Perform a top-down transformation of the BigFrameNode tree.
332332
"""
333-
to_process = [self]
334-
results: Dict[BigFrameNode, BigFrameNode] = {}
335333

336-
while to_process:
337-
item = to_process.pop()
338-
if item not in results.keys():
339-
item_result = transform(item)
340-
results[item] = item_result
341-
to_process.extend(item_result.child_nodes)
334+
@functools.cache
335+
def recursive_transform(node: BigFrameNode) -> BigFrameNode:
336+
return transform(node).transform_children(recursive_transform)
342337

343-
to_process = [self]
344-
# for each processed item, replace its children
345-
for item in reversed(list(results.keys())):
346-
results[item] = results[item].transform_children(lambda x: results[x])
347-
348-
return results[self]
338+
return recursive_transform(self)
349339

350340
def bottom_up(
351341
self: BigFrameNode,

bigframes/core/compile/sqlglot/compiler.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,7 @@ def _remap_variables(
9898
result_node, _ = rewrite.remap_variables(
9999
node, map(identifiers.ColumnId, uid_gen.get_uid_stream("bfcol_"))
100100
)
101+
result_node.validate_tree()
101102
return typing.cast(nodes.ResultNode, result_node)
102103

103104

bigframes/core/rewrite/ctes.py

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,10 +25,17 @@ def extract_ctes(root: nodes.BigFrameNode) -> nodes.BigFrameNode:
2525
for child in parent.child_nodes:
2626
node_parents[child] += 1
2727

28-
# we just mark in place, rather than pull out of the tree.
28+
# everywhere a multi-parent node is referenced, wrap it in a CTE node
2929
def insert_cte_markers(node: nodes.BigFrameNode) -> nodes.BigFrameNode:
30-
if node_parents[node] > 1:
31-
return nodes.CteNode(node)
32-
return node
30+
def _add_cte_if_needed(child: nodes.BigFrameNode) -> nodes.BigFrameNode:
31+
if node_parents[child] > 1:
32+
return nodes.CteNode(child)
33+
return child
34+
35+
if isinstance(node, nodes.CteNode):
36+
# don't re-wrap CTE nodes
37+
return node
38+
39+
return node.transform_children(_add_cte_if_needed)
3340

3441
return root.top_down(insert_cte_markers)

bigframes/core/rewrite/identifiers.py

Lines changed: 23 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -38,13 +38,13 @@ def _create_mapping_operator(
3838

3939
def _mapping_operator(node: nodes.BigFrameNode) -> nodes.BigFrameNode:
4040
# Step 1: Get the local remapping for the current node.
41-
local_def_remaps = id_def_remapping_by_node.get(node, {})
42-
local_ref_remaps = id_ref_remapping_by_node.get(node, {})
41+
local_def_remaps = id_def_remapping_by_node[node]
42+
local_ref_remaps = id_ref_remapping_by_node[node]
4343

44-
node = node.remap_vars(local_def_remaps)
45-
node = node.remap_refs(local_ref_remaps)
44+
result = node.remap_vars(local_def_remaps)
45+
result = result.remap_refs(local_ref_remaps)
4646

47-
return node
47+
return result
4848

4949
return _mapping_operator
5050

@@ -80,19 +80,33 @@ def remap_variables(
8080
id_ref_remaps: dict[
8181
nodes.BigFrameNode, dict[identifiers.ColumnId, identifiers.ColumnId]
8282
] = {}
83-
for node in root.iter_nodes_topo(): # bottom_up
83+
for node in root.iter_nodes_topo(): # bottom up
8484
local_def_remaps = {
8585
col_id: next(id_generator) for col_id in node.node_defined_ids
8686
}
8787
id_def_remaps[node] = local_def_remaps
8888

8989
local_ref_remaps = {}
90-
for child in node.child_nodes: # inherit ref and def mappings from children
90+
91+
# InNode is special case as ID scope inherited purely from left side
92+
inheriting_nodes = (
93+
[node.child_nodes[0]]
94+
if isinstance(node, nodes.InNode)
95+
else node.child_nodes
96+
)
97+
for child in inheriting_nodes: # inherit ref and def mappings from children
98+
if not child.defines_namespace: # these nodes represent new id spaces
99+
local_ref_remaps.update(
100+
{
101+
old_id: new_id
102+
for old_id, new_id in id_ref_remaps[child].items()
103+
if old_id in child.ids
104+
}
105+
)
91106
local_ref_remaps.update(id_def_remaps[child])
92-
if not child.defines_namespace:
93-
local_ref_remaps.update(id_ref_remaps[child])
94107
id_ref_remaps[node] = local_ref_remaps
95108

109+
# have to do top down to preserve node identities
96110
return (
97111
root.top_down(_create_mapping_operator(id_def_remaps, id_ref_remaps)),
98112
id_def_remaps[root],
Lines changed: 26 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1,59 +1,48 @@
11
WITH `bfcte_0` AS (
2-
SELECT
3-
`rowindex` AS `bfcol_18`,
4-
`rowindex` AS `bfcol_19`,
5-
`int64_col` AS `bfcol_20`,
6-
`string_col` AS `bfcol_21`
7-
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0`
8-
), `bfcte_1` AS (
92
SELECT
103
`rowindex` AS `bfcol_3`,
114
`rowindex` AS `bfcol_4`,
125
`int64_col` AS `bfcol_5`,
136
`string_col` AS `bfcol_6`
147
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0`
15-
), `bfcte_2` AS (
16-
SELECT
17-
*
18-
FROM `bfcte_0`
198
)
209
SELECT
21-
`bfcol_30` AS `rowindex`,
22-
`bfcol_31` AS `rowindex_1`,
23-
`bfcol_32` AS `int64_col`,
24-
`bfcol_33` AS `string_col`
10+
`bfcol_23` AS `rowindex`,
11+
`bfcol_24` AS `rowindex_1`,
12+
`bfcol_25` AS `int64_col`,
13+
`bfcol_26` AS `string_col`
2514
FROM (
2615
SELECT
27-
`bfcol_9` AS `bfcol_30`,
28-
`bfcol_10` AS `bfcol_31`,
29-
`bfcol_11` AS `bfcol_32`,
30-
`bfcol_12` AS `bfcol_33`,
31-
`bfcol_13` AS `bfcol_34`,
32-
`bfcol_14` AS `bfcol_35`
16+
`bfcol_17` AS `bfcol_23`,
17+
`bfcol_18` AS `bfcol_24`,
18+
`bfcol_19` AS `bfcol_25`,
19+
`bfcol_20` AS `bfcol_26`,
20+
`bfcol_21` AS `bfcol_27`,
21+
`bfcol_22` AS `bfcol_28`
3322
FROM (
3423
(
3524
SELECT
36-
`bfcol_3` AS `bfcol_9`,
37-
`bfcol_4` AS `bfcol_10`,
38-
`bfcol_5` AS `bfcol_11`,
39-
`bfcol_6` AS `bfcol_12`,
40-
0 AS `bfcol_13`,
41-
ROW_NUMBER() OVER () - 1 AS `bfcol_14`
42-
FROM `bfcte_1`
25+
`bfcol_3` AS `bfcol_17`,
26+
`bfcol_4` AS `bfcol_18`,
27+
`bfcol_5` AS `bfcol_19`,
28+
`bfcol_6` AS `bfcol_20`,
29+
0 AS `bfcol_21`,
30+
ROW_NUMBER() OVER () - 1 AS `bfcol_22`
31+
FROM `bfcte_0`
4332
)
4433
UNION ALL
4534
(
4635
SELECT
47-
`bfcol_18` AS `bfcol_24`,
48-
`bfcol_19` AS `bfcol_25`,
49-
`bfcol_20` AS `bfcol_26`,
50-
`bfcol_21` AS `bfcol_27`,
51-
1 AS `bfcol_28`,
52-
ROW_NUMBER() OVER () - 1 AS `bfcol_29`
53-
FROM `bfcte_2`
36+
`bfcol_3` AS `bfcol_11`,
37+
`bfcol_4` AS `bfcol_12`,
38+
`bfcol_5` AS `bfcol_13`,
39+
`bfcol_6` AS `bfcol_14`,
40+
1 AS `bfcol_15`,
41+
ROW_NUMBER() OVER () - 1 AS `bfcol_16`
42+
FROM `bfcte_0`
5443
)
5544
)
5645
)
5746
ORDER BY
58-
`bfcol_34` ASC NULLS LAST,
59-
`bfcol_35` ASC NULLS LAST
47+
`bfcol_27` ASC NULLS LAST,
48+
`bfcol_28` ASC NULLS LAST
Lines changed: 32 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -1,83 +1,63 @@
11
WITH `bfcte_0` AS (
22
SELECT
3-
`float64_col` AS `bfcol_23`,
4-
`int64_col` AS `bfcol_24`
3+
`float64_col` AS `bfcol_5`,
4+
`int64_col` AS `bfcol_6`
55
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0`
66
), `bfcte_1` AS (
77
SELECT
8-
`float64_col` AS `bfcol_2`,
9-
`int64_col` AS `bfcol_3`
10-
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0`
11-
), `bfcte_2` AS (
12-
SELECT
13-
`float64_col` AS `bfcol_34`,
14-
`int64_too` AS `bfcol_35`
15-
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0`
16-
WHERE
17-
`bool_col`
18-
), `bfcte_3` AS (
19-
SELECT
20-
`float64_col` AS `bfcol_13`,
21-
`int64_too` AS `bfcol_14`
8+
`float64_col` AS `bfcol_7`,
9+
`int64_too` AS `bfcol_8`
2210
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0`
2311
WHERE
2412
`bool_col`
25-
), `bfcte_4` AS (
26-
SELECT
27-
*
28-
FROM `bfcte_0`
29-
), `bfcte_5` AS (
30-
SELECT
31-
*
32-
FROM `bfcte_2`
3313
)
3414
SELECT
35-
`bfcol_42` AS `float64_col`,
36-
`bfcol_43` AS `int64_col`
15+
`bfcol_33` AS `float64_col`,
16+
`bfcol_34` AS `int64_col`
3717
FROM (
3818
SELECT
39-
`bfcol_6` AS `bfcol_42`,
40-
`bfcol_7` AS `bfcol_43`,
41-
`bfcol_8` AS `bfcol_44`,
42-
`bfcol_9` AS `bfcol_45`
19+
`bfcol_21` AS `bfcol_33`,
20+
`bfcol_22` AS `bfcol_34`,
21+
`bfcol_23` AS `bfcol_35`,
22+
`bfcol_24` AS `bfcol_36`
4323
FROM (
4424
(
4525
SELECT
46-
`bfcol_2` AS `bfcol_6`,
47-
`bfcol_3` AS `bfcol_7`,
48-
0 AS `bfcol_8`,
49-
ROW_NUMBER() OVER (ORDER BY `bfcol_3` ASC NULLS LAST) - 1 AS `bfcol_9`
50-
FROM `bfcte_1`
26+
`bfcol_5` AS `bfcol_21`,
27+
`bfcol_6` AS `bfcol_22`,
28+
0 AS `bfcol_23`,
29+
ROW_NUMBER() OVER (ORDER BY `bfcol_6` ASC NULLS LAST) - 1 AS `bfcol_24`
30+
FROM `bfcte_0`
5131
)
5232
UNION ALL
5333
(
5434
SELECT
55-
`bfcol_13` AS `bfcol_17`,
56-
`bfcol_14` AS `bfcol_18`,
57-
1 AS `bfcol_19`,
58-
ROW_NUMBER() OVER () - 1 AS `bfcol_20`
59-
FROM `bfcte_3`
35+
`bfcol_7` AS `bfcol_29`,
36+
`bfcol_8` AS `bfcol_30`,
37+
1 AS `bfcol_31`,
38+
ROW_NUMBER() OVER () - 1 AS `bfcol_32`
39+
FROM `bfcte_1`
6040
)
6141
UNION ALL
6242
(
6343
SELECT
64-
`bfcol_23` AS `bfcol_27`,
65-
`bfcol_24` AS `bfcol_28`,
66-
2 AS `bfcol_29`,
67-
ROW_NUMBER() OVER (ORDER BY `bfcol_24` ASC NULLS LAST) - 1 AS `bfcol_30`
68-
FROM `bfcte_4`
44+
`bfcol_5` AS `bfcol_17`,
45+
`bfcol_6` AS `bfcol_18`,
46+
2 AS `bfcol_19`,
47+
ROW_NUMBER() OVER (ORDER BY `bfcol_6` ASC NULLS LAST) - 1 AS `bfcol_20`
48+
FROM `bfcte_0`
6949
)
7050
UNION ALL
7151
(
7252
SELECT
73-
`bfcol_34` AS `bfcol_38`,
74-
`bfcol_35` AS `bfcol_39`,
75-
3 AS `bfcol_40`,
76-
ROW_NUMBER() OVER () - 1 AS `bfcol_41`
77-
FROM `bfcte_5`
53+
`bfcol_7` AS `bfcol_25`,
54+
`bfcol_8` AS `bfcol_26`,
55+
3 AS `bfcol_27`,
56+
ROW_NUMBER() OVER () - 1 AS `bfcol_28`
57+
FROM `bfcte_1`
7858
)
7959
)
8060
)
8161
ORDER BY
82-
`bfcol_44` ASC NULLS LAST,
83-
`bfcol_45` ASC NULLS LAST
62+
`bfcol_35` ASC NULLS LAST,
63+
`bfcol_36` ASC NULLS LAST

0 commit comments

Comments
 (0)