Skip to content

Commit 479e44d

Browse files
fix(bigframes): Fix bugs compiling ambiguous ids and in subqueries (#16617)
Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/google-cloud-python/issues) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Fixes #<issue_number_goes_here> 🦕
1 parent d80f278 commit 479e44d

File tree

11 files changed

+72
-55
lines changed

11 files changed

+72
-55
lines changed

packages/bigframes/bigframes/core/compile/compiled.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -381,6 +381,7 @@ def isin_join(
381381
new_column = (
382382
(left_table[conditions[0]])
383383
.isin((right_table[conditions[1]]))
384+
.fillna(False)
384385
.name(indicator_col)
385386
)
386387

packages/bigframes/bigframes/core/compile/sqlglot/sqlglot_ir.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -385,9 +385,13 @@ def isin_join(
385385
)
386386
)
387387
else:
388-
new_column = sge.In(
389-
this=conditions[0].expr,
390-
expressions=[right._as_subquery()],
388+
new_column = sge.func(
389+
"COALESCE",
390+
sge.In(
391+
this=conditions[0].expr,
392+
expressions=[right._as_subquery()],
393+
),
394+
sql.literal(False, dtypes.BOOL_DTYPE),
391395
)
392396

393397
new_column = sge.Alias(

packages/bigframes/bigframes/session/_io/bigquery/__init__.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -516,19 +516,21 @@ def to_query(
516516
) -> str:
517517
"""Compile query_or_table with conditions(filters, wildcards) to query."""
518518
if is_query(query_or_table):
519-
sub_query = f"({query_or_table})"
519+
from_item = f"({query_or_table})"
520520
else:
521521
# Table ID can have 1, 2, 3, or 4 parts. Quoting all parts to be safe.
522522
# See: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#identifiers
523523
parts = query_or_table.split(".")
524-
sub_query = ".".join(f"`{part}`" for part in parts)
524+
from_item = ".".join(f"`{part}`" for part in parts)
525525

526526
# TODO(b/338111344): Generate an index based on DefaultIndexKind if we
527527
# don't have index columns specified.
528528
if columns:
529529
# We only reduce the selection if columns is set, but we always
530530
# want to make sure index_cols is also included.
531-
select_clause = "SELECT " + ", ".join(f"`{column}`" for column in columns)
531+
select_clause = "SELECT " + ", ".join(
532+
f"`_bf_source`.`{column}`" for column in columns
533+
)
532534
else:
533535
select_clause = "SELECT *"
534536

@@ -545,7 +547,7 @@ def to_query(
545547

546548
return (
547549
f"{select_clause} "
548-
f"FROM {sub_query}"
550+
f"FROM {from_item} AS _bf_source"
549551
f"{time_travel_clause}{where_clause}{limit_clause}"
550552
)
551553

packages/bigframes/bigframes/session/polars_executor.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@ def _is_node_polars_executable(node: nodes.BigFrameNode):
122122
return False
123123
for expr in node._node_expressions:
124124
if isinstance(expr, agg_expressions.Aggregation):
125-
if not type(expr.op) in _COMPATIBLE_AGG_OPS:
125+
if type(expr.op) not in _COMPATIBLE_AGG_OPS:
126126
return False
127127
if isinstance(expr, expression.Expression):
128128
if not set(map(type, _get_expr_ops(expr))).issubset(_COMPATIBLE_SCALAR_OPS):

packages/bigframes/tests/unit/core/compile/sqlglot/snapshots/test_compile_isin/test_compile_isin_not_nullable/out.sql

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,11 @@ WITH `bfcte_0` AS (
2020
), `bfcte_4` AS (
2121
SELECT
2222
*,
23-
`bfcol_4` IN ((
23+
COALESCE(`bfcol_4` IN ((
2424
SELECT
2525
*
2626
FROM `bfcte_3`
27-
)) AS `bfcol_5`
27+
)), FALSE) AS `bfcol_5`
2828
FROM `bfcte_1`
2929
)
3030
SELECT

packages/bigframes/tests/unit/core/compile/sqlglot/tpch/snapshots/test_tpch/test_tpch_query/16/out.sql

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,11 +51,11 @@ WITH `bfcte_0` AS (
5151
), `bfcte_6` AS (
5252
SELECT
5353
*,
54-
`bfcol_58` IN ((
54+
COALESCE(`bfcol_58` IN ((
5555
SELECT
5656
*
5757
FROM `bfcte_5`
58-
)) AS `bfcol_59`
58+
)), FALSE) AS `bfcol_59`
5959
FROM `bfcte_4`
6060
), `bfcte_7` AS (
6161
SELECT

packages/bigframes/tests/unit/core/compile/sqlglot/tpch/snapshots/test_tpch/test_tpch_query/18/out.sql

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,11 +44,11 @@ WITH `bfcte_0` AS (
4444
), `bfcte_7` AS (
4545
SELECT
4646
*,
47-
`bfcol_4` IN ((
47+
COALESCE(`bfcol_4` IN ((
4848
SELECT
4949
*
5050
FROM `bfcte_6`
51-
)) AS `bfcol_14`
51+
)), FALSE) AS `bfcol_14`
5252
FROM `bfcte_2`
5353
), `bfcte_8` AS (
5454
SELECT

packages/bigframes/tests/unit/core/compile/sqlglot/tpch/snapshots/test_tpch/test_tpch_query/20/out.sql

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -85,11 +85,11 @@ WITH `bfcte_0` AS (
8585
), `bfcte_10` AS (
8686
SELECT
8787
*,
88-
`bfcol_2` IN ((
88+
COALESCE(`bfcol_2` IN ((
8989
SELECT
9090
*
9191
FROM `bfcte_8`
92-
)) AS `bfcol_37`
92+
)), FALSE) AS `bfcol_37`
9393
FROM `bfcte_1`
9494
), `bfcte_11` AS (
9595
SELECT
@@ -127,11 +127,11 @@ WITH `bfcte_0` AS (
127127
), `bfcte_15` AS (
128128
SELECT
129129
*,
130-
`bfcol_41` IN ((
130+
COALESCE(`bfcol_41` IN ((
131131
SELECT
132132
*
133133
FROM `bfcte_14`
134-
)) AS `bfcol_62`
134+
)), FALSE) AS `bfcol_62`
135135
FROM `bfcte_7`
136136
)
137137
SELECT

packages/bigframes/tests/unit/core/compile/sqlglot/tpch/snapshots/test_tpch/test_tpch_query/22/out.sql

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -92,11 +92,11 @@ WITH `bfcte_0` AS (
9292
), `bfcte_12` AS (
9393
SELECT
9494
*,
95-
`bfcol_61` IN ((
95+
COALESCE(`bfcol_61` IN ((
9696
SELECT
9797
*
9898
FROM `bfcte_6`
99-
)) AS `bfcol_64`
99+
)), FALSE) AS `bfcol_64`
100100
FROM `bfcte_11`
101101
), `bfcte_13` AS (
102102
SELECT

packages/bigframes/tests/unit/session/test_io_bigquery.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -344,7 +344,7 @@ def test_bq_schema_to_sql(schema: Iterable[bigquery.SchemaField], expected: str)
344344
2024, 5, 14, 12, 42, 36, 125125, tzinfo=datetime.timezone.utc
345345
),
346346
(
347-
"SELECT `row_index`, `string_col` FROM `test_table` "
347+
"SELECT `_bf_source`.`row_index`, `_bf_source`.`string_col` FROM `test_table` AS _bf_source "
348348
"FOR SYSTEM_TIME AS OF CAST('2024-05-14T12:42:36.125125+00:00' AS TIMESTAMP) "
349349
"WHERE `rowindex` NOT IN (0, 6) OR `string_col` IN ('Hello, World!', "
350350
"'こんにちは') LIMIT 123"
@@ -369,11 +369,11 @@ def test_bq_schema_to_sql(schema: Iterable[bigquery.SchemaField], expected: str)
369369
2024, 5, 14, 12, 42, 36, 125125, tzinfo=datetime.timezone.utc
370370
),
371371
(
372-
"""SELECT `rowindex`, `string_col` FROM (SELECT
372+
"""SELECT `_bf_source`.`rowindex`, `_bf_source`.`string_col` FROM (SELECT
373373
rowindex,
374374
string_col,
375375
FROM `test_table` AS t
376-
) """
376+
) AS _bf_source """
377377
"FOR SYSTEM_TIME AS OF CAST('2024-05-14T12:42:36.125125+00:00' AS TIMESTAMP) "
378378
"WHERE `rowindex` < 4 AND `string_col` = 'Hello, World!' "
379379
"LIMIT 123"
@@ -386,7 +386,7 @@ def test_bq_schema_to_sql(schema: Iterable[bigquery.SchemaField], expected: str)
386386
[],
387387
None, # max_results
388388
None, # time_travel_timestampe
389-
"SELECT `col_a`, `col_b` FROM `test_table`",
389+
"SELECT `_bf_source`.`col_a`, `_bf_source`.`col_b` FROM `test_table` AS _bf_source",
390390
id="table-columns",
391391
),
392392
pytest.param(
@@ -395,7 +395,7 @@ def test_bq_schema_to_sql(schema: Iterable[bigquery.SchemaField], expected: str)
395395
[("date_col", ">", "2022-10-20")],
396396
None, # max_results
397397
None, # time_travel_timestampe
398-
"SELECT * FROM `test_table` WHERE `date_col` > '2022-10-20'",
398+
"SELECT * FROM `test_table` AS _bf_source WHERE `date_col` > '2022-10-20'",
399399
id="table-filter",
400400
),
401401
pytest.param(
@@ -404,7 +404,7 @@ def test_bq_schema_to_sql(schema: Iterable[bigquery.SchemaField], expected: str)
404404
[],
405405
None, # max_results
406406
None, # time_travel_timestampe
407-
"SELECT * FROM `test_table*`",
407+
"SELECT * FROM `test_table*` AS _bf_source",
408408
id="wildcard-no_params",
409409
),
410410
pytest.param(
@@ -413,7 +413,7 @@ def test_bq_schema_to_sql(schema: Iterable[bigquery.SchemaField], expected: str)
413413
[("_TABLE_SUFFIX", ">", "2022-10-20")],
414414
None, # max_results
415415
None, # time_travel_timestampe
416-
"SELECT * FROM `test_table*` WHERE `_TABLE_SUFFIX` > '2022-10-20'",
416+
"SELECT * FROM `test_table*` AS _bf_source WHERE `_TABLE_SUFFIX` > '2022-10-20'",
417417
id="wildcard-filter",
418418
),
419419
],

0 commit comments

Comments
 (0)