Skip to content

Commit 2be7b15

Browse files
committed
chore(bigframes): update the isin_join sqlglot implementation for better performance
1 parent 1111e1c commit 2be7b15

File tree

2 files changed

+19
-27
lines changed
  • packages/bigframes
    • bigframes/core/compile/sqlglot
    • tests/unit/core/compile/sqlglot/snapshots/test_compile_isin/test_compile_isin

2 files changed

+19
-27
lines changed

packages/bigframes/bigframes/core/compile/sqlglot/sqlglot_ir.py

Lines changed: 12 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -357,34 +357,25 @@ def isin_join(
357357
or conditions[1].dtype == dtypes.FLOAT_DTYPE
358358
):
359359
force_float_domain = True
360-
part1_id = sql.identifier("bfpart1")
361-
part2_id = sql.identifier("bfpart2")
362360
left_expr1, left_expr2 = _value_to_non_null_identity(
363361
conditions[0], force_float_domain
364362
)
365-
left_as_struct = sge.Struct(
366-
expressions=[
367-
sge.PropertyEQ(this=part1_id, expression=left_expr1),
368-
sge.PropertyEQ(this=part2_id, expression=left_expr2),
369-
]
370-
)
371363
right_expr1, right_expr2 = _value_to_non_null_identity(
372364
conditions[1], force_float_domain
373365
)
374-
right_select = right.expr.select(
375-
*[
376-
sge.Struct(
377-
expressions=[
378-
sge.PropertyEQ(this=part1_id, expression=right_expr1),
379-
sge.PropertyEQ(this=part2_id, expression=right_expr2),
380-
]
381-
)
382-
],
383-
)
384366

385-
new_column = sge.In(
386-
this=left_as_struct,
387-
expressions=[right_select.subquery()],
367+
# Use EXISTS for better performance.
368+
# We use COALESCE on both sides in the WHERE clause as requested.
369+
new_column = sge.Exists(
370+
this=sge.Select()
371+
.select(sge.convert(1))
372+
.from_(right.expr.as_from_item())
373+
.where(
374+
sge.and_(
375+
sge.EQ(this=left_expr1, expression=right_expr1),
376+
sge.EQ(this=left_expr2, expression=right_expr2),
377+
)
378+
)
388379
)
389380
else:
390381
new_column = sge.In(

packages/bigframes/tests/unit/core/compile/sqlglot/snapshots/test_compile_isin/test_compile_isin/out.sql

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,13 @@ WITH `bfcte_0` AS (
2020
), `bfcte_4` AS (
2121
SELECT
2222
*,
23-
STRUCT(COALESCE(`bfcol_4`, 0) AS `bfpart1`, COALESCE(`bfcol_4`, 1) AS `bfpart2`) IN (
24-
(
25-
SELECT
26-
STRUCT(COALESCE(`bfcol_0`, 0) AS `bfpart1`, COALESCE(`bfcol_0`, 1) AS `bfpart2`)
27-
FROM `bfcte_3`
28-
)
23+
EXISTS(
24+
SELECT
25+
1
26+
FROM `bfcte_3`
27+
WHERE
28+
COALESCE(`bfcol_4`, 0) = COALESCE(`bfcol_0`, 0)
29+
AND COALESCE(`bfcol_4`, 1) = COALESCE(`bfcol_0`, 1)
2930
) AS `bfcol_5`
3031
FROM `bfcte_1`
3132
)

0 commit comments

Comments
 (0)