@@ -56,8 +56,7 @@ def __init__(
5656 column .resolve (table ) # type:ignore
5757 # TODO(https://github.com/ibis-project/ibis/issues/7613): use
5858 # public API to refer to Deferred type.
59- if isinstance (column , ibis_deferred .Deferred )
60- else column
59+ if isinstance (column , ibis_deferred .Deferred ) else column
6160 for column in columns
6261 )
6362 # To allow for more efficient lookup by column name, create a
@@ -363,35 +362,40 @@ def isin_join(
363362 The joined expression.
364363 """
365364 left_table = self ._to_ibis_expr ()
366- right_table = right ._to_ibis_expr ()
367- if join_nulls : # nullsafe isin join must actually use "exists" subquery
368- new_column = (
369- (
370- _join_condition (
371- left_table [conditions [0 ]],
372- right_table [conditions [1 ]],
373- nullsafe = True ,
374- )
375- )
376- .any ()
377- .name (indicator_col )
378- )
365+ # Distinct right table to avoid duplicating rows in left join
366+ right_table = right ._to_ibis_expr ().distinct ()
367+
368+ # Rename right column to avoid name clash with left table
369+ right_key_renamed = "__isin_right_key__"
370+ right_table = right_table .select (
371+ right_table [conditions [1 ]].name (right_key_renamed )
372+ )
379373
380- else : # Can do simpler "in" subquery
381- new_column = (
382- ( left_table [conditions [0 ]])
383- . isin (( right_table [conditions [ 1 ]]))
384- . name ( indicator_col )
374+ join_conditions = [
375+ _join_condition (
376+ left_table [conditions [0 ]],
377+ right_table [right_key_renamed ],
378+ nullsafe = join_nulls ,
385379 )
380+ ]
381+
382+ combined_table = bigframes_vendored .ibis .join (
383+ left_table ,
384+ right_table ,
385+ predicates = join_conditions ,
386+ how = "left" ,
387+ )
388+
389+ new_column = combined_table [right_key_renamed ].notnull ().name (indicator_col )
386390
387391 columns = tuple (
388392 itertools .chain (
389- (left_table [col .get_name ()] for col in self .columns ), (new_column ,)
393+ (combined_table [col .get_name ()] for col in self .columns ), (new_column ,)
390394 )
391395 )
392396
393397 return UnorderedIR (
394- left_table ,
398+ combined_table ,
395399 columns = columns ,
396400 )
397401
@@ -461,23 +465,36 @@ def is_window(column: ibis_types.Value) -> bool:
461465def _string_cast_join_cond (
462466 lvalue : ibis_types .Column , rvalue : ibis_types .Column
463467) -> ibis_types .BooleanColumn :
464- result = (
465- lvalue .cast (ibis_dtypes .str ).fill_null (ibis_types .literal ("0" ))
466- == rvalue .cast (ibis_dtypes .str ).fill_null (ibis_types .literal ("0" ))
467- ) & (
468- lvalue .cast (ibis_dtypes .str ).fill_null (ibis_types .literal ("1" ))
469- == rvalue .cast (ibis_dtypes .str ).fill_null (ibis_types .literal ("1" ))
470- )
468+ import bigframes_vendored .ibis as ibis
469+
470+ l_str = lvalue .cast (ibis_dtypes .str )
471+ r_str = rvalue .cast (ibis_dtypes .str )
472+
473+ lvalue1 = ibis .coalesce (l_str , ibis_types .literal ("0" ))
474+ rvalue1 = ibis .coalesce (r_str , ibis_types .literal ("0" ))
475+ lvalue2 = ibis .coalesce (l_str , ibis_types .literal ("1" ))
476+ rvalue2 = ibis .coalesce (r_str , ibis_types .literal ("1" ))
477+
478+ result = (lvalue1 == rvalue1 ) & (lvalue2 == rvalue2 )
471479 return typing .cast (ibis_types .BooleanColumn , result )
472480
473481
474482def _numeric_join_cond (
475483 lvalue : ibis_types .Column , rvalue : ibis_types .Column
476484) -> ibis_types .BooleanColumn :
477- lvalue1 = lvalue .fill_null (ibis_types .literal (0 ))
478- lvalue2 = lvalue .fill_null (ibis_types .literal (1 ))
479- rvalue1 = rvalue .fill_null (ibis_types .literal (0 ))
480- rvalue2 = rvalue .fill_null (ibis_types .literal (1 ))
485+ if lvalue .type ().is_floating ():
486+ lvalue1 = lvalue .fill_null (ibis_types .literal (0.0 ))
487+ lvalue2 = lvalue .fill_null (ibis_types .literal (1.0 ))
488+ else :
489+ lvalue1 = lvalue .fill_null (ibis_types .literal (0 ))
490+ lvalue2 = lvalue .fill_null (ibis_types .literal (1 ))
491+
492+ if rvalue .type ().is_floating ():
493+ rvalue1 = rvalue .fill_null (ibis_types .literal (0.0 ))
494+ rvalue2 = rvalue .fill_null (ibis_types .literal (1.0 ))
495+ else :
496+ rvalue1 = rvalue .fill_null (ibis_types .literal (0 ))
497+ rvalue2 = rvalue .fill_null (ibis_types .literal (1 ))
481498 if lvalue .type ().is_floating () and rvalue .type ().is_floating ():
482499 # NaN aren't equal so need to coalesce as well with diff constants
483500 lvalue1 = (
@@ -507,13 +524,9 @@ def _numeric_join_cond(
507524def _join_condition (
508525 lvalue : ibis_types .Column , rvalue : ibis_types .Column , nullsafe : bool
509526) -> ibis_types .BooleanColumn :
510- if (lvalue .type ().is_floating ()) and (lvalue .type ().is_floating ()):
527+ if (lvalue .type ().is_floating ()) and (rvalue .type ().is_floating ()):
511528 # Need to always make safe join condition to handle nan, even if no nulls
512529 return _numeric_join_cond (lvalue , rvalue )
513530 if nullsafe :
514- # TODO: Define more coalesce constants for non-numeric types to avoid cast
515- if (lvalue .type ().is_numeric ()) and (lvalue .type ().is_numeric ()):
516- return _numeric_join_cond (lvalue , rvalue )
517- else :
518- return _string_cast_join_cond (lvalue , rvalue )
531+ return _string_cast_join_cond (lvalue , rvalue )
519532 return typing .cast (ibis_types .BooleanColumn , lvalue == rvalue )
0 commit comments