diff --git a/bigframes/core/compile/ibis_compiler/scalar_op_registry.py b/bigframes/core/compile/ibis_compiler/scalar_op_registry.py index dd27587433..5bb278e882 100644 --- a/bigframes/core/compile/ibis_compiler/scalar_op_registry.py +++ b/bigframes/core/compile/ibis_compiler/scalar_op_registry.py @@ -663,7 +663,7 @@ def datetime_to_integer_label_non_fixed_frequency( .else_((x_int - first - 1) // us + 1) # type: ignore .end() ) - elif rule_code == "ME": # Monthly + elif rule_code in ("M", "ME"): # Monthly x_int = x.year() * 12 + x.month() - 1 # type: ignore first = y.year() * 12 + y.month() - 1 # type: ignore x_int_label = ( @@ -672,7 +672,7 @@ def datetime_to_integer_label_non_fixed_frequency( .else_((x_int - first - 1) // n + 1) # type: ignore .end() ) - elif rule_code == "QE-DEC": # Quarterly + elif rule_code in ("Q-DEC", "QE-DEC"): # Quarterly x_int = x.year() * 4 + x.quarter() - 1 # type: ignore first = y.year() * 4 + y.quarter() - 1 # type: ignore x_int_label = ( @@ -681,7 +681,7 @@ def datetime_to_integer_label_non_fixed_frequency( .else_((x_int - first - 1) // n + 1) # type: ignore .end() ) - elif rule_code == "YE-DEC": # Yearly + elif rule_code in ("A-DEC", "Y-DEC", "YE-DEC"): # Yearly x_int = x.year() # type: ignore first = y.year() # type: ignore x_int_label = ( @@ -749,7 +749,7 @@ def integer_label_to_datetime_op_non_fixed_frequency( .cast(ibis_dtypes.Timestamp(timezone="UTC")) .cast(y.type()) ) - elif rule_code == "ME": # Monthly + elif rule_code in ("M", "ME"): # Monthly one = ibis_types.literal(1) twelve = ibis_types.literal(12) first = y.year() * twelve + y.month() - one # type: ignore @@ -769,7 +769,7 @@ def integer_label_to_datetime_op_non_fixed_frequency( 0, ) x_label = next_month_date - ibis_api.interval(days=1) - elif rule_code == "QE-DEC": # Quarterly + elif rule_code in ("Q-DEC", "QE-DEC"): # Quarterly one = ibis_types.literal(1) three = ibis_types.literal(3) four = ibis_types.literal(4) @@ -792,7 +792,7 @@ def integer_label_to_datetime_op_non_fixed_frequency( ) x_label = next_month_date - ibis_api.interval(days=1) - elif rule_code == "YE-DEC": # Yearly + elif rule_code in ("A-DEC", "Y-DEC", "YE-DEC"): # Yearly one = ibis_types.literal(1) first = y.year() # type: ignore x = x * n + first # type: ignore diff --git a/bigframes/core/compile/sqlglot/aggregate_compiler.py b/bigframes/core/compile/sqlglot/aggregate_compiler.py index f86e2af0de..9f72e1c794 100644 --- a/bigframes/core/compile/sqlglot/aggregate_compiler.py +++ b/bigframes/core/compile/sqlglot/aggregate_compiler.py @@ -70,7 +70,5 @@ def compile_analytic( aggregate.arg.output_type, ) return unary_compiler.compile(aggregate.op, column, window) - elif isinstance(aggregate, agg_expressions.BinaryAggregation): - raise NotImplementedError("binary analytic operations not yet supported") else: raise ValueError(f"Unexpected analytic operation: {aggregate}") diff --git a/bigframes/core/compile/sqlglot/expressions/array_ops.py b/bigframes/core/compile/sqlglot/expressions/array_ops.py index eb7582cb16..b2c8c1c568 100644 --- a/bigframes/core/compile/sqlglot/expressions/array_ops.py +++ b/bigframes/core/compile/sqlglot/expressions/array_ops.py @@ -105,31 +105,6 @@ def _coerce_bool_to_int(typed_expr: TypedExpr) -> sge.Expression: return typed_expr.expr -def _string_slice(expr: TypedExpr, op: ops.ArraySliceOp) -> sge.Expression: - # local name for each element in the array - el = sg.to_identifier("el") - # local name for the index in the array - slice_idx = sg.to_identifier("slice_idx") - - conditions: typing.List[sge.Predicate] = [slice_idx >= op.start] - if op.stop is not None: - conditions.append(slice_idx < op.stop) - - selected_elements = ( - sge.select(el) - .from_( - sge.Unnest( - expressions=[expr.expr], - alias=sge.TableAlias(columns=[el]), - offset=slice_idx, - ) - ) - .where(*conditions) - ) - - return sge.array(selected_elements) - - def _array_slice(expr: TypedExpr, op: ops.ArraySliceOp) -> sge.Expression: # local name for each element in the array el = sg.to_identifier("el") diff --git a/bigframes/core/compile/sqlglot/expressions/datetime_ops.py b/bigframes/core/compile/sqlglot/expressions/datetime_ops.py index 21f8b39e7d..4e0a75e699 100644 --- a/bigframes/core/compile/sqlglot/expressions/datetime_ops.py +++ b/bigframes/core/compile/sqlglot/expressions/datetime_ops.py @@ -125,7 +125,7 @@ def _datetime_to_integer_label_non_fixed_frequency( expression=sge.convert(1), ), ) - elif rule_code == "ME": # Monthly + elif rule_code in ("M", "ME"): # Monthly x_int = sge.Paren( # type: ignore this=sge.Add( this=sge.Mul( @@ -182,7 +182,7 @@ def _datetime_to_integer_label_non_fixed_frequency( expression=sge.convert(1), ), ) - elif rule_code == "QE-DEC": # Quarterly + elif rule_code in ("Q-DEC", "QE-DEC"): # Quarterly x_int = sge.Paren( # type: ignore this=sge.Add( this=sge.Mul( @@ -239,7 +239,7 @@ def _datetime_to_integer_label_non_fixed_frequency( expression=sge.convert(1), ), ) - elif rule_code == "YE-DEC": # Yearly + elif rule_code in ("A-DEC", "Y-DEC", "YE-DEC"): # Yearly x_int = sge.Extract(this=sge.Identifier(this="YEAR"), expression=x.expr) first = sge.Extract(this=sge.Identifier(this="YEAR"), expression=y.expr) return sge.Case( diff --git a/tests/unit/core/compile/sqlglot/aggregations/test_op_registration.py b/tests/unit/core/compile/sqlglot/aggregations/test_op_registration.py index c6c1c21151..7d4f53254d 100644 --- a/tests/unit/core/compile/sqlglot/aggregations/test_op_registration.py +++ b/tests/unit/core/compile/sqlglot/aggregations/test_op_registration.py @@ -42,3 +42,23 @@ def test_func(input: sge.Expression) -> sge.Expression: ValueError, match=r".*first parameter must be a window operator.*" ): test_func(sge.to_identifier("A")) + + +def test_register_already_registered_raise_error(): + reg = op_registration.OpRegistration() + + @reg.register(agg_ops.SizeOp) + def test_func1(op, input): + return input + + with pytest.raises(ValueError, match=r".*is already registered.*"): + + @reg.register(agg_ops.SizeOp) + def test_func2(op, input): + return input + + +def test_getitem_not_registered_raise_error(): + reg = op_registration.OpRegistration() + with pytest.raises(ValueError, match=r".*is not registered.*"): + _ = reg[agg_ops.SizeOp()] diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_array_ops/test_array_index/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_array_ops/test_array_index/out.sql index 4200470b65..a1f089424a 100644 --- a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_array_ops/test_array_index/out.sql +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_array_ops/test_array_index/out.sql @@ -1,3 +1,4 @@ SELECT - `string_list_col`[SAFE_OFFSET(1)] AS `string_list_col` -FROM `bigframes-dev`.`sqlglot_test`.`repeated_types` AS `bft_0` \ No newline at end of file + IF(SUBSTRING(`string_col`, 2, 1) <> '', SUBSTRING(`string_col`, 2, 1), NULL) AS `string_index`, + [`int64_col`, `int64_too`][SAFE_OFFSET(1)] AS `array_index` +FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_array_ops/test_array_reduce_op/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_array_ops/test_array_reduce_op/out.sql index 26fc32f68d..1053ec1c2c 100644 --- a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_array_ops/test_array_reduce_op/out.sql +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_array_ops/test_array_reduce_op/out.sql @@ -18,5 +18,10 @@ SELECT SELECT COALESCE(LOGICAL_OR(bf_arr_reduce_uid), FALSE) FROM UNNEST(`bool_list_col`) AS bf_arr_reduce_uid - ) AS `any_bool` + ) AS `any_bool`, + ( + SELECT + ARRAY_AGG(bf_arr_reduce_uid IGNORE NULLS) + FROM UNNEST(`string_list_col`) AS bf_arr_reduce_uid + ) AS `array_agg_str` FROM `bigframes-dev`.`sqlglot_test`.`repeated_types` AS `bft_0` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_array_ops/test_array_slice/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_array_ops/test_array_slice/out.sql new file mode 100644 index 0000000000..ffec3b8e93 --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_array_ops/test_array_slice/out.sql @@ -0,0 +1,17 @@ +SELECT + SUBSTRING(`string_col`, 2, 4) AS `string_slice`, + ARRAY( + SELECT + el + FROM UNNEST([`int64_col`, `int64_too`]) AS el WITH OFFSET AS slice_idx + WHERE + slice_idx >= 1 + ) AS `slice_only_start`, + ARRAY( + SELECT + el + FROM UNNEST([`int64_col`, `int64_too`]) AS el WITH OFFSET AS slice_idx + WHERE + slice_idx >= 1 AND slice_idx < 5 + ) AS `slice_start_stop` +FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_array_ops/test_array_slice_with_only_start/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_array_ops/test_array_slice_with_only_start/out.sql deleted file mode 100644 index c37e27b2cf..0000000000 --- a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_array_ops/test_array_slice_with_only_start/out.sql +++ /dev/null @@ -1,9 +0,0 @@ -SELECT - ARRAY( - SELECT - el - FROM UNNEST(`string_list_col`) AS el WITH OFFSET AS slice_idx - WHERE - slice_idx >= 1 - ) AS `string_list_col` -FROM `bigframes-dev`.`sqlglot_test`.`repeated_types` AS `bft_0` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_array_ops/test_array_slice_with_start_and_stop/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_array_ops/test_array_slice_with_start_and_stop/out.sql deleted file mode 100644 index 70417daf5c..0000000000 --- a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_array_ops/test_array_slice_with_start_and_stop/out.sql +++ /dev/null @@ -1,9 +0,0 @@ -SELECT - ARRAY( - SELECT - el - FROM UNNEST(`string_list_col`) AS el WITH OFFSET AS slice_idx - WHERE - slice_idx >= 1 AND slice_idx < 5 - ) AS `string_list_col` -FROM `bigframes-dev`.`sqlglot_test`.`repeated_types` AS `bft_0` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_blob_ops/test_obj_get_access_url_with_duration/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_blob_ops/test_obj_get_access_url_with_duration/out.sql new file mode 100644 index 0000000000..2e8b60230f --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_blob_ops/test_obj_get_access_url_with_duration/out.sql @@ -0,0 +1,3 @@ +SELECT + OBJ.GET_ACCESS_URL(`string_col`, 'READ', INTERVAL 3600 MICROSECOND) AS `string_col` +FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_blob_ops/test_obj_make_ref_json/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_blob_ops/test_obj_make_ref_json/out.sql new file mode 100644 index 0000000000..dc84b3bec1 --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_blob_ops/test_obj_make_ref_json/out.sql @@ -0,0 +1,3 @@ +SELECT + OBJ.MAKE_REF(`string_col`) AS `string_col` +FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_bool_ops/test_and_op/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_bool_ops/test_and_op/out.sql index 7afe926ab4..d6f6587ead 100644 --- a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_bool_ops/test_and_op/out.sql +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_bool_ops/test_and_op/out.sql @@ -4,5 +4,6 @@ SELECT `int64_col`, `int64_col` & `int64_col` AS `int_and_int`, `bool_col` AND `bool_col` AS `bool_and_bool`, - IF(`bool_col` = FALSE, `bool_col`, NULL) AS `bool_and_null` + IF(`bool_col` = FALSE, `bool_col`, NULL) AS `bool_and_null`, + IF(`bool_col` = FALSE, `bool_col`, NULL) AS `null_and_bool` FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_bool_ops/test_or_op/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_bool_ops/test_or_op/out.sql index 89a80b05a8..dad4cee9d0 100644 --- a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_bool_ops/test_or_op/out.sql +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_bool_ops/test_or_op/out.sql @@ -4,5 +4,6 @@ SELECT `int64_col`, `int64_col` | `int64_col` AS `int_and_int`, `bool_col` OR `bool_col` AS `bool_and_bool`, - IF(`bool_col` = TRUE, `bool_col`, NULL) AS `bool_and_null` + IF(`bool_col` = TRUE, `bool_col`, NULL) AS `bool_and_null`, + IF(`bool_col` = TRUE, `bool_col`, NULL) AS `null_and_bool` FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_bool_ops/test_xor_op/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_bool_ops/test_xor_op/out.sql index 74a8e81081..4be3b9f94a 100644 --- a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_bool_ops/test_xor_op/out.sql +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_bool_ops/test_xor_op/out.sql @@ -13,5 +13,11 @@ SELECT ) OR ( NOT `bool_col` AND CAST(NULL AS BOOLEAN) - ) AS `bool_and_null` + ) AS `bool_and_null`, + ( + `bool_col` AND NOT CAST(NULL AS BOOLEAN) + ) + OR ( + NOT `bool_col` AND CAST(NULL AS BOOLEAN) + ) AS `null_and_bool` FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_comparison_ops/test_eq_numeric/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_comparison_ops/test_eq_numeric/out.sql index 37554c77e0..7827731881 100644 --- a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_comparison_ops/test_eq_numeric/out.sql +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_comparison_ops/test_eq_numeric/out.sql @@ -5,6 +5,7 @@ SELECT `int64_col` = `int64_col` AS `int_eq_int`, `int64_col` = 1 AS `int_eq_1`, `int64_col` IS NULL AS `int_eq_null`, + `int64_col` IS NULL AS `null_eq_int`, `int64_col` = CAST(`bool_col` AS INT64) AS `int_eq_bool`, CAST(`bool_col` AS INT64) = `int64_col` AS `bool_eq_int` FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_comparison_ops/test_ge_numeric/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_comparison_ops/test_ge_numeric/out.sql index f66e8435eb..5903cf0369 100644 --- a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_comparison_ops/test_ge_numeric/out.sql +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_comparison_ops/test_ge_numeric/out.sql @@ -4,6 +4,7 @@ SELECT `bool_col`, `int64_col` >= `int64_col` AS `int_ge_int`, `int64_col` >= 1 AS `int_ge_1`, + NULL AS `null_ge_int`, `int64_col` >= CAST(`bool_col` AS INT64) AS `int_ge_bool`, CAST(`bool_col` AS INT64) >= `int64_col` AS `bool_ge_int` FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_comparison_ops/test_gt_numeric/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_comparison_ops/test_gt_numeric/out.sql index d97f9d1d42..42bf029240 100644 --- a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_comparison_ops/test_gt_numeric/out.sql +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_comparison_ops/test_gt_numeric/out.sql @@ -4,6 +4,7 @@ SELECT `bool_col`, `int64_col` > `int64_col` AS `int_gt_int`, `int64_col` > 1 AS `int_gt_1`, + NULL AS `null_gt_int`, `int64_col` > CAST(`bool_col` AS INT64) AS `int_gt_bool`, CAST(`bool_col` AS INT64) > `int64_col` AS `bool_gt_int` FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_comparison_ops/test_is_in/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_comparison_ops/test_is_in/out.sql index d1af7c57ae..b6d860d472 100644 --- a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_comparison_ops/test_is_in/out.sql +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_comparison_ops/test_is_in/out.sql @@ -10,5 +10,10 @@ SELECT COALESCE(`int64_col` IN (123456), FALSE) AS `ints_wo_match_nulls`, ( `float64_col` IS NULL - ) OR `float64_col` IN (1, 2, 3) AS `float_in_ints` + ) OR `float64_col` IN (1, 2, 3) AS `float_in_ints`, + ( + `int64_col` IS NULL + ) OR `int64_col` IN (2) AS `mixed_with_null`, + COALESCE(CAST(`bool_col` AS INT64) IN (1, 2.5), FALSE) AS `bool_in_mixed`, + `int64_col` IS NULL AS `only_null_match` FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_comparison_ops/test_le_numeric/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_comparison_ops/test_le_numeric/out.sql index e4e542d1c5..c6c8651010 100644 --- a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_comparison_ops/test_le_numeric/out.sql +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_comparison_ops/test_le_numeric/out.sql @@ -4,6 +4,7 @@ SELECT `bool_col`, `int64_col` <= `int64_col` AS `int_le_int`, `int64_col` <= 1 AS `int_le_1`, + NULL AS `null_le_int`, `int64_col` <= CAST(`bool_col` AS INT64) AS `int_le_bool`, CAST(`bool_col` AS INT64) <= `int64_col` AS `bool_le_int` FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_comparison_ops/test_lt_numeric/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_comparison_ops/test_lt_numeric/out.sql index d616aecc8c..ec5c317a8e 100644 --- a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_comparison_ops/test_lt_numeric/out.sql +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_comparison_ops/test_lt_numeric/out.sql @@ -4,6 +4,7 @@ SELECT `bool_col`, `int64_col` < `int64_col` AS `int_lt_int`, `int64_col` < 1 AS `int_lt_1`, + NULL AS `null_lt_int`, `int64_col` < CAST(`bool_col` AS INT64) AS `int_lt_bool`, CAST(`bool_col` AS INT64) < `int64_col` AS `bool_lt_int` FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_comparison_ops/test_ne_numeric/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_comparison_ops/test_ne_numeric/out.sql index abef6f93d6..448a614629 100644 --- a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_comparison_ops/test_ne_numeric/out.sql +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_comparison_ops/test_ne_numeric/out.sql @@ -7,6 +7,9 @@ SELECT ( `int64_col` ) IS NOT NULL AS `int_ne_null`, + ( + `int64_col` + ) IS NOT NULL AS `null_ne_int`, `int64_col` <> CAST(`bool_col` AS INT64) AS `int_ne_bool`, CAST(`bool_col` AS INT64) <> `int64_col` AS `bool_ne_int` FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_datetime_to_integer_label/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_datetime_to_integer_label/out.sql index 8654f94270..4b0696386c 100644 --- a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_datetime_to_integer_label/out.sql +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_datetime_to_integer_label/out.sql @@ -5,6 +5,13 @@ SELECT 86400000000 ) ) AS INT64) AS `fixed_freq`, + CAST(FLOOR(IEEE_DIVIDE(UNIX_MICROS(CAST(`datetime_col` AS TIMESTAMP)) - 0, 86400000000)) AS INT64) AS `origin_epoch`, + CAST(FLOOR( + IEEE_DIVIDE( + UNIX_MICROS(CAST(`datetime_col` AS TIMESTAMP)) - UNIX_MICROS(CAST(CAST(`timestamp_col` AS DATE) AS TIMESTAMP)), + 86400000000 + ) + ) AS INT64) AS `origin_start_day`, CASE WHEN UNIX_MICROS( CAST(TIMESTAMP_TRUNC(`datetime_col`, WEEK(MONDAY)) + INTERVAL 6 DAY AS TIMESTAMP) @@ -22,5 +29,48 @@ SELECT 604800000000 ) ) AS INT64) + 1 - END AS `non_fixed_freq_weekly` + END AS `non_fixed_freq_weekly`, + CASE + WHEN ( + EXTRACT(YEAR FROM `datetime_col`) * 12 + EXTRACT(MONTH FROM `datetime_col`) - 1 + ) = ( + EXTRACT(YEAR FROM `timestamp_col`) * 12 + EXTRACT(MONTH FROM `timestamp_col`) - 1 + ) + THEN 0 + ELSE CAST(FLOOR( + IEEE_DIVIDE( + ( + EXTRACT(YEAR FROM `datetime_col`) * 12 + EXTRACT(MONTH FROM `datetime_col`) - 1 + ) - ( + EXTRACT(YEAR FROM `timestamp_col`) * 12 + EXTRACT(MONTH FROM `timestamp_col`) - 1 + ) - 1, + 1 + ) + ) AS INT64) + 1 + END AS `non_fixed_freq_monthly`, + CASE + WHEN ( + EXTRACT(YEAR FROM `datetime_col`) * 4 + EXTRACT(QUARTER FROM `datetime_col`) - 1 + ) = ( + EXTRACT(YEAR FROM `timestamp_col`) * 4 + EXTRACT(QUARTER FROM `timestamp_col`) - 1 + ) + THEN 0 + ELSE CAST(FLOOR( + IEEE_DIVIDE( + ( + EXTRACT(YEAR FROM `datetime_col`) * 4 + EXTRACT(QUARTER FROM `datetime_col`) - 1 + ) - ( + EXTRACT(YEAR FROM `timestamp_col`) * 4 + EXTRACT(QUARTER FROM `timestamp_col`) - 1 + ) - 1, + 1 + ) + ) AS INT64) + 1 + END AS `non_fixed_freq_quarterly`, + CASE + WHEN EXTRACT(YEAR FROM `datetime_col`) = EXTRACT(YEAR FROM `timestamp_col`) + THEN 0 + ELSE CAST(FLOOR( + IEEE_DIVIDE(EXTRACT(YEAR FROM `datetime_col`) - EXTRACT(YEAR FROM `timestamp_col`) - 1, 1) + ) AS INT64) + 1 + END AS `non_fixed_freq_yearly` FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_to_datetime/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_to_datetime/out.sql index 3d0b8213b6..5d98e445cc 100644 --- a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_to_datetime/out.sql +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_to_datetime/out.sql @@ -2,5 +2,6 @@ SELECT CAST(TIMESTAMP_MICROS(CAST(TRUNC(`int64_col` * 0.001) AS INT64)) AS DATETIME) AS `int64_col`, SAFE_CAST(`string_col` AS DATETIME), CAST(TIMESTAMP_MICROS(CAST(TRUNC(`float64_col` * 0.001) AS INT64)) AS DATETIME) AS `float64_col`, - SAFE_CAST(`timestamp_col` AS DATETIME) + SAFE_CAST(`timestamp_col` AS DATETIME), + CAST(PARSE_TIMESTAMP('%Y-%m-%d', `string_col`, 'UTC') AS DATETIME) AS `string_col_fmt` FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_to_timestamp/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_to_timestamp/out.sql index 1e8910fad7..e0fb530cc6 100644 --- a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_to_timestamp/out.sql +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_to_timestamp/out.sql @@ -5,5 +5,6 @@ SELECT CAST(TIMESTAMP_MICROS(CAST(TRUNC(`int64_col` * 1000) AS INT64)) AS TIMESTAMP) AS `int64_col_ms`, CAST(TIMESTAMP_MICROS(CAST(TRUNC(`int64_col`) AS INT64)) AS TIMESTAMP) AS `int64_col_us`, CAST(TIMESTAMP_MICROS(CAST(TRUNC(`int64_col` * 0.001) AS INT64)) AS TIMESTAMP) AS `int64_col_ns`, - TIMESTAMP(`datetime_col`) AS `datetime_col` + TIMESTAMP(`datetime_col`) AS `datetime_col`, + PARSE_TIMESTAMP('%Y-%m-%d', `string_col`, 'UTC') AS `string_col_fmt` FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_json_ops/test_json_value_array/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_json_ops/test_json_value_array/out.sql new file mode 100644 index 0000000000..8250c02934 --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_json_ops/test_json_value_array/out.sql @@ -0,0 +1,3 @@ +SELECT + JSON_VALUE_ARRAY(`json_col`, '$') AS `json_col` +FROM `bigframes-dev`.`sqlglot_test`.`json_types` AS `bft_0` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_add_numeric/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_add_numeric/out.sql index 111684acd0..3aa06fe16e 100644 --- a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_add_numeric/out.sql +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_add_numeric/out.sql @@ -4,6 +4,7 @@ SELECT `bool_col`, `int64_col` + `int64_col` AS `int_add_int`, `int64_col` + 1 AS `int_add_1`, + NULL AS `int_add_null`, `int64_col` + CAST(`bool_col` AS INT64) AS `int_add_bool`, CAST(`bool_col` AS INT64) + `int64_col` AS `bool_add_int` FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_div_numeric/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_div_numeric/out.sql index 1b8166684c..3f5ff73326 100644 --- a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_div_numeric/out.sql +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_div_numeric/out.sql @@ -6,6 +6,7 @@ SELECT IEEE_DIVIDE(`int64_col`, `int64_col`) AS `int_div_int`, IEEE_DIVIDE(`int64_col`, 1) AS `int_div_1`, IEEE_DIVIDE(`int64_col`, 0.0) AS `int_div_0`, + IEEE_DIVIDE(`int64_col`, NULL) AS `int_div_null`, IEEE_DIVIDE(`int64_col`, `float64_col`) AS `int_div_float`, IEEE_DIVIDE(`float64_col`, `int64_col`) AS `float_div_int`, IEEE_DIVIDE(`float64_col`, 0.0) AS `float_div_0`, diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_floordiv_numeric/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_floordiv_numeric/out.sql new file mode 100644 index 0000000000..c7fa74e48f --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_floordiv_numeric/out.sql @@ -0,0 +1,47 @@ +SELECT + `rowindex`, + `int64_col`, + `bool_col`, + `float64_col`, + CASE + WHEN `int64_col` = CAST(0 AS INT64) + THEN CAST(0 AS INT64) * `int64_col` + ELSE CAST(FLOOR(IEEE_DIVIDE(`int64_col`, `int64_col`)) AS INT64) + END AS `int_div_int`, + CASE + WHEN 1 = CAST(0 AS INT64) + THEN CAST(0 AS INT64) * `int64_col` + ELSE CAST(FLOOR(IEEE_DIVIDE(`int64_col`, 1)) AS INT64) + END AS `int_div_1`, + CASE + WHEN 0.0 = CAST(0 AS INT64) + THEN CAST('Infinity' AS FLOAT64) * `int64_col` + ELSE CAST(FLOOR(IEEE_DIVIDE(`int64_col`, 0.0)) AS INT64) + END AS `int_div_0`, + NULL AS `int_div_null`, + CASE + WHEN `float64_col` = CAST(0 AS INT64) + THEN CAST('Infinity' AS FLOAT64) * `int64_col` + ELSE CAST(FLOOR(IEEE_DIVIDE(`int64_col`, `float64_col`)) AS INT64) + END AS `int_div_float`, + CASE + WHEN `int64_col` = CAST(0 AS INT64) + THEN CAST('Infinity' AS FLOAT64) * `float64_col` + ELSE CAST(FLOOR(IEEE_DIVIDE(`float64_col`, `int64_col`)) AS INT64) + END AS `float_div_int`, + CASE + WHEN 0.0 = CAST(0 AS INT64) + THEN CAST('Infinity' AS FLOAT64) * `float64_col` + ELSE CAST(FLOOR(IEEE_DIVIDE(`float64_col`, 0.0)) AS INT64) + END AS `float_div_0`, + CASE + WHEN CAST(`bool_col` AS INT64) = CAST(0 AS INT64) + THEN CAST(0 AS INT64) * `int64_col` + ELSE CAST(FLOOR(IEEE_DIVIDE(`int64_col`, CAST(`bool_col` AS INT64))) AS INT64) + END AS `int_div_bool`, + CASE + WHEN `int64_col` = CAST(0 AS INT64) + THEN CAST(0 AS INT64) * CAST(`bool_col` AS INT64) + ELSE CAST(FLOOR(IEEE_DIVIDE(CAST(`bool_col` AS INT64), `int64_col`)) AS INT64) + END AS `bool_div_int` +FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_mul_numeric/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_mul_numeric/out.sql index 57aff08158..ebe8d571d6 100644 --- a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_mul_numeric/out.sql +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_mul_numeric/out.sql @@ -4,6 +4,7 @@ SELECT `bool_col`, `int64_col` * `int64_col` AS `int_mul_int`, `int64_col` * 1 AS `int_mul_1`, + NULL AS `int_mul_null`, `int64_col` * CAST(`bool_col` AS INT64) AS `int_mul_bool`, CAST(`bool_col` AS INT64) * `int64_col` AS `bool_mul_int` FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_sub_numeric/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_sub_numeric/out.sql index e1ca93d136..c1d0350a66 100644 --- a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_sub_numeric/out.sql +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_sub_numeric/out.sql @@ -2,8 +2,9 @@ SELECT `rowindex`, `int64_col`, `bool_col`, - `int64_col` - `int64_col` AS `int_add_int`, - `int64_col` - 1 AS `int_add_1`, - `int64_col` - CAST(`bool_col` AS INT64) AS `int_add_bool`, - CAST(`bool_col` AS INT64) - `int64_col` AS `bool_add_int` + `int64_col` - `int64_col` AS `int_sub_int`, + `int64_col` - 1 AS `int_sub_1`, + NULL AS `int_sub_null`, + `int64_col` - CAST(`bool_col` AS INT64) AS `int_sub_bool`, + CAST(`bool_col` AS INT64) - `int64_col` AS `bool_sub_int` FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_string_ops/test_str_slice/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_string_ops/test_str_slice/out.sql index df4dc689f7..b10f4b29e6 100644 --- a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_string_ops/test_str_slice/out.sql +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_string_ops/test_str_slice/out.sql @@ -1,3 +1,18 @@ SELECT - SUBSTRING(`string_col`, 2, 2) AS `string_col` + SUBSTRING(`string_col`, 2, 2) AS `1_3`, + SUBSTRING(`string_col`, 1, 3) AS `none_3`, + SUBSTRING(`string_col`, 2) AS `1_none`, + SUBSTRING(`string_col`, -3) AS `m3_none`, + SUBSTRING(`string_col`, 1, GREATEST(0, LENGTH(`string_col`) + -3)) AS `none_m3`, + SUBSTRING( + `string_col`, + GREATEST(1, LENGTH(`string_col`) + -4), + GREATEST(0, LENGTH(`string_col`) + -3) - GREATEST(0, LENGTH(`string_col`) + -5) + ) AS `m5_m3`, + SUBSTRING(`string_col`, 2, GREATEST(0, LENGTH(`string_col`) + -4)) AS `1_m3`, + SUBSTRING( + `string_col`, + GREATEST(1, LENGTH(`string_col`) + -2), + 5 - GREATEST(0, LENGTH(`string_col`) + -3) + ) AS `m3_5` FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/test_array_ops.py b/tests/unit/core/compile/sqlglot/expressions/test_array_ops.py index 67c8bb0e5c..4075e1c278 100644 --- a/tests/unit/core/compile/sqlglot/expressions/test_array_ops.py +++ b/tests/unit/core/compile/sqlglot/expressions/test_array_ops.py @@ -1,13 +1,13 @@ # Copyright 2025 Google LLC # -# Licensed under the Apache License, Version 2.0 (the "License"); +# Licensed under the Apache License, Version 2.0 (the \"License\"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, +# distributed under the License is distributed on an \"AS IS\" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. @@ -16,7 +16,6 @@ from bigframes import operations as ops from bigframes.core import expression -from bigframes.operations._op_converters import convert_index, convert_slice import bigframes.operations.aggregations as agg_ops import bigframes.pandas as bpd from bigframes.testing import utils @@ -34,13 +33,18 @@ def test_array_to_string(repeated_types_df: bpd.DataFrame, snapshot): snapshot.assert_match(sql, "out.sql") -def test_array_index(repeated_types_df: bpd.DataFrame, snapshot): - col_name = "string_list_col" - bf_df = repeated_types_df[[col_name]] +def test_array_index(scalar_types_df: bpd.DataFrame, snapshot): + ops_map = { + "string_index": ops.ArrayIndexOp(index=1).as_expr("string_col"), + "array_index": expression.OpExpression( + ops.ArrayIndexOp(index=1), + (ops.ToArrayOp().as_expr("int64_col", "int64_too"),), + ), + } + sql = utils._apply_ops_to_sql( - bf_df, [convert_index(1).as_expr(col_name)], [col_name] + scalar_types_df, list(ops_map.values()), list(ops_map.keys()) ) - snapshot.assert_match(sql, "out.sql") @@ -50,6 +54,9 @@ def test_array_reduce_op(repeated_types_df: bpd.DataFrame, snapshot): "std_float": ops.ArrayReduceOp(agg_ops.StdOp()).as_expr("float_list_col"), "count_str": ops.ArrayReduceOp(agg_ops.CountOp()).as_expr("string_list_col"), "any_bool": ops.ArrayReduceOp(agg_ops.AnyOp()).as_expr("bool_list_col"), + "array_agg_str": ops.ArrayReduceOp(agg_ops.ArrayAggOp()).as_expr( + "string_list_col" + ), } sql = utils._apply_ops_to_sql( @@ -58,23 +65,23 @@ def test_array_reduce_op(repeated_types_df: bpd.DataFrame, snapshot): snapshot.assert_match(sql, "out.sql") -def test_array_slice_with_only_start(repeated_types_df: bpd.DataFrame, snapshot): - col_name = "string_list_col" - bf_df = repeated_types_df[[col_name]] - sql = utils._apply_ops_to_sql( - bf_df, [convert_slice(slice(1, None)).as_expr(col_name)], [col_name] - ) - - snapshot.assert_match(sql, "out.sql") - +def test_array_slice(scalar_types_df: bpd.DataFrame, snapshot): + array_expr = ops.ToArrayOp().as_expr("int64_col", "int64_too") + ops_map = { + "string_slice": ops.ArraySliceOp(start=1, stop=5).as_expr("string_col"), + "slice_only_start": expression.OpExpression( + ops.ArraySliceOp(start=1, stop=None), + (array_expr,), + ), + "slice_start_stop": expression.OpExpression( + ops.ArraySliceOp(start=1, stop=5), + (array_expr,), + ), + } -def test_array_slice_with_start_and_stop(repeated_types_df: bpd.DataFrame, snapshot): - col_name = "string_list_col" - bf_df = repeated_types_df[[col_name]] sql = utils._apply_ops_to_sql( - bf_df, [convert_slice(slice(1, 5)).as_expr(col_name)], [col_name] + scalar_types_df, list(ops_map.values()), list(ops_map.keys()) ) - snapshot.assert_match(sql, "out.sql") diff --git a/tests/unit/core/compile/sqlglot/expressions/test_blob_ops.py b/tests/unit/core/compile/sqlglot/expressions/test_blob_ops.py index 80aa22aaac..ac032f46e6 100644 --- a/tests/unit/core/compile/sqlglot/expressions/test_blob_ops.py +++ b/tests/unit/core/compile/sqlglot/expressions/test_blob_ops.py @@ -14,7 +14,9 @@ import pytest +from bigframes import operations as ops import bigframes.pandas as bpd +from bigframes.testing import utils pytest.importorskip("pytest_snapshot") @@ -31,6 +33,28 @@ def test_obj_get_access_url(scalar_types_df: bpd.DataFrame, snapshot): snapshot.assert_match(sql, "out.sql") +def test_obj_get_access_url_with_duration(scalar_types_df: bpd.DataFrame, snapshot): + col_name = "string_col" + bf_df = scalar_types_df[[col_name]] + sql = utils._apply_ops_to_sql( + bf_df, + [ops.ObjGetAccessUrl(mode="READ", duration=3600).as_expr(col_name)], + [col_name], + ) + snapshot.assert_match(sql, "out.sql") + + def test_obj_make_ref(scalar_types_df: bpd.DataFrame, snapshot): blob_df = scalar_types_df["string_col"].str.to_blob() snapshot.assert_match(blob_df.to_frame().sql, "out.sql") + + +def test_obj_make_ref_json(scalar_types_df: bpd.DataFrame, snapshot): + col_name = "string_col" + bf_df = scalar_types_df[[col_name]] + sql = utils._apply_ops_to_sql( + bf_df, + [ops.obj_make_ref_json_op.as_expr(col_name)], + [col_name], + ) + snapshot.assert_match(sql, "out.sql") diff --git a/tests/unit/core/compile/sqlglot/expressions/test_bool_ops.py b/tests/unit/core/compile/sqlglot/expressions/test_bool_ops.py index 601fd86e4e..bd51ea905a 100644 --- a/tests/unit/core/compile/sqlglot/expressions/test_bool_ops.py +++ b/tests/unit/core/compile/sqlglot/expressions/test_bool_ops.py @@ -26,6 +26,7 @@ def test_and_op(scalar_types_df: bpd.DataFrame, snapshot): bf_df["int_and_int"] = bf_df["int64_col"] & bf_df["int64_col"] bf_df["bool_and_bool"] = bf_df["bool_col"] & bf_df["bool_col"] bf_df["bool_and_null"] = bf_df["bool_col"] & pd.NA # type: ignore + bf_df["null_and_bool"] = pd.NA & bf_df["bool_col"] # type: ignore snapshot.assert_match(bf_df.sql, "out.sql") @@ -35,6 +36,7 @@ def test_or_op(scalar_types_df: bpd.DataFrame, snapshot): bf_df["int_and_int"] = bf_df["int64_col"] | bf_df["int64_col"] bf_df["bool_and_bool"] = bf_df["bool_col"] | bf_df["bool_col"] bf_df["bool_and_null"] = bf_df["bool_col"] | pd.NA # type: ignore + bf_df["null_and_bool"] = pd.NA | bf_df["bool_col"] # type: ignore snapshot.assert_match(bf_df.sql, "out.sql") @@ -44,4 +46,5 @@ def test_xor_op(scalar_types_df: bpd.DataFrame, snapshot): bf_df["int_and_int"] = bf_df["int64_col"] ^ bf_df["int64_col"] bf_df["bool_and_bool"] = bf_df["bool_col"] ^ bf_df["bool_col"] bf_df["bool_and_null"] = bf_df["bool_col"] ^ pd.NA # type: ignore + bf_df["null_and_bool"] = pd.NA ^ bf_df["bool_col"] # type: ignore snapshot.assert_match(bf_df.sql, "out.sql") diff --git a/tests/unit/core/compile/sqlglot/expressions/test_comparison_ops.py b/tests/unit/core/compile/sqlglot/expressions/test_comparison_ops.py index 3c13bc798b..05fa1b5434 100644 --- a/tests/unit/core/compile/sqlglot/expressions/test_comparison_ops.py +++ b/tests/unit/core/compile/sqlglot/expressions/test_comparison_ops.py @@ -44,6 +44,13 @@ def test_is_in(scalar_types_df: bpd.DataFrame, snapshot): values=(None, 123456), match_nulls=False ).as_expr(int_col), "float_in_ints": ops.IsInOp(values=(1, 2, 3, None)).as_expr(float_col), + "mixed_with_null": ops.IsInOp( + values=("1.0", 2, None), match_nulls=True + ).as_expr(int_col), + "bool_in_mixed": ops.IsInOp(values=(1, 2.5)).as_expr(bool_col), + "only_null_match": ops.IsInOp(values=(None,), match_nulls=True).as_expr( + int_col + ), } sql = utils._apply_ops_to_sql(bf_df, list(ops_map.values()), list(ops_map.keys())) @@ -62,6 +69,7 @@ def test_eq_numeric(scalar_types_df: bpd.DataFrame, snapshot): bf_df["int_eq_int"] = bf_df["int64_col"] == bf_df["int64_col"] bf_df["int_eq_1"] = bf_df["int64_col"] == 1 bf_df["int_eq_null"] = bf_df["int64_col"] == pd.NA + bf_df["null_eq_int"] = pd.NA == bf_df["int64_col"] bf_df["int_eq_bool"] = bf_df["int64_col"] == bf_df["bool_col"] bf_df["bool_eq_int"] = bf_df["bool_col"] == bf_df["int64_col"] @@ -74,6 +82,7 @@ def test_gt_numeric(scalar_types_df: bpd.DataFrame, snapshot): bf_df["int_gt_int"] = bf_df["int64_col"] > bf_df["int64_col"] bf_df["int_gt_1"] = bf_df["int64_col"] > 1 + bf_df["null_gt_int"] = pd.NA > bf_df["int64_col"] bf_df["int_gt_bool"] = bf_df["int64_col"] > bf_df["bool_col"] bf_df["bool_gt_int"] = bf_df["bool_col"] > bf_df["int64_col"] @@ -86,6 +95,7 @@ def test_ge_numeric(scalar_types_df: bpd.DataFrame, snapshot): bf_df["int_ge_int"] = bf_df["int64_col"] >= bf_df["int64_col"] bf_df["int_ge_1"] = bf_df["int64_col"] >= 1 + bf_df["null_ge_int"] = pd.NA >= bf_df["int64_col"] bf_df["int_ge_bool"] = bf_df["int64_col"] >= bf_df["bool_col"] bf_df["bool_ge_int"] = bf_df["bool_col"] >= bf_df["int64_col"] @@ -98,6 +108,7 @@ def test_lt_numeric(scalar_types_df: bpd.DataFrame, snapshot): bf_df["int_lt_int"] = bf_df["int64_col"] < bf_df["int64_col"] bf_df["int_lt_1"] = bf_df["int64_col"] < 1 + bf_df["null_lt_int"] = pd.NA < bf_df["int64_col"] bf_df["int_lt_bool"] = bf_df["int64_col"] < bf_df["bool_col"] bf_df["bool_lt_int"] = bf_df["bool_col"] < bf_df["int64_col"] @@ -110,6 +121,7 @@ def test_le_numeric(scalar_types_df: bpd.DataFrame, snapshot): bf_df["int_le_int"] = bf_df["int64_col"] <= bf_df["int64_col"] bf_df["int_le_1"] = bf_df["int64_col"] <= 1 + bf_df["null_le_int"] = pd.NA <= bf_df["int64_col"] bf_df["int_le_bool"] = bf_df["int64_col"] <= bf_df["bool_col"] bf_df["bool_le_int"] = bf_df["bool_col"] <= bf_df["int64_col"] @@ -137,6 +149,7 @@ def test_ne_numeric(scalar_types_df: bpd.DataFrame, snapshot): bf_df["int_ne_int"] = bf_df["int64_col"] != bf_df["int64_col"] bf_df["int_ne_1"] = bf_df["int64_col"] != 1 bf_df["int_ne_null"] = bf_df["int64_col"] != pd.NA + bf_df["null_ne_int"] = pd.NA != bf_df["int64_col"] bf_df["int_ne_bool"] = bf_df["int64_col"] != bf_df["bool_col"] bf_df["bool_ne_int"] = bf_df["bool_col"] != bf_df["int64_col"] diff --git a/tests/unit/core/compile/sqlglot/expressions/test_datetime_ops.py b/tests/unit/core/compile/sqlglot/expressions/test_datetime_ops.py index 76966d3c9b..1d6ea99d34 100644 --- a/tests/unit/core/compile/sqlglot/expressions/test_datetime_ops.py +++ b/tests/unit/core/compile/sqlglot/expressions/test_datetime_ops.py @@ -64,9 +64,24 @@ def test_datetime_to_integer_label(scalar_types_df: bpd.DataFrame, snapshot): "fixed_freq": ops.DatetimeToIntegerLabelOp( freq=pd.tseries.offsets.Day(), origin="start", closed="left" # type: ignore ).as_expr("datetime_col", "timestamp_col"), + "origin_epoch": ops.DatetimeToIntegerLabelOp( + freq=pd.tseries.offsets.Day(), origin="epoch", closed="left" # type: ignore + ).as_expr("datetime_col", "timestamp_col"), + "origin_start_day": ops.DatetimeToIntegerLabelOp( + freq=pd.tseries.offsets.Day(), origin="start_day", closed="left" # type: ignore + ).as_expr("datetime_col", "timestamp_col"), "non_fixed_freq_weekly": ops.DatetimeToIntegerLabelOp( freq=pd.tseries.offsets.Week(weekday=6), origin="start", closed="left" # type: ignore ).as_expr("datetime_col", "timestamp_col"), + "non_fixed_freq_monthly": ops.DatetimeToIntegerLabelOp( + freq=pd.tseries.offsets.MonthEnd(), origin="start", closed="left" # type: ignore + ).as_expr("datetime_col", "timestamp_col"), + "non_fixed_freq_quarterly": ops.DatetimeToIntegerLabelOp( + freq=pd.tseries.offsets.QuarterEnd(startingMonth=12), origin="start", closed="left" # type: ignore + ).as_expr("datetime_col", "timestamp_col"), + "non_fixed_freq_yearly": ops.DatetimeToIntegerLabelOp( + freq=pd.tseries.offsets.YearEnd(), origin="start", closed="left" # type: ignore + ).as_expr("datetime_col", "timestamp_col"), } sql = utils._apply_ops_to_sql(bf_df, list(ops_map.values()), list(ops_map.keys())) @@ -183,6 +198,9 @@ def test_to_datetime(scalar_types_df: bpd.DataFrame, snapshot): col_names = ["int64_col", "string_col", "float64_col", "timestamp_col"] bf_df = scalar_types_df[col_names] ops_map = {col_name: ops.ToDatetimeOp().as_expr(col_name) for col_name in col_names} + ops_map["string_col_fmt"] = ops.ToDatetimeOp(format="%Y-%m-%d").as_expr( + "string_col" + ) sql = utils._apply_ops_to_sql(bf_df, list(ops_map.values()), list(ops_map.keys())) snapshot.assert_match(sql, "out.sql") @@ -198,6 +216,7 @@ def test_to_timestamp(scalar_types_df: bpd.DataFrame, snapshot): "int64_col_us": ops.ToTimestampOp(unit="us").as_expr("int64_col"), "int64_col_ns": ops.ToTimestampOp(unit="ns").as_expr("int64_col"), "datetime_col": ops.ToTimestampOp().as_expr("datetime_col"), + "string_col_fmt": ops.ToTimestampOp(format="%Y-%m-%d").as_expr("string_col"), } sql = utils._apply_ops_to_sql(bf_df, list(ops_map.values()), list(ops_map.keys())) diff --git a/tests/unit/core/compile/sqlglot/expressions/test_json_ops.py b/tests/unit/core/compile/sqlglot/expressions/test_json_ops.py index 1c5894fc96..fa6d6d546f 100644 --- a/tests/unit/core/compile/sqlglot/expressions/test_json_ops.py +++ b/tests/unit/core/compile/sqlglot/expressions/test_json_ops.py @@ -95,6 +95,16 @@ def test_json_value(json_types_df: bpd.DataFrame, snapshot): snapshot.assert_match(sql, "out.sql") +def test_json_value_array(json_types_df: bpd.DataFrame, snapshot): + col_name = "json_col" + bf_df = json_types_df[[col_name]] + sql = utils._apply_ops_to_sql( + bf_df, [ops.JSONValueArray(json_path="$").as_expr(col_name)], [col_name] + ) + + snapshot.assert_match(sql, "out.sql") + + def test_parse_json(scalar_types_df: bpd.DataFrame, snapshot): col_name = "string_col" bf_df = scalar_types_df[[col_name]] diff --git a/tests/unit/core/compile/sqlglot/expressions/test_numeric_ops.py b/tests/unit/core/compile/sqlglot/expressions/test_numeric_ops.py index f0237159bc..17c2ff98bc 100644 --- a/tests/unit/core/compile/sqlglot/expressions/test_numeric_ops.py +++ b/tests/unit/core/compile/sqlglot/expressions/test_numeric_ops.py @@ -282,6 +282,7 @@ def test_add_numeric(scalar_types_df: bpd.DataFrame, snapshot): bf_df["int_add_int"] = bf_df["int64_col"] + bf_df["int64_col"] bf_df["int_add_1"] = bf_df["int64_col"] + 1 + bf_df["int_add_null"] = bf_df["int64_col"] + pd.NA bf_df["int_add_bool"] = bf_df["int64_col"] + bf_df["bool_col"] bf_df["bool_add_int"] = bf_df["bool_col"] + bf_df["int64_col"] @@ -323,6 +324,7 @@ def test_div_numeric(scalar_types_df: bpd.DataFrame, snapshot): bf_df["int_div_int"] = bf_df["int64_col"] / bf_df["int64_col"] bf_df["int_div_1"] = bf_df["int64_col"] / 1 bf_df["int_div_0"] = bf_df["int64_col"] / 0.0 + bf_df["int_div_null"] = bf_df["int64_col"] / pd.NA bf_df["int_div_float"] = bf_df["int64_col"] / bf_df["float64_col"] bf_df["float_div_int"] = bf_df["float64_col"] / bf_df["int64_col"] @@ -363,6 +365,7 @@ def test_floordiv_numeric(scalar_types_df: bpd.DataFrame, snapshot): bf_df["int_div_int"] = bf_df["int64_col"] // bf_df["int64_col"] bf_df["int_div_1"] = bf_df["int64_col"] // 1 bf_df["int_div_0"] = bf_df["int64_col"] // 0.0 + bf_df["int_div_null"] = bf_df["int64_col"] // pd.NA bf_df["int_div_float"] = bf_df["int64_col"] // bf_df["float64_col"] bf_df["float_div_int"] = bf_df["float64_col"] // bf_df["int64_col"] @@ -371,6 +374,8 @@ def test_floordiv_numeric(scalar_types_df: bpd.DataFrame, snapshot): bf_df["int_div_bool"] = bf_df["int64_col"] // bf_df["bool_col"] bf_df["bool_div_int"] = bf_df["bool_col"] // bf_df["int64_col"] + snapshot.assert_match(bf_df.sql, "out.sql") + def test_floordiv_timedelta(scalar_types_df: bpd.DataFrame, snapshot): bf_df = scalar_types_df[["timestamp_col", "date_col"]] @@ -401,6 +406,7 @@ def test_mul_numeric(scalar_types_df: bpd.DataFrame, snapshot): bf_df["int_mul_int"] = bf_df["int64_col"] * bf_df["int64_col"] bf_df["int_mul_1"] = bf_df["int64_col"] * 1 + bf_df["int_mul_null"] = bf_df["int64_col"] * pd.NA bf_df["int_mul_bool"] = bf_df["int64_col"] * bf_df["bool_col"] bf_df["bool_mul_int"] = bf_df["bool_col"] * bf_df["int64_col"] @@ -437,11 +443,12 @@ def test_mod_numeric(scalar_types_df: bpd.DataFrame, snapshot): def test_sub_numeric(scalar_types_df: bpd.DataFrame, snapshot): bf_df = scalar_types_df[["int64_col", "bool_col"]] - bf_df["int_add_int"] = bf_df["int64_col"] - bf_df["int64_col"] - bf_df["int_add_1"] = bf_df["int64_col"] - 1 + bf_df["int_sub_int"] = bf_df["int64_col"] - bf_df["int64_col"] + bf_df["int_sub_1"] = bf_df["int64_col"] - 1 + bf_df["int_sub_null"] = bf_df["int64_col"] - pd.NA - bf_df["int_add_bool"] = bf_df["int64_col"] - bf_df["bool_col"] - bf_df["bool_add_int"] = bf_df["bool_col"] - bf_df["int64_col"] + bf_df["int_sub_bool"] = bf_df["int64_col"] - bf_df["bool_col"] + bf_df["bool_sub_int"] = bf_df["bool_col"] - bf_df["int64_col"] snapshot.assert_match(bf_df.sql, "out.sql") diff --git a/tests/unit/core/compile/sqlglot/expressions/test_string_ops.py b/tests/unit/core/compile/sqlglot/expressions/test_string_ops.py index fff2cc06df..bb0e413486 100644 --- a/tests/unit/core/compile/sqlglot/expressions/test_string_ops.py +++ b/tests/unit/core/compile/sqlglot/expressions/test_string_ops.py @@ -219,9 +219,17 @@ def test_str_pad(scalar_types_df: bpd.DataFrame, snapshot): def test_str_slice(scalar_types_df: bpd.DataFrame, snapshot): col_name = "string_col" bf_df = scalar_types_df[[col_name]] - sql = utils._apply_ops_to_sql( - bf_df, [ops.StrSliceOp(1, 3).as_expr(col_name)], [col_name] - ) + ops_map = { + "1_3": ops.StrSliceOp(1, 3).as_expr(col_name), + "none_3": ops.StrSliceOp(None, 3).as_expr(col_name), + "1_none": ops.StrSliceOp(1, None).as_expr(col_name), + "m3_none": ops.StrSliceOp(-3, None).as_expr(col_name), + "none_m3": ops.StrSliceOp(None, -3).as_expr(col_name), + "m5_m3": ops.StrSliceOp(-5, -3).as_expr(col_name), + "1_m3": ops.StrSliceOp(1, -3).as_expr(col_name), + "m3_5": ops.StrSliceOp(-3, 5).as_expr(col_name), + } + sql = utils._apply_ops_to_sql(bf_df, list(ops_map.values()), list(ops_map.keys())) snapshot.assert_match(sql, "out.sql")