Skip to content
This repository was archived by the owner on Apr 1, 2026. It is now read-only.

Commit 9e4da68

Browse files
test: Fix prerelease and pandas 3.0 test compat (#2457)
1 parent 867951b commit 9e4da68

File tree

41 files changed

+1304
-1011
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+1304
-1011
lines changed

bigframes/core/compile/ibis_compiler/scalar_op_registry.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,8 @@ def arctanh_op_impl(x: ibis_types.Value):
169169
@scalar_op_compiler.register_unary_op(ops.floor_op)
170170
def floor_op_impl(x: ibis_types.Value):
171171
x_numeric = typing.cast(ibis_types.NumericValue, x)
172+
if x_numeric.type().is_boolean():
173+
return x_numeric.cast(ibis_dtypes.Int64()).cast(ibis_dtypes.Float64())
172174
if x_numeric.type().is_integer():
173175
return x_numeric.cast(ibis_dtypes.Float64())
174176
if x_numeric.type().is_floating():
@@ -181,6 +183,8 @@ def floor_op_impl(x: ibis_types.Value):
181183
@scalar_op_compiler.register_unary_op(ops.ceil_op)
182184
def ceil_op_impl(x: ibis_types.Value):
183185
x_numeric = typing.cast(ibis_types.NumericValue, x)
186+
if x_numeric.type().is_boolean():
187+
return x_numeric.cast(ibis_dtypes.Int64()).cast(ibis_dtypes.Float64())
184188
if x_numeric.type().is_integer():
185189
return x_numeric.cast(ibis_dtypes.Float64())
186190
if x_numeric.type().is_floating():
@@ -1026,7 +1030,7 @@ def to_timedelta_op_impl(x: ibis_types.Value, op: ops.ToTimedeltaOp):
10261030

10271031
@scalar_op_compiler.register_unary_op(ops.timedelta_floor_op)
10281032
def timedelta_floor_op_impl(x: ibis_types.NumericValue):
1029-
return x.floor()
1033+
return ibis_api.case().when(x > ibis.literal(0), x.floor()).else_(x.ceil()).end()
10301034

10311035

10321036
@scalar_op_compiler.register_unary_op(ops.RemoteFunctionOp, pass_op=True)

bigframes/core/compile/polars/lowering.py

Lines changed: 43 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -174,12 +174,10 @@ def lower(self, expr: expression.OpExpression) -> expression.Expression:
174174
divisor.output_type
175175
):
176176
# exact same as floordiv impl for timedelta
177-
numeric_result = ops.floordiv_op.as_expr(
177+
numeric_result = ops.div_op.as_expr(
178178
ops.AsTypeOp(to_type=dtypes.INT_DTYPE).as_expr(dividend), divisor
179179
)
180-
int_result = ops.AsTypeOp(to_type=dtypes.INT_DTYPE).as_expr(numeric_result)
181-
return ops.AsTypeOp(to_type=dtypes.TIMEDELTA_DTYPE).as_expr(int_result)
182-
180+
return _numeric_to_timedelta(numeric_result)
183181
if (
184182
dividend.output_type == dtypes.BOOL_DTYPE
185183
and divisor.output_type == dtypes.BOOL_DTYPE
@@ -226,11 +224,10 @@ def lower(self, expr: expression.OpExpression) -> expression.Expression:
226224
divisor.output_type
227225
):
228226
# this is pretty fragile as zero will break it, and must fit back into int
229-
numeric_result = expr.op.as_expr(
227+
numeric_result = ops.div_op.as_expr(
230228
ops.AsTypeOp(to_type=dtypes.INT_DTYPE).as_expr(dividend), divisor
231229
)
232-
int_result = ops.AsTypeOp(to_type=dtypes.INT_DTYPE).as_expr(numeric_result)
233-
return ops.AsTypeOp(to_type=dtypes.TIMEDELTA_DTYPE).as_expr(int_result)
230+
return _numeric_to_timedelta(numeric_result)
234231

235232
if dividend.output_type == dtypes.BOOL_DTYPE:
236233
dividend = ops.AsTypeOp(to_type=dtypes.INT_DTYPE).as_expr(dividend)
@@ -319,6 +316,32 @@ def lower(self, expr: expression.OpExpression) -> expression.Expression:
319316
return expr
320317

321318

319+
class LowerCeilOp(op_lowering.OpLoweringRule):
320+
@property
321+
def op(self) -> type[ops.ScalarOp]:
322+
return numeric_ops.CeilOp
323+
324+
def lower(self, expr: expression.OpExpression) -> expression.Expression:
325+
assert isinstance(expr.op, numeric_ops.CeilOp)
326+
arg = expr.children[0]
327+
if arg.output_type in (dtypes.INT_DTYPE, dtypes.BOOL_DTYPE):
328+
return expr.op.as_expr(ops.AsTypeOp(dtypes.FLOAT_DTYPE).as_expr(arg))
329+
return expr
330+
331+
332+
class LowerFloorOp(op_lowering.OpLoweringRule):
333+
@property
334+
def op(self) -> type[ops.ScalarOp]:
335+
return numeric_ops.FloorOp
336+
337+
def lower(self, expr: expression.OpExpression) -> expression.Expression:
338+
assert isinstance(expr.op, numeric_ops.FloorOp)
339+
arg = expr.children[0]
340+
if arg.output_type in (dtypes.INT_DTYPE, dtypes.BOOL_DTYPE):
341+
return expr.op.as_expr(ops.AsTypeOp(dtypes.FLOAT_DTYPE).as_expr(arg))
342+
return expr
343+
344+
322345
class LowerIsinOp(op_lowering.OpLoweringRule):
323346
@property
324347
def op(self) -> type[ops.ScalarOp]:
@@ -465,8 +488,21 @@ def _lower_cast(cast_op: ops.AsTypeOp, arg: expression.Expression):
465488
LowerInvertOp(),
466489
LowerIsinOp(),
467490
LowerLenOp(),
491+
LowerCeilOp(),
492+
LowerFloorOp(),
468493
)
469494

470495

471496
def lower_ops_to_polars(root: bigframe_node.BigFrameNode) -> bigframe_node.BigFrameNode:
472497
return op_lowering.lower_ops(root, rules=POLARS_LOWERING_RULES)
498+
499+
500+
def _numeric_to_timedelta(expr: expression.Expression) -> expression.Expression:
501+
"""rounding logic used for emulating timedelta ops"""
502+
rounded_value = ops.where_op.as_expr(
503+
ops.floor_op.as_expr(expr),
504+
ops.gt_op.as_expr(expr, expression.const(0)),
505+
ops.ceil_op.as_expr(expr),
506+
)
507+
int_value = ops.AsTypeOp(to_type=dtypes.INT_DTYPE).as_expr(rounded_value)
508+
return ops.AsTypeOp(to_type=dtypes.TIMEDELTA_DTYPE).as_expr(int_value)
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from __future__ import annotations
16+
17+
import bigframes_vendored.sqlglot.expressions as sge
18+
19+
20+
def round_towards_zero(expr: sge.Expression):
21+
"""
22+
Round a float value to to an integer, always rounding towards zero.
23+
24+
This is used to handle duration/timedelta emulation mostly.
25+
"""
26+
return sge.Cast(
27+
this=sge.If(
28+
this=sge.GT(this=expr, expression=sge.convert(0)),
29+
true=sge.Floor(this=expr),
30+
false=sge.Ceil(this=expr),
31+
),
32+
to="INT64",
33+
)

bigframes/core/compile/sqlglot/expressions/numeric_ops.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
from bigframes import dtypes
2121
from bigframes import operations as ops
2222
import bigframes.core.compile.sqlglot.expression_compiler as expression_compiler
23+
from bigframes.core.compile.sqlglot.expressions.common import round_towards_zero
2324
import bigframes.core.compile.sqlglot.expressions.constants as constants
2425
from bigframes.core.compile.sqlglot.expressions.typed_expr import TypedExpr
2526
from bigframes.operations import numeric_ops
@@ -467,7 +468,7 @@ def _(left: TypedExpr, right: TypedExpr) -> sge.Expression:
467468

468469
result = sge.func("IEEE_DIVIDE", left_expr, right_expr)
469470
if left.dtype == dtypes.TIMEDELTA_DTYPE and dtypes.is_numeric(right.dtype):
470-
return sge.Cast(this=sge.Floor(this=result), to="INT64")
471+
return round_towards_zero(result)
471472
else:
472473
return result
473474

@@ -510,7 +511,7 @@ def _(left: TypedExpr, right: TypedExpr) -> sge.Expression:
510511
)
511512

512513
if dtypes.is_numeric(right.dtype) and left.dtype == dtypes.TIMEDELTA_DTYPE:
513-
result = sge.Cast(this=sge.Floor(this=result), to="INT64")
514+
result = round_towards_zero(sge.func("IEEE_DIVIDE", left_expr, right_expr))
514515

515516
return result
516517

@@ -578,7 +579,7 @@ def _(left: TypedExpr, right: TypedExpr) -> sge.Expression:
578579
if (dtypes.is_numeric(left.dtype) and right.dtype == dtypes.TIMEDELTA_DTYPE) or (
579580
left.dtype == dtypes.TIMEDELTA_DTYPE and dtypes.is_numeric(right.dtype)
580581
):
581-
return sge.Cast(this=sge.Floor(this=result), to="INT64")
582+
return round_towards_zero(result)
582583
else:
583584
return result
584585

bigframes/core/rewrite/timedeltas.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -206,12 +206,12 @@ def _rewrite_div_op(left: _TypedExpr, right: _TypedExpr) -> _TypedExpr:
206206

207207

208208
def _rewrite_floordiv_op(left: _TypedExpr, right: _TypedExpr) -> _TypedExpr:
209-
result = _TypedExpr.create_op_expr(ops.floordiv_op, left, right)
210-
211209
if left.dtype == dtypes.TIMEDELTA_DTYPE and dtypes.is_numeric(right.dtype):
212-
return _TypedExpr.create_op_expr(ops.timedelta_floor_op, result)
210+
return _TypedExpr.create_op_expr(
211+
ops.timedelta_floor_op, _TypedExpr.create_op_expr(ops.div_op, left, right)
212+
)
213213

214-
return result
214+
return _TypedExpr.create_op_expr(ops.floordiv_op, left, right)
215215

216216

217217
def _rewrite_to_timedelta_op(op: ops.ToTimedeltaOp, arg: _TypedExpr):

bigframes/core/utils.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -113,13 +113,13 @@ def get_standardized_ids(
113113
"""
114114
col_ids = [
115115
UNNAMED_COLUMN_ID
116-
if col_label is None
116+
if pd.isna(col_label) # type: ignore
117117
else label_to_identifier(col_label, strict=strict)
118118
for col_label in col_labels
119119
]
120120
idx_ids = [
121121
UNNAMED_INDEX_ID
122-
if idx_label is None
122+
if pd.isna(idx_label) # type: ignore
123123
else label_to_identifier(idx_label, strict=strict)
124124
for idx_label in idx_labels
125125
]

bigframes/dataframe.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4398,7 +4398,7 @@ def to_excel(
43984398
**kwargs,
43994399
) -> None:
44004400
return self.to_pandas(allow_large_results=allow_large_results).to_excel(
4401-
excel_writer, sheet_name, **kwargs
4401+
excel_writer, sheet_name=sheet_name, **kwargs
44024402
)
44034403

44044404
def to_latex(

bigframes/ml/metrics/_metrics.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -214,7 +214,7 @@ def confusion_matrix(
214214
y_true = row["y_true"]
215215
y_pred = row["y_pred"]
216216
count = row["dummy"]
217-
confusion_matrix[y_pred][y_true] = count
217+
confusion_matrix.at[y_true, y_pred] = count
218218

219219
return confusion_matrix
220220

@@ -251,7 +251,7 @@ def recall_score(
251251
/ is_accurate.groupby(y_true_series).count()
252252
).to_pandas()
253253

254-
recall_score = pd.Series(0, index=index)
254+
recall_score = pd.Series(0.0, index=index)
255255
for i in recall_score.index:
256256
recall_score.loc[i] = recall.loc[i]
257257

@@ -321,7 +321,7 @@ def _precision_score_per_label(y_true: bpd.Series, y_pred: bpd.Series) -> pd.Ser
321321
is_accurate.groupby(y_pred).sum() / is_accurate.groupby(y_pred).count()
322322
).to_pandas()
323323

324-
precision_score = pd.Series(0, index=index)
324+
precision_score = pd.Series(0.0, index=index)
325325
for i in precision.index:
326326
precision_score.loc[i] = precision.loc[i]
327327

@@ -366,7 +366,7 @@ def f1_score(
366366
recall = recall_score(y_true_series, y_pred_series, average=None)
367367
precision = precision_score(y_true_series, y_pred_series, average=None)
368368

369-
f1_score = pd.Series(0, index=recall.index)
369+
f1_score = pd.Series(0.0, index=recall.index)
370370
for index in recall.index:
371371
if precision[index] + recall[index] != 0:
372372
f1_score[index] = (

bigframes/series.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2292,7 +2292,7 @@ def to_excel(
22922292
self, excel_writer, sheet_name="Sheet1", *, allow_large_results=None, **kwargs
22932293
) -> None:
22942294
return self.to_pandas(allow_large_results=allow_large_results).to_excel(
2295-
excel_writer, sheet_name, **kwargs
2295+
excel_writer, sheet_name=sheet_name, **kwargs
22962296
)
22972297

22982298
def to_json(

bigframes/session/polars_executor.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,8 @@
7979
numeric_ops.SubOp,
8080
numeric_ops.MulOp,
8181
numeric_ops.DivOp,
82+
numeric_ops.CeilOp,
83+
numeric_ops.FloorOp,
8284
numeric_ops.FloorDivOp,
8385
numeric_ops.ModOp,
8486
generic_ops.AsTypeOp,

0 commit comments

Comments
 (0)