Skip to content
This repository was archived by the owner on Apr 1, 2026. It is now read-only.

Commit 3a21a00

Browse files
fix more tests
1 parent 3d4abc7 commit 3a21a00

File tree

3 files changed

+67
-39
lines changed

3 files changed

+67
-39
lines changed

bigframes/testing/utils.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -94,8 +94,6 @@ def _normalize_all_nulls(col: pd.Series) -> pd.Series:
9494
# This over-normalizes probably, make more conservative later
9595
if col.hasnans and (pd_types.is_float_dtype(col.dtype)):
9696
col = col.astype("float64").astype("Float64")
97-
if pd_types.is_object_dtype(col):
98-
col = col.fillna(pd.NA)
9997
return col
10098

10199

@@ -106,10 +104,8 @@ def _normalize_index_nulls(idx: pd.Index) -> pd.Index:
106104
]
107105
return pd.MultiIndex.from_arrays(new_levels, names=idx.names)
108106
if idx.hasnans:
109-
if pd_types.is_float_dtype(idx.dtype) or pd_types.is_integer_dtype(idx.dtype):
107+
if pd_types.is_float_dtype(idx.dtype):
110108
idx = idx.astype("float64").astype("Float64")
111-
if pd_types.is_object_dtype(idx.dtype):
112-
idx = idx.fillna(pd.NA)
113109
return idx
114110

115111

@@ -119,6 +115,7 @@ def assert_frame_equal(
119115
*,
120116
ignore_order: bool = False,
121117
nulls_are_nan: bool = True,
118+
downcast_object: bool = True,
122119
**kwargs,
123120
):
124121
if ignore_order:
@@ -134,6 +131,12 @@ def assert_frame_equal(
134131
left = left.sort_index()
135132
right = right.sort_index()
136133

134+
# Pandas sometimes likes to produce object dtype columns
135+
# However, nan/None/Null inconsistency makes comparison futile, convert to typed column
136+
if downcast_object:
137+
left = left.apply(lambda x: x.infer_objects())
138+
right = right.apply(lambda x: x.infer_objects())
139+
137140
if nulls_are_nan:
138141
left = left.apply(_normalize_all_nulls)
139142
right = right.apply(_normalize_all_nulls)

tests/system/small/operations/test_timedeltas.py

Lines changed: 56 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -97,48 +97,63 @@ def _assert_series_equal(actual: pd.Series, expected: pd.Series):
9797

9898

9999
@pytest.mark.parametrize(
100-
("op", "col_1", "col_2"),
100+
("op", "col_1", "col_2", "arrow_supported"),
101101
[
102-
(operator.add, "timedelta_col_1", "timedelta_col_2"),
103-
(operator.sub, "timedelta_col_1", "timedelta_col_2"),
104-
(operator.truediv, "timedelta_col_1", "timedelta_col_2"),
105-
(operator.floordiv, "timedelta_col_1", "timedelta_col_2"),
106-
(operator.truediv, "timedelta_col_1", "float_col"),
107-
(operator.floordiv, "timedelta_col_1", "float_col"),
108-
(operator.mul, "timedelta_col_1", "float_col"),
109-
(operator.mul, "float_col", "timedelta_col_1"),
110-
(operator.mod, "timedelta_col_1", "timedelta_col_2"),
102+
(operator.add, "timedelta_col_1", "timedelta_col_2", True),
103+
(operator.sub, "timedelta_col_1", "timedelta_col_2", True),
104+
(operator.truediv, "timedelta_col_1", "timedelta_col_2", True),
105+
(operator.floordiv, "timedelta_col_1", "timedelta_col_2", True),
106+
(operator.truediv, "timedelta_col_1", "float_col", False),
107+
(operator.floordiv, "timedelta_col_1", "float_col", False),
108+
(operator.mul, "timedelta_col_1", "float_col", False),
109+
(operator.mul, "float_col", "timedelta_col_1", False),
110+
(operator.mod, "timedelta_col_1", "timedelta_col_2", False),
111111
],
112112
)
113-
def test_timedelta_binary_ops_between_series(temporal_dfs, op, col_1, col_2):
113+
def test_timedelta_binary_ops_between_series(
114+
temporal_dfs, op, col_1, col_2, arrow_supported
115+
):
114116
bf_df, pd_df = temporal_dfs
115117

116118
actual_result = op(bf_df[col_1], bf_df[col_2]).to_pandas()
117119

118-
expected_result = op(pd_df[col_1], pd_df[col_2])
120+
if not arrow_supported:
121+
expected_result = pd_df.apply(lambda x: op(x[col_1], x[col_2]), axis=1)
122+
else:
123+
expected_result = op(pd_df[col_1], pd_df[col_2])
119124
_assert_series_equal(actual_result, expected_result)
120125

121126

122127
@pytest.mark.parametrize(
123-
("op", "col", "literal"),
128+
("op", "col", "literal", "arrow_supported"),
124129
[
125-
(operator.add, "timedelta_col_1", pd.Timedelta(2, "s").as_unit("us")),
126-
(operator.sub, "timedelta_col_1", pd.Timedelta(2, "s").as_unit("us")),
127-
(operator.truediv, "timedelta_col_1", pd.Timedelta(2, "s").as_unit("us")),
128-
(operator.floordiv, "timedelta_col_1", pd.Timedelta(2, "s").as_unit("us")),
129-
(operator.truediv, "timedelta_col_1", 3),
130-
(operator.floordiv, "timedelta_col_1", 3),
131-
(operator.mul, "timedelta_col_1", 3),
132-
(operator.mul, "float_col", pd.Timedelta(1, "s").as_unit("us")),
133-
(operator.mod, "timedelta_col_1", pd.Timedelta(7, "s").as_unit("us")),
130+
(operator.add, "timedelta_col_1", pd.Timedelta(2, "s").as_unit("us"), True),
131+
(operator.sub, "timedelta_col_1", pd.Timedelta(2, "s").as_unit("us"), True),
132+
(operator.truediv, "timedelta_col_1", pd.Timedelta(2, "s").as_unit("us"), True),
133+
(
134+
operator.floordiv,
135+
"timedelta_col_1",
136+
pd.Timedelta(2, "s").as_unit("us"),
137+
False,
138+
),
139+
(operator.truediv, "timedelta_col_1", 3, True),
140+
(operator.floordiv, "timedelta_col_1", 3, False),
141+
(operator.mul, "timedelta_col_1", 3, True),
142+
(operator.mul, "float_col", pd.Timedelta(1, "s").as_unit("us"), True),
143+
(operator.mod, "timedelta_col_1", pd.Timedelta(7, "s").as_unit("us"), False),
134144
],
135145
)
136-
def test_timedelta_binary_ops_series_and_literal(temporal_dfs, op, col, literal):
146+
def test_timedelta_binary_ops_series_and_literal(
147+
temporal_dfs, op, col, literal, arrow_supported
148+
):
137149
bf_df, pd_df = temporal_dfs
138150

139151
actual_result = op(bf_df[col], literal).to_pandas()
140152

141-
expected_result = op(pd_df[col], literal)
153+
if not arrow_supported:
154+
expected_result = pd_df[col].map(lambda x: op(x, literal))
155+
else:
156+
expected_result = op(pd_df[col], literal)
142157
_assert_series_equal(actual_result, expected_result)
143158

144159

@@ -298,13 +313,16 @@ def test_timestamp_add_dataframes(temporal_dfs):
298313

299314

300315
@pytest.mark.parametrize(
301-
("column", "pd_dtype"),
316+
("column",),
302317
[
303-
("datetime_col", "<M8[ns]"),
304-
("timestamp_col", "datetime64[ns, UTC]"),
318+
("datetime_col",),
319+
("timestamp_col",),
305320
],
306321
)
307-
def test_timestamp_sub__ts_series_minus_td_series(temporal_dfs, column, pd_dtype):
322+
def test_timestamp_sub__ts_series_minus_td_series(
323+
temporal_dfs,
324+
column,
325+
):
308326
bf_df, pd_df = temporal_dfs
309327

310328
actual_result = (bf_df[column] - bf_df["timedelta_col_1"]).to_pandas()
@@ -316,19 +334,23 @@ def test_timestamp_sub__ts_series_minus_td_series(temporal_dfs, column, pd_dtype
316334

317335

318336
@pytest.mark.parametrize(
319-
("column", "pd_dtype"),
337+
("column",),
320338
[
321-
("datetime_col", "<M8[ns]"),
322-
("timestamp_col", "datetime64[ns, UTC]"),
339+
("datetime_col",),
340+
("timestamp_col",),
323341
],
324342
)
325-
def test_timestamp_sub__ts_series_minus_td_literal(temporal_dfs, column, pd_dtype):
343+
def test_timestamp_sub__ts_series_minus_td_literal(
344+
temporal_dfs,
345+
column,
346+
):
326347
bf_df, pd_df = temporal_dfs
327348
literal = pd.Timedelta(1, "h")
328349

329350
actual_result = (bf_df[column] - literal).to_pandas()
330351

331-
expected_result = pd_df[column] - literal
352+
# pandas type behavior changes per pandas version
353+
expected_result = (pd_df[column] - literal).astype(actual_result.dtype)
332354
bigframes.testing.assert_series_equal(
333355
actual_result, expected_result, check_index_type=False
334356
)
@@ -584,7 +606,7 @@ def test_timedelta_agg__timedelta_result(temporal_dfs, agg_func):
584606

585607
actual_result = agg_func(bf_df["timedelta_col_1"])
586608

587-
expected_result = agg_func(pd_df["timedelta_col_1"]).floor("us")
609+
expected_result = agg_func(pd_df["timedelta_col_1"])
588610
assert actual_result == expected_result
589611

590612

tests/system/small/test_dataframe_io.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ def test_sql_executes(scalars_df_default_index, bigquery_client):
7070
.sort_values("rowindex")
7171
.reset_index(drop=True)
7272
)
73+
bq_result["bytes_col"] = bq_result["bytes_col"].astype(dtypes.BYTES_DTYPE)
7374
bigframes.testing.assert_frame_equal(bf_result, bq_result, check_dtype=False)
7475

7576

@@ -101,6 +102,7 @@ def test_sql_executes_and_includes_named_index(
101102
.set_index("string_col")
102103
.sort_values("rowindex")
103104
)
105+
bq_result["bytes_col"] = bq_result["bytes_col"].astype(dtypes.BYTES_DTYPE)
104106
bigframes.testing.assert_frame_equal(
105107
bf_result, bq_result, check_dtype=False, check_index_type=False
106108
)
@@ -134,6 +136,7 @@ def test_sql_executes_and_includes_named_multiindex(
134136
.set_index(["string_col", "bool_col"])
135137
.sort_values("rowindex")
136138
)
139+
bq_result["bytes_col"] = bq_result["bytes_col"].astype(dtypes.BYTES_DTYPE)
137140
bigframes.testing.assert_frame_equal(
138141
bf_result, bq_result, check_dtype=False, check_index_type=False
139142
)

0 commit comments

Comments
 (0)