fix more tests

TrevorBergeron · TrevorBergeron · commit 3a21a00cf3ae · 2026-02-20T22:10:08.000Z
diff --git a/bigframes/testing/utils.py b/bigframes/testing/utils.py
@@ -94,8 +94,6 @@ def _normalize_all_nulls(col: pd.Series) -> pd.Series:
     # This over-normalizes probably, make more conservative later
     if col.hasnans and (pd_types.is_float_dtype(col.dtype)):
         col = col.astype("float64").astype("Float64")
-    if pd_types.is_object_dtype(col):
-        col = col.fillna(pd.NA)
     return col
 
 
@@ -106,10 +104,8 @@ def _normalize_index_nulls(idx: pd.Index) -> pd.Index:
         ]
         return pd.MultiIndex.from_arrays(new_levels, names=idx.names)
     if idx.hasnans:
-        if pd_types.is_float_dtype(idx.dtype) or pd_types.is_integer_dtype(idx.dtype):
+        if pd_types.is_float_dtype(idx.dtype):
             idx = idx.astype("float64").astype("Float64")
-        if pd_types.is_object_dtype(idx.dtype):
-            idx = idx.fillna(pd.NA)
     return idx
 
 
@@ -119,6 +115,7 @@ def assert_frame_equal(
     *,
     ignore_order: bool = False,
     nulls_are_nan: bool = True,
+    downcast_object: bool = True,
     **kwargs,
 ):
     if ignore_order:
@@ -134,6 +131,12 @@ def assert_frame_equal(
             left = left.sort_index()
             right = right.sort_index()
 
+    # Pandas sometimes likes to produce object dtype columns
+    # However, nan/None/Null inconsistency makes comparison futile, convert to typed column
+    if downcast_object:
+        left = left.apply(lambda x: x.infer_objects())
+        right = right.apply(lambda x: x.infer_objects())
+
     if nulls_are_nan:
         left = left.apply(_normalize_all_nulls)
         right = right.apply(_normalize_all_nulls)
diff --git a/tests/system/small/operations/test_timedeltas.py b/tests/system/small/operations/test_timedeltas.py
@@ -97,48 +97,63 @@ def _assert_series_equal(actual: pd.Series, expected: pd.Series):
 
 
 @pytest.mark.parametrize(
-    ("op", "col_1", "col_2"),
+    ("op", "col_1", "col_2", "arrow_supported"),
     [
-        (operator.add, "timedelta_col_1", "timedelta_col_2"),
-        (operator.sub, "timedelta_col_1", "timedelta_col_2"),
-        (operator.truediv, "timedelta_col_1", "timedelta_col_2"),
-        (operator.floordiv, "timedelta_col_1", "timedelta_col_2"),
-        (operator.truediv, "timedelta_col_1", "float_col"),
-        (operator.floordiv, "timedelta_col_1", "float_col"),
-        (operator.mul, "timedelta_col_1", "float_col"),
-        (operator.mul, "float_col", "timedelta_col_1"),
-        (operator.mod, "timedelta_col_1", "timedelta_col_2"),
+        (operator.add, "timedelta_col_1", "timedelta_col_2", True),
+        (operator.sub, "timedelta_col_1", "timedelta_col_2", True),
+        (operator.truediv, "timedelta_col_1", "timedelta_col_2", True),
+        (operator.floordiv, "timedelta_col_1", "timedelta_col_2", True),
+        (operator.truediv, "timedelta_col_1", "float_col", False),
+        (operator.floordiv, "timedelta_col_1", "float_col", False),
+        (operator.mul, "timedelta_col_1", "float_col", False),
+        (operator.mul, "float_col", "timedelta_col_1", False),
+        (operator.mod, "timedelta_col_1", "timedelta_col_2", False),
     ],
 )
-def test_timedelta_binary_ops_between_series(temporal_dfs, op, col_1, col_2):
+def test_timedelta_binary_ops_between_series(
+    temporal_dfs, op, col_1, col_2, arrow_supported
+):
     bf_df, pd_df = temporal_dfs
 
     actual_result = op(bf_df[col_1], bf_df[col_2]).to_pandas()
 
-    expected_result = op(pd_df[col_1], pd_df[col_2])
+    if not arrow_supported:
+        expected_result = pd_df.apply(lambda x: op(x[col_1], x[col_2]), axis=1)
+    else:
+        expected_result = op(pd_df[col_1], pd_df[col_2])
     _assert_series_equal(actual_result, expected_result)
 
 
 @pytest.mark.parametrize(
-    ("op", "col", "literal"),
+    ("op", "col", "literal", "arrow_supported"),
     [
-        (operator.add, "timedelta_col_1", pd.Timedelta(2, "s").as_unit("us")),
-        (operator.sub, "timedelta_col_1", pd.Timedelta(2, "s").as_unit("us")),
-        (operator.truediv, "timedelta_col_1", pd.Timedelta(2, "s").as_unit("us")),
-        (operator.floordiv, "timedelta_col_1", pd.Timedelta(2, "s").as_unit("us")),
-        (operator.truediv, "timedelta_col_1", 3),
-        (operator.floordiv, "timedelta_col_1", 3),
-        (operator.mul, "timedelta_col_1", 3),
-        (operator.mul, "float_col", pd.Timedelta(1, "s").as_unit("us")),
-        (operator.mod, "timedelta_col_1", pd.Timedelta(7, "s").as_unit("us")),
+        (operator.add, "timedelta_col_1", pd.Timedelta(2, "s").as_unit("us"), True),
+        (operator.sub, "timedelta_col_1", pd.Timedelta(2, "s").as_unit("us"), True),
+        (operator.truediv, "timedelta_col_1", pd.Timedelta(2, "s").as_unit("us"), True),
+        (
+            operator.floordiv,
+            "timedelta_col_1",
+            pd.Timedelta(2, "s").as_unit("us"),
+            False,
+        ),
+        (operator.truediv, "timedelta_col_1", 3, True),
+        (operator.floordiv, "timedelta_col_1", 3, False),
+        (operator.mul, "timedelta_col_1", 3, True),
+        (operator.mul, "float_col", pd.Timedelta(1, "s").as_unit("us"), True),
+        (operator.mod, "timedelta_col_1", pd.Timedelta(7, "s").as_unit("us"), False),
     ],
 )
-def test_timedelta_binary_ops_series_and_literal(temporal_dfs, op, col, literal):
+def test_timedelta_binary_ops_series_and_literal(
+    temporal_dfs, op, col, literal, arrow_supported
+):
     bf_df, pd_df = temporal_dfs
 
     actual_result = op(bf_df[col], literal).to_pandas()
 
-    expected_result = op(pd_df[col], literal)
+    if not arrow_supported:
+        expected_result = pd_df[col].map(lambda x: op(x, literal))
+    else:
+        expected_result = op(pd_df[col], literal)
     _assert_series_equal(actual_result, expected_result)
 
 
@@ -298,13 +313,16 @@ def test_timestamp_add_dataframes(temporal_dfs):
 
 
 @pytest.mark.parametrize(
-    ("column", "pd_dtype"),
+    ("column",),
     [
-        ("datetime_col", "<M8[ns]"),
-        ("timestamp_col", "datetime64[ns, UTC]"),
+        ("datetime_col",),
+        ("timestamp_col",),
     ],
 )
-def test_timestamp_sub__ts_series_minus_td_series(temporal_dfs, column, pd_dtype):
+def test_timestamp_sub__ts_series_minus_td_series(
+    temporal_dfs,
+    column,
+):
     bf_df, pd_df = temporal_dfs
 
     actual_result = (bf_df[column] - bf_df["timedelta_col_1"]).to_pandas()
@@ -316,19 +334,23 @@ def test_timestamp_sub__ts_series_minus_td_series(temporal_dfs, column, pd_dtype
 
 
 @pytest.mark.parametrize(
-    ("column", "pd_dtype"),
+    ("column",),
     [
-        ("datetime_col", "<M8[ns]"),
-        ("timestamp_col", "datetime64[ns, UTC]"),
+        ("datetime_col",),
+        ("timestamp_col",),
     ],
 )
-def test_timestamp_sub__ts_series_minus_td_literal(temporal_dfs, column, pd_dtype):
+def test_timestamp_sub__ts_series_minus_td_literal(
+    temporal_dfs,
+    column,
+):
     bf_df, pd_df = temporal_dfs
     literal = pd.Timedelta(1, "h")
 
     actual_result = (bf_df[column] - literal).to_pandas()
 
-    expected_result = pd_df[column] - literal
+    # pandas type behavior changes per pandas version
+    expected_result = (pd_df[column] - literal).astype(actual_result.dtype)
     bigframes.testing.assert_series_equal(
         actual_result, expected_result, check_index_type=False
     )
@@ -584,7 +606,7 @@ def test_timedelta_agg__timedelta_result(temporal_dfs, agg_func):
 
     actual_result = agg_func(bf_df["timedelta_col_1"])
 
-    expected_result = agg_func(pd_df["timedelta_col_1"]).floor("us")
+    expected_result = agg_func(pd_df["timedelta_col_1"])
     assert actual_result == expected_result
 
 
diff --git a/tests/system/small/test_dataframe_io.py b/tests/system/small/test_dataframe_io.py
@@ -70,6 +70,7 @@ def test_sql_executes(scalars_df_default_index, bigquery_client):
         .sort_values("rowindex")
         .reset_index(drop=True)
     )
+    bq_result["bytes_col"] = bq_result["bytes_col"].astype(dtypes.BYTES_DTYPE)
     bigframes.testing.assert_frame_equal(bf_result, bq_result, check_dtype=False)
 
 
@@ -101,6 +102,7 @@ def test_sql_executes_and_includes_named_index(
         .set_index("string_col")
         .sort_values("rowindex")
     )
+    bq_result["bytes_col"] = bq_result["bytes_col"].astype(dtypes.BYTES_DTYPE)
     bigframes.testing.assert_frame_equal(
         bf_result, bq_result, check_dtype=False, check_index_type=False
     )
@@ -134,6 +136,7 @@ def test_sql_executes_and_includes_named_multiindex(
         .set_index(["string_col", "bool_col"])
         .sort_values("rowindex")
     )
+    bq_result["bytes_col"] = bq_result["bytes_col"].astype(dtypes.BYTES_DTYPE)
     bigframes.testing.assert_frame_equal(
         bf_result, bq_result, check_dtype=False, check_index_type=False
     )

Original file line number	Diff line number	Diff line change
`@@ -70,6 +70,7 @@ def test_sql_executes(scalars_df_default_index, bigquery_client):`
`70`	`70`	`.sort_values("rowindex")`
`71`	`71`	`.reset_index(drop=True)`
`72`	`72`	`)`
	`73`	`+ bq_result["bytes_col"] = bq_result["bytes_col"].astype(dtypes.BYTES_DTYPE)`
`73`	`74`	`bigframes.testing.assert_frame_equal(bf_result, bq_result, check_dtype=False)`
`74`	`75`
`75`	`76`
`@@ -101,6 +102,7 @@ def test_sql_executes_and_includes_named_index(`
`101`	`102`	`.set_index("string_col")`
`102`	`103`	`.sort_values("rowindex")`
`103`	`104`	`)`
	`105`	`+ bq_result["bytes_col"] = bq_result["bytes_col"].astype(dtypes.BYTES_DTYPE)`
`104`	`106`	`bigframes.testing.assert_frame_equal(`
`105`	`107`	`bf_result, bq_result, check_dtype=False, check_index_type=False`
`106`	`108`	`)`
`@@ -134,6 +136,7 @@ def test_sql_executes_and_includes_named_multiindex(`
`134`	`136`	`.set_index(["string_col", "bool_col"])`
`135`	`137`	`.sort_values("rowindex")`
`136`	`138`	`)`
	`139`	`+ bq_result["bytes_col"] = bq_result["bytes_col"].astype(dtypes.BYTES_DTYPE)`
`137`	`140`	`bigframes.testing.assert_frame_equal(`
`138`	`141`	`bf_result, bq_result, check_dtype=False, check_index_type=False`
`139`	`142`	`)`