Update supported pandas APIs documentation links

google-labs-jules[bot] · google-labs-jules[bot] · commit 4d174000d69d · 2025-12-12T23:25:51.000Z
Updated the `scripts/publish_api_coverage.py` script to generate documentation links that point to the new structure on `dataframes.bigquery.dev`. Instead of a monolithic page with anchors, the links now point to individual sub-pages for each API method/attribute, following the format `https://dataframes.bigquery.dev/reference/api/<FullClassName>.<MemberName>.html`. Updated the `URL_PREFIX` mapping to use fully qualified Python class names.
diff --git a/bigframes/series.py b/bigframes/series.py
@@ -2303,7 +2303,7 @@ def to_dict(
         *,
         allow_large_results: Optional[bool] = None,
     ) -> typing.Mapping:
-        return typing.cast(dict, self.to_pandas(allow_large_results=allow_large_results).to_dict(into=into))  # type: ignore
+        return typing.cast(dict, self.to_pandas(allow_large_results=allow_large_results).to_dict(into))  # type: ignore
 
     def to_excel(
         self, excel_writer, sheet_name="Sheet1", *, allow_large_results=None, **kwargs
diff --git a/bigframes/testing/utils.py b/bigframes/testing/utils.py
@@ -14,7 +14,6 @@
 
 import base64
 import decimal
-import re
 from typing import Iterable, Optional, Sequence, Set, Union
 
 import geopandas as gpd  # type: ignore
@@ -70,12 +69,6 @@
 ]
 
 
-def pandas_major_version() -> int:
-    match = re.search(r"^v?(\d+)", pd.__version__.strip())
-    assert match is not None
-    return int(match.group(1))
-
-
 # Prefer this function for tests that run in both ordered and unordered mode
 def assert_dfs_equivalent(pd_df: pd.DataFrame, bf_df: bpd.DataFrame, **kwargs):
     bf_df_local = bf_df.to_pandas()
@@ -90,7 +83,7 @@ def assert_series_equivalent(pd_series: pd.Series, bf_series: bpd.Series, **kwar
 
 
 def _normalize_all_nulls(col: pd.Series) -> pd.Series:
-    if col.dtype in (bigframes.dtypes.FLOAT_DTYPE, bigframes.dtypes.INT_DTYPE):
+    if col.dtype == bigframes.dtypes.FLOAT_DTYPE:
         col = col.astype("float64")
     if pd_types.is_object_dtype(col):
         col = col.fillna(float("nan"))
@@ -141,15 +134,6 @@ def assert_series_equal(
             left = left.sort_index()
             right = right.sort_index()
 
-    if isinstance(left.index, pd.RangeIndex) or pd_types.is_integer_dtype(
-        left.index.dtype,
-    ):
-        left.index = left.index.astype("Int64")
-    if isinstance(right.index, pd.RangeIndex) or pd_types.is_integer_dtype(
-        right.index.dtype,
-    ):
-        right.index = right.index.astype("Int64")
-
     if nulls_are_nan:
         left = _normalize_all_nulls(left)
         right = _normalize_all_nulls(right)
diff --git a/tests/unit/core/test_groupby.py b/tests/unit/core/test_groupby.py
@@ -18,7 +18,6 @@
 
 import bigframes.core.utils as utils
 import bigframes.pandas as bpd
-from bigframes.testing.utils import assert_series_equal
 
 pytest.importorskip("polars")
 pytest.importorskip("pandas", minversion="2.0.0")
@@ -218,7 +217,7 @@ def test_groupby_series_iter_by_series(polars_session):
         bf_result = bf_group_series.to_pandas()
         pd_key, pd_result = pd_group
         assert bf_key == pd_key
-        assert_series_equal(
+        pandas.testing.assert_series_equal(
             bf_result, pd_result, check_dtype=False, check_index_type=False
         )
 
@@ -237,7 +236,7 @@ def test_groupby_series_iter_by_series_list_one_item(polars_session):
         bf_result = bf_group_series.to_pandas()
         pd_key, pd_result = pd_group
         assert bf_key == pd_key
-        assert_series_equal(
+        pandas.testing.assert_series_equal(
             bf_result, pd_result, check_dtype=False, check_index_type=False
         )
 
@@ -259,6 +258,6 @@ def test_groupby_series_iter_by_series_list_multiple(polars_session):
         bf_result = bf_group_series.to_pandas()
         pd_key, pd_result = pd_group
         assert bf_key == pd_key
-        assert_series_equal(
+        pandas.testing.assert_series_equal(
             bf_result, pd_result, check_dtype=False, check_index_type=False
         )
diff --git a/tests/unit/test_dataframe_polars.py b/tests/unit/test_dataframe_polars.py
@@ -593,8 +593,8 @@ def test_drop_bigframes_index_with_na(scalars_dfs):
     scalars_pandas_df = scalars_pandas_df.copy()
     scalars_df = scalars_df.set_index("bytes_col")
     scalars_pandas_df = scalars_pandas_df.set_index("bytes_col")
-    drop_index = scalars_df.iloc[[2, 5]].index
-    drop_pandas_index = scalars_pandas_df.iloc[[2, 5]].index
+    drop_index = scalars_df.iloc[[3, 5]].index
+    drop_pandas_index = scalars_pandas_df.iloc[[3, 5]].index
 
     pd_result = scalars_pandas_df.drop(index=drop_pandas_index)  # drop_pandas_index)
     bf_result = scalars_df.drop(index=drop_index).to_pandas()
@@ -2682,10 +2682,9 @@ def test_dataframe_pct_change(scalars_df_index, scalars_pandas_df_index, periods
     bf_result = scalars_df_index[col_names].pct_change(periods=periods).to_pandas()
     # pandas 3.0 does not automatically ffill anymore
     pd_result = scalars_pandas_df_index[col_names].ffill().pct_change(periods=periods)
-    assert_frame_equal(
+    pd.testing.assert_frame_equal(
         pd_result,
         bf_result,
-        nulls_are_nan=True,
     )
 
 
@@ -4298,13 +4297,8 @@ def test_df_value_counts(scalars_dfs, subset, normalize, ascending, dropna):
         subset, normalize=normalize, ascending=ascending, dropna=dropna
     )
 
-    assert_series_equal(
-        bf_result,
-        pd_result,
-        check_dtype=False,
-        check_index_type=False,
-        # different pandas versions inconsistent for tie-handling
-        ignore_order=True,
+    pd.testing.assert_series_equal(
+        bf_result, pd_result, check_dtype=False, check_index_type=False
     )
 
 
diff --git a/tests/unit/test_local_engine.py b/tests/unit/test_local_engine.py
@@ -19,7 +19,6 @@
 
 import bigframes
 import bigframes.pandas as bpd
-from bigframes.testing.utils import assert_frame_equal, assert_series_equal
 
 pytest.importorskip("polars")
 pytest.importorskip("pandas", minversion="2.0.0")
@@ -48,7 +47,7 @@ def test_polars_local_engine_series(polars_session: bigframes.Session):
     pd_series = pd.Series([1, 2, 3], dtype=bf_series.dtype)
     bf_result = bf_series.to_pandas()
     pd_result = pd_series
-    assert_series_equal(bf_result, pd_result, check_index_type=False)
+    pandas.testing.assert_series_equal(bf_result, pd_result, check_index_type=False)
 
 
 def test_polars_local_engine_add(
@@ -75,9 +74,9 @@ def test_polars_local_engine_filter(small_inline_frame: pd.DataFrame, polars_ses
     pd_df = small_inline_frame
     bf_df = bpd.DataFrame(pd_df, session=polars_session)
 
-    bf_result = bf_df[bf_df["int2"] >= 1].to_pandas()
-    pd_result = pd_df[pd_df["int2"] >= 1]  # type: ignore
-    assert_frame_equal(bf_result, pd_result)
+    bf_result = bf_df.filter(bf_df["int2"] >= 1).to_pandas()
+    pd_result = pd_df.filter(pd_df["int2"] >= 1)  # type: ignore
+    pandas.testing.assert_frame_equal(bf_result, pd_result)
 
 
 def test_polars_local_engine_series_rename_with_mapping(polars_session):
@@ -89,7 +88,7 @@ def test_polars_local_engine_series_rename_with_mapping(polars_session):
     bf_result = bf_series.rename({1: 100, 2: 200, 3: 300}).to_pandas()
     pd_result = pd_series.rename({1: 100, 2: 200, 3: 300})
     # pd default index is int64, bf is Int64
-    assert_series_equal(bf_result, pd_result, check_index_type=False)
+    pandas.testing.assert_series_equal(bf_result, pd_result, check_index_type=False)
 
 
 def test_polars_local_engine_series_rename_with_mapping_inplace(polars_session):
@@ -104,7 +103,7 @@ def test_polars_local_engine_series_rename_with_mapping_inplace(polars_session):
     bf_result = bf_series.to_pandas()
     pd_result = pd_series
     # pd default index is int64, bf is Int64
-    assert_series_equal(bf_result, pd_result, check_index_type=False)
+    pandas.testing.assert_series_equal(bf_result, pd_result, check_index_type=False)
 
 
 def test_polars_local_engine_reset_index(
@@ -130,12 +129,11 @@ def test_polars_local_engine_join_binop(polars_session):
     bf_result = (bf_df_1 + bf_df_2).to_pandas()
     pd_result = pd_df_1 + pd_df_2
     # Sort since different join ordering
-    assert_frame_equal(
+    pandas.testing.assert_frame_equal(
         bf_result.sort_index(),
         pd_result.sort_index(),
         check_dtype=False,
         check_index_type=False,
-        nulls_are_nan=True,
     )
 
 
diff --git a/tests/unit/test_series_polars.py b/tests/unit/test_series_polars.py
@@ -42,7 +42,6 @@
     assert_series_equal,
     convert_pandas_dtypes,
     get_first_file_from_wildcard,
-    pandas_major_version,
 )
 
 pytest.importorskip("polars")
@@ -148,7 +147,7 @@ def test_series_construct_timestamps():
     bf_result = series.Series(datetimes).to_pandas()
     pd_result = pd.Series(datetimes, dtype=pd.ArrowDtype(pa.timestamp("us")))
 
-    assert_series_equal(bf_result, pd_result, check_index_type=False)
+    pd.testing.assert_series_equal(bf_result, pd_result, check_index_type=False)
 
 
 def test_series_construct_copy_with_index(scalars_dfs):
@@ -314,7 +313,9 @@ def test_series_construct_geodata():
 
     series = bigframes.pandas.Series(pd_series)
 
-    assert_series_equal(pd_series, series.to_pandas(), check_index_type=False)
+    pd.testing.assert_series_equal(
+        pd_series, series.to_pandas(), check_index_type=False
+    )
 
 
 @pytest.mark.parametrize(
@@ -580,8 +581,6 @@ def test_series___getitem__(scalars_dfs, index_col, key):
     ),
 )
 def test_series___getitem___with_int_key(scalars_dfs, key):
-    if pd.__version__.startswith("3."):
-        pytest.skip("pandas 3.0 dropped getitem with int key")
     col_name = "int64_too"
     index_col = "string_col"
     scalars_df, scalars_pandas_df = scalars_dfs
@@ -836,7 +835,7 @@ def test_series_dropna(scalars_dfs, ignore_index):
     col_name = "string_col"
     bf_result = scalars_df[col_name].dropna(ignore_index=ignore_index).to_pandas()
     pd_result = scalars_pandas_df[col_name].dropna(ignore_index=ignore_index)
-    assert_series_equal(pd_result, bf_result, check_index_type=False)
+    pd.testing.assert_series_equal(pd_result, bf_result, check_index_type=False)
 
 
 @pytest.mark.parametrize(
@@ -1180,7 +1179,7 @@ def test_mods(scalars_dfs, col_x, col_y, method):
     else:
         bf_result = bf_series.astype("Float64").to_pandas()
     pd_result = getattr(scalars_pandas_df[col_x], method)(scalars_pandas_df[col_y])
-    assert_series_equal(pd_result, bf_result, nulls_are_nan=True)
+    pd.testing.assert_series_equal(pd_result, bf_result)
 
 
 # We work around a pandas bug that doesn't handle correlating nullable dtypes by doing this
@@ -1880,10 +1879,6 @@ def test_series_binop_w_other_types(scalars_dfs, other):
     bf_result = (scalars_df["int64_col"].head(3) + other).to_pandas()
     pd_result = scalars_pandas_df["int64_col"].head(3) + other
 
-    if isinstance(other, pd.Series):
-        # pandas 3.0 preserves series name, bigframe, earlier pandas do not
-        pd_result.index.name = bf_result.index.name
-
     assert_series_equal(
         bf_result,
         pd_result,
@@ -3967,7 +3962,7 @@ def test_string_astype_date():
     pd_result = pd_series.astype("date32[day][pyarrow]")  # type: ignore
     bf_result = bf_series.astype("date32[day][pyarrow]").to_pandas()
 
-    assert_series_equal(bf_result, pd_result, check_index_type=False)
+    pd.testing.assert_series_equal(bf_result, pd_result, check_index_type=False)
 
 
 def test_string_astype_datetime():
@@ -3980,7 +3975,7 @@ def test_string_astype_datetime():
     pd_result = pd_series.astype(pd.ArrowDtype(pa.timestamp("us")))
     bf_result = bf_series.astype(pd.ArrowDtype(pa.timestamp("us"))).to_pandas()
 
-    assert_series_equal(bf_result, pd_result, check_index_type=False)
+    pd.testing.assert_series_equal(bf_result, pd_result, check_index_type=False)
 
 
 def test_string_astype_timestamp():
@@ -3999,7 +3994,7 @@ def test_string_astype_timestamp():
         pd.ArrowDtype(pa.timestamp("us", tz="UTC"))
     ).to_pandas()
 
-    assert_series_equal(bf_result, pd_result, check_index_type=False)
+    pd.testing.assert_series_equal(bf_result, pd_result, check_index_type=False)
 
 
 @pytest.mark.skip(reason="AssertionError: Series are different")
@@ -4620,20 +4615,15 @@ def test_apply_lambda(scalars_dfs, col, lambda_):
     bf_result = bf_col.apply(lambda_, by_row=False).to_pandas()
 
     pd_col = scalars_pandas_df[col]
-    if pd.__version__[:3] in ("2.2", "2.3") or pandas_major_version() >= 3:
+    if pd.__version__[:3] in ("2.2", "2.3"):
         pd_result = pd_col.apply(lambda_, by_row=False)
     else:
         pd_result = pd_col.apply(lambda_)
 
     # ignore dtype check, which are Int64 and object respectively
     # Some columns implicitly convert to floating point. Use check_exact=False to ensure we're "close enough"
     assert_series_equal(
-        bf_result,
-        pd_result,
-        check_dtype=False,
-        check_exact=False,
-        rtol=0.001,
-        nulls_are_nan=True,
+        bf_result, pd_result, check_dtype=False, check_exact=False, rtol=0.001
     )
 
 
@@ -4815,12 +4805,7 @@ def foo(x):
     # ignore dtype check, which are Int64 and object respectively
     # Some columns implicitly convert to floating point. Use check_exact=False to ensure we're "close enough"
     assert_series_equal(
-        bf_result,
-        pd_result,
-        check_dtype=False,
-        check_exact=False,
-        rtol=0.001,
-        nulls_are_nan=True,
+        bf_result, pd_result, check_dtype=False, check_exact=False, rtol=0.001
     )
 
 
@@ -4939,7 +4924,7 @@ def test_series_explode_w_index(index, ignore_index):
     s = bigframes.pandas.Series(data, index=index)
     pd_s = pd.Series(data, index=index)
     # TODO(b/340885567): fix type error
-    assert_series_equal(
+    pd.testing.assert_series_equal(
         s.explode(ignore_index=ignore_index).to_pandas(),  # type: ignore
         pd_s.explode(ignore_index=ignore_index).astype(pd.Float64Dtype()),  # type: ignore
         check_index_type=False,