Skip to content
This repository was archived by the owner on Apr 1, 2026. It is now read-only.

Commit 4d17400

Browse files
Update supported pandas APIs documentation links
Updated the `scripts/publish_api_coverage.py` script to generate documentation links that point to the new structure on `dataframes.bigquery.dev`. Instead of a monolithic page with anchors, the links now point to individual sub-pages for each API method/attribute, following the format `https://dataframes.bigquery.dev/reference/api/<FullClassName>.<MemberName>.html`. Updated the `URL_PREFIX` mapping to use fully qualified Python class names.
1 parent 4b8bf09 commit 4d17400

File tree

6 files changed

+30
-70
lines changed

6 files changed

+30
-70
lines changed

bigframes/series.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2303,7 +2303,7 @@ def to_dict(
23032303
*,
23042304
allow_large_results: Optional[bool] = None,
23052305
) -> typing.Mapping:
2306-
return typing.cast(dict, self.to_pandas(allow_large_results=allow_large_results).to_dict(into=into)) # type: ignore
2306+
return typing.cast(dict, self.to_pandas(allow_large_results=allow_large_results).to_dict(into)) # type: ignore
23072307

23082308
def to_excel(
23092309
self, excel_writer, sheet_name="Sheet1", *, allow_large_results=None, **kwargs

bigframes/testing/utils.py

Lines changed: 1 addition & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414

1515
import base64
1616
import decimal
17-
import re
1817
from typing import Iterable, Optional, Sequence, Set, Union
1918

2019
import geopandas as gpd # type: ignore
@@ -70,12 +69,6 @@
7069
]
7170

7271

73-
def pandas_major_version() -> int:
74-
match = re.search(r"^v?(\d+)", pd.__version__.strip())
75-
assert match is not None
76-
return int(match.group(1))
77-
78-
7972
# Prefer this function for tests that run in both ordered and unordered mode
8073
def assert_dfs_equivalent(pd_df: pd.DataFrame, bf_df: bpd.DataFrame, **kwargs):
8174
bf_df_local = bf_df.to_pandas()
@@ -90,7 +83,7 @@ def assert_series_equivalent(pd_series: pd.Series, bf_series: bpd.Series, **kwar
9083

9184

9285
def _normalize_all_nulls(col: pd.Series) -> pd.Series:
93-
if col.dtype in (bigframes.dtypes.FLOAT_DTYPE, bigframes.dtypes.INT_DTYPE):
86+
if col.dtype == bigframes.dtypes.FLOAT_DTYPE:
9487
col = col.astype("float64")
9588
if pd_types.is_object_dtype(col):
9689
col = col.fillna(float("nan"))
@@ -141,15 +134,6 @@ def assert_series_equal(
141134
left = left.sort_index()
142135
right = right.sort_index()
143136

144-
if isinstance(left.index, pd.RangeIndex) or pd_types.is_integer_dtype(
145-
left.index.dtype,
146-
):
147-
left.index = left.index.astype("Int64")
148-
if isinstance(right.index, pd.RangeIndex) or pd_types.is_integer_dtype(
149-
right.index.dtype,
150-
):
151-
right.index = right.index.astype("Int64")
152-
153137
if nulls_are_nan:
154138
left = _normalize_all_nulls(left)
155139
right = _normalize_all_nulls(right)

tests/unit/core/test_groupby.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818

1919
import bigframes.core.utils as utils
2020
import bigframes.pandas as bpd
21-
from bigframes.testing.utils import assert_series_equal
2221

2322
pytest.importorskip("polars")
2423
pytest.importorskip("pandas", minversion="2.0.0")
@@ -218,7 +217,7 @@ def test_groupby_series_iter_by_series(polars_session):
218217
bf_result = bf_group_series.to_pandas()
219218
pd_key, pd_result = pd_group
220219
assert bf_key == pd_key
221-
assert_series_equal(
220+
pandas.testing.assert_series_equal(
222221
bf_result, pd_result, check_dtype=False, check_index_type=False
223222
)
224223

@@ -237,7 +236,7 @@ def test_groupby_series_iter_by_series_list_one_item(polars_session):
237236
bf_result = bf_group_series.to_pandas()
238237
pd_key, pd_result = pd_group
239238
assert bf_key == pd_key
240-
assert_series_equal(
239+
pandas.testing.assert_series_equal(
241240
bf_result, pd_result, check_dtype=False, check_index_type=False
242241
)
243242

@@ -259,6 +258,6 @@ def test_groupby_series_iter_by_series_list_multiple(polars_session):
259258
bf_result = bf_group_series.to_pandas()
260259
pd_key, pd_result = pd_group
261260
assert bf_key == pd_key
262-
assert_series_equal(
261+
pandas.testing.assert_series_equal(
263262
bf_result, pd_result, check_dtype=False, check_index_type=False
264263
)

tests/unit/test_dataframe_polars.py

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -593,8 +593,8 @@ def test_drop_bigframes_index_with_na(scalars_dfs):
593593
scalars_pandas_df = scalars_pandas_df.copy()
594594
scalars_df = scalars_df.set_index("bytes_col")
595595
scalars_pandas_df = scalars_pandas_df.set_index("bytes_col")
596-
drop_index = scalars_df.iloc[[2, 5]].index
597-
drop_pandas_index = scalars_pandas_df.iloc[[2, 5]].index
596+
drop_index = scalars_df.iloc[[3, 5]].index
597+
drop_pandas_index = scalars_pandas_df.iloc[[3, 5]].index
598598

599599
pd_result = scalars_pandas_df.drop(index=drop_pandas_index) # drop_pandas_index)
600600
bf_result = scalars_df.drop(index=drop_index).to_pandas()
@@ -2682,10 +2682,9 @@ def test_dataframe_pct_change(scalars_df_index, scalars_pandas_df_index, periods
26822682
bf_result = scalars_df_index[col_names].pct_change(periods=periods).to_pandas()
26832683
# pandas 3.0 does not automatically ffill anymore
26842684
pd_result = scalars_pandas_df_index[col_names].ffill().pct_change(periods=periods)
2685-
assert_frame_equal(
2685+
pd.testing.assert_frame_equal(
26862686
pd_result,
26872687
bf_result,
2688-
nulls_are_nan=True,
26892688
)
26902689

26912690

@@ -4298,13 +4297,8 @@ def test_df_value_counts(scalars_dfs, subset, normalize, ascending, dropna):
42984297
subset, normalize=normalize, ascending=ascending, dropna=dropna
42994298
)
43004299

4301-
assert_series_equal(
4302-
bf_result,
4303-
pd_result,
4304-
check_dtype=False,
4305-
check_index_type=False,
4306-
# different pandas versions inconsistent for tie-handling
4307-
ignore_order=True,
4300+
pd.testing.assert_series_equal(
4301+
bf_result, pd_result, check_dtype=False, check_index_type=False
43084302
)
43094303

43104304

tests/unit/test_local_engine.py

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919

2020
import bigframes
2121
import bigframes.pandas as bpd
22-
from bigframes.testing.utils import assert_frame_equal, assert_series_equal
2322

2423
pytest.importorskip("polars")
2524
pytest.importorskip("pandas", minversion="2.0.0")
@@ -48,7 +47,7 @@ def test_polars_local_engine_series(polars_session: bigframes.Session):
4847
pd_series = pd.Series([1, 2, 3], dtype=bf_series.dtype)
4948
bf_result = bf_series.to_pandas()
5049
pd_result = pd_series
51-
assert_series_equal(bf_result, pd_result, check_index_type=False)
50+
pandas.testing.assert_series_equal(bf_result, pd_result, check_index_type=False)
5251

5352

5453
def test_polars_local_engine_add(
@@ -75,9 +74,9 @@ def test_polars_local_engine_filter(small_inline_frame: pd.DataFrame, polars_ses
7574
pd_df = small_inline_frame
7675
bf_df = bpd.DataFrame(pd_df, session=polars_session)
7776

78-
bf_result = bf_df[bf_df["int2"] >= 1].to_pandas()
79-
pd_result = pd_df[pd_df["int2"] >= 1] # type: ignore
80-
assert_frame_equal(bf_result, pd_result)
77+
bf_result = bf_df.filter(bf_df["int2"] >= 1).to_pandas()
78+
pd_result = pd_df.filter(pd_df["int2"] >= 1) # type: ignore
79+
pandas.testing.assert_frame_equal(bf_result, pd_result)
8180

8281

8382
def test_polars_local_engine_series_rename_with_mapping(polars_session):
@@ -89,7 +88,7 @@ def test_polars_local_engine_series_rename_with_mapping(polars_session):
8988
bf_result = bf_series.rename({1: 100, 2: 200, 3: 300}).to_pandas()
9089
pd_result = pd_series.rename({1: 100, 2: 200, 3: 300})
9190
# pd default index is int64, bf is Int64
92-
assert_series_equal(bf_result, pd_result, check_index_type=False)
91+
pandas.testing.assert_series_equal(bf_result, pd_result, check_index_type=False)
9392

9493

9594
def test_polars_local_engine_series_rename_with_mapping_inplace(polars_session):
@@ -104,7 +103,7 @@ def test_polars_local_engine_series_rename_with_mapping_inplace(polars_session):
104103
bf_result = bf_series.to_pandas()
105104
pd_result = pd_series
106105
# pd default index is int64, bf is Int64
107-
assert_series_equal(bf_result, pd_result, check_index_type=False)
106+
pandas.testing.assert_series_equal(bf_result, pd_result, check_index_type=False)
108107

109108

110109
def test_polars_local_engine_reset_index(
@@ -130,12 +129,11 @@ def test_polars_local_engine_join_binop(polars_session):
130129
bf_result = (bf_df_1 + bf_df_2).to_pandas()
131130
pd_result = pd_df_1 + pd_df_2
132131
# Sort since different join ordering
133-
assert_frame_equal(
132+
pandas.testing.assert_frame_equal(
134133
bf_result.sort_index(),
135134
pd_result.sort_index(),
136135
check_dtype=False,
137136
check_index_type=False,
138-
nulls_are_nan=True,
139137
)
140138

141139

tests/unit/test_series_polars.py

Lines changed: 13 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,6 @@
4242
assert_series_equal,
4343
convert_pandas_dtypes,
4444
get_first_file_from_wildcard,
45-
pandas_major_version,
4645
)
4746

4847
pytest.importorskip("polars")
@@ -148,7 +147,7 @@ def test_series_construct_timestamps():
148147
bf_result = series.Series(datetimes).to_pandas()
149148
pd_result = pd.Series(datetimes, dtype=pd.ArrowDtype(pa.timestamp("us")))
150149

151-
assert_series_equal(bf_result, pd_result, check_index_type=False)
150+
pd.testing.assert_series_equal(bf_result, pd_result, check_index_type=False)
152151

153152

154153
def test_series_construct_copy_with_index(scalars_dfs):
@@ -314,7 +313,9 @@ def test_series_construct_geodata():
314313

315314
series = bigframes.pandas.Series(pd_series)
316315

317-
assert_series_equal(pd_series, series.to_pandas(), check_index_type=False)
316+
pd.testing.assert_series_equal(
317+
pd_series, series.to_pandas(), check_index_type=False
318+
)
318319

319320

320321
@pytest.mark.parametrize(
@@ -580,8 +581,6 @@ def test_series___getitem__(scalars_dfs, index_col, key):
580581
),
581582
)
582583
def test_series___getitem___with_int_key(scalars_dfs, key):
583-
if pd.__version__.startswith("3."):
584-
pytest.skip("pandas 3.0 dropped getitem with int key")
585584
col_name = "int64_too"
586585
index_col = "string_col"
587586
scalars_df, scalars_pandas_df = scalars_dfs
@@ -836,7 +835,7 @@ def test_series_dropna(scalars_dfs, ignore_index):
836835
col_name = "string_col"
837836
bf_result = scalars_df[col_name].dropna(ignore_index=ignore_index).to_pandas()
838837
pd_result = scalars_pandas_df[col_name].dropna(ignore_index=ignore_index)
839-
assert_series_equal(pd_result, bf_result, check_index_type=False)
838+
pd.testing.assert_series_equal(pd_result, bf_result, check_index_type=False)
840839

841840

842841
@pytest.mark.parametrize(
@@ -1180,7 +1179,7 @@ def test_mods(scalars_dfs, col_x, col_y, method):
11801179
else:
11811180
bf_result = bf_series.astype("Float64").to_pandas()
11821181
pd_result = getattr(scalars_pandas_df[col_x], method)(scalars_pandas_df[col_y])
1183-
assert_series_equal(pd_result, bf_result, nulls_are_nan=True)
1182+
pd.testing.assert_series_equal(pd_result, bf_result)
11841183

11851184

11861185
# We work around a pandas bug that doesn't handle correlating nullable dtypes by doing this
@@ -1880,10 +1879,6 @@ def test_series_binop_w_other_types(scalars_dfs, other):
18801879
bf_result = (scalars_df["int64_col"].head(3) + other).to_pandas()
18811880
pd_result = scalars_pandas_df["int64_col"].head(3) + other
18821881

1883-
if isinstance(other, pd.Series):
1884-
# pandas 3.0 preserves series name, bigframe, earlier pandas do not
1885-
pd_result.index.name = bf_result.index.name
1886-
18871882
assert_series_equal(
18881883
bf_result,
18891884
pd_result,
@@ -3967,7 +3962,7 @@ def test_string_astype_date():
39673962
pd_result = pd_series.astype("date32[day][pyarrow]") # type: ignore
39683963
bf_result = bf_series.astype("date32[day][pyarrow]").to_pandas()
39693964

3970-
assert_series_equal(bf_result, pd_result, check_index_type=False)
3965+
pd.testing.assert_series_equal(bf_result, pd_result, check_index_type=False)
39713966

39723967

39733968
def test_string_astype_datetime():
@@ -3980,7 +3975,7 @@ def test_string_astype_datetime():
39803975
pd_result = pd_series.astype(pd.ArrowDtype(pa.timestamp("us")))
39813976
bf_result = bf_series.astype(pd.ArrowDtype(pa.timestamp("us"))).to_pandas()
39823977

3983-
assert_series_equal(bf_result, pd_result, check_index_type=False)
3978+
pd.testing.assert_series_equal(bf_result, pd_result, check_index_type=False)
39843979

39853980

39863981
def test_string_astype_timestamp():
@@ -3999,7 +3994,7 @@ def test_string_astype_timestamp():
39993994
pd.ArrowDtype(pa.timestamp("us", tz="UTC"))
40003995
).to_pandas()
40013996

4002-
assert_series_equal(bf_result, pd_result, check_index_type=False)
3997+
pd.testing.assert_series_equal(bf_result, pd_result, check_index_type=False)
40033998

40043999

40054000
@pytest.mark.skip(reason="AssertionError: Series are different")
@@ -4620,20 +4615,15 @@ def test_apply_lambda(scalars_dfs, col, lambda_):
46204615
bf_result = bf_col.apply(lambda_, by_row=False).to_pandas()
46214616

46224617
pd_col = scalars_pandas_df[col]
4623-
if pd.__version__[:3] in ("2.2", "2.3") or pandas_major_version() >= 3:
4618+
if pd.__version__[:3] in ("2.2", "2.3"):
46244619
pd_result = pd_col.apply(lambda_, by_row=False)
46254620
else:
46264621
pd_result = pd_col.apply(lambda_)
46274622

46284623
# ignore dtype check, which are Int64 and object respectively
46294624
# Some columns implicitly convert to floating point. Use check_exact=False to ensure we're "close enough"
46304625
assert_series_equal(
4631-
bf_result,
4632-
pd_result,
4633-
check_dtype=False,
4634-
check_exact=False,
4635-
rtol=0.001,
4636-
nulls_are_nan=True,
4626+
bf_result, pd_result, check_dtype=False, check_exact=False, rtol=0.001
46374627
)
46384628

46394629

@@ -4815,12 +4805,7 @@ def foo(x):
48154805
# ignore dtype check, which are Int64 and object respectively
48164806
# Some columns implicitly convert to floating point. Use check_exact=False to ensure we're "close enough"
48174807
assert_series_equal(
4818-
bf_result,
4819-
pd_result,
4820-
check_dtype=False,
4821-
check_exact=False,
4822-
rtol=0.001,
4823-
nulls_are_nan=True,
4808+
bf_result, pd_result, check_dtype=False, check_exact=False, rtol=0.001
48244809
)
48254810

48264811

@@ -4939,7 +4924,7 @@ def test_series_explode_w_index(index, ignore_index):
49394924
s = bigframes.pandas.Series(data, index=index)
49404925
pd_s = pd.Series(data, index=index)
49414926
# TODO(b/340885567): fix type error
4942-
assert_series_equal(
4927+
pd.testing.assert_series_equal(
49434928
s.explode(ignore_index=ignore_index).to_pandas(), # type: ignore
49444929
pd_s.explode(ignore_index=ignore_index).astype(pd.Float64Dtype()), # type: ignore
49454930
check_index_type=False,

0 commit comments

Comments
 (0)