Skip to content
This repository was archived by the owner on Apr 1, 2026. It is now read-only.

Commit 6353d6e

Browse files
authored
feat: make all and any compatible with integer columns on Polars session (#2154)
* docs: remove import bigframes.pandas as bpd boilerplate from many samples Also, fixes several constructors that didn't take a session for compatibility with multi-session applications. * fix docs * fix unit tests * skip sklearn test * fix snapshot * plumb through session for from_tuples and from_arrays * add from_frame * make sure polars session isnt skipped on Kokoro * fix apply doctest * make doctest conftest available everywhere * add python version flexibility for to_dict * disambiguate explicit names * disambiguate explicit name none versus no name * fix for column name comparison in pandas bin op * avoid setting column labels in special case of Series(block) * revert doctest changes * revert doctest changes * revert df docstrings * add polars series unit tests * restore a test * Revert "restore a test" This reverts commit 765b678. * skip null * skip unsupported tests * revert more docs changes * revert more docs * revert more docs * fix unit tests python 3.13 * add test to reproduce name error * revert new session methods * fix TestSession read_pandas for Series * revert more unnecessary changes * even more * add unit_noextras to improve code coverage * run system tests on latest fully supported * system-3.12 not found * cap polars version * hide progress bar * relax polars upper pin
1 parent ddb4df0 commit 6353d6e

File tree

7 files changed

+5058
-11
lines changed

7 files changed

+5058
-11
lines changed

bigframes/core/compile/polars/compiler.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -493,9 +493,9 @@ def compile_agg_op(
493493
if isinstance(op, agg_ops.MedianOp):
494494
return pl.median(*inputs)
495495
if isinstance(op, agg_ops.AllOp):
496-
return pl.all(*inputs)
496+
return pl.col(inputs).cast(pl.Boolean).all()
497497
if isinstance(op, agg_ops.AnyOp):
498-
return pl.any(*inputs) # type: ignore
498+
return pl.col(inputs).cast(pl.Boolean).any()
499499
if isinstance(op, agg_ops.NuniqueOp):
500500
return pl.col(*inputs).drop_nulls().n_unique()
501501
if isinstance(op, agg_ops.MinOp):

bigframes/testing/polars_session.py

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -94,11 +94,24 @@ def __init__(self):
9494
self._loader = None # type: ignore
9595

9696
def read_pandas(self, pandas_dataframe, write_engine="default"):
97+
original_input = pandas_dataframe
98+
9799
# override read_pandas to always keep data local-only
98-
if isinstance(pandas_dataframe, pandas.Series):
100+
if isinstance(pandas_dataframe, (pandas.Series, pandas.Index)):
99101
pandas_dataframe = pandas_dataframe.to_frame()
102+
100103
local_block = bigframes.core.blocks.Block.from_local(pandas_dataframe, self)
101-
return bigframes.dataframe.DataFrame(local_block)
104+
bf_df = bigframes.dataframe.DataFrame(local_block)
105+
106+
if isinstance(original_input, pandas.Series):
107+
series = bf_df[bf_df.columns[0]]
108+
series.name = original_input.name
109+
return series
110+
111+
if isinstance(original_input, pandas.Index):
112+
return bf_df.index
113+
114+
return bf_df
102115

103116
@property
104117
def bqclient(self):

noxfile.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -46,9 +46,7 @@
4646
"3.11",
4747
]
4848

49-
# pytest-retry is not yet compatible with pytest 8.x.
50-
# https://github.com/str0zzapreti/pytest-retry/issues/32
51-
PYTEST_VERSION = "pytest<8.0.0dev"
49+
PYTEST_VERSION = "pytest==8.4.2"
5250
SPHINX_VERSION = "sphinx==4.5.0"
5351
LINT_PATHS = [
5452
"docs",
@@ -91,7 +89,7 @@
9189
# 3.10 is needed for Windows tests as it is the only version installed in the
9290
# bigframes-windows container image. For more information, search
9391
# bigframes/windows-docker, internally.
94-
SYSTEM_TEST_PYTHON_VERSIONS = ["3.9", "3.10", "3.11", "3.13"]
92+
SYSTEM_TEST_PYTHON_VERSIONS = ["3.9", "3.10", "3.11", "3.12", "3.13"]
9593
SYSTEM_TEST_STANDARD_DEPENDENCIES = [
9694
"jinja2",
9795
"mock",
@@ -115,7 +113,7 @@
115113
# Make sure we leave some versions without "extras" so we know those
116114
# dependencies are actually optional.
117115
"3.10": ["tests", "scikit-learn", "anywidget"],
118-
"3.11": ["tests", "scikit-learn", "polars", "anywidget"],
116+
LATEST_FULLY_SUPPORTED_PYTHON: ["tests", "scikit-learn", "polars", "anywidget"],
119117
"3.13": ["tests", "polars", "anywidget"],
120118
}
121119

@@ -132,7 +130,7 @@
132130
# from GitHub actions.
133131
"unit_noextras",
134132
"system-3.9", # No extras.
135-
"system-3.11",
133+
f"system-{LATEST_FULLY_SUPPORTED_PYTHON}", # All extras.
136134
"cover",
137135
# TODO(b/401609005): remove
138136
"cleanup",

tests/system/small/engines/test_generic_ops.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
from bigframes.session import polars_executor
2323
from bigframes.testing.engine_utils import assert_equivalence_execution
2424

25-
pytest.importorskip("polars")
25+
polars = pytest.importorskip("polars")
2626

2727
# Polars used as reference as its fast and local. Generally though, prefer gbq engine where they disagree.
2828
REFERENCE_ENGINE = polars_executor.PolarsExecutor()
@@ -54,6 +54,12 @@ def apply_op(
5454

5555
@pytest.mark.parametrize("engine", ["polars", "bq", "bq-sqlglot"], indirect=True)
5656
def test_engines_astype_int(scalars_array_value: array_value.ArrayValue, engine):
57+
polars_version = tuple([int(part) for part in polars.__version__.split(".")])
58+
if polars_version >= (1, 34, 0):
59+
# TODO(https://github.com/pola-rs/polars/issues/24841): Remove this when
60+
# polars fixes Decimal to Int cast.
61+
scalars_array_value = scalars_array_value.drop_columns(["numeric_col"])
62+
5763
arr = apply_op(
5864
scalars_array_value,
5965
ops.AsTypeOp(to_type=bigframes.dtypes.INT_DTYPE),

tests/unit/test_local_engine.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,14 @@ def small_inline_frame() -> pd.DataFrame:
4242
return df
4343

4444

45+
def test_polars_local_engine_series(polars_session: bigframes.Session):
46+
bf_series = bpd.Series([1, 2, 3], session=polars_session)
47+
pd_series = pd.Series([1, 2, 3], dtype=bf_series.dtype)
48+
bf_result = bf_series.to_pandas()
49+
pd_result = pd_series
50+
pandas.testing.assert_series_equal(bf_result, pd_result, check_index_type=False)
51+
52+
4553
def test_polars_local_engine_add(
4654
small_inline_frame: pd.DataFrame, polars_session: bigframes.Session
4755
):

0 commit comments

Comments
 (0)