Skip to content
This repository was archived by the owner on Apr 1, 2026. It is now read-only.

Commit c1193d1

Browse files
committed
feat: add df.bigquery pandas accessor
1 parent 5cf3788 commit c1193d1

File tree

11 files changed

+308
-0
lines changed

11 files changed

+308
-0
lines changed

bigframes/__init__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,9 @@
3232
)
3333
import bigframes.enums as enums # noqa: E402
3434
import bigframes.exceptions as exceptions # noqa: E402
35+
36+
# Register pandas extensions
37+
import bigframes.extensions.pandas.dataframe_accessor # noqa: F401, E402
3538
from bigframes.session import connect, Session # noqa: E402
3639
from bigframes.version import __version__ # noqa: E402
3740

bigframes/extensions/__init__.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# Copyright 2026 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# Copyright 2026 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
# Copyright 2026 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from typing import cast
16+
17+
import pandas
18+
import pandas.api.extensions
19+
20+
import bigframes.core.global_session as bf_session
21+
import bigframes.pandas as bpd
22+
23+
24+
@pandas.api.extensions.register_dataframe_accessor("bigquery")
25+
class BigQueryDataFrameAccessor:
26+
"""
27+
Pandas DataFrame accessor for BigQuery DataFrames functionality.
28+
29+
This accessor is registered under the ``bigquery`` namespace on pandas DataFrame objects.
30+
"""
31+
32+
def __init__(self, pandas_obj: pandas.DataFrame):
33+
self._obj = pandas_obj
34+
35+
def sql_scalar(self, sql_template: str, session=None):
36+
"""
37+
Compute a new pandas Series by applying a SQL scalar function to the DataFrame.
38+
39+
The DataFrame is converted to BigFrames by calling ``read_pandas``, then the SQL
40+
template is applied using ``bigframes.bigquery.sql_scalar``, and the result is
41+
converted back to a pandas Series using ``to_pandas``.
42+
43+
Args:
44+
sql_template (str):
45+
A SQL format string with Python-style {0}, {1}, etc. placeholders for each of
46+
the columns in the DataFrame (in the order they appear in ``df.columns``).
47+
session (bigframes.session.Session, optional):
48+
The BigFrames session to use. If not provided, the default global session is used.
49+
50+
Returns:
51+
pandas.Series:
52+
The result of the SQL scalar function as a pandas Series.
53+
"""
54+
if session is None:
55+
session = bf_session.get_global_session()
56+
57+
bf_df = cast(bpd.DataFrame, session.read_pandas(self._obj))
58+
59+
# Import bigframes.bigquery here to avoid circular imports
60+
import bigframes.bigquery
61+
62+
columns = [cast(bpd.Series, bf_df[col]) for col in bf_df.columns]
63+
result = bigframes.bigquery.sql_scalar(sql_template, columns)
64+
65+
return result.to_pandas()

docs/reference/index.rst

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,16 @@ packages.
1919
bigframes.pandas.api.typing
2020
bigframes.streaming
2121

22+
Pandas Extensions
23+
~~~~~~~~~~~~~~~~~
24+
25+
BigQuery DataFrames provides extensions to pandas DataFrame objects.
26+
27+
.. autosummary::
28+
:toctree: api
29+
30+
bigframes.extensions.pandas.dataframe_accessor.BigQueryDataFrameAccessor
31+
2232
ML APIs
2333
~~~~~~~
2434

docs/user_guide/index.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ User Guide
1818
Getting Started <../notebooks/getting_started/getting_started_bq_dataframes.ipynb>
1919
Magics <../notebooks/getting_started/magics.ipynb>
2020
ML Fundamentals <../notebooks/getting_started/ml_fundamentals_bq_dataframes.ipynb>
21+
Pandas Extensions <../notebooks/getting_started/pandas_extensions.ipynb>
2122

2223
.. toctree::
2324
:caption: DataFrames
Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"metadata": {},
6+
"source": [
7+
"# Pandas Extension for BigQuery DataFrames\n",
8+
"\n",
9+
"BigQuery DataFrames provides a pandas extension to execute BigQuery SQL scalar functions directly on pandas DataFrames."
10+
]
11+
},
12+
{
13+
"cell_type": "code",
14+
"execution_count": null,
15+
"metadata": {},
16+
"outputs": [],
17+
"source": [
18+
"import pandas as pd\n",
19+
"import bigframes.pandas as bpd\n",
20+
"import bigframes"
21+
]
22+
},
23+
{
24+
"cell_type": "markdown",
25+
"metadata": {},
26+
"source": [
27+
"## Using `sql_scalar`\n",
28+
"\n",
29+
"The `bigquery.sql_scalar` method allows you to apply a SQL scalar function to a pandas DataFrame by converting it to BigFrames, executing the SQL in BigQuery, and returning the result as a pandas Series."
30+
]
31+
},
32+
{
33+
"cell_type": "code",
34+
"execution_count": null,
35+
"metadata": {},
36+
"outputs": [],
37+
"source": [
38+
"df = pd.DataFrame({\"a\": [1.5, 2.5, 3.5]})\n",
39+
"result = df.bigquery.sql_scalar(\"ROUND({0}, 0)\")\n",
40+
"result"
41+
]
42+
},
43+
{
44+
"cell_type": "markdown",
45+
"metadata": {},
46+
"source": [
47+
"You can also use multiple columns."
48+
]
49+
},
50+
{
51+
"cell_type": "code",
52+
"execution_count": null,
53+
"metadata": {},
54+
"outputs": [],
55+
"source": [
56+
"df = pd.DataFrame({\"a\": [1, 2, 3], \"b\": [10, 20, 30]})\n",
57+
"result = df.bigquery.sql_scalar(\"{0} + {1}\")\n",
58+
"result"
59+
]
60+
}
61+
],
62+
"metadata": {
63+
"kernelspec": {
64+
"display_name": "Python 3",
65+
"language": "python",
66+
"name": "python3"
67+
},
68+
"language_info": {
69+
"codemirror_mode": {
70+
"name": "ipython",
71+
"version": 3
72+
},
73+
"file_extension": ".py",
74+
"mimetype": "text/x-python",
75+
"name": "python",
76+
"nbconvert_exporter": "python",
77+
"pygments_lexer": "ipython3",
78+
"version": "3.12.9"
79+
}
80+
},
81+
"nbformat": 4,
82+
"nbformat_minor": 4
83+
}

tests/unit/extensions/__init__.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# Copyright 2026 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# Copyright 2026 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
# Copyright 2026 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import unittest.mock as mock
16+
17+
import pandas as pd
18+
19+
# Importing bigframes registers the accessor.
20+
import bigframes # noqa: F401
21+
22+
23+
def test_dataframe_accessor_sql_scalar():
24+
df = pd.DataFrame({"a": [1, 2], "b": [3, 4]})
25+
26+
with mock.patch("bigframes.pandas.io.api.read_pandas") as mock_read_pandas:
27+
with mock.patch("bigframes.bigquery.sql_scalar") as mock_sql_scalar:
28+
mock_bf_df = mock.MagicMock()
29+
mock_bf_df.columns = ["a", "b"]
30+
mock_bf_df.__getitem__.side_effect = lambda x: f"series_{x}"
31+
mock_read_pandas.return_value = mock_bf_df
32+
33+
mock_result_series = mock.MagicMock()
34+
mock_sql_scalar.return_value = mock_result_series
35+
mock_result_series.to_pandas.return_value = pd.Series([4, 6])
36+
37+
# This should trigger the accessor
38+
result = df.bigquery.sql_scalar("ROUND({0} + {1})")
39+
40+
mock_read_pandas.assert_called_once()
41+
# check it was called with df
42+
assert mock_read_pandas.call_args[0][0] is df
43+
44+
mock_sql_scalar.assert_called_once_with(
45+
"ROUND({0} + {1})", ["series_a", "series_b"]
46+
)
47+
48+
pd.testing.assert_series_equal(result, pd.Series([4, 6]))
49+
50+
51+
def test_dataframe_accessor_sql_scalar_with_session():
52+
df = pd.DataFrame({"a": [1]})
53+
mock_session = mock.MagicMock()
54+
55+
with mock.patch("bigframes.pandas.io.api.read_pandas") as mock_read_pandas:
56+
with mock.patch("bigframes.bigquery.sql_scalar") as mock_sql_scalar:
57+
mock_bf_df = mock.MagicMock()
58+
mock_bf_df.columns = ["a"]
59+
mock_bf_df.__getitem__.side_effect = lambda x: f"series_{x}"
60+
mock_read_pandas.return_value = mock_bf_df
61+
62+
mock_result_series = mock.MagicMock()
63+
mock_sql_scalar.return_value = mock_result_series
64+
65+
df.bigquery.sql_scalar("template", session=mock_session)
66+
67+
mock_read_pandas.assert_called_once_with(df, session=mock_session)

0 commit comments

Comments
 (0)