Skip to content
This repository was archived by the owner on Apr 1, 2026. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions bigframes/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,9 @@
)
import bigframes.enums as enums # noqa: E402
import bigframes.exceptions as exceptions # noqa: E402

# Register pandas extensions
import bigframes.extensions.pandas.dataframe_accessor # noqa: F401, E402
from bigframes.session import connect, Session # noqa: E402
from bigframes.version import __version__ # noqa: E402

Expand Down
73 changes: 57 additions & 16 deletions bigframes/bigquery/_operations/sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,19 +16,31 @@

from __future__ import annotations

from typing import Sequence
from typing import cast, Optional, Sequence, Union

import google.cloud.bigquery

from bigframes.core.compile.sqlglot import sql
import bigframes.dataframe
import bigframes.dtypes
import bigframes.operations
import bigframes.series


def _format_names(sql_template: str, dataframe: bigframes.dataframe.DataFrame):
"""Turn sql_template from a template that uses names to one that uses
numbers.
"""
names_to_numbers = {name: f"{{{i}}}" for i, name in enumerate(dataframe.columns)}
numbers = [f"{{{i}}}" for i in range(len(dataframe.columns))]
return sql_template.format(*numbers, **names_to_numbers)


def sql_scalar(
sql_template: str,
columns: Sequence[bigframes.series.Series],
columns: Union[bigframes.dataframe.DataFrame, Sequence[bigframes.series.Series]],
*,
output_dtype: Optional[bigframes.dtypes.Dtype] = None,
) -> bigframes.series.Series:
"""Create a Series from a SQL template.

Expand All @@ -37,6 +49,9 @@ def sql_scalar(
>>> import bigframes.pandas as bpd
>>> import bigframes.bigquery as bbq

Either pass in a sequence of series, in which case use integers in the
format strings.

>>> s = bpd.Series(["1.5", "2.5", "3.5"])
>>> s = s.astype(pd.ArrowDtype(pa.decimal128(38, 9)))
>>> bbq.sql_scalar("ROUND({0}, 0, 'ROUND_HALF_EVEN')", [s])
Expand All @@ -45,13 +60,29 @@ def sql_scalar(
2 4.000000000
dtype: decimal128(38, 9)[pyarrow]

Or pass in a DataFrame, in which case use the column names in the format
strings.

>>> df = bpd.DataFrame({"a": ["1.5", "2.5", "3.5"]})
>>> df = df.astype({"a": pd.ArrowDtype(pa.decimal128(38, 9))})
>>> bbq.sql_scalar("ROUND({a}, 0, 'ROUND_HALF_EVEN')", df)
0 2.000000000
1 2.000000000
2 4.000000000
dtype: decimal128(38, 9)[pyarrow]

Args:
sql_template (str):
A SQL format string with Python-style {0} placeholders for each of
the Series objects in ``columns``.
columns (Sequence[bigframes.pandas.Series]):
columns (
Sequence[bigframes.pandas.Series] | bigframes.pandas.DataFrame
):
Series objects representing the column inputs to the
``sql_template``. Must contain at least one Series.
output_dtype (a BigQuery DataFrames compatible dtype, optional):
If provided, BigQuery DataFrames uses this to determine the output
of the returned Series. This avoids a dry run query.

Returns:
bigframes.pandas.Series:
Expand All @@ -60,28 +91,38 @@ def sql_scalar(
Raises:
ValueError: If ``columns`` is empty.
"""
if isinstance(columns, bigframes.dataframe.DataFrame):
sql_template = _format_names(sql_template, columns)
columns = [
cast(bigframes.series.Series, columns[column]) for column in columns.columns
]

if len(columns) == 0:
raise ValueError("Must provide at least one column in columns")

base_series = columns[0]

# To integrate this into our expression trees, we need to get the output
# type, so we do some manual compilation and a dry run query to get that.
# Another benefit of this is that if there is a syntax error in the SQL
# template, then this will fail with an error earlier in the process,
# aiding users in debugging.
literals_sql = [sql.to_sql(sql.literal(None, column.dtype)) for column in columns]
select_sql = sql_template.format(*literals_sql)
dry_run_sql = f"SELECT {select_sql}"

# Use the executor directly, because we want the original column IDs, not
# the user-friendly column names that block.to_sql_query() would produce.
base_series = columns[0]
bqclient = base_series._session.bqclient
job = bqclient.query(
dry_run_sql, job_config=google.cloud.bigquery.QueryJobConfig(dry_run=True)
)
_, output_type = bigframes.dtypes.convert_schema_field(job.schema[0])
if output_dtype is None:
literals_sql = [
sql.to_sql(sql.literal(None, column.dtype)) for column in columns
]
select_sql = sql_template.format(*literals_sql)
dry_run_sql = f"SELECT {select_sql}"

# Use the executor directly, because we want the original column IDs, not
# the user-friendly column names that block.to_sql_query() would produce.
bqclient = base_series._session.bqclient
job = bqclient.query(
dry_run_sql, job_config=google.cloud.bigquery.QueryJobConfig(dry_run=True)
)
_, output_dtype = bigframes.dtypes.convert_schema_field(job.schema[0])

op = bigframes.operations.SqlScalarOp(
_output_type=output_type, sql_template=sql_template
_output_type=output_dtype, sql_template=sql_template
)
return base_series._apply_nary_op(op, columns[1:])
13 changes: 13 additions & 0 deletions bigframes/extensions/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Copyright 2026 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
13 changes: 13 additions & 0 deletions bigframes/extensions/pandas/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Copyright 2026 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
67 changes: 67 additions & 0 deletions bigframes/extensions/pandas/dataframe_accessor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
# Copyright 2026 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import cast

import pandas
import pandas.api.extensions

import bigframes.core.global_session as bf_session
import bigframes.pandas as bpd


@pandas.api.extensions.register_dataframe_accessor("bigquery")
class BigQueryDataFrameAccessor:
"""
Pandas DataFrame accessor for BigQuery DataFrames functionality.

This accessor is registered under the ``bigquery`` namespace on pandas DataFrame objects.
"""

def __init__(self, pandas_obj: pandas.DataFrame):
self._obj = pandas_obj

def sql_scalar(self, sql_template: str, *, output_dtype=None, session=None):
"""
Compute a new pandas Series by applying a SQL scalar function to the DataFrame.

The DataFrame is converted to BigFrames by calling ``read_pandas``, then the SQL
template is applied using ``bigframes.bigquery.sql_scalar``, and the result is
converted back to a pandas Series using ``to_pandas``.

Args:
sql_template (str):
A SQL format string with Python-style {0}, {1}, etc. placeholders for each of
the columns in the DataFrame (in the order they appear in ``df.columns``).
output_dtype (a BigQuery DataFrames compatible dtype, optional):
If provided, BigQuery DataFrames uses this to determine the output
of the returned Series. This avoids a dry run query.
session (bigframes.session.Session, optional):
The BigFrames session to use. If not provided, the default global session is used.

Returns:
pandas.Series:
The result of the SQL scalar function as a pandas Series.
"""
# Import bigframes.bigquery here to avoid circular imports
import bigframes.bigquery

if session is None:
session = bf_session.get_global_session()

bf_df = cast(bpd.DataFrame, session.read_pandas(self._obj))
result = bigframes.bigquery.sql_scalar(
sql_template, bf_df, output_dtype=output_dtype
)
return result.to_pandas(ordered=True)
5 changes: 4 additions & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,8 @@
# See https://github.com/sphinx-doc/sphinx/blob
# /2a65ffeef5c107c19084fabdd706cdff3f52d93c/sphinx/domains/python.py#L843
"ref.python",
# Allow external websites to be down occasionally.
"intersphinx.external",
]

# -- Options for LaTeX output ---------------------------------------------
Expand Down Expand Up @@ -388,7 +390,8 @@
"grpc": ("https://grpc.github.io/grpc/python/", None),
"proto-plus": ("https://proto-plus-python.readthedocs.io/en/latest/", None),
"protobuf": ("https://googleapis.dev/python/protobuf/latest/", None),
"pandas": ("https://pandas.pydata.org/pandas-docs/stable/", None),
# TODO(tswast): re-enable if we can get temporary failures to be ignored.
# "pandas": ("https://pandas.pydata.org/pandas-docs/stable/", None),
"pydata-google-auth": (
"https://pydata-google-auth.readthedocs.io/en/latest/",
None,
Expand Down
10 changes: 10 additions & 0 deletions docs/reference/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,16 @@ packages.
bigframes.pandas.api.typing
bigframes.streaming

Pandas Extensions
~~~~~~~~~~~~~~~~~

BigQuery DataFrames provides extensions to pandas DataFrame objects.

.. autosummary::
:toctree: api

bigframes.extensions.pandas.dataframe_accessor.BigQueryDataFrameAccessor

ML APIs
~~~~~~~

Expand Down
1 change: 1 addition & 0 deletions docs/user_guide/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ User Guide
Getting Started <../notebooks/getting_started/getting_started_bq_dataframes.ipynb>
Magics <../notebooks/getting_started/magics.ipynb>
ML Fundamentals <../notebooks/getting_started/ml_fundamentals_bq_dataframes.ipynb>
Pandas Extensions <../notebooks/getting_started/pandas_extensions.ipynb>

.. toctree::
:caption: DataFrames
Expand Down
Loading
Loading