Skip to content
This repository was archived by the owner on Apr 1, 2026. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions bigframes/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,9 @@
)
import bigframes.enums as enums # noqa: E402
import bigframes.exceptions as exceptions # noqa: E402

# Register pandas extensions
import bigframes.extensions.pandas.dataframe_accessor # noqa: F401, E402
from bigframes.session import connect, Session # noqa: E402
from bigframes.version import __version__ # noqa: E402

Expand Down
73 changes: 57 additions & 16 deletions bigframes/bigquery/_operations/sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,19 +16,31 @@

from __future__ import annotations

from typing import Sequence
from typing import cast, Optional, Sequence, Union

import google.cloud.bigquery

from bigframes.core.compile.sqlglot import sql
import bigframes.dataframe
import bigframes.dtypes
import bigframes.operations
import bigframes.series


def _format_names(sql_template: str, dataframe: bigframes.dataframe.DataFrame):
"""Turn sql_template from a template that uses names to one that uses
numbers.
"""
names_to_numbers = {name: f"{{{i}}}" for i, name in enumerate(dataframe.columns)}
numbers = [f"{{{i}}}" for i in range(len(dataframe.columns))]
return sql_template.format(*numbers, **names_to_numbers)


def sql_scalar(
sql_template: str,
columns: Sequence[bigframes.series.Series],
columns: Union[bigframes.dataframe.DataFrame, Sequence[bigframes.series.Series]],
*,
output_dtype: Optional[bigframes.dtypes.Dtype] = None,
) -> bigframes.series.Series:
"""Create a Series from a SQL template.

Expand All @@ -37,6 +49,9 @@ def sql_scalar(
>>> import bigframes.pandas as bpd
>>> import bigframes.bigquery as bbq

Either pass in a sequence of series, in which case use integers in the
format strings.

>>> s = bpd.Series(["1.5", "2.5", "3.5"])
>>> s = s.astype(pd.ArrowDtype(pa.decimal128(38, 9)))
>>> bbq.sql_scalar("ROUND({0}, 0, 'ROUND_HALF_EVEN')", [s])
Expand All @@ -45,13 +60,29 @@ def sql_scalar(
2 4.000000000
dtype: decimal128(38, 9)[pyarrow]

Or pass in a DataFrame, in which case use the column names in the format
strings.

>>> df = bpd.DataFrame({"a": ["1.5", "2.5", "3.5"]})
>>> df = df.astype({"a": pd.ArrowDtype(pa.decimal128(38, 9))})
>>> bbq.sql_scalar("ROUND({a}, 0, 'ROUND_HALF_EVEN')", df)
0 2.000000000
1 2.000000000
2 4.000000000
dtype: decimal128(38, 9)[pyarrow]

Args:
sql_template (str):
A SQL format string with Python-style {0} placeholders for each of
the Series objects in ``columns``.
columns (Sequence[bigframes.pandas.Series]):
columns (
Sequence[bigframes.pandas.Series] | bigframes.pandas.DataFrame
):
Series objects representing the column inputs to the
``sql_template``. Must contain at least one Series.
output_dtype (a BigQuery DataFrames compatible dtype, optional):
If provided, BigQuery DataFrames uses this to determine the output
of the returned Series. This avoids a dry run query.

Returns:
bigframes.pandas.Series:
Expand All @@ -60,28 +91,38 @@ def sql_scalar(
Raises:
ValueError: If ``columns`` is empty.
"""
if isinstance(columns, bigframes.dataframe.DataFrame):
sql_template = _format_names(sql_template, columns)
columns = [
cast(bigframes.series.Series, columns[column]) for column in columns.columns
]

if len(columns) == 0:
raise ValueError("Must provide at least one column in columns")

base_series = columns[0]

# To integrate this into our expression trees, we need to get the output
# type, so we do some manual compilation and a dry run query to get that.
# Another benefit of this is that if there is a syntax error in the SQL
# template, then this will fail with an error earlier in the process,
# aiding users in debugging.
literals_sql = [sql.to_sql(sql.literal(None, column.dtype)) for column in columns]
select_sql = sql_template.format(*literals_sql)
dry_run_sql = f"SELECT {select_sql}"

# Use the executor directly, because we want the original column IDs, not
# the user-friendly column names that block.to_sql_query() would produce.
base_series = columns[0]
bqclient = base_series._session.bqclient
job = bqclient.query(
dry_run_sql, job_config=google.cloud.bigquery.QueryJobConfig(dry_run=True)
)
_, output_type = bigframes.dtypes.convert_schema_field(job.schema[0])
if output_dtype is None:
literals_sql = [
sql.to_sql(sql.literal(None, column.dtype)) for column in columns
]
select_sql = sql_template.format(*literals_sql)
dry_run_sql = f"SELECT {select_sql}"

# Use the executor directly, because we want the original column IDs, not
# the user-friendly column names that block.to_sql_query() would produce.
bqclient = base_series._session.bqclient
job = bqclient.query(
dry_run_sql, job_config=google.cloud.bigquery.QueryJobConfig(dry_run=True)
)
_, output_dtype = bigframes.dtypes.convert_schema_field(job.schema[0])

op = bigframes.operations.SqlScalarOp(
_output_type=output_type, sql_template=sql_template
_output_type=output_dtype, sql_template=sql_template
)
return base_series._apply_nary_op(op, columns[1:])
13 changes: 13 additions & 0 deletions bigframes/extensions/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Copyright 2026 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
13 changes: 13 additions & 0 deletions bigframes/extensions/pandas/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Copyright 2026 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
67 changes: 67 additions & 0 deletions bigframes/extensions/pandas/dataframe_accessor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
# Copyright 2026 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import cast

import pandas
import pandas.api.extensions

import bigframes.core.global_session as bf_session
import bigframes.pandas as bpd


@pandas.api.extensions.register_dataframe_accessor("bigquery")
class BigQueryDataFrameAccessor:
"""
Pandas DataFrame accessor for BigQuery DataFrames functionality.

This accessor is registered under the ``bigquery`` namespace on pandas DataFrame objects.
"""

def __init__(self, pandas_obj: pandas.DataFrame):
self._obj = pandas_obj

def sql_scalar(self, sql_template: str, *, output_dtype=None, session=None):
"""
Compute a new pandas Series by applying a SQL scalar function to the DataFrame.

The DataFrame is converted to BigFrames by calling ``read_pandas``, then the SQL
template is applied using ``bigframes.bigquery.sql_scalar``, and the result is
converted back to a pandas Series using ``to_pandas``.

Args:
sql_template (str):
A SQL format string with Python-style {0}, {1}, etc. placeholders for each of
the columns in the DataFrame (in the order they appear in ``df.columns``).
output_dtype (a BigQuery DataFrames compatible dtype, optional):
If provided, BigQuery DataFrames uses this to determine the output
of the returned Series. This avoids a dry run query.
session (bigframes.session.Session, optional):
The BigFrames session to use. If not provided, the default global session is used.

Returns:
pandas.Series:
The result of the SQL scalar function as a pandas Series.
"""
# Import bigframes.bigquery here to avoid circular imports
import bigframes.bigquery

if session is None:
session = bf_session.get_global_session()

bf_df = cast(bpd.DataFrame, session.read_pandas(self._obj))
result = bigframes.bigquery.sql_scalar(
sql_template, bf_df, output_dtype=output_dtype
)
return result.to_pandas(ordered=True)
10 changes: 10 additions & 0 deletions docs/reference/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,16 @@ packages.
bigframes.pandas.api.typing
bigframes.streaming

Pandas Extensions
~~~~~~~~~~~~~~~~~

BigQuery DataFrames provides extensions to pandas DataFrame objects.

.. autosummary::
:toctree: api

bigframes.extensions.pandas.dataframe_accessor.BigQueryDataFrameAccessor

ML APIs
~~~~~~~

Expand Down
1 change: 1 addition & 0 deletions docs/user_guide/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ User Guide
Getting Started <../notebooks/getting_started/getting_started_bq_dataframes.ipynb>
Magics <../notebooks/getting_started/magics.ipynb>
ML Fundamentals <../notebooks/getting_started/ml_fundamentals_bq_dataframes.ipynb>
Pandas Extensions <../notebooks/getting_started/pandas_extensions.ipynb>

.. toctree::
:caption: DataFrames
Expand Down
Loading
Loading