Skip to content
This repository was archived by the owner on Apr 1, 2026. It is now read-only.

Commit fbadd19

Browse files
committed
Merge branch 'main' into shuowei-fix-compiler-syntax-guards
2 parents d4ac766 + 96597f0 commit fbadd19

File tree

140 files changed

+11409
-10862
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

140 files changed

+11409
-10862
lines changed

.github/workflows/docs.yml

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,3 @@ jobs:
3636
run: |
3737
python -m pip install --upgrade setuptools pip wheel
3838
python -m pip install nox
39-
- name: Run docfx
40-
run: |
41-
nox -s docfx

.librarian/state.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
image: us-central1-docker.pkg.dev/cloud-sdk-librarian-prod/images-prod/python-librarian-generator@sha256:160860d189ff1c2f7515638478823712fa5b243e27ccc33a2728669fa1e2ed0c
22
libraries:
33
- id: bigframes
4-
version: 2.37.0
4+
version: 2.38.0
55
last_generated_commit: ""
66
apis: []
77
source_roots:

CHANGELOG.md

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,29 @@
44

55
[1]: https://pypi.org/project/bigframes/#history
66

7+
## [2.38.0](https://github.com/googleapis/python-bigquery-dataframes/compare/v2.37.0...v2.38.0) (2026-03-16)
8+
9+
10+
### Documentation
11+
12+
* add notebooks to user guide page (#2505) ([5cf37888bc0b4b1b0993dadd1e0fe5ee08341ef4](https://github.com/googleapis/python-bigquery-dataframes/commit/5cf37888bc0b4b1b0993dadd1e0fe5ee08341ef4))
13+
* Fix typo in ExperimentOptions class docstring (#2498) ([077cb2ebe515fc5e07bcbb5dc663edd28d3eaf00](https://github.com/googleapis/python-bigquery-dataframes/commit/077cb2ebe515fc5e07bcbb5dc663edd28d3eaf00))
14+
15+
16+
### Features
17+
18+
* add `df.bigquery` pandas accessor (#2513) ([91b6c245521218bb78b543885e1b9424278ce2ab](https://github.com/googleapis/python-bigquery-dataframes/commit/91b6c245521218bb78b543885e1b9424278ce2ab))
19+
* use EUC for AI IF, CLASSIFY, and SCORE when connection is not provided (#2507) ([fe94910abff28e244dd79e1540a6c2184a12eb44](https://github.com/googleapis/python-bigquery-dataframes/commit/fe94910abff28e244dd79e1540a6c2184a12eb44))
20+
* Add `bigframes.bigquery.rand()` function (#2501) ([5c43efb745118f506ecc30196da68e9d6f4346dc](https://github.com/googleapis/python-bigquery-dataframes/commit/5c43efb745118f506ecc30196da68e9d6f4346dc))
21+
* add bigquery.ml.get_insights function (#2493) ([d29a60953ac989bb2c95e6eec3010620ac776a3c](https://github.com/googleapis/python-bigquery-dataframes/commit/d29a60953ac989bb2c95e6eec3010620ac776a3c))
22+
* Add str, dt accessors to pd.col Expression objects (#2488) ([ce5de57019449ca77d308946df72f04289343b51](https://github.com/googleapis/python-bigquery-dataframes/commit/ce5de57019449ca77d308946df72f04289343b51))
23+
24+
25+
### Bug Fixes
26+
27+
* handle unsupported types and empty results in describe (#2506) ([2326ad6aec15c20a66756eff093b50be484b3ba8](https://github.com/googleapis/python-bigquery-dataframes/commit/2326ad6aec15c20a66756eff093b50be484b3ba8))
28+
* no longer automatically use anywidget in the `%%bqsql` magics (#2504) ([43353e2bc9ffbc38b7383c24ecaac80d3b8bab32](https://github.com/googleapis/python-bigquery-dataframes/commit/43353e2bc9ffbc38b7383c24ecaac80d3b8bab32))
29+
730
## [2.37.0](https://github.com/googleapis/python-bigquery-dataframes/compare/v2.36.0...v2.37.0) (2026-03-03)
831

932

bigframes/__init__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,9 @@
3232
)
3333
import bigframes.enums as enums # noqa: E402
3434
import bigframes.exceptions as exceptions # noqa: E402
35+
36+
# Register pandas extensions
37+
import bigframes.extensions.pandas.dataframe_accessor # noqa: F401, E402
3538
from bigframes.session import connect, Session # noqa: E402
3639
from bigframes.version import __version__ # noqa: E402
3740

bigframes/bigquery/__init__.py

Lines changed: 32 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,38 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
"""This module integrates BigQuery built-in functions for use with DataFrame objects,
16-
such as array functions:
17-
https://cloud.google.com/bigquery/docs/reference/standard-sql/array_functions. """
15+
"""
16+
Access BigQuery-specific operations and namespaces within BigQuery DataFrames.
17+
18+
This module provides specialized functions and sub-modules that expose BigQuery's
19+
advanced capabilities to DataFrames and Series. It acts as a bridge between the
20+
pandas-compatible API and the full power of BigQuery SQL.
21+
22+
Key sub-modules include:
23+
24+
* :mod:`bigframes.bigquery.ai`: Generative and predictive AI functions (Gemini, BQML).
25+
* :mod:`bigframes.bigquery.ml`: Direct access to BigQuery ML model operations.
26+
* :mod:`bigframes.bigquery.obj`: Support for BigQuery object tables.
27+
28+
This module also provides direct access to optimized BigQuery functions for:
29+
30+
* **JSON Processing:** High-performance functions like ``json_extract``, ``json_value``,
31+
and ``parse_json`` for handling semi-structured data.
32+
* **Geospatial Analysis:** Comprehensive geographic functions such as ``st_area``,
33+
``st_distance``, and ``st_centroid`` (``ST_`` prefixed functions).
34+
* **Array Operations:** Tools for working with BigQuery arrays, including ``array_agg``
35+
and ``array_length``.
36+
* **Vector Search:** Integration with BigQuery's vector search and indexing
37+
capabilities for high-dimensional data.
38+
* **Custom SQL:** The ``sql_scalar`` function allows embedding raw SQL snippets for
39+
advanced operations not yet directly mapped in the API.
40+
41+
By using these functions, you can leverage BigQuery's high-performance engine for
42+
domain-specific tasks while maintaining a Python-centric development experience.
43+
44+
For the full list of BigQuery standard SQL functions, see:
45+
https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference
46+
"""
1847

1948
import sys
2049

bigframes/bigquery/_operations/ai.py

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -745,7 +745,7 @@ def if_(
745745
or pandas Series.
746746
connection_id (str, optional):
747747
Specifies the connection to use to communicate with the model. For example, `myproject.us.myconnection`.
748-
If not provided, the connection from the current session will be used.
748+
If not provided, the query uses your end-user credential.
749749
750750
Returns:
751751
bigframes.series.Series: A new series of bools.
@@ -756,7 +756,7 @@ def if_(
756756

757757
operator = ai_ops.AIIf(
758758
prompt_context=tuple(prompt_context),
759-
connection_id=_resolve_connection_id(series_list[0], connection_id),
759+
connection_id=connection_id,
760760
)
761761

762762
return series_list[0]._apply_nary_op(operator, series_list[1:])
@@ -800,7 +800,7 @@ def classify(
800800
Categories to classify the input into.
801801
connection_id (str, optional):
802802
Specifies the connection to use to communicate with the model. For example, `myproject.us.myconnection`.
803-
If not provided, the connection from the current session will be used.
803+
If not provided, the query uses your end-user credential.
804804
805805
Returns:
806806
bigframes.series.Series: A new series of strings.
@@ -812,7 +812,7 @@ def classify(
812812
operator = ai_ops.AIClassify(
813813
prompt_context=tuple(prompt_context),
814814
categories=tuple(categories),
815-
connection_id=_resolve_connection_id(series_list[0], connection_id),
815+
connection_id=connection_id,
816816
)
817817

818818
return series_list[0]._apply_nary_op(operator, series_list[1:])
@@ -853,7 +853,7 @@ def score(
853853
or pandas Series.
854854
connection_id (str, optional):
855855
Specifies the connection to use to communicate with the model. For example, `myproject.us.myconnection`.
856-
If not provided, the connection from the current session will be used.
856+
If not provided, the query uses your end-user credential.
857857
858858
Returns:
859859
bigframes.series.Series: A new series of double (float) values.
@@ -864,7 +864,7 @@ def score(
864864

865865
operator = ai_ops.AIScore(
866866
prompt_context=tuple(prompt_context),
867-
connection_id=_resolve_connection_id(series_list[0], connection_id),
867+
connection_id=connection_id,
868868
)
869869

870870
return series_list[0]._apply_nary_op(operator, series_list[1:])
@@ -880,6 +880,7 @@ def forecast(
880880
id_cols: Iterable[str] | None = None,
881881
horizon: int = 10,
882882
confidence_level: float = 0.95,
883+
output_historical_time_series: bool = False,
883884
context_window: int | None = None,
884885
) -> dataframe.DataFrame:
885886
"""
@@ -914,6 +915,15 @@ def forecast(
914915
confidence_level (float, default 0.95):
915916
A FLOAT64 value that specifies the percentage of the future values that fall in the prediction interval.
916917
The default value is 0.95. The valid input range is [0, 1).
918+
output_historical_time_series (bool, default False):
919+
A BOOL value that determines whether the input data is returned
920+
along with the forecasted data. Set this argument to TRUE to return
921+
input data. The default value is FALSE.
922+
923+
Returning the input data along with the forecasted data lets you
924+
compare the historical value of the data column with the forecasted
925+
value of the data column, or chart the change in the data column
926+
values over time.
917927
context_window (int, optional):
918928
An int value that specifies the context window length used by BigQuery ML's built-in TimesFM model.
919929
The context window length determines how many of the most recent data points from the input time series are use by the model.
@@ -945,6 +955,7 @@ def forecast(
945955
"timestamp_col": timestamp_col,
946956
"model": model,
947957
"horizon": horizon,
958+
"output_historical_time_series": output_historical_time_series,
948959
"confidence_level": confidence_level,
949960
}
950961
if id_cols:

bigframes/bigquery/_operations/io.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@
1919
import pandas as pd
2020

2121
from bigframes.bigquery._operations.table import _get_table_metadata
22+
import bigframes.core.compile.sqlglot.sql as sql
2223
import bigframes.core.logging.log_adapter as log_adapter
23-
import bigframes.core.sql.io
2424
import bigframes.session
2525

2626

@@ -73,7 +73,7 @@ def load_data(
7373
"""
7474
import bigframes.pandas as bpd
7575

76-
sql = bigframes.core.sql.io.load_data_ddl(
76+
load_data_expr = sql.load_data(
7777
table_name=table_name,
7878
write_disposition=write_disposition,
7979
columns=columns,
@@ -84,11 +84,12 @@ def load_data(
8484
with_partition_columns=with_partition_columns,
8585
connection_name=connection_name,
8686
)
87+
sql_text = sql.to_sql(load_data_expr)
8788

8889
if session is None:
89-
bpd.read_gbq_query(sql)
90+
bpd.read_gbq_query(sql_text)
9091
session = bpd.get_global_session()
9192
else:
92-
session.read_gbq_query(sql)
93+
session.read_gbq_query(sql_text)
9394

9495
return _get_table_metadata(bqclient=session.bqclient, table_name=table_name)

bigframes/bigquery/_operations/sql.py

Lines changed: 57 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -16,19 +16,31 @@
1616

1717
from __future__ import annotations
1818

19-
from typing import Sequence
19+
from typing import cast, Optional, Sequence, Union
2020

2121
import google.cloud.bigquery
2222

2323
from bigframes.core.compile.sqlglot import sql
24+
import bigframes.dataframe
2425
import bigframes.dtypes
2526
import bigframes.operations
2627
import bigframes.series
2728

2829

30+
def _format_names(sql_template: str, dataframe: bigframes.dataframe.DataFrame):
31+
"""Turn sql_template from a template that uses names to one that uses
32+
numbers.
33+
"""
34+
names_to_numbers = {name: f"{{{i}}}" for i, name in enumerate(dataframe.columns)}
35+
numbers = [f"{{{i}}}" for i in range(len(dataframe.columns))]
36+
return sql_template.format(*numbers, **names_to_numbers)
37+
38+
2939
def sql_scalar(
3040
sql_template: str,
31-
columns: Sequence[bigframes.series.Series],
41+
columns: Union[bigframes.dataframe.DataFrame, Sequence[bigframes.series.Series]],
42+
*,
43+
output_dtype: Optional[bigframes.dtypes.Dtype] = None,
3244
) -> bigframes.series.Series:
3345
"""Create a Series from a SQL template.
3446
@@ -37,6 +49,9 @@ def sql_scalar(
3749
>>> import bigframes.pandas as bpd
3850
>>> import bigframes.bigquery as bbq
3951
52+
Either pass in a sequence of series, in which case use integers in the
53+
format strings.
54+
4055
>>> s = bpd.Series(["1.5", "2.5", "3.5"])
4156
>>> s = s.astype(pd.ArrowDtype(pa.decimal128(38, 9)))
4257
>>> bbq.sql_scalar("ROUND({0}, 0, 'ROUND_HALF_EVEN')", [s])
@@ -45,13 +60,29 @@ def sql_scalar(
4560
2 4.000000000
4661
dtype: decimal128(38, 9)[pyarrow]
4762
63+
Or pass in a DataFrame, in which case use the column names in the format
64+
strings.
65+
66+
>>> df = bpd.DataFrame({"a": ["1.5", "2.5", "3.5"]})
67+
>>> df = df.astype({"a": pd.ArrowDtype(pa.decimal128(38, 9))})
68+
>>> bbq.sql_scalar("ROUND({a}, 0, 'ROUND_HALF_EVEN')", df)
69+
0 2.000000000
70+
1 2.000000000
71+
2 4.000000000
72+
dtype: decimal128(38, 9)[pyarrow]
73+
4874
Args:
4975
sql_template (str):
5076
A SQL format string with Python-style {0} placeholders for each of
5177
the Series objects in ``columns``.
52-
columns (Sequence[bigframes.pandas.Series]):
78+
columns (
79+
Sequence[bigframes.pandas.Series] | bigframes.pandas.DataFrame
80+
):
5381
Series objects representing the column inputs to the
5482
``sql_template``. Must contain at least one Series.
83+
output_dtype (a BigQuery DataFrames compatible dtype, optional):
84+
If provided, BigQuery DataFrames uses this to determine the output
85+
of the returned Series. This avoids a dry run query.
5586
5687
Returns:
5788
bigframes.pandas.Series:
@@ -60,28 +91,38 @@ def sql_scalar(
6091
Raises:
6192
ValueError: If ``columns`` is empty.
6293
"""
94+
if isinstance(columns, bigframes.dataframe.DataFrame):
95+
sql_template = _format_names(sql_template, columns)
96+
columns = [
97+
cast(bigframes.series.Series, columns[column]) for column in columns.columns
98+
]
99+
63100
if len(columns) == 0:
64101
raise ValueError("Must provide at least one column in columns")
65102

103+
base_series = columns[0]
104+
66105
# To integrate this into our expression trees, we need to get the output
67106
# type, so we do some manual compilation and a dry run query to get that.
68107
# Another benefit of this is that if there is a syntax error in the SQL
69108
# template, then this will fail with an error earlier in the process,
70109
# aiding users in debugging.
71-
literals_sql = [sql.to_sql(sql.literal(None, column.dtype)) for column in columns]
72-
select_sql = sql_template.format(*literals_sql)
73-
dry_run_sql = f"SELECT {select_sql}"
74-
75-
# Use the executor directly, because we want the original column IDs, not
76-
# the user-friendly column names that block.to_sql_query() would produce.
77-
base_series = columns[0]
78-
bqclient = base_series._session.bqclient
79-
job = bqclient.query(
80-
dry_run_sql, job_config=google.cloud.bigquery.QueryJobConfig(dry_run=True)
81-
)
82-
_, output_type = bigframes.dtypes.convert_schema_field(job.schema[0])
110+
if output_dtype is None:
111+
literals_sql = [
112+
sql.to_sql(sql.literal(None, column.dtype)) for column in columns
113+
]
114+
select_sql = sql_template.format(*literals_sql)
115+
dry_run_sql = f"SELECT {select_sql}"
116+
117+
# Use the executor directly, because we want the original column IDs, not
118+
# the user-friendly column names that block.to_sql_query() would produce.
119+
bqclient = base_series._session.bqclient
120+
job = bqclient.query(
121+
dry_run_sql, job_config=google.cloud.bigquery.QueryJobConfig(dry_run=True)
122+
)
123+
_, output_dtype = bigframes.dtypes.convert_schema_field(job.schema[0])
83124

84125
op = bigframes.operations.SqlScalarOp(
85-
_output_type=output_type, sql_template=sql_template
126+
_output_type=output_dtype, sql_template=sql_template
86127
)
87128
return base_series._apply_nary_op(op, columns[1:])

bigframes/bigquery/_operations/table.py

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@
1919
import google.cloud.bigquery
2020
import pandas as pd
2121

22+
import bigframes.core.compile.sqlglot.sql as sg_sql
2223
import bigframes.core.logging.log_adapter as log_adapter
23-
import bigframes.core.sql.table
2424
import bigframes.session
2525

2626

@@ -80,14 +80,16 @@ def create_external_table(
8080
"""
8181
import bigframes.pandas as bpd
8282

83-
sql = bigframes.core.sql.table.create_external_table_ddl(
84-
table_name=table_name,
85-
replace=replace,
86-
if_not_exists=if_not_exists,
87-
columns=columns,
88-
partition_columns=partition_columns,
89-
connection_name=connection_name,
90-
options=options,
83+
sql = sg_sql.to_sql(
84+
sg_sql.create_external_table(
85+
table_name=table_name,
86+
replace=replace,
87+
if_not_exists=if_not_exists,
88+
columns=columns,
89+
partition_columns=partition_columns,
90+
connection_name=connection_name,
91+
options=options,
92+
)
9193
)
9294

9395
if session is None:

0 commit comments

Comments
 (0)