Skip to content
This repository was archived by the owner on Apr 1, 2026. It is now read-only.

Commit bae1fdb

Browse files
Merge remote-tracking branch 'github/main' into agg_callables
2 parents 90c8e45 + 913de1b commit bae1fdb

File tree

20 files changed

+383
-27
lines changed

20 files changed

+383
-27
lines changed

CHANGELOG.md

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,21 @@
44

55
[1]: https://pypi.org/project/bigframes/#history
66

7+
## [2.19.0](https://github.com/googleapis/python-bigquery-dataframes/compare/v2.18.0...v2.19.0) (2025-09-09)
8+
9+
10+
### Features
11+
12+
* Add str.join method ([#2054](https://github.com/googleapis/python-bigquery-dataframes/issues/2054)) ([8804ada](https://github.com/googleapis/python-bigquery-dataframes/commit/8804adaf8ba23fdcad6e42a7bf034bd0a11c890f))
13+
* Support display.max_colwidth option ([#2053](https://github.com/googleapis/python-bigquery-dataframes/issues/2053)) ([5229e07](https://github.com/googleapis/python-bigquery-dataframes/commit/5229e07b4535c01b0cdbd731455ff225a373b5c8))
14+
* Support VPC egress setting in remote function ([#2059](https://github.com/googleapis/python-bigquery-dataframes/issues/2059)) ([5df779d](https://github.com/googleapis/python-bigquery-dataframes/commit/5df779d4f421d3ba777cfd928d99ca2e8a3f79ad))
15+
16+
17+
### Bug Fixes
18+
19+
* Fix issue mishandling chunked array while loading data ([#2051](https://github.com/googleapis/python-bigquery-dataframes/issues/2051)) ([873d0ee](https://github.com/googleapis/python-bigquery-dataframes/commit/873d0eee474ed34f1d5164c37383f2737dbec4db))
20+
* Remove warning for slot_millis_sum ([#2047](https://github.com/googleapis/python-bigquery-dataframes/issues/2047)) ([425a691](https://github.com/googleapis/python-bigquery-dataframes/commit/425a6917d5442eeb4df486c6eed1fd136bbcedfb))
21+
722
## [2.18.0](https://github.com/googleapis/python-bigquery-dataframes/compare/v2.17.0...v2.18.0) (2025-09-03)
823

924

bigframes/_config/auth.py

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from __future__ import annotations
16+
17+
import threading
18+
from typing import Optional
19+
20+
import google.auth.credentials
21+
import google.auth.transport.requests
22+
import pydata_google_auth
23+
24+
_SCOPES = ["https://www.googleapis.com/auth/cloud-platform"]
25+
26+
# Put the lock here rather than in BigQueryOptions so that BigQueryOptions
27+
# remains deepcopy-able.
28+
_AUTH_LOCK = threading.Lock()
29+
_cached_credentials: Optional[google.auth.credentials.Credentials] = None
30+
_cached_project_default: Optional[str] = None
31+
32+
33+
def get_default_credentials_with_project() -> tuple[
34+
google.auth.credentials.Credentials, Optional[str]
35+
]:
36+
global _AUTH_LOCK, _cached_credentials, _cached_project_default
37+
38+
with _AUTH_LOCK:
39+
if _cached_credentials is not None:
40+
return _cached_credentials, _cached_project_default
41+
42+
_cached_credentials, _cached_project_default = pydata_google_auth.default(
43+
scopes=_SCOPES, use_local_webserver=False
44+
)
45+
46+
# Ensure an access token is available.
47+
_cached_credentials.refresh(google.auth.transport.requests.Request())
48+
49+
return _cached_credentials, _cached_project_default
50+
51+
52+
def reset_default_credentials_and_project():
53+
global _AUTH_LOCK, _cached_credentials, _cached_project_default
54+
55+
with _AUTH_LOCK:
56+
_cached_credentials = None
57+
_cached_project_default = None

bigframes/_config/bigquery_options.py

Lines changed: 41 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import google.auth.credentials
2323
import requests.adapters
2424

25+
import bigframes._config.auth
2526
import bigframes._importing
2627
import bigframes.enums
2728
import bigframes.exceptions as bfe
@@ -37,6 +38,7 @@
3738

3839
def _get_validated_location(value: Optional[str]) -> Optional[str]:
3940
import bigframes._tools.strings
41+
import bigframes.constants
4042

4143
if value is None or value in bigframes.constants.ALL_BIGQUERY_LOCATIONS:
4244
return value
@@ -141,20 +143,52 @@ def application_name(self, value: Optional[str]):
141143
)
142144
self._application_name = value
143145

146+
def _try_set_default_credentials_and_project(
147+
self,
148+
) -> tuple[google.auth.credentials.Credentials, Optional[str]]:
149+
# Don't fetch credentials or project if credentials is already set.
150+
# If it's set, we've already authenticated, so if the user wants to
151+
# re-auth, they should explicitly reset the credentials.
152+
if self._credentials is not None:
153+
return self._credentials, self._project
154+
155+
(
156+
credentials,
157+
credentials_project,
158+
) = bigframes._config.auth.get_default_credentials_with_project()
159+
self._credentials = credentials
160+
161+
# Avoid overriding an explicitly set project with a default value.
162+
if self._project is None:
163+
self._project = credentials_project
164+
165+
return credentials, self._project
166+
144167
@property
145-
def credentials(self) -> Optional[google.auth.credentials.Credentials]:
168+
def credentials(self) -> google.auth.credentials.Credentials:
146169
"""The OAuth2 credentials to use for this client.
147170
171+
Set to None to force re-authentication.
172+
148173
Returns:
149174
None or google.auth.credentials.Credentials:
150175
google.auth.credentials.Credentials if exists; otherwise None.
151176
"""
152-
return self._credentials
177+
if self._credentials:
178+
return self._credentials
179+
180+
credentials, _ = self._try_set_default_credentials_and_project()
181+
return credentials
153182

154183
@credentials.setter
155184
def credentials(self, value: Optional[google.auth.credentials.Credentials]):
156185
if self._session_started and self._credentials is not value:
157186
raise ValueError(SESSION_STARTED_MESSAGE.format(attribute="credentials"))
187+
188+
if value is None:
189+
# The user has _explicitly_ asked that we re-authenticate.
190+
bigframes._config.auth.reset_default_credentials_and_project()
191+
158192
self._credentials = value
159193

160194
@property
@@ -183,7 +217,11 @@ def project(self) -> Optional[str]:
183217
None or str:
184218
Google Cloud project ID as a string; otherwise None.
185219
"""
186-
return self._project
220+
if self._project:
221+
return self._project
222+
223+
_, project = self._try_set_default_credentials_and_project()
224+
return project
187225

188226
@project.setter
189227
def project(self, value: Optional[str]):

bigframes/core/compile/ibis_compiler/aggregate_compiler.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -676,6 +676,29 @@ def _(
676676
).to_expr()
677677

678678

679+
@compile_ordered_unary_agg.register
680+
def _(
681+
op: agg_ops.StringAggOp,
682+
column: ibis_types.Column,
683+
window=None,
684+
order_by: typing.Sequence[ibis_types.Value] = [],
685+
) -> ibis_types.ArrayValue:
686+
if window is not None:
687+
raise NotImplementedError(
688+
f"StringAgg with windowing is not supported. {constants.FEEDBACK_LINK}"
689+
)
690+
691+
return (
692+
ibis_ops.StringAgg(
693+
column, # type: ignore
694+
sep=op.sep, # type: ignore
695+
order_by=order_by, # type: ignore
696+
)
697+
.to_expr()
698+
.fill_null(ibis_types.literal(""))
699+
)
700+
701+
679702
@compile_binary_agg.register
680703
def _(
681704
op: agg_ops.CorrOp, left: ibis_types.Column, right: ibis_types.Column, window=None

bigframes/core/compile/ibis_compiler/scalar_op_registry.py

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1216,11 +1216,18 @@ def to_arry_op_impl(*values: ibis_types.Value):
12161216
def array_reduce_op_impl(x: ibis_types.Value, op: ops.ArrayReduceOp):
12171217
import bigframes.core.compile.ibis_compiler.aggregate_compiler as agg_compilers
12181218

1219-
return typing.cast(ibis_types.ArrayValue, x).reduce(
1220-
lambda arr_vals: agg_compilers.compile_unary_agg(
1221-
op.aggregation, typing.cast(ibis_types.Column, arr_vals)
1219+
if op.aggregation.order_independent:
1220+
return typing.cast(ibis_types.ArrayValue, x).reduce(
1221+
lambda arr_vals: agg_compilers.compile_unary_agg(
1222+
op.aggregation, typing.cast(ibis_types.Column, arr_vals)
1223+
)
1224+
)
1225+
else:
1226+
return typing.cast(ibis_types.ArrayValue, x).reduce(
1227+
lambda arr_vals: agg_compilers.compile_ordered_unary_agg(
1228+
op.aggregation, typing.cast(ibis_types.Column, arr_vals)
1229+
)
12221230
)
1223-
)
12241231

12251232

12261233
# JSON Ops

bigframes/functions/_function_client.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,15 @@
5151
}
5252
)
5353

54+
# https://cloud.google.com/functions/docs/reference/rest/v2/projects.locations.functions#vpconnectoregresssettings
55+
_VPC_EGRESS_SETTINGS_MAP = types.MappingProxyType(
56+
{
57+
"all": functions_v2.ServiceConfig.VpcConnectorEgressSettings.ALL_TRAFFIC,
58+
"private-ranges-only": functions_v2.ServiceConfig.VpcConnectorEgressSettings.PRIVATE_RANGES_ONLY,
59+
"unspecified": functions_v2.ServiceConfig.VpcConnectorEgressSettings.VPC_CONNECTOR_EGRESS_SETTINGS_UNSPECIFIED,
60+
}
61+
)
62+
5463
# BQ managed functions (@udf) currently only support Python 3.11.
5564
_MANAGED_FUNC_PYTHON_VERSION = "python-3.11"
5665

@@ -375,6 +384,7 @@ def create_cloud_function(
375384
max_instance_count=None,
376385
is_row_processor=False,
377386
vpc_connector=None,
387+
vpc_connector_egress_settings="private-ranges-only",
378388
memory_mib=1024,
379389
ingress_settings="internal-only",
380390
):
@@ -472,6 +482,15 @@ def create_cloud_function(
472482
function.service_config.max_instance_count = max_instance_count
473483
if vpc_connector is not None:
474484
function.service_config.vpc_connector = vpc_connector
485+
if vpc_connector_egress_settings not in _VPC_EGRESS_SETTINGS_MAP:
486+
raise bf_formatting.create_exception_with_feedback_link(
487+
ValueError,
488+
f"'{vpc_connector_egress_settings}' not one of the supported vpc egress settings values: {list(_VPC_EGRESS_SETTINGS_MAP)}",
489+
)
490+
function.service_config.vpc_connector_egress_settings = cast(
491+
functions_v2.ServiceConfig.VpcConnectorEgressSettings,
492+
_VPC_EGRESS_SETTINGS_MAP[vpc_connector_egress_settings],
493+
)
475494
function.service_config.service_account_email = (
476495
self._cloud_function_service_account
477496
)
@@ -532,6 +551,7 @@ def provision_bq_remote_function(
532551
cloud_function_max_instance_count,
533552
is_row_processor,
534553
cloud_function_vpc_connector,
554+
cloud_function_vpc_connector_egress_settings,
535555
cloud_function_memory_mib,
536556
cloud_function_ingress_settings,
537557
bq_metadata,
@@ -580,6 +600,7 @@ def provision_bq_remote_function(
580600
max_instance_count=cloud_function_max_instance_count,
581601
is_row_processor=is_row_processor,
582602
vpc_connector=cloud_function_vpc_connector,
603+
vpc_connector_egress_settings=cloud_function_vpc_connector_egress_settings,
583604
memory_mib=cloud_function_memory_mib,
584605
ingress_settings=cloud_function_ingress_settings,
585606
)

bigframes/functions/_function_session.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -245,6 +245,9 @@ def remote_function(
245245
cloud_function_timeout: Optional[int] = 600,
246246
cloud_function_max_instances: Optional[int] = None,
247247
cloud_function_vpc_connector: Optional[str] = None,
248+
cloud_function_vpc_connector_egress_settings: Literal[
249+
"all", "private-ranges-only", "unspecified"
250+
] = "private-ranges-only",
248251
cloud_function_memory_mib: Optional[int] = 1024,
249252
cloud_function_ingress_settings: Literal[
250253
"all", "internal-only", "internal-and-gclb"
@@ -425,6 +428,13 @@ def remote_function(
425428
function. This is useful if your code needs access to data or
426429
service(s) that are on a VPC network. See for more details
427430
https://cloud.google.com/functions/docs/networking/connecting-vpc.
431+
cloud_function_vpc_connector_egress_settings (str, Optional):
432+
Egress settings for the VPC connector, controlling what outbound
433+
traffic is routed through the VPC connector.
434+
Options are: `all`, `private-ranges-only`, or `unspecified`.
435+
If not specified, `private-ranges-only` is used by default.
436+
See for more details
437+
https://cloud.google.com/run/docs/configuring/vpc-connectors#egress-job.
428438
cloud_function_memory_mib (int, Optional):
429439
The amounts of memory (in mebibytes) to allocate for the cloud
430440
function (2nd gen) created. This also dictates a corresponding
@@ -616,6 +626,7 @@ def wrapper(func):
616626
cloud_function_max_instance_count=cloud_function_max_instances,
617627
is_row_processor=is_row_processor,
618628
cloud_function_vpc_connector=cloud_function_vpc_connector,
629+
cloud_function_vpc_connector_egress_settings=cloud_function_vpc_connector_egress_settings,
619630
cloud_function_memory_mib=cloud_function_memory_mib,
620631
cloud_function_ingress_settings=cloud_function_ingress_settings,
621632
bq_metadata=bqrf_metadata,

bigframes/operations/aggregations.py

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -380,9 +380,26 @@ def skips_nulls(self):
380380
return True
381381

382382
def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionType:
383-
return pd.ArrowDtype(
384-
pa.list_(dtypes.bigframes_dtype_to_arrow_dtype(input_types[0]))
385-
)
383+
return dtypes.list_type(input_types[0])
384+
385+
386+
@dataclasses.dataclass(frozen=True)
387+
class StringAggOp(UnaryAggregateOp):
388+
name: ClassVar[str] = "string_agg"
389+
sep: str = ","
390+
391+
@property
392+
def order_independent(self):
393+
return False
394+
395+
@property
396+
def skips_nulls(self):
397+
return True
398+
399+
def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionType:
400+
if input_types[0] != dtypes.STRING_DTYPE:
401+
raise TypeError(f"Type {input_types[0]} is not string-like")
402+
return dtypes.STRING_DTYPE
386403

387404

388405
@dataclasses.dataclass(frozen=True)

bigframes/operations/strings.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
import bigframes.dataframe as df
2525
import bigframes.operations as ops
2626
from bigframes.operations._op_converters import convert_index, convert_slice
27+
import bigframes.operations.aggregations as agg_ops
2728
import bigframes.operations.base
2829
import bigframes.series as series
2930

@@ -295,6 +296,11 @@ def cat(
295296
) -> series.Series:
296297
return self._apply_binary_op(others, ops.strconcat_op, alignment=join)
297298

299+
def join(self, sep: str) -> series.Series:
300+
return self._apply_unary_op(
301+
ops.ArrayReduceOp(aggregation=agg_ops.StringAggOp(sep=sep))
302+
)
303+
298304
def to_blob(self, connection: Optional[str] = None) -> series.Series:
299305
"""Create a BigFrames Blob series from a series of URIs.
300306

bigframes/pandas/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,9 @@ def remote_function(
8787
cloud_function_timeout: Optional[int] = 600,
8888
cloud_function_max_instances: Optional[int] = None,
8989
cloud_function_vpc_connector: Optional[str] = None,
90+
cloud_function_vpc_connector_egress_settings: Literal[
91+
"all", "private-ranges-only", "unspecified"
92+
] = "private-ranges-only",
9093
cloud_function_memory_mib: Optional[int] = 1024,
9194
cloud_function_ingress_settings: Literal[
9295
"all", "internal-only", "internal-and-gclb"
@@ -109,6 +112,7 @@ def remote_function(
109112
cloud_function_timeout=cloud_function_timeout,
110113
cloud_function_max_instances=cloud_function_max_instances,
111114
cloud_function_vpc_connector=cloud_function_vpc_connector,
115+
cloud_function_vpc_connector_egress_settings=cloud_function_vpc_connector_egress_settings,
112116
cloud_function_memory_mib=cloud_function_memory_mib,
113117
cloud_function_ingress_settings=cloud_function_ingress_settings,
114118
cloud_build_service_account=cloud_build_service_account,

0 commit comments

Comments
 (0)