Skip to content
This repository was archived by the owner on Apr 1, 2026. It is now read-only.

Commit 01d6152

Browse files
Merge remote-tracking branch 'github/main' into groupby_describe
2 parents bbfbd3e + bbd95e5 commit 01d6152

File tree

153 files changed

+2651
-2142
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

153 files changed

+2651
-2142
lines changed

bigframes/_config/bigquery_options.py

Lines changed: 3 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@
2222
import google.auth.credentials
2323
import requests.adapters
2424

25-
import bigframes._config.auth
2625
import bigframes._importing
2726
import bigframes.enums
2827
import bigframes.exceptions as bfe
@@ -38,7 +37,6 @@
3837

3938
def _get_validated_location(value: Optional[str]) -> Optional[str]:
4039
import bigframes._tools.strings
41-
import bigframes.constants
4240

4341
if value is None or value in bigframes.constants.ALL_BIGQUERY_LOCATIONS:
4442
return value
@@ -143,52 +141,20 @@ def application_name(self, value: Optional[str]):
143141
)
144142
self._application_name = value
145143

146-
def _try_set_default_credentials_and_project(
147-
self,
148-
) -> tuple[google.auth.credentials.Credentials, Optional[str]]:
149-
# Don't fetch credentials or project if credentials is already set.
150-
# If it's set, we've already authenticated, so if the user wants to
151-
# re-auth, they should explicitly reset the credentials.
152-
if self._credentials is not None:
153-
return self._credentials, self._project
154-
155-
(
156-
credentials,
157-
credentials_project,
158-
) = bigframes._config.auth.get_default_credentials_with_project()
159-
self._credentials = credentials
160-
161-
# Avoid overriding an explicitly set project with a default value.
162-
if self._project is None:
163-
self._project = credentials_project
164-
165-
return credentials, self._project
166-
167144
@property
168-
def credentials(self) -> google.auth.credentials.Credentials:
145+
def credentials(self) -> Optional[google.auth.credentials.Credentials]:
169146
"""The OAuth2 credentials to use for this client.
170147
171-
Set to None to force re-authentication.
172-
173148
Returns:
174149
None or google.auth.credentials.Credentials:
175150
google.auth.credentials.Credentials if exists; otherwise None.
176151
"""
177-
if self._credentials:
178-
return self._credentials
179-
180-
credentials, _ = self._try_set_default_credentials_and_project()
181-
return credentials
152+
return self._credentials
182153

183154
@credentials.setter
184155
def credentials(self, value: Optional[google.auth.credentials.Credentials]):
185156
if self._session_started and self._credentials is not value:
186157
raise ValueError(SESSION_STARTED_MESSAGE.format(attribute="credentials"))
187-
188-
if value is None:
189-
# The user has _explicitly_ asked that we re-authenticate.
190-
bigframes._config.auth.reset_default_credentials_and_project()
191-
192158
self._credentials = value
193159

194160
@property
@@ -217,11 +183,7 @@ def project(self) -> Optional[str]:
217183
None or str:
218184
Google Cloud project ID as a string; otherwise None.
219185
"""
220-
if self._project:
221-
return self._project
222-
223-
_, project = self._try_set_default_credentials_and_project()
224-
return project
186+
return self._project
225187

226188
@project.setter
227189
def project(self, value: Optional[str]):

bigframes/bigquery/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@
5151
json_value,
5252
json_value_array,
5353
parse_json,
54+
to_json,
5455
to_json_string,
5556
)
5657
from bigframes.bigquery._operations.search import create_vector_index, vector_search
@@ -89,6 +90,7 @@
8990
json_value,
9091
json_value_array,
9192
parse_json,
93+
to_json,
9294
to_json_string,
9395
# search ops
9496
create_vector_index,

bigframes/bigquery/_operations/json.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -430,6 +430,40 @@ def json_value_array(
430430
return input._apply_unary_op(ops.JSONValueArray(json_path=json_path))
431431

432432

433+
def to_json(
434+
input: series.Series,
435+
) -> series.Series:
436+
"""Converts a series with a JSON value to a JSON-formatted STRING value.
437+
438+
**Examples:**
439+
440+
>>> import bigframes.pandas as bpd
441+
>>> import bigframes.bigquery as bbq
442+
>>> bpd.options.display.progress_bar = None
443+
444+
>>> s = bpd.Series([1, 2, 3])
445+
>>> bbq.to_json(s)
446+
0 1
447+
1 2
448+
2 3
449+
dtype: extension<dbjson<JSONArrowType>>[pyarrow]
450+
451+
>>> s = bpd.Series([{"int": 1, "str": "pandas"}, {"int": 2, "str": "numpy"}])
452+
>>> bbq.to_json(s)
453+
0 {"int":1,"str":"pandas"}
454+
1 {"int":2,"str":"numpy"}
455+
dtype: extension<dbjson<JSONArrowType>>[pyarrow]
456+
457+
Args:
458+
input (bigframes.series.Series):
459+
The Series containing JSON or JSON-formatted string values.
460+
461+
Returns:
462+
bigframes.series.Series: A new Series with the JSON value.
463+
"""
464+
return input._apply_unary_op(ops.ToJSON())
465+
466+
433467
def to_json_string(
434468
input: series.Series,
435469
) -> series.Series:

bigframes/core/compile/ibis_compiler/scalar_op_registry.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1302,6 +1302,11 @@ def parse_json_op_impl(x: ibis_types.Value, op: ops.ParseJSON):
13021302
return parse_json(json_str=x)
13031303

13041304

1305+
@scalar_op_compiler.register_unary_op(ops.ToJSON)
1306+
def to_json_op_impl(json_obj: ibis_types.Value):
1307+
return to_json(json_obj=json_obj)
1308+
1309+
13051310
@scalar_op_compiler.register_unary_op(ops.ToJSONString)
13061311
def to_json_string_op_impl(x: ibis_types.Value):
13071312
return to_json_string(value=x)
@@ -2093,6 +2098,11 @@ def json_extract_string_array( # type: ignore[empty-body]
20932098
"""Extracts a JSON array and converts it to a SQL ARRAY of STRINGs."""
20942099

20952100

2101+
@ibis_udf.scalar.builtin(name="to_json")
2102+
def to_json(json_obj) -> ibis_dtypes.JSON: # type: ignore[empty-body]
2103+
"""Convert to JSON."""
2104+
2105+
20962106
@ibis_udf.scalar.builtin(name="to_json_string")
20972107
def to_json_string(value) -> ibis_dtypes.String: # type: ignore[empty-body]
20982108
"""Convert value to JSON-formatted string."""

bigframes/core/compile/sqlglot/__init__.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,18 @@
1414
from __future__ import annotations
1515

1616
from bigframes.core.compile.sqlglot.compiler import SQLGlotCompiler
17+
import bigframes.core.compile.sqlglot.expressions.array_ops # noqa: F401
1718
import bigframes.core.compile.sqlglot.expressions.binary_compiler # noqa: F401
18-
import bigframes.core.compile.sqlglot.expressions.unary_compiler # noqa: F401
19+
import bigframes.core.compile.sqlglot.expressions.blob_ops # noqa: F401
20+
import bigframes.core.compile.sqlglot.expressions.comparison_ops # noqa: F401
21+
import bigframes.core.compile.sqlglot.expressions.date_ops # noqa: F401
22+
import bigframes.core.compile.sqlglot.expressions.datetime_ops # noqa: F401
23+
import bigframes.core.compile.sqlglot.expressions.generic_ops # noqa: F401
24+
import bigframes.core.compile.sqlglot.expressions.geo_ops # noqa: F401
25+
import bigframes.core.compile.sqlglot.expressions.json_ops # noqa: F401
26+
import bigframes.core.compile.sqlglot.expressions.numeric_ops # noqa: F401
27+
import bigframes.core.compile.sqlglot.expressions.string_ops # noqa: F401
28+
import bigframes.core.compile.sqlglot.expressions.struct_ops # noqa: F401
29+
import bigframes.core.compile.sqlglot.expressions.timedelta_ops # noqa: F401
1930

2031
__all__ = ["SQLGlotCompiler"]
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from __future__ import annotations
16+
17+
import typing
18+
19+
import sqlglot
20+
import sqlglot.expressions as sge
21+
22+
from bigframes import operations as ops
23+
from bigframes.core.compile.sqlglot.expressions.typed_expr import TypedExpr
24+
import bigframes.core.compile.sqlglot.scalar_compiler as scalar_compiler
25+
26+
register_unary_op = scalar_compiler.scalar_op_compiler.register_unary_op
27+
28+
29+
@register_unary_op(ops.ArrayToStringOp, pass_op=True)
30+
def _(expr: TypedExpr, op: ops.ArrayToStringOp) -> sge.Expression:
31+
return sge.ArrayToString(this=expr.expr, expression=f"'{op.delimiter}'")
32+
33+
34+
@register_unary_op(ops.ArrayIndexOp, pass_op=True)
35+
def _(expr: TypedExpr, op: ops.ArrayIndexOp) -> sge.Expression:
36+
return sge.Bracket(
37+
this=expr.expr,
38+
expressions=[sge.Literal.number(op.index)],
39+
safe=True,
40+
offset=False,
41+
)
42+
43+
44+
@register_unary_op(ops.ArraySliceOp, pass_op=True)
45+
def _(expr: TypedExpr, op: ops.ArraySliceOp) -> sge.Expression:
46+
slice_idx = sqlglot.to_identifier("slice_idx")
47+
48+
conditions: typing.List[sge.Predicate] = [slice_idx >= op.start]
49+
50+
if op.stop is not None:
51+
conditions.append(slice_idx < op.stop)
52+
53+
# local name for each element in the array
54+
el = sqlglot.to_identifier("el")
55+
56+
selected_elements = (
57+
sge.select(el)
58+
.from_(
59+
sge.Unnest(
60+
expressions=[expr.expr],
61+
alias=sge.TableAlias(columns=[el]),
62+
offset=slice_idx,
63+
)
64+
)
65+
.where(*conditions)
66+
)
67+
68+
return sge.array(selected_elements)
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from __future__ import annotations
16+
17+
import sqlglot.expressions as sge
18+
19+
from bigframes import operations as ops
20+
from bigframes.core.compile.sqlglot.expressions.typed_expr import TypedExpr
21+
import bigframes.core.compile.sqlglot.scalar_compiler as scalar_compiler
22+
23+
register_unary_op = scalar_compiler.scalar_op_compiler.register_unary_op
24+
25+
26+
@register_unary_op(ops.obj_fetch_metadata_op)
27+
def _(expr: TypedExpr) -> sge.Expression:
28+
return sge.func("OBJ.FETCH_METADATA", expr.expr)
29+
30+
31+
@register_unary_op(ops.ObjGetAccessUrl)
32+
def _(expr: TypedExpr) -> sge.Expression:
33+
return sge.func("OBJ.GET_ACCESS_URL", expr.expr)
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from __future__ import annotations
16+
17+
import typing
18+
19+
import pandas as pd
20+
import sqlglot.expressions as sge
21+
22+
from bigframes import operations as ops
23+
from bigframes.core.compile.sqlglot.expressions.typed_expr import TypedExpr
24+
import bigframes.core.compile.sqlglot.scalar_compiler as scalar_compiler
25+
import bigframes.dtypes as dtypes
26+
27+
register_unary_op = scalar_compiler.scalar_op_compiler.register_unary_op
28+
29+
30+
@register_unary_op(ops.IsInOp, pass_op=True)
31+
def _(expr: TypedExpr, op: ops.IsInOp) -> sge.Expression:
32+
values = []
33+
is_numeric_expr = dtypes.is_numeric(expr.dtype)
34+
for value in op.values:
35+
if value is None:
36+
continue
37+
dtype = dtypes.bigframes_type(type(value))
38+
if expr.dtype == dtype or is_numeric_expr and dtypes.is_numeric(dtype):
39+
values.append(sge.convert(value))
40+
41+
if op.match_nulls:
42+
contains_nulls = any(_is_null(value) for value in op.values)
43+
if contains_nulls:
44+
return sge.Is(this=expr.expr, expression=sge.Null()) | sge.In(
45+
this=expr.expr, expressions=values
46+
)
47+
48+
if len(values) == 0:
49+
return sge.convert(False)
50+
51+
return sge.func(
52+
"COALESCE", sge.In(this=expr.expr, expressions=values), sge.convert(False)
53+
)
54+
55+
56+
# Helpers
57+
def _is_null(value) -> bool:
58+
# float NaN/inf should be treated as distinct from 'true' null values
59+
return typing.cast(bool, pd.isna(value)) and not isinstance(value, float)

0 commit comments

Comments
 (0)