Skip to content
This repository was archived by the owner on Apr 1, 2026. It is now read-only.

Commit 45570a3

Browse files
committed
Merge branch 'main' into shuowei-fix-compiler-syntax-guards
2 parents 7e551b8 + 915cce5 commit 45570a3

File tree

52 files changed

+576
-221
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

52 files changed

+576
-221
lines changed

bigframes/core/blocks.py

Lines changed: 49 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1822,9 +1822,9 @@ def melt(
18221822
Arguments correspond to pandas.melt arguments.
18231823
"""
18241824
# TODO: Implement col_level and ignore_index
1825-
value_labels: pd.Index = pd.Index(
1826-
[self.col_id_to_label[col_id] for col_id in value_vars]
1827-
)
1825+
value_labels: pd.Index = self.column_labels[
1826+
[self.value_columns.index(col_id) for col_id in value_vars]
1827+
]
18281828
id_labels = [self.col_id_to_label[col_id] for col_id in id_vars]
18291829

18301830
unpivot_expr, (var_col_ids, unpivot_out, passthrough_cols) = unpivot(
@@ -3417,6 +3417,7 @@ def unpivot(
34173417
joined_array, (labels_mapping, column_mapping) = labels_array.relational_join(
34183418
array_value, type="cross"
34193419
)
3420+
34203421
new_passthrough_cols = [column_mapping[col] for col in passthrough_columns]
34213422
# Last column is offsets
34223423
index_col_ids = [labels_mapping[col] for col in labels_array.column_ids[:-1]]
@@ -3426,20 +3427,24 @@ def unpivot(
34263427
unpivot_exprs: List[ex.Expression] = []
34273428
# Supports producing multiple stacked ouput columns for stacking only part of hierarchical index
34283429
for input_ids in unpivot_columns:
3429-
# row explode offset used to choose the input column
3430-
# we use offset instead of label as labels are not necessarily unique
3431-
cases = itertools.chain(
3432-
*(
3433-
(
3434-
ops.eq_op.as_expr(explode_offsets_id, ex.const(i)),
3435-
ex.deref(column_mapping[id_or_null])
3436-
if (id_or_null is not None)
3437-
else ex.const(None),
3430+
col_expr: ex.Expression
3431+
if not input_ids:
3432+
col_expr = ex.const(None, dtype=bigframes.dtypes.INT_DTYPE)
3433+
else:
3434+
# row explode offset used to choose the input column
3435+
# we use offset instead of label as labels are not necessarily unique
3436+
cases = itertools.chain(
3437+
*(
3438+
(
3439+
ops.eq_op.as_expr(explode_offsets_id, ex.const(i)),
3440+
ex.deref(column_mapping[id_or_null])
3441+
if (id_or_null is not None)
3442+
else ex.const(None),
3443+
)
3444+
for i, id_or_null in enumerate(input_ids)
34383445
)
3439-
for i, id_or_null in enumerate(input_ids)
34403446
)
3441-
)
3442-
col_expr = ops.case_when_op.as_expr(*cases)
3447+
col_expr = ops.case_when_op.as_expr(*cases)
34433448
unpivot_exprs.append(col_expr)
34443449

34453450
joined_array, unpivot_col_ids = joined_array.compute_values(unpivot_exprs)
@@ -3457,19 +3462,43 @@ def _pd_index_to_array_value(
34573462
Create an ArrayValue from a list of label tuples.
34583463
The last column will be row offsets.
34593464
"""
3465+
id_gen = bigframes.core.identifiers.standard_id_strings()
3466+
col_ids = [next(id_gen) for _ in range(index.nlevels)]
3467+
offset_id = next(id_gen)
3468+
34603469
rows = []
34613470
labels_as_tuples = utils.index_as_tuples(index)
34623471
for row_offset in range(len(index)):
3463-
id_gen = bigframes.core.identifiers.standard_id_strings()
34643472
row_label = labels_as_tuples[row_offset]
34653473
row_label = (row_label,) if not isinstance(row_label, tuple) else row_label
34663474
row = {}
3467-
for label_part, id in zip(row_label, id_gen):
3468-
row[id] = label_part if pd.notnull(label_part) else None
3469-
row[next(id_gen)] = row_offset
3475+
for label_part, col_id in zip(row_label, col_ids):
3476+
row[col_id] = label_part if pd.notnull(label_part) else None
3477+
row[offset_id] = row_offset
34703478
rows.append(row)
34713479

3472-
return core.ArrayValue.from_pyarrow(pa.Table.from_pylist(rows), session=session)
3480+
if not rows:
3481+
dtypes_list = getattr(index, "dtypes", None)
3482+
if dtypes_list is None:
3483+
dtypes_list = (
3484+
[index.dtype] if hasattr(index, "dtype") else [pd.Float64Dtype()]
3485+
)
3486+
3487+
fields = []
3488+
for col_id, dtype in zip(col_ids, dtypes_list):
3489+
try:
3490+
pa_type = bigframes.dtypes.bigframes_dtype_to_arrow_dtype(dtype)
3491+
except Exception:
3492+
pa_type = pa.string()
3493+
fields.append(pa.field(col_id, pa_type))
3494+
fields.append(pa.field(offset_id, pa.int64()))
3495+
schema = pa.schema(fields)
3496+
pt = pa.Table.from_pylist([], schema=schema)
3497+
else:
3498+
pt = pa.Table.from_pylist(rows)
3499+
pt = pt.rename_columns([*col_ids, offset_id])
3500+
3501+
return core.ArrayValue.from_pyarrow(pt, session=session)
34733502

34743503

34753504
def _resolve_index_col(

bigframes/core/compile/ibis_compiler/scalar_op_registry.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -663,7 +663,7 @@ def datetime_to_integer_label_non_fixed_frequency(
663663
.else_((x_int - first - 1) // us + 1) # type: ignore
664664
.end()
665665
)
666-
elif rule_code == "ME": # Monthly
666+
elif rule_code in ("M", "ME"): # Monthly
667667
x_int = x.year() * 12 + x.month() - 1 # type: ignore
668668
first = y.year() * 12 + y.month() - 1 # type: ignore
669669
x_int_label = (
@@ -672,7 +672,7 @@ def datetime_to_integer_label_non_fixed_frequency(
672672
.else_((x_int - first - 1) // n + 1) # type: ignore
673673
.end()
674674
)
675-
elif rule_code == "QE-DEC": # Quarterly
675+
elif rule_code in ("Q-DEC", "QE-DEC"): # Quarterly
676676
x_int = x.year() * 4 + x.quarter() - 1 # type: ignore
677677
first = y.year() * 4 + y.quarter() - 1 # type: ignore
678678
x_int_label = (
@@ -681,7 +681,7 @@ def datetime_to_integer_label_non_fixed_frequency(
681681
.else_((x_int - first - 1) // n + 1) # type: ignore
682682
.end()
683683
)
684-
elif rule_code == "YE-DEC": # Yearly
684+
elif rule_code in ("A-DEC", "Y-DEC", "YE-DEC"): # Yearly
685685
x_int = x.year() # type: ignore
686686
first = y.year() # type: ignore
687687
x_int_label = (
@@ -749,7 +749,7 @@ def integer_label_to_datetime_op_non_fixed_frequency(
749749
.cast(ibis_dtypes.Timestamp(timezone="UTC"))
750750
.cast(y.type())
751751
)
752-
elif rule_code == "ME": # Monthly
752+
elif rule_code in ("M", "ME"): # Monthly
753753
one = ibis_types.literal(1)
754754
twelve = ibis_types.literal(12)
755755
first = y.year() * twelve + y.month() - one # type: ignore
@@ -769,7 +769,7 @@ def integer_label_to_datetime_op_non_fixed_frequency(
769769
0,
770770
)
771771
x_label = next_month_date - ibis_api.interval(days=1)
772-
elif rule_code == "QE-DEC": # Quarterly
772+
elif rule_code in ("Q-DEC", "QE-DEC"): # Quarterly
773773
one = ibis_types.literal(1)
774774
three = ibis_types.literal(3)
775775
four = ibis_types.literal(4)
@@ -792,7 +792,7 @@ def integer_label_to_datetime_op_non_fixed_frequency(
792792
)
793793

794794
x_label = next_month_date - ibis_api.interval(days=1)
795-
elif rule_code == "YE-DEC": # Yearly
795+
elif rule_code in ("A-DEC", "Y-DEC", "YE-DEC"): # Yearly
796796
one = ibis_types.literal(1)
797797
first = y.year() # type: ignore
798798
x = x * n + first # type: ignore

bigframes/core/compile/sqlglot/aggregate_compiler.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,5 @@ def compile_analytic(
7070
aggregate.arg.output_type,
7171
)
7272
return unary_compiler.compile(aggregate.op, column, window)
73-
elif isinstance(aggregate, agg_expressions.BinaryAggregation):
74-
raise NotImplementedError("binary analytic operations not yet supported")
7573
else:
7674
raise ValueError(f"Unexpected analytic operation: {aggregate}")

bigframes/core/compile/sqlglot/expressions/array_ops.py

Lines changed: 0 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -105,31 +105,6 @@ def _coerce_bool_to_int(typed_expr: TypedExpr) -> sge.Expression:
105105
return typed_expr.expr
106106

107107

108-
def _string_slice(expr: TypedExpr, op: ops.ArraySliceOp) -> sge.Expression:
109-
# local name for each element in the array
110-
el = sg.to_identifier("el")
111-
# local name for the index in the array
112-
slice_idx = sg.to_identifier("slice_idx")
113-
114-
conditions: typing.List[sge.Predicate] = [slice_idx >= op.start]
115-
if op.stop is not None:
116-
conditions.append(slice_idx < op.stop)
117-
118-
selected_elements = (
119-
sge.select(el)
120-
.from_(
121-
sge.Unnest(
122-
expressions=[expr.expr],
123-
alias=sge.TableAlias(columns=[el]),
124-
offset=slice_idx,
125-
)
126-
)
127-
.where(*conditions)
128-
)
129-
130-
return sge.array(selected_elements)
131-
132-
133108
def _array_slice(expr: TypedExpr, op: ops.ArraySliceOp) -> sge.Expression:
134109
# local name for each element in the array
135110
el = sg.to_identifier("el")

bigframes/core/compile/sqlglot/expressions/datetime_ops.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ def _datetime_to_integer_label_non_fixed_frequency(
125125
expression=sge.convert(1),
126126
),
127127
)
128-
elif rule_code == "ME": # Monthly
128+
elif rule_code in ("M", "ME"): # Monthly
129129
x_int = sge.Paren( # type: ignore
130130
this=sge.Add(
131131
this=sge.Mul(
@@ -182,7 +182,7 @@ def _datetime_to_integer_label_non_fixed_frequency(
182182
expression=sge.convert(1),
183183
),
184184
)
185-
elif rule_code == "QE-DEC": # Quarterly
185+
elif rule_code in ("Q-DEC", "QE-DEC"): # Quarterly
186186
x_int = sge.Paren( # type: ignore
187187
this=sge.Add(
188188
this=sge.Mul(
@@ -239,7 +239,7 @@ def _datetime_to_integer_label_non_fixed_frequency(
239239
expression=sge.convert(1),
240240
),
241241
)
242-
elif rule_code == "YE-DEC": # Yearly
242+
elif rule_code in ("A-DEC", "Y-DEC", "YE-DEC"): # Yearly
243243
x_int = sge.Extract(this=sge.Identifier(this="YEAR"), expression=x.expr)
244244
first = sge.Extract(this=sge.Identifier(this="YEAR"), expression=y.expr)
245245
return sge.Case(

bigframes/core/tree_properties.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,13 @@
1515

1616
import functools
1717
import itertools
18-
from typing import Callable, Dict, Optional, Sequence
18+
from typing import Callable, Dict, Optional, Sequence, TYPE_CHECKING
1919

2020
import bigframes.core.nodes as nodes
2121

22+
if TYPE_CHECKING:
23+
import bigframes.session.execution_cache as execution_cache
24+
2225

2326
def is_trivially_executable(node: nodes.BigFrameNode) -> bool:
2427
if local_only(node):
@@ -65,7 +68,7 @@ def select_cache_target(
6568
root: nodes.BigFrameNode,
6669
min_complexity: float,
6770
max_complexity: float,
68-
cache: dict[nodes.BigFrameNode, nodes.BigFrameNode],
71+
cache: execution_cache.ExecutionCache,
6972
heuristic: Callable[[int, int], float],
7073
) -> Optional[nodes.BigFrameNode]:
7174
"""Take tree, and return candidate nodes with (# of occurences, post-caching planning complexity).
@@ -75,7 +78,7 @@ def select_cache_target(
7578

7679
@functools.cache
7780
def _with_caching(subtree: nodes.BigFrameNode) -> nodes.BigFrameNode:
78-
return nodes.top_down(subtree, lambda x: cache.get(x, x))
81+
return cache.subsitute_cached_subplans(subtree)
7982

8083
def _combine_counts(
8184
left: Dict[nodes.BigFrameNode, int], right: Dict[nodes.BigFrameNode, int]
@@ -106,6 +109,7 @@ def _node_counts_inner(
106109
if len(node_counts) == 0:
107110
raise ValueError("node counts should be non-zero")
108111

112+
# for each considered node, calculate heuristic value, and return node with max value
109113
return max(
110114
node_counts.keys(),
111115
key=lambda node: heuristic(

bigframes/extensions/pandas/__init__.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,3 +11,15 @@
1111
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
14+
15+
"""
16+
BigQuery DataFrames automatically registers a pandas extenstion when imported.
17+
This allows you to use the power of the BigQuery engine with pandas objects
18+
directly.
19+
"""
20+
21+
from bigframes.extensions.pandas.dataframe_accessor import (
22+
PandasBigQueryDataFrameAccessor,
23+
)
24+
25+
__all__ = ["PandasBigQueryDataFrameAccessor"]

bigframes/extensions/pandas/dataframe_accessor.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,12 @@
1818
import pandas.api.extensions
1919

2020
import bigframes.core.global_session as bf_session
21+
from bigframes.core.logging import log_adapter
2122
import bigframes.pandas as bpd
2223

2324

24-
class AIAccessor:
25+
@log_adapter.class_logger
26+
class PandasAIAccessor:
2527
"""
2628
Pandas DataFrame accessor for BigQuery AI functions.
2729
"""
@@ -101,7 +103,8 @@ def forecast(
101103

102104

103105
@pandas.api.extensions.register_dataframe_accessor("bigquery")
104-
class BigQueryDataFrameAccessor:
106+
@log_adapter.class_logger
107+
class PandasBigQueryDataFrameAccessor:
105108
"""
106109
Pandas DataFrame accessor for BigQuery DataFrames functionality.
107110
@@ -112,11 +115,11 @@ def __init__(self, pandas_obj: pandas.DataFrame):
112115
self._obj = pandas_obj
113116

114117
@property
115-
def ai(self) -> "AIAccessor":
118+
def ai(self) -> "PandasAIAccessor":
116119
"""
117120
Accessor for BigQuery AI functions.
118121
"""
119-
return AIAccessor(self._obj)
122+
return PandasAIAccessor(self._obj)
120123

121124
def sql_scalar(self, sql_template: str, *, output_dtype=None, session=None):
122125
"""

bigframes/session/__init__.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -265,15 +265,20 @@ def __init__(
265265
metrics=self._metrics,
266266
publisher=self._publisher,
267267
)
268+
269+
labels = {}
270+
if not self._strictly_ordered:
271+
labels["bigframes-mode"] = "unordered"
272+
268273
self._executor: executor.Executor = bq_caching_executor.BigQueryCachingExecutor(
269274
bqclient=self._clients_provider.bqclient,
270275
bqstoragereadclient=self._clients_provider.bqstoragereadclient,
271276
loader=self._loader,
272277
storage_manager=self._temp_storage_manager,
273-
strictly_ordered=self._strictly_ordered,
274278
metrics=self._metrics,
275279
enable_polars_execution=context.enable_polars_execution,
276280
publisher=self._publisher,
281+
labels=labels,
277282
)
278283

279284
def __del__(self):

0 commit comments

Comments
 (0)