Skip to content
This repository was archived by the owner on Apr 1, 2026. It is now read-only.

Commit f38799c

Browse files
handle post-processing virtualized array type
1 parent 79986dd commit f38799c

File tree

6 files changed

+56
-16
lines changed

6 files changed

+56
-16
lines changed

bigframes/core/compile/ibis_compiler/scalar_op_registry.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1156,6 +1156,13 @@ def array_reduce_op_impl(x: ibis_types.Value, op: ops.ArrayReduceOp):
11561156
)
11571157

11581158

1159+
@scalar_op_compiler.register_unary_op(ops.ArrayMapOp, pass_op=True)
1160+
def array_map_op_impl(x: ibis_types.Value, op: ops.ArrayMapOp):
1161+
return typing.cast(ibis_types.ArrayValue, x).map(
1162+
lambda arr_vals: scalar_op_compiler.compile_row_op(op.map_op, (arr_vals,))
1163+
)
1164+
1165+
11591166
# JSON Ops
11601167
@scalar_op_compiler.register_binary_op(ops.JSONSet, pass_op=True)
11611168
def json_set_op_impl(x: ibis_types.Value, y: ibis_types.Value, op: ops.JSONSet):

bigframes/core/compile/sqlglot/expressions/array_ops.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,28 @@ def _(expr: TypedExpr, op: ops.ArrayReduceOp) -> sge.Expression:
7373
)
7474

7575

76+
@register_unary_op(ops.ArrayMapOp, pass_op=True)
77+
def _(expr: TypedExpr, op: ops.ArrayMapOp) -> sge.Expression:
78+
sub_expr = sg.to_identifier("bf_arr_map_uid")
79+
sub_type = dtypes.get_array_inner_type(expr.dtype)
80+
81+
# TODO: Expression should be provided instead of invoking compiler manually
82+
map_expr = expression_compiler.expression_compiler.compile_row_op(
83+
op.map_op, (TypedExpr(sub_expr, sub_type),)
84+
)
85+
86+
return sge.array(
87+
sge.select(map_expr)
88+
.from_(
89+
sge.Unnest(
90+
expressions=[expr.expr],
91+
alias=sge.TableAlias(columns=[sub_expr]),
92+
)
93+
)
94+
.subquery()
95+
)
96+
97+
7698
@register_unary_op(ops.ArraySliceOp, pass_op=True)
7799
def _(expr: TypedExpr, op: ops.ArraySliceOp) -> sge.Expression:
78100
if expr.dtype == dtypes.STRING_DTYPE:

bigframes/functions/_utils.py

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -292,20 +292,6 @@ def get_python_version(is_compat: bool = False) -> str:
292292
return f"python{major}{minor}" if is_compat else f"python-{major}.{minor}"
293293

294294

295-
def build_unnest_post_routine(py_list_type: type[list]):
296-
sdk_type = function_typing.sdk_array_output_type_from_python_type(py_list_type)
297-
assert sdk_type.array_element_type is not None
298-
inner_sdk_type = sdk_type.array_element_type
299-
result_dtype = function_typing.sdk_type_to_bf_type(inner_sdk_type)
300-
301-
def post_process(input):
302-
import bigframes.bigquery as bbq
303-
304-
return bbq.json_extract_string_array(input, value_dtype=result_dtype)
305-
306-
return post_process
307-
308-
309295
def has_conflict_input_type(
310296
signature: inspect.Signature,
311297
input_types: Sequence[Any],

bigframes/functions/udf_def.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -158,10 +158,19 @@ def emulating_type(self) -> DirectScalarType:
158158
def out_expr(
159159
self, expr: bigframes.core.expression.Expression
160160
) -> bigframes.core.expression.Expression:
161+
# essentially we are undoing json.dumps in sql
161162
import bigframes.operations as ops
162163

163-
# convert json string to array of underlying type
164-
return ops.JSONValueArray(json_path="$").as_expr(expr)
164+
as_str_list = ops.JSONValueArray(json_path="$").as_expr(expr)
165+
if self.inner_dtype.py_type is str:
166+
return as_str_list
167+
elif self.inner_dtype.py_type is bool:
168+
# TODO: hack so we don't need to make ArrayMap support general expressions yet
169+
return ops.ArrayMapOp(ops.IsInOp(values=("true",))).as_expr(as_str_list)
170+
else:
171+
return ops.ArrayMapOp(ops.AsTypeOp(self.inner_dtype.bf_type)).as_expr(
172+
as_str_list
173+
)
165174

166175
@property
167176
def sql_type(self) -> str:

bigframes/operations/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
)
2626
from bigframes.operations.array_ops import (
2727
ArrayIndexOp,
28+
ArrayMapOp,
2829
ArrayReduceOp,
2930
ArraySliceOp,
3031
ArrayToStringOp,
@@ -440,4 +441,5 @@
440441
"NUMPY_TO_OP",
441442
"ToArrayOp",
442443
"ArrayReduceOp",
444+
"ArrayMapOp",
443445
]

bigframes/operations/array_ops.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,3 +88,17 @@ def output_type(self, *input_types):
8888
assert dtypes.is_array_like(input_type)
8989
inner_type = dtypes.get_array_inner_type(input_type)
9090
return self.aggregation.output_type(inner_type)
91+
92+
93+
@dataclasses.dataclass(frozen=True)
94+
class ArrayMapOp(base_ops.UnaryOp):
95+
name: typing.ClassVar[str] = "array_map"
96+
# TODO: Generalize to chained expressions
97+
map_op: base_ops.UnaryOp
98+
99+
def output_type(self, *input_types):
100+
input_type = input_types[0]
101+
assert dtypes.is_array_like(input_type)
102+
inner_type = dtypes.get_array_inner_type(input_type)
103+
out_inner_type = self.map_op.output_type(inner_type)
104+
return dtypes.list_type(out_inner_type)

0 commit comments

Comments
 (0)