Skip to content
This repository was archived by the owner on Apr 1, 2026. It is now read-only.

Commit e80e8f8

Browse files
feat: Add numpy ufunc support to col expressions
1 parent be33279 commit e80e8f8

File tree

2 files changed

+55
-6
lines changed

2 files changed

+55
-6
lines changed

bigframes/core/col.py

Lines changed: 31 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
from typing import Any, Hashable, Literal, TYPE_CHECKING
1818

1919
import bigframes_vendored.pandas.core.col as pd_col
20+
import numpy
2021

2122
from bigframes.core import agg_expressions, window_spec
2223
import bigframes.core.expression as bf_expression
@@ -56,14 +57,10 @@ def _apply_binary_op(
5657
alignment: Literal["outer", "left"] = "outer",
5758
reverse: bool = False,
5859
):
59-
if isinstance(other, Expression):
60-
other_value = other._value
61-
else:
62-
other_value = bf_expression.const(other)
6360
if reverse:
64-
return Expression(op.as_expr(other_value, self._value))
61+
return Expression(op.as_expr(_as_bf_expr(other), self._value))
6562
else:
66-
return Expression(op.as_expr(self._value, other_value))
63+
return Expression(op.as_expr(self._value, _as_bf_expr(other)))
6764

6865
def __add__(self, other: Any) -> Expression:
6966
return self._apply_binary_op(other, bf_ops.add_op)
@@ -170,6 +167,34 @@ def str(self) -> strings.StringMethods:
170167

171168
return strings.StringMethods(self)
172169

170+
def __array_ufunc__(
171+
self, ufunc: numpy.ufunc, method: __builtins__.str, *inputs, **kwargs
172+
) -> Expression:
173+
"""Used to support numpy ufuncs.
174+
See: https://numpy.org/doc/stable/reference/ufuncs.html
175+
"""
176+
# Only __call__ supported with zero arguments
177+
if method != "__call__" or len(inputs) > 2 or len(kwargs) > 0:
178+
return NotImplemented
179+
180+
if len(inputs) == 1 and ufunc in bf_ops.NUMPY_TO_OP:
181+
op = bf_ops.NUMPY_TO_OP[ufunc]
182+
return Expression(op.as_expr(self._value))
183+
if len(inputs) == 2 and ufunc in bf_ops.NUMPY_TO_BINOP:
184+
binop = bf_ops.NUMPY_TO_BINOP[ufunc]
185+
if inputs[0] is self:
186+
return Expression(binop.as_expr(self._value, _as_bf_expr(inputs[1])))
187+
else:
188+
return Expression(binop.as_expr(_as_bf_expr(inputs[0]), self._value))
189+
190+
return NotImplemented
191+
192+
193+
def _as_bf_expr(arg: Any) -> bf_expression.Expression:
194+
if isinstance(arg, Expression):
195+
return arg._value
196+
return bf_expression.const(arg)
197+
173198

174199
def col(col_name: Hashable) -> Expression:
175200
return Expression(bf_expression.free_var(col_name))

tests/unit/test_col.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
import pathlib
1717
from typing import Generator
1818

19+
import numpy as np
1920
import pandas as pd
2021
import pytest
2122

@@ -246,3 +247,26 @@ def test_col_dt_accessor(scalars_dfs):
246247

247248
# int64[pyarrow] vs Int64
248249
assert_frame_equal(bf_result, pd_result, check_dtype=False)
250+
251+
252+
def test_col_numpy_ufunc(scalars_dfs):
253+
scalars_df, scalars_pandas_df = scalars_dfs
254+
255+
bf_kwargs = {
256+
"sqrt": np.sqrt(bpd.col("float64_col")), # type: ignore
257+
"add_const": np.add(bpd.col("float64_col"), 2.4), # type: ignore
258+
"radd_const": np.add(2.4, bpd.col("float64_col")), # type: ignore
259+
"add_cols": np.add(bpd.col("float64_col"), bpd.col("int64_col")), # type: ignore
260+
}
261+
pd_kwargs = {
262+
"sqrt": np.sqrt(pd.col("float64_col")), # type: ignore
263+
"add_const": np.add(pd.col("float64_col"), 2.4), # type: ignore
264+
"radd_const": np.add(2.4, pd.col("float64_col")), # type: ignore
265+
"add_cols": np.add(pd.col("float64_col"), pd.col("int64_col")), # type: ignore
266+
}
267+
268+
bf_result = scalars_df.assign(**bf_kwargs).to_pandas()
269+
pd_result = scalars_pandas_df.assign(**pd_kwargs) # type: ignore
270+
271+
# int64[pyarrow] vs Int64
272+
assert_frame_equal(bf_result, pd_result, check_dtype=False)

0 commit comments

Comments
 (0)