googleapis
diff --git a/‎CHANGELOG.md‎
Lines changed: 9 additions & 0 deletions b/‎CHANGELOG.md‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎bigframes/core/compile/sqlglot/expressions/unary_compiler.py‎
Lines changed: 97 additions & 8 deletions b/‎bigframes/core/compile/sqlglot/expressions/unary_compiler.py‎
Lines changed: 97 additions & 8 deletions
diff --git a/‎bigframes/series.py‎
Lines changed: 29 additions & 12 deletions b/‎bigframes/series.py‎
Lines changed: 29 additions & 12 deletions
diff --git a/‎bigframes/version.py‎
Lines changed: 2 additions & 2 deletions b/‎bigframes/version.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎tests/system/large/functions/test_managed_function.py‎
Lines changed: 41 additions & 3 deletions b/‎tests/system/large/functions/test_managed_function.py‎
Lines changed: 41 additions & 3 deletions
diff --git a/‎tests/system/large/functions/test_remote_function.py‎
Lines changed: 52 additions & 3 deletions b/‎tests/system/large/functions/test_remote_function.py‎
Lines changed: 52 additions & 3 deletions
diff --git a/‎tests/system/small/test_series.py‎
Lines changed: 20 additions & 0 deletions b/‎tests/system/small/test_series.py‎
Lines changed: 20 additions & 0 deletions
@@ -4,6 +4,15 @@
 
 [1]: https://pypi.org/project/bigframes/#history
 
+## [2.17.0](https://github.com/googleapis/python-bigquery-dataframes/compare/v2.16.0...v2.17.0) (2025-08-22)
+
+
+### Features
+
+* Add isin local execution impl ([#1993](https://github.com/googleapis/python-bigquery-dataframes/issues/1993)) ([26df6e6](https://github.com/googleapis/python-bigquery-dataframes/commit/26df6e691bb27ed09322a81214faedbf3639b32e))
+* Add reset_index names, col_level, col_fill, allow_duplicates args ([#2017](https://github.com/googleapis/python-bigquery-dataframes/issues/2017)) ([c02a1b6](https://github.com/googleapis/python-bigquery-dataframes/commit/c02a1b67d27758815430bb8006ac3a72cea55a89))
+* Support callable for series mask method ([#2014](https://github.com/googleapis/python-bigquery-dataframes/issues/2014)) ([5ac32eb](https://github.com/googleapis/python-bigquery-dataframes/commit/5ac32ebe17cfda447870859f5dd344b082b4d3d0))
+
 ## [2.16.0](https://github.com/googleapis/python-bigquery-dataframes/compare/v2.15.0...v2.16.0) (2025-08-20)
 
 
 
@@ -177,14 +177,96 @@ def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
     )
 
 
+@UNARY_OP_REGISTRATION.register(ops.StrContainsOp)
+def _(op: ops.StrContainsOp, expr: TypedExpr) -> sge.Expression:
+    return sge.Like(this=expr.expr, expression=sge.convert(f"%{op.pat}%"))
+
+
 @UNARY_OP_REGISTRATION.register(ops.StrContainsRegexOp)
 def _(op: ops.StrContainsRegexOp, expr: TypedExpr) -> sge.Expression:
     return sge.RegexpLike(this=expr.expr, expression=sge.convert(op.pat))
 
 
-@UNARY_OP_REGISTRATION.register(ops.StrContainsOp)
-def _(op: ops.StrContainsOp, expr: TypedExpr) -> sge.Expression:
-    return sge.Like(this=expr.expr, expression=sge.convert(f"%{op.pat}%"))
+@UNARY_OP_REGISTRATION.register(ops.StrExtractOp)
+def _(op: ops.StrExtractOp, expr: TypedExpr) -> sge.Expression:
+    return sge.RegexpExtract(
+        this=expr.expr, expression=sge.convert(op.pat), group=sge.convert(op.n)
+    )
+
+
+@UNARY_OP_REGISTRATION.register(ops.StrFindOp)
+def _(op: ops.StrFindOp, expr: TypedExpr) -> sge.Expression:
+    # INSTR is 1-based, so we need to adjust the start position.
+    start = sge.convert(op.start + 1) if op.start is not None else sge.convert(1)
+    if op.end is not None:
+        # BigQuery's INSTR doesn't support `end`, so we need to use SUBSTR.
+        return sge.func(
+            "INSTR",
+            sge.Substring(
+                this=expr.expr,
+                start=start,
+                length=sge.convert(op.end - (op.start or 0)),
+            ),
+            sge.convert(op.substr),
+        ) - sge.convert(1)
+    else:
+        return sge.func(
+            "INSTR",
+            expr.expr,
+            sge.convert(op.substr),
+            start,
+        ) - sge.convert(1)
+
+
+@UNARY_OP_REGISTRATION.register(ops.StrLstripOp)
+def _(op: ops.StrLstripOp, expr: TypedExpr) -> sge.Expression:
+    return sge.Trim(this=expr.expr, expression=sge.convert(op.to_strip), side="LEFT")
+
+
+@UNARY_OP_REGISTRATION.register(ops.StrPadOp)
+def _(op: ops.StrPadOp, expr: TypedExpr) -> sge.Expression:
+    pad_length = sge.func(
+        "GREATEST", sge.Length(this=expr.expr), sge.convert(op.length)
+    )
+    if op.side == "left":
+        return sge.func(
+            "LPAD",
+            expr.expr,
+            pad_length,
+            sge.convert(op.fillchar),
+        )
+    elif op.side == "right":
+        return sge.func(
+            "RPAD",
+            expr.expr,
+            pad_length,
+            sge.convert(op.fillchar),
+        )
+    else:  # side == both
+        lpad_amount = sge.Cast(
+            this=sge.func(
+                "SAFE_DIVIDE",
+                sge.Sub(this=pad_length, expression=sge.Length(this=expr.expr)),
+                sge.convert(2),
+            ),
+            to="INT64",
+        ) + sge.Length(this=expr.expr)
+        return sge.func(
+            "RPAD",
+            sge.func(
+                "LPAD",
+                expr.expr,
+                lpad_amount,
+                sge.convert(op.fillchar),
+            ),
+            pad_length,
+            sge.convert(op.fillchar),
+        )
+
+
+@UNARY_OP_REGISTRATION.register(ops.StrRepeatOp)
+def _(op: ops.StrRepeatOp, expr: TypedExpr) -> sge.Expression:
+    return sge.Repeat(this=expr.expr, times=sge.convert(op.repeats))
 
 
 @UNARY_OP_REGISTRATION.register(ops.date_op)
@@ -444,11 +526,6 @@ def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
     return sge.Extract(this=sge.Identifier(this="MONTH"), expression=expr.expr)
 
 
-@UNARY_OP_REGISTRATION.register(ops.StrLstripOp)
-def _(op: ops.StrLstripOp, expr: TypedExpr) -> sge.Expression:
-    return sge.Trim(this=expr.expr, expression=sge.convert(op.to_strip), side="LEFT")
-
-
 @UNARY_OP_REGISTRATION.register(ops.neg_op)
 def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
     return sge.Neg(this=expr.expr)
@@ -484,6 +561,18 @@ def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
     return sge.Extract(this=sge.Identifier(this="QUARTER"), expression=expr.expr)
 
 
+@UNARY_OP_REGISTRATION.register(ops.ReplaceStrOp)
+def _(op: ops.ReplaceStrOp, expr: TypedExpr) -> sge.Expression:
+    return sge.func("REPLACE", expr.expr, sge.convert(op.pat), sge.convert(op.repl))
+
+
+@UNARY_OP_REGISTRATION.register(ops.RegexReplaceStrOp)
+def _(op: ops.RegexReplaceStrOp, expr: TypedExpr) -> sge.Expression:
+    return sge.func(
+        "REGEXP_REPLACE", expr.expr, sge.convert(op.pat), sge.convert(op.repl)
+    )
+
+
 @UNARY_OP_REGISTRATION.register(ops.reverse_op)
 def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
     return sge.func("REVERSE", expr.expr)
 
@@ -1904,9 +1904,22 @@ def _groupby_values(
         )
 
     def apply(
-        self, func, by_row: typing.Union[typing.Literal["compat"], bool] = "compat"
+        self,
+        func,
+        by_row: typing.Union[typing.Literal["compat"], bool] = "compat",
+        *,
+        args: typing.Tuple = (),
     ) -> Series:
-        # TODO(shobs, b/274645634): Support convert_dtype, args, **kwargs
+        # Note: This signature differs from pandas.Series.apply. Specifically,
+        # `args` is keyword-only and `by_row` is a custom parameter here. Full
+        # alignment would involve breaking changes. However, given that by_row
+        # is not frequently used, we defer any such changes until there is a
+        # clear need based on user feedback.
+        #
+        # See pandas docs for reference:
+        # https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Series.apply.html
+
+        # TODO(shobs, b/274645634): Support convert_dtype, **kwargs
         # is actually a ternary op
 
         if by_row not in ["compat", False]:
@@ -1950,10 +1963,19 @@ def apply(
                 raise
 
         # We are working with bigquery function at this point
-        result_series = self._apply_unary_op(
-            ops.RemoteFunctionOp(function_def=func.udf_def, apply_on_null=True)
-        )
+        if args:
+            result_series = self._apply_nary_op(
+                ops.NaryRemoteFunctionOp(function_def=func.udf_def), args
+            )
+            # TODO(jialuo): Investigate why `_apply_nary_op` drops the series
+            # `name`. Manually reassigning it here as a temporary fix.
+            result_series.name = self.name
+        else:
+            result_series = self._apply_unary_op(
+                ops.RemoteFunctionOp(function_def=func.udf_def, apply_on_null=True)
+            )
         result_series = func._post_process_series(result_series)
+
         return result_series
 
     def combine(
@@ -2113,13 +2135,8 @@ def duplicated(self, keep: str = "first") -> Series:
         )
 
     def mask(self, cond, other=None) -> Series:
-        if callable(cond):
-            if hasattr(cond, "bigframes_bigquery_function"):
-                cond = self.apply(cond)
-            else:
-                # For non-BigQuery function assume that it is applicable on Series
-                cond = self.apply(cond, by_row=False)
-
+        cond = self._apply_callable(cond)
+        other = self._apply_callable(other)
         if not isinstance(cond, Series):
             raise TypeError(
                 f"Only bigframes series condition is supported, received {type(cond).__name__}. "
 
@@ -12,8 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__ = "2.16.0"
+__version__ = "2.17.0"
 
 # {x-release-please-start-date}
-__release_date__ = "2025-08-20"
+__release_date__ = "2025-08-22"
 # {x-release-please-end}
@@ -1103,7 +1103,7 @@ def func_for_other(x):
         )
 
 
-def test_managed_function_series_where(session, dataset_id, scalars_dfs):
+def test_managed_function_series_where_mask(session, dataset_id, scalars_dfs):
     try:
 
         # The return type has to be bool type for callable where condition.
@@ -1124,8 +1124,8 @@ def _is_positive(s):
         pd_int64 = scalars_pandas["int64_col"]
         pd_int64_filtered = pd_int64.dropna()
 
-        # The cond is a callable (managed function) and the other is not a
-        # callable in series.where method.
+        # Test series.where method: the cond is a callable (managed function)
+        # and the other is not a callable.
         bf_result = bf_int64_filtered.where(
             cond=is_positive_mf, other=-bf_int64_filtered
         ).to_pandas()
@@ -1134,6 +1134,44 @@ def _is_positive(s):
         # Ignore any dtype difference.
         pandas.testing.assert_series_equal(bf_result, pd_result, check_dtype=False)
 
+        # Test series.mask method: the cond is a callable (managed function)
+        # and the other is not a callable.
+        bf_result = bf_int64_filtered.mask(
+            cond=is_positive_mf, other=-bf_int64_filtered
+        ).to_pandas()
+        pd_result = pd_int64_filtered.mask(cond=_is_positive, other=-pd_int64_filtered)
+
+        # Ignore any dtype difference.
+        pandas.testing.assert_series_equal(bf_result, pd_result, check_dtype=False)
+
     finally:
         # Clean up the gcp assets created for the managed function.
         cleanup_function_assets(is_positive_mf, session.bqclient, ignore_failures=False)
+
+
+def test_managed_function_series_apply_args(session, dataset_id, scalars_dfs):
+    try:
+
+        with pytest.warns(bfe.PreviewWarning, match="udf is in preview."):
+
+            @session.udf(dataset=dataset_id, name=prefixer.create_prefix())
+            def foo_list(x: int, y0: float, y1: bytes, y2: bool) -> list[str]:
+                return [str(x), str(y0), str(y1), str(y2)]
+
+        scalars_df, scalars_pandas_df = scalars_dfs
+
+        bf_result = (
+            scalars_df["int64_too"]
+            .apply(foo_list, args=(12.34, b"hello world", False))
+            .to_pandas()
+        )
+        pd_result = scalars_pandas_df["int64_too"].apply(
+            foo_list, args=(12.34, b"hello world", False)
+        )
+
+        # Ignore any dtype difference.
+        pandas.testing.assert_series_equal(bf_result, pd_result, check_dtype=False)
+
+    finally:
+        # Clean up the gcp assets created for the managed function.
+        cleanup_function_assets(foo_list, session.bqclient, ignore_failures=False)
@@ -2950,7 +2950,7 @@ def func_for_other(x):
 
 
 @pytest.mark.flaky(retries=2, delay=120)
-def test_remote_function_series_where(session, dataset_id, scalars_dfs):
+def test_remote_function_series_where_mask(session, dataset_id, scalars_dfs):
     try:
 
         def _ten_times(x):
@@ -2971,8 +2971,8 @@ def _ten_times(x):
         pd_int64 = scalars_pandas["float64_col"]
         pd_int64_filtered = pd_int64.dropna()
 
-        # The cond is not a callable and the other is a callable (remote
-        # function) in series.where method.
+        # Test series.where method: the cond is not a callable and the other is
+        # a callable (remote function).
         bf_result = bf_int64_filtered.where(
             cond=bf_int64_filtered < 0, other=ten_times_mf
         ).to_pandas()
@@ -2983,6 +2983,55 @@ def _ten_times(x):
         # Ignore any dtype difference.
         pandas.testing.assert_series_equal(bf_result, pd_result, check_dtype=False)
 
+        # Test series.mask method: the cond is not a callable and the other is
+        # a callable (remote function).
+        bf_result = bf_int64_filtered.mask(
+            cond=bf_int64_filtered < 0, other=ten_times_mf
+        ).to_pandas()
+        pd_result = pd_int64_filtered.mask(cond=pd_int64_filtered < 0, other=_ten_times)
+
+        # Ignore any dtype difference.
+        pandas.testing.assert_series_equal(bf_result, pd_result, check_dtype=False)
+
     finally:
         # Clean up the gcp assets created for the remote function.
         cleanup_function_assets(ten_times_mf, session.bqclient, ignore_failures=False)
+
+
+@pytest.mark.flaky(retries=2, delay=120)
+def test_remote_function_series_apply_args(session, dataset_id, scalars_dfs):
+    try:
+
+        @session.remote_function(
+            dataset=dataset_id,
+            reuse=False,
+            cloud_function_service_account="default",
+        )
+        def foo(x: int, y: bool, z: float) -> str:
+            if y:
+                return f"{x}: y is True."
+            if z > 0.0:
+                return f"{x}: y is False and z is positive."
+            return f"{x}: y is False and z is non-positive."
+
+        scalars_df, scalars_pandas_df = scalars_dfs
+
+        args1 = (True, 10.0)
+        bf_result = scalars_df["int64_too"].apply(foo, args=args1).to_pandas()
+        pd_result = scalars_pandas_df["int64_too"].apply(foo, args=args1)
+
+        # Ignore any dtype difference.
+        pandas.testing.assert_series_equal(bf_result, pd_result, check_dtype=False)
+
+        args2 = (False, -10.0)
+        foo_ref = session.read_gbq_function(foo.bigframes_bigquery_function)
+
+        bf_result = scalars_df["int64_too"].apply(foo_ref, args=args2).to_pandas()
+        pd_result = scalars_pandas_df["int64_too"].apply(foo, args=args2)
+
+        # Ignore any dtype difference.
+        pandas.testing.assert_series_equal(bf_result, pd_result, check_dtype=False)
+
+    finally:
+        # Clean up the gcp assets created for the remote function.
+        cleanup_function_assets(foo, session.bqclient, ignore_failures=False)
@@ -3603,6 +3603,26 @@ def test_mask_custom_value(scalars_dfs):
     assert_pandas_df_equal(bf_result, pd_result)
 
 
+def test_mask_with_callable(scalars_df_index, scalars_pandas_df_index):
+    def _ten_times(x):
+        return x * 10
+
+    # Both cond and other are callable.
+    bf_result = (
+        scalars_df_index["int64_col"]
+        .mask(cond=lambda x: x > 0, other=_ten_times)
+        .to_pandas()
+    )
+    pd_result = scalars_pandas_df_index["int64_col"].mask(
+        cond=lambda x: x > 0, other=_ten_times
+    )
+
+    pd.testing.assert_series_equal(
+        bf_result,
+        pd_result,
+    )
+
+
 @pytest.mark.parametrize(
     ("lambda_",),
     [