Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
76336f4
Added support for conditional expression functions
sfc-gh-aherreraaguilar Sep 11, 2025
7936089
Fix tests
sfc-gh-aherreraaguilar Sep 11, 2025
749ba79
Update src/snowflake/snowpark/_functions/scalar_functions.py
sfc-gh-aherreraaguilar Sep 11, 2025
2580870
Update src/snowflake/snowpark/_functions/scalar_functions.py
sfc-gh-aherreraaguilar Sep 11, 2025
dbdd8ad
Update src/snowflake/snowpark/_functions/scalar_functions.py
sfc-gh-aherreraaguilar Sep 11, 2025
3b4e62c
Merge branch 'refs/heads/main' into feature/aherrera/SNOW-2314365-con…
sfc-gh-aherreraaguilar Sep 11, 2025
81a5155
Merge remote-tracking branch 'origin/feature/aherrera/SNOW-2314365-co…
sfc-gh-aherreraaguilar Sep 11, 2025
77478ac
Merge branch 'refs/heads/main' into feature/aherrera/SNOW-2314365-con…
sfc-gh-aherreraaguilar Sep 11, 2025
528b7e5
Fix boolor documentation.
sfc-gh-aherreraaguilar Sep 11, 2025
96a0288
Update boolxor indentation.
sfc-gh-aherreraaguilar Sep 11, 2025
c677166
Merge branch 'refs/heads/main' into feature/aherrera/SNOW-2314365-con…
sfc-gh-aherreraaguilar Sep 11, 2025
aab6521
Update comment
sfc-gh-aherreraaguilar Sep 11, 2025
8475da4
Fix indentation docstring
sfc-gh-aherreraaguilar Sep 12, 2025
ba654bf
Update boolor docstring
sfc-gh-aherreraaguilar Sep 12, 2025
cb698d3
Merge branch 'main' into feature/aherrera/SNOW-2314365-conditional-ex…
sfc-gh-aherreraaguilar Sep 12, 2025
a9b8dba
Merge branch 'main' into feature/aherrera/SNOW-2314365-conditional-ex…
sfc-gh-aherreraaguilar Sep 16, 2025
be97a7e
Update docstrings
sfc-gh-aherreraaguilar Sep 16, 2025
2912f30
Update changelog
sfc-gh-aherreraaguilar Sep 16, 2025
63e86e4
Merge branch 'main' into feature/aherrera/SNOW-2314365-conditional-ex…
sfc-gh-aherreraaguilar Sep 16, 2025
9ba61d1
Update docstrings
sfc-gh-aherreraaguilar Sep 16, 2025
efe000e
Fix local test
sfc-gh-aherreraaguilar Sep 18, 2025
de2603e
Merge branch 'main' into feature/aherrera/SNOW-2314365-conditional-ex…
sfc-gh-aherreraaguilar Sep 18, 2025
bd5c5e1
update changelog
sfc-gh-aherreraaguilar Sep 18, 2025
6c9a693
update changelog
sfc-gh-aherreraaguilar Sep 18, 2025
ff33709
update changelog
sfc-gh-aherreraaguilar Sep 18, 2025
5e8212d
update docstring
sfc-gh-aherreraaguilar Sep 18, 2025
25f1403
Merge branch 'main' into feature/aherrera/SNOW-2314365-conditional-ex…
sfc-gh-aherreraaguilar Sep 18, 2025
d6f6cd3
Merge branch 'main' into feature/aherrera/SNOW-2314365-conditional-ex…
sfc-gh-aherreraaguilar Sep 19, 2025
bc1b3ce
Update changelog
sfc-gh-aherreraaguilar Sep 19, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,17 @@
- Added support for the following scalar functions in `functions.py`:
- `array_remove_at`
- `as_boolean`
- `booland`
- `boolnot`
- `boolor`
- `boolor_agg`
- `boolxor`
- `chr`
- `decode`
- `div0null`
- `dp_interval_high`
- `dp_interval_low`
- `greatest_ignore_nulls`
- `h3_cell_to_boundary`
- `h3_cell_to_parent`
- `h3_cell_to_point`
Expand All @@ -38,6 +44,11 @@
- `hex_decode_binary`
- `last_query_id`
- `last_transaction`
- `least_ignore_nulls`
- `nullif`
- `nvl2`
- `regr_valx`


### Snowpark pandas API Updates

Expand Down
10 changes: 10 additions & 0 deletions docs/source/snowpark/functions.rst
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,10 @@ Functions
bitxor
bitxor_agg
boolor_agg
booland
boolnot
boolor
boolxor
build_stage_file_url
builtin
bround
Expand Down Expand Up @@ -184,6 +188,7 @@ Functions
dayofmonth
dayofweek
dayofyear
decode
degrees
dense_rank
desc
Expand Down Expand Up @@ -229,6 +234,7 @@ Functions
getdate
getvariable
greatest
greatest_ignore_nulls
grouping
grouping_id
hash
Expand Down Expand Up @@ -287,6 +293,7 @@ Functions
last_value
lead
least
least_ignore_nulls
left
length
listagg
Expand Down Expand Up @@ -327,8 +334,10 @@ Functions
not_
nth_value
ntile
nullif
nullifzero
nvl
nvl2
object_agg
object_construct
object_construct_keep_null
Expand Down Expand Up @@ -365,6 +374,7 @@ Functions
regr_sxx
regr_sxy
regr_syy
regr_valx
repeat
replace
right
Expand Down
273 changes: 273 additions & 0 deletions src/snowflake/snowpark/_functions/scalar_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -1024,3 +1024,276 @@ def last_transaction(_emit_ast: bool = True) -> Column:
>>> assert result[0]['LAST_TRANSACTION()'] is None or isinstance(result[0]['LAST_TRANSACTION()'], str)
"""
return builtin("last_transaction", _emit_ast=_emit_ast)()


@publicapi
def booland(expr1: ColumnOrName, expr2: ColumnOrName, _emit_ast: bool = True) -> Column:
"""
Computes the Boolean AND of two numeric expressions. In accordance with Boolean semantics:
- Non-zero values (including negative numbers) are regarded as True.
- Zero values are regarded as False.

Args:
expr1 (ColumnOrName): The first boolean expression.
expr2 (ColumnOrName): The second boolean expression.

Returns:
- True if both expressions are non-zero.
- False if both expressions are zero or one expression is zero and the other expression is non-zero or NULL.
- NULL if both expressions are NULL or one expression is NULL and the other expression is non-zero.

Comment thread
sfc-gh-aherreraaguilar marked this conversation as resolved.
Example::
>>> from snowflake.snowpark.functions import col
>>> df = session.create_dataframe([[1, -2], [0, 2], [0, 0], [5, 3]], schema=["a", "b"])
>>> df.select(booland(col("a"), col("b")).alias("result")).collect()
[Row(RESULT=True), Row(RESULT=False), Row(RESULT=False), Row(RESULT=True)]
"""
c1 = _to_col_if_str(expr1, "booland")
c2 = _to_col_if_str(expr2, "booland")
return builtin("booland", _emit_ast=_emit_ast)(c1, c2)


@publicapi
def boolnot(e: ColumnOrName, _emit_ast: bool = True) -> Column:
"""
Computes the Boolean NOT of a single numeric expression. In accordance with Boolean semantics:
- Non-zero values (including negative numbers) are regarded as True.
- Zero values are regarded as False.

Args:
e (ColumnOrName): A numeric expression to be evaluated.

Returns:
- True if the expression is zero.
- False if the expression is non-zero.
- NULL if the expression is NULL.

Example::

>>> df = session.create_dataframe([0, 10, -5], schema=["a"])
>>> df.select(boolnot("a")).collect()
[Row(BOOLNOT("A")=True), Row(BOOLNOT("A")=False), Row(BOOLNOT("A")=False)]
"""
c = _to_col_if_str(e, "boolnot")
return builtin("boolnot", _emit_ast=_emit_ast)(c)


@publicapi
def boolor(expr1: ColumnOrName, expr2: ColumnOrName, _emit_ast: bool = True) -> Column:
"""
Computes the Boolean OR of two numeric expressions. In accordance with Boolean semantics:
- Non-zero values (including negative numbers) are regarded as True.
- Zero values are regarded as False.

Args:
expr1 (ColumnOrName): The first boolean expression.
expr2 (ColumnOrName): The second boolean expression.

Returns:
- True if both expressions are non-zero or the first expression is non-zero and the second expression is zero or None.
- False if both expressions are zero.
- None if both expressions are None or the first expression is None and the second expression is zero.
Comment thread
sfc-gh-aherreraaguilar marked this conversation as resolved.
Comment thread
sfc-gh-aherreraaguilar marked this conversation as resolved.

Example::

>>> from snowflake.snowpark.functions import col
>>> df = session.create_dataframe([
... [1, 2],
... [-1, 0],
... [3, None],
... [0, 0],
... [None, 0],
... [None, None]
... ], schema=["expr1", "expr2"])
>>> df.select(boolor(col("expr1"), col("expr2")).alias("result")).collect()
[Row(RESULT=True), Row(RESULT=True), Row(RESULT=True), Row(RESULT=False), Row(RESULT=None), Row(RESULT=None)]
"""
c1 = _to_col_if_str(expr1, "boolor")
c2 = _to_col_if_str(expr2, "boolor")
return builtin("boolor", _emit_ast=_emit_ast)(c1, c2)


@publicapi
def boolxor(expr1: ColumnOrName, expr2: ColumnOrName, _emit_ast: bool = True) -> Column:
"""
Computes the Boolean XOR of two numeric expressions (i.e. one of the expressions, but not both expressions, is True). In accordance with Boolean semantics:
- Non-zero values (including negative numbers) are regarded as True.
- Zero values are regarded as False.

Args:
expr1 (ColumnOrName): First numeric expression or a string name of the column.
expr2 (ColumnOrName): Second numeric expression or a string name of the column.

Returns:
- True if exactly one of the expressions is non-zero.
- False if both expressions are zero or both expressions are non-zero.
- None if both expressions are None, or one expression is None and the other expression is zero.

Example::
Comment thread
sfc-gh-aherreraaguilar marked this conversation as resolved.
>>> from snowflake.snowpark.functions import col
>>> df = session.create_dataframe([[2, 0], [1, -1], [0, 0], [None, 3]], schema=["a", "b"])
>>> df.select(boolxor(col("a"), col("b")).alias("result")).collect()
[Row(RESULT=True), Row(RESULT=False), Row(RESULT=False), Row(RESULT=None)]
"""
c1 = _to_col_if_str(expr1, "boolxor")
c2 = _to_col_if_str(expr2, "boolxor")
return builtin("boolxor", _emit_ast=_emit_ast)(c1, c2)


@publicapi
def decode(expr: ColumnOrName, *args: ColumnOrName, _emit_ast: bool = True) -> Column:
"""Decodes an expression by comparing it with search values and returning corresponding result values.

Similar to a Case statement, this function compares an expression to one or more search values
and returns the corresponding result when a match is found.

Args:
expr (ColumnOrName): The expression to decode.
*args (ColumnOrName): Variable length argument list containing pairs of search values and
result values, with an optional default value at the end.


Returns:
Column: The decoded result.

Example:

>>> from snowflake.snowpark.functions import col, lit
>>> df = session.create_dataframe([[1, 1], [2, 4], [16, 24]], schema=["a", "b"])
>>> df.select(decode(col("a"), lit(1), lit("one"), lit(2), lit("two"), lit("default")).alias("RESULT")).collect()
[Row(RESULT='one'), Row(RESULT='two'), Row(RESULT='default')]
"""
expr_col = _to_col_if_str(expr, "decode")
arg_cols = [_to_col_if_str(arg, "decode") for arg in args]
return builtin("decode", _emit_ast=_emit_ast)(expr_col, *arg_cols)


@publicapi
def greatest_ignore_nulls(*columns: ColumnOrName, _emit_ast: bool = True) -> Column:
"""
Returns the largest value from a list of expressions, ignoring None values.
If all argument values are None, the result is None.

Args:
columns (ColumnOrName): The name strings to compare.

Returns:
Column: The greatest value, ignoring None values.

Examples::

>>> df = session.create_dataframe([[1, 2, 3, 4.25], [2, 4, -1, None], [3, 6, None, -2.75]], schema=["a", "b", "c", "d"])
>>> df.select(greatest_ignore_nulls(df["a"], df["b"], df["c"], df["d"]).alias("greatest_ignore_nulls")).collect()
[Row(GREATEST_IGNORE_NULLS=4.25), Row(GREATEST_IGNORE_NULLS=4.0), Row(GREATEST_IGNORE_NULLS=6.0)]
"""
c = [_to_col_if_str(ex, "greatest_ignore_nulls") for ex in columns]
return builtin("greatest_ignore_nulls", _emit_ast=_emit_ast)(*c)


@publicapi
def least_ignore_nulls(*columns: ColumnOrName, _emit_ast: bool = True) -> Column:
"""
Returns the smallest value from a list of expressions, ignoring None values.
If all argument values are None, the result is None.

Args:
columns (ColumnOrName): list of column or column names to compare.

Returns:
Column: The smallest value from the list of expressions, ignoring None values.

Example::

>>> df = session.create_dataframe([[1, 2, 3], [2, 4, -1], [3, 6, None]], schema=["a", "b", "c"])
>>> df.select(least_ignore_nulls(df["a"], df["b"], df["c"]).alias("least_ignore_nulls")).collect()
[Row(LEAST_IGNORE_NULLS=1), Row(LEAST_IGNORE_NULLS=-1), Row(LEAST_IGNORE_NULLS=3)]
"""
c = [_to_col_if_str(ex, "least_ignore_nulls") for ex in columns]
return builtin("least_ignore_nulls", _emit_ast=_emit_ast)(*c)


@publicapi
def nullif(expr1: ColumnOrName, expr2: ColumnOrName, _emit_ast: bool = True) -> Column:
"""
Returns None if expr1 is equal to expr2, otherwise returns expr1.

Args:
expr1 (ColumnOrName): The first expression to compare.
expr2 (ColumnOrName): The second expression to compare.

Returns:
Column: None if expr1 is equal to expr2, otherwise expr1.

Example::

>>> df = session.create_dataframe([[0, 0], [0, 1], [1, 0], [1, 1], [None, 0]], schema=["a", "b"])
>>> df.select(nullif(df["a"], df["b"]).alias("result")).collect()
[Row(RESULT=None), Row(RESULT=0), Row(RESULT=1), Row(RESULT=None), Row(RESULT=None)]
"""
c1 = _to_col_if_str(expr1, "nullif")
c2 = _to_col_if_str(expr2, "nullif")
return builtin("nullif", _emit_ast=_emit_ast)(c1, c2)


@publicapi
def nvl2(
expr1: ColumnOrName,
expr2: ColumnOrName,
expr3: ColumnOrName,
_emit_ast: bool = True,
) -> Column:
"""
Returns expr2 if expr1 is not None, otherwise returns expr3.

Args:
expr1 (ColumnOrName): The expression to test for None.
expr2 (ColumnOrName): The value to return if expr1 is not None.
expr3 (ColumnOrName): The value to return if expr1 is None.

Returns:
Column: The result of the nvl2 function.

Example::

>>> from snowflake.snowpark.functions import col
>>> df = session.create_dataframe([
... [0, 5, 3],
... [0, 5, None],
... [0, None, 3],
... [None, 5, 3],
... [None, None, 3]
... ], schema=["a", "b", "c"])
>>> df.select(nvl2(col("a"), col("b"), col("c")).alias("nvl2_result")).collect()
[Row(NVL2_RESULT=5), Row(NVL2_RESULT=5), Row(NVL2_RESULT=None), Row(NVL2_RESULT=3), Row(NVL2_RESULT=3)]
"""
c1 = _to_col_if_str(expr1, "nvl2")
c2 = _to_col_if_str(expr2, "nvl2")
c3 = _to_col_if_str(expr3, "nvl2")
return builtin("nvl2", _emit_ast=_emit_ast)(c1, c2, c3)


@publicapi
def regr_valx(y: ColumnOrName, x: ColumnOrName, _emit_ast: bool = True) -> Column:
"""
Returns None if either argument is None; otherwise, returns the second argument.
Note that REGR_VALX is a None-preserving function, while the more commonly-used NVL is a None-replacing function.

Args:
y (ColumnOrName): The dependent variable column.
x (ColumnOrName): The independent variable column.

Returns:
Column: The result of the regr_valx function.

Example::

>>> from snowflake.snowpark import Row
>>> df = session.create_dataframe([[2.0, 1.0], [None, 3.0], [6.0, None]], schema=["col_y", "col_x"])
>>> result = df.select(regr_valx(df["col_y"], df["col_x"]).alias("result")).collect()
>>> assert result == [Row(RESULT=1.0), Row(RESULT=None), Row(RESULT=None)]

Important: Note the order of the arguments; y precedes x
"""
y_col = _to_col_if_str(y, "regr_valx")
x_col = _to_col_if_str(x, "regr_valx")
return builtin("regr_valx", _emit_ast=_emit_ast)(y_col, x_col)
12 changes: 6 additions & 6 deletions tests/mock/test_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -340,30 +340,30 @@ def test_patch_unsupported_function(session):
df = session.create_dataframe([[3, 1], [3, 2], [4, 3]], schema=["a", "b"])
with pytest.raises(NotImplementedError):
df.select(
call_function("greatest_ignore_nulls", df["a"], df["b"]).alias("greatest")
call_function("my_function", df["a"], df["b"]).alias("greatest")
).collect()

@patch("greatest_ignore_nulls")
def mock_greatest_ignore_nulls(
@patch("my_mocked_function")
def mock_my_mocked_function(
*columns: Iterable[ColumnEmulator],
) -> ColumnEmulator:
return ColumnEmulator(
[1] * len(columns[0]), sf_type=ColumnType(IntegerType(), False)
)

assert df.select(
call_function("greatest_ignore_nulls", df["a"], df["b"]).alias("greatest")
call_function("my_mocked_function", df["a"], df["b"]).alias("greatest")
).collect() == [Row(1), Row(1), Row(1)]

@patch("greatest_ignore_nulls")
@patch("my_mocked_function_2")
def mock_wrong_patch(columns: Iterable[ColumnEmulator]) -> ColumnEmulator:
return ColumnEmulator(
[1] * len(columns[0]), sf_type=ColumnType(IntegerType(), False)
)

with pytest.raises(SnowparkLocalTestingException) as exc:
df.select(
call_function("greatest_ignore_nulls", df["a"], df["b"]).alias("greatest")
call_function("my_mocked_function_2", df["a"], df["b"]).alias("greatest")
).collect()
assert "Please ensure the implementation follows specifications" in str(exc.value)

Expand Down