Skip to content

Commit 4bbe1fb

Browse files
Merge branch 'main' into yuwang-fix-try-cast
2 parents 3c45a7b + fbf000c commit 4bbe1fb

7 files changed

Lines changed: 82 additions & 47 deletions

File tree

CHANGELOG.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,13 @@
77
#### Bug Fixes
88

99
- Fixed a bug where CTE optimization incorrectly deduplicated subtrees containing non-deterministic data generation functions (e.g. `uuid_string()`).
10+
- Fixed a bug where vectorized UDFs using non-anaconda package repositories did not specify the pandas package by default.
11+
12+
### Snowpark pandas API Updates
13+
14+
#### Dependency Updates
15+
16+
- Updated the supported `pandas` versions to <=2.4 (was previously <=2.3.1).
1017

1118
## 1.49.0 (TBD)
1219

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@
4343
MODIN_REQUIREMENTS = [
4444
*PANDAS_REQUIREMENTS,
4545
f"modin{MODIN_DEPENDENCY_VERSION}",
46-
"pandas<=2.3.1",
46+
"pandas<=2.4",
4747
"tqdm", # For progress bars during backend switching
4848
"ipywidgets", # For enhanced progress bars in Jupyter notebooks
4949
]

src/snowflake/snowpark/_internal/udf_utils.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1253,6 +1253,7 @@ def resolve_imports_and_packages(
12531253
[],
12541254
artifact_repository=artifact_repository,
12551255
existing_packages_dict=existing_packages_dict,
1256+
include_pandas=is_pandas_udf,
12561257
)
12571258
)
12581259
elif packages:

src/snowflake/snowpark/functions.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11945,7 +11945,7 @@ def regr_sxy(y: ColumnOrName, x: ColumnOrName, _emit_ast: bool = True) -> Column
1194511945

1194611946
>>> df = session.create_dataframe([[10, 11], [20, 22], [25, None], [30, 35]], schema=["v", "v2"])
1194711947
>>> df = df.filter(df["v2"].is_not_null())
11948-
>>> df.group_by("v").agg(regr_sxy(df["v"], df["v2"]).alias("regr_sxy")).collect()
11948+
>>> df.group_by("v").agg(regr_sxy(df["v"], df["v2"]).alias("regr_sxy")).sort("v").collect()
1194911949
[Row(V=10, REGR_SXY=0.0), Row(V=20, REGR_SXY=0.0), Row(V=30, REGR_SXY=0.0)]
1195011950
"""
1195111951
y_col = _to_col_if_str(y, "regr_sxy")

tests/integ/modin/frame/test_select_dtypes.py

Lines changed: 40 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -137,36 +137,47 @@ def test_select_dtypes_duplicate_col_names(include, exclude):
137137
)
138138

139139

140-
@pytest.mark.parametrize(
141-
"include, exclude, exc, exc_match",
142-
[
143-
([], [], ValueError, "at least one of include or exclude must be nonempty"),
144-
(None, None, ValueError, "at least one of include or exclude must be nonempty"),
145-
# python `int` is equivalent to any np.int dtype, but it is fine for an type in
146-
# `include` to be a strict subtype of a type in `exclude` or vice versa
147-
(int, int, ValueError, "include and exclude overlap"),
148-
([int], ["O", int], ValueError, "include and exclude overlap"),
149-
(["O", int], [int], ValueError, "include and exclude overlap"),
150-
(int, np.int32, ValueError, "include and exclude overlap"),
151-
(int, np.int64, ValueError, "include and exclude overlap"),
152-
("datetime", np.datetime64, ValueError, "include and exclude overlap"),
153-
("O", object, ValueError, "include and exclude overlap"),
154-
# string dtypes are prohibited by pandas
155-
(str, None, TypeError, "string dtypes are not allowed, use 'object' instead"),
156-
(None, str, TypeError, "string dtypes are not allowed, use 'object' instead"),
157-
(
158-
"timedelta64[s]",
159-
None,
160-
ValueError,
161-
"'timedelta64[s]' is too specific of a frequency, try passing 'timedelta64'",
140+
# Error message differs slightly across pandas patch versions
141+
STR_DTYPE_ERROR = r"string dtypes are not allowed, use ('str' or )?'object' instead"
142+
143+
SELECT_DTYPES_INVALID_PARAMS = [
144+
([], [], ValueError, "at least one of include or exclude must be nonempty"),
145+
(None, None, ValueError, "at least one of include or exclude must be nonempty"),
146+
# python `int` is equivalent to any np.int dtype, but it is fine for an type in
147+
# `include` to be a strict subtype of a type in `exclude` or vice versa
148+
(int, int, ValueError, "include and exclude overlap"),
149+
([int], ["O", int], ValueError, "include and exclude overlap"),
150+
(["O", int], [int], ValueError, "include and exclude overlap"),
151+
(int, np.int32, ValueError, "include and exclude overlap"),
152+
(int, np.int64, ValueError, "include and exclude overlap"),
153+
("datetime", np.datetime64, ValueError, "include and exclude overlap"),
154+
("O", object, ValueError, "include and exclude overlap"),
155+
# string dtypes are prohibited by pandas
156+
(str, None, TypeError, STR_DTYPE_ERROR),
157+
(None, str, TypeError, STR_DTYPE_ERROR),
158+
(
159+
"timedelta64[s]",
160+
None,
161+
ValueError,
162+
re.escape(
163+
"'timedelta64[s]' is too specific of a frequency, try passing 'timedelta64'"
162164
),
163-
(
164-
None,
165-
"timedelta64[s]",
166-
ValueError,
167-
"'timedelta64[s]' is too specific of a frequency, try passing 'timedelta64'",
165+
),
166+
(
167+
None,
168+
"timedelta64[s]",
169+
ValueError,
170+
re.escape(
171+
"'timedelta64[s]' is too specific of a frequency, try passing 'timedelta64'"
168172
),
169-
],
173+
),
174+
]
175+
176+
177+
@pytest.mark.parametrize(
178+
"include, exclude, exc, exc_match",
179+
SELECT_DTYPES_INVALID_PARAMS,
180+
ids=[f"include_{a[0]}-exclude_{a[1]}" for a in SELECT_DTYPES_INVALID_PARAMS],
170181
)
171182
@sql_count_checker(query_count=0)
172183
def test_select_dtypes_invalid_args(include, exclude, exc, exc_match):
@@ -178,6 +189,6 @@ def test_select_dtypes_invalid_args(include, exclude, exc, exc_match):
178189
lambda df: df.select_dtypes(include, exclude),
179190
expect_exception=True,
180191
expect_exception_type=exc,
181-
expect_exception_match=re.escape(exc_match),
192+
expect_exception_match=exc_match,
182193
assert_exception_equal=True,
183194
)

tests/integ/test_packaging.py

Lines changed: 30 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
from snowflake.snowpark.functions import call_udf, col, count_distinct, sproc, udf
2323
from snowflake.snowpark.context import _ANACONDA_SHARED_REPOSITORY
2424
from snowflake.snowpark.types import DateType, StringType
25-
from tests.utils import IS_IN_STORED_PROC, TempObjectType, TestFiles, Utils
25+
from tests.utils import IS_IN_STORED_PROC, TempObjectType, TestFiles, Utils, IS_PY314
2626

2727
pytestmark = pytest.mark.xfail(
2828
"config.getoption('local_testing_mode', default=False)",
@@ -347,21 +347,23 @@ def test_add_packages_negative(session, caplog):
347347
assert "InvalidRequirement" in str(ex_info)
348348

349349
session.custom_package_usage_config = {"enabled": True}
350-
with patch.object(session, "_is_anaconda_terms_acknowledged", lambda: True):
351-
with pytest.raises(RuntimeError, match="Pip failed with return code 1"):
352-
session.add_packages("dateutil")
353-
354-
with patch.object(session, "_is_anaconda_terms_acknowledged", lambda: False):
355-
with pytest.raises(RuntimeError, match="Cannot add package dateutil"):
356-
session.add_packages("dateutil")
357-
358-
# Verify multiple errors can be raised at once
359-
with patch.object(session, "_is_anaconda_terms_acknowledged", lambda: False):
360-
with pytest.raises(
361-
RuntimeError,
362-
match="Cannot add package dateutil.*Cannot add package functools",
363-
):
364-
session.add_packages("dateutil", "functools")
350+
# These errors are not raised with Python 3.14 since it uses the pypi repository by default.
351+
if not IS_PY314:
352+
with patch.object(session, "_is_anaconda_terms_acknowledged", lambda: True):
353+
with pytest.raises(RuntimeError, match="Pip failed with return code 1"):
354+
session.add_packages("dateutil")
355+
356+
with patch.object(session, "_is_anaconda_terms_acknowledged", lambda: False):
357+
with pytest.raises(RuntimeError, match="Cannot add package dateutil"):
358+
session.add_packages("dateutil")
359+
360+
# Verify multiple errors can be raised at once
361+
with patch.object(session, "_is_anaconda_terms_acknowledged", lambda: False):
362+
with pytest.raises(
363+
RuntimeError,
364+
match="Cannot add package dateutil.*Cannot add package functools",
365+
):
366+
session.add_packages("dateutil", "functools")
365367

366368
with pytest.raises(ValueError, match="is already added"):
367369
with caplog.at_level(logging.WARNING):
@@ -464,6 +466,10 @@ def test_add_requirements_twice_should_fail_if_packages_are_different(
464466
IS_IN_STORED_PROC,
465467
reason="Subprocess calls are not allowed within stored procedures.",
466468
)
469+
@pytest.mark.skipif(
470+
IS_PY314,
471+
reason="Python 3.14 uses pypi repository",
472+
)
467473
def test_add_unsupported_requirements_should_fail_if_custom_packages_upload_enabled_not_switched_on(
468474
session, resources_path
469475
):
@@ -519,6 +525,10 @@ def test_urllib() -> str:
519525
IS_IN_STORED_PROC,
520526
reason="Subprocess calls are not allowed within stored procedures.",
521527
)
528+
@pytest.mark.skipif(
529+
IS_PY314,
530+
reason="Python 3.14 uses pypi repository",
531+
)
522532
def test_add_unsupported_packages_should_fail_if_custom_packages_upload_enabled_not_switched_on(
523533
session,
524534
):
@@ -991,6 +1001,10 @@ def get_skfuzzy_version() -> str:
9911001
IS_IN_STORED_PROC,
9921002
reason="Subprocess calls are not allowed within stored procedures.",
9931003
)
1004+
@pytest.mark.skipif(
1005+
IS_PY314,
1006+
reason="Python 3.14 uses pypi repository",
1007+
)
9941008
def test_add_requirements_unsupported_with_cache_path_negative(
9951009
session, resources_path, temporary_stage
9961010
):

tests/utils.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,8 @@
105105
"IGNORE_CLIENT_VESRION_IN_STRUCTURED_TYPES_RESPONSE",
106106
}
107107

108+
IS_PY314 = (sys.version_info.major, sys.version_info.minor) == (3, 14)
109+
108110

109111
def current_account(session):
110112
return session.sql("select CURRENT_ACCOUNT_NAME()").collect()[0][0].upper()

0 commit comments

Comments
 (0)