diff --git a/CHANGELOG.md b/CHANGELOG.md index fba8710a2f..a378aaaeeb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,6 @@ # Release History -## Unreleased +## 1.52.0 (TBD) ### Snowpark Python API Updates @@ -8,6 +8,12 @@ - Clarified that the JDBC driver JAR referenced via `udtf_configs.imports` in `DataFrameReader.jdbc()` must be downloaded from the database vendor and uploaded to a Snowflake stage. +### Snowpark pandas API Updates + +#### Dependency Updates + +- Relaxed the `pandas` dependency ceiling for Snowpark pandas extras to `<3.1`. + ## 1.51.0 (2026-05-18) ### Snowpark Python API Updates diff --git a/setup.py b/setup.py index a3e3699e8f..2a67aeecb6 100644 --- a/setup.py +++ b/setup.py @@ -14,8 +14,7 @@ THIS_DIR = os.path.dirname(os.path.realpath(__file__)) SRC_DIR = os.path.join(THIS_DIR, "src") SNOWPARK_SRC_DIR = os.path.join(SRC_DIR, "snowflake", "snowpark") -# Snowpark pandas requires modin 0.36.x or 0.37.x, which are compatible with pandas -# 2.2.x or 2.3.x +# Snowpark pandas requires modin 0.36.x or 0.37.x. MODIN_DEPENDENCY_VERSION = ">=0.36.0, <0.38.0" CONNECTOR_DEPENDENCY_VERSION = ">=3.17.0, <5.0.0" CONNECTOR_DEPENDENCY = f"snowflake-connector-python{CONNECTOR_DEPENDENCY_VERSION}" @@ -43,7 +42,7 @@ MODIN_REQUIREMENTS = [ *PANDAS_REQUIREMENTS, f"modin{MODIN_DEPENDENCY_VERSION}", - "pandas<=2.4", + "pandas<3.1", "tqdm", # For progress bars during backend switching "ipywidgets", # For enhanced progress bars in Jupyter notebooks ] diff --git a/tests/integ/test_packaging.py b/tests/integ/test_packaging.py index da7a0fbf0d..47a07f3a8e 100644 --- a/tests/integ/test_packaging.py +++ b/tests/integ/test_packaging.py @@ -11,6 +11,7 @@ from unittest.mock import patch import pytest +from packaging.version import Version from snowflake.snowpark import Row, Session from snowflake.snowpark._internal.packaging_utils import ( @@ -51,6 +52,14 @@ is_pandas_and_numpy_available = False +def assert_numpy_and_pandas_version_pair( + version_pair: str, expected_numpy_ver: str +) -> None: + numpy_ver, pandas_ver = version_pair.split("/", 1) + assert numpy_ver == expected_numpy_ver + assert Version(pandas_ver) < Version("3.1") + + @pytest.fixture(scope="module", autouse=True) def setup(session, resources_path, local_testing_mode): tmp_stage_name = Utils.random_stage_name() @@ -196,14 +205,14 @@ def test_add_packages(session, local_testing_mode): session.add_packages( [ numpy_version, - "pandas==2.3.3", + "pandas<3.1", "matplotlib", "pyyaml", ] ) assert session.get_packages() == { "numpy": numpy_version, - "pandas": "pandas==2.3.3", + "pandas": "pandas<3.1", "matplotlib": "matplotlib", "pyyaml": "pyyaml", } @@ -219,11 +228,12 @@ def get_numpy_pandas_dateutil_version() -> str: res = df.select(call_udf(udf_name)).collect()[0][0] # don't need to check the version of dateutil, as it can be changed on the server side expected_numpy_ver = "2.3.5" if sys.version_info >= (3, 13) else "1.26.3" - assert ( - res.startswith(f"{expected_numpy_ver}/2.3.3") - if not local_testing_mode - else res == get_numpy_pandas_dateutil_version() - ) + if local_testing_mode: + assert res == get_numpy_pandas_dateutil_version() + else: + numpy_ver, pandas_ver, _ = res.split("/", 2) + assert numpy_ver == expected_numpy_ver + assert Version(pandas_ver) < Version("3.1") # only add pyyaml, which will overwrite the previously added packages # so matplotlib will not be available on the server side @@ -447,7 +457,7 @@ def test_add_requirements(session, resources_path, local_testing_mode): session.add_requirements(test_files.test_requirements_file) assert session.get_packages() == { "numpy": "numpy==2.3.5" if sys.version_info >= (3, 13) else "numpy==1.26.3", - "pandas": "pandas==2.3.3", + "pandas": "pandas<3.1", } udf_name = Utils.random_name_for_temp_object(TempObjectType.FUNCTION) @@ -459,12 +469,11 @@ def get_numpy_pandas_version() -> str: df = session.create_dataframe([None]).to_df("a") res = df.select(call_udf(udf_name)) expected_numpy_ver = "2.3.5" if sys.version_info >= (3, 13) else "1.26.3" - Utils.check_answer( - res, - [Row(f"{expected_numpy_ver}/2.3.3")] - if not local_testing_mode - else [Row(f"{numpy.__version__}/{pandas.__version__}")], - ) + if local_testing_mode: + Utils.check_answer(res, [Row(f"{numpy.__version__}/{pandas.__version__}")]) + else: + version_pair = res.collect()[0][0] + assert_numpy_and_pandas_version_pair(version_pair, expected_numpy_ver) def test_add_requirements_twice_should_fail_if_packages_are_different( @@ -475,7 +484,7 @@ def test_add_requirements_twice_should_fail_if_packages_are_different( session.add_requirements(test_files.test_requirements_file) assert session.get_packages() == { "numpy": f"numpy=={expected_numpy_ver}", - "pandas": "pandas==2.3.3", + "pandas": "pandas<3.1", } with pytest.raises(ValueError, match="Cannot add package"): @@ -968,7 +977,7 @@ def test_add_requirements_with_empty_stage_as_cache_path( expected_numpy_ver = "2.3.5" if sys.version_info >= (3, 13) else "1.26.3" assert session.get_packages() == { "numpy": f"numpy=={expected_numpy_ver}", - "pandas": "pandas==2.3.3", + "pandas": "pandas<3.1", } udf_name = Utils.random_name_for_temp_object(TempObjectType.FUNCTION) diff --git a/tests/resources/test_requirements.txt b/tests/resources/test_requirements.txt index 2931b12135..13a2ffb844 100644 --- a/tests/resources/test_requirements.txt +++ b/tests/resources/test_requirements.txt @@ -1,2 +1,2 @@ numpy==1.26.3 -pandas==2.3.3 +pandas<3.1 diff --git a/tests/resources/test_requirements_py313.txt b/tests/resources/test_requirements_py313.txt index 7ff532ea89..c74db7b97f 100644 --- a/tests/resources/test_requirements_py313.txt +++ b/tests/resources/test_requirements_py313.txt @@ -1,2 +1,2 @@ numpy==2.3.5 -pandas==2.3.3 +pandas<3.1 diff --git a/tox.ini b/tox.ini index 9b4ab73404..d1ee296471 100644 --- a/tox.ini +++ b/tox.ini @@ -106,7 +106,7 @@ setenv = # Cap pyarrow<21 for Snowpark pandas tests on Jenkins due to SNOW-2266293 snowparkpandasjenkins: SNOWFLAKE_PYTEST_MODIN_JENKINS_PYARROW_CAP = pyarrow<21 pyarrowcap: SNOWFLAKE_PYTEST_MODIN_JENKINS_PYARROW_CAP = pyarrow<21 - pandascap: SNOWFLAKE_PYTEST_MODIN_JENKINS_PANDAS_CAP = pandas<=2.3.2 + pandascap: SNOWFLAKE_PYTEST_MODIN_JENKINS_PANDAS_CAP = pandas<3.1 passenv = AWS_ACCESS_KEY_ID