diff --git a/CHANGELOG.md b/CHANGELOG.md index 70be259ee5..7b39612d40 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -41,6 +41,7 @@ - Introduce faster pandas: Improved performance by deferring row position computation. - The following operations are currently supported and can benefit from the optimization: `read_snowflake`, `repr`, `loc`, `reset_index`, `merge`, and binary operations. - If a lazy object (e.g., DataFrame or Series) depends on a mix of supported and unsupported operations, the optimization will not be used. +- Updated the error message for when Snowpark pandas is referenced within apply. #### Dependency Updates diff --git a/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py b/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py index fde971f271..90e4d1f186 100644 --- a/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py +++ b/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py @@ -8883,7 +8883,20 @@ def _apply_with_udtf_and_dynamic_pivot_along_axis_1( # materially slow down CI or individual groupby.apply() calls. # TODO(SNOW-1345395): Investigate why and to what extent the cache_result # is useful. - ordered_dataframe = cache_result(udtf_dataframe) + try: + ordered_dataframe = cache_result(udtf_dataframe) + except SnowparkSQLException as e: + if "No module named 'snowflake'" in str( + e + ) or "Modin is not installed" in str(e): + raise SnowparkSQLException( + "modin.pandas cannot be referenced within a Snowpark pandas apply() function. " + "You can only use native pandas inside apply(). Please check developer guide for details " + "https://docs.snowflake.com/developer-guide/snowpark/python/pandas-on-snowflake#limitations." + ) + else: + # retry the try-block logic + ordered_dataframe = cache_result(udtf_dataframe) # After applying the udtf, the underlying Snowpark DataFrame becomes # ------------------------------------------------------------------------------------------- diff --git a/tests/integ/modin/frame/test_apply.py b/tests/integ/modin/frame/test_apply.py index 0bf6cb266f..4938e695d9 100644 --- a/tests/integ/modin/frame/test_apply.py +++ b/tests/integ/modin/frame/test_apply.py @@ -1255,3 +1255,15 @@ def operation(col, arg): eval_snowpark_pandas_result( *create_test_dfs(test_data), lambda df: df.apply(operation, arg=arg2) ) + + +@sql_count_checker(query_count=3) +def test_snowpandas_in_apply_negative(): + df = pd.DataFrame({"date": ["2025-01-01"], "time": ["12:34:56"]}) + with pytest.raises( + SnowparkSQLException, + match=re.escape( + "modin.pandas cannot be referenced within a Snowpark pandas apply() function" + ), + ): + df.apply(lambda row: pd.to_datetime(f"{row.date} {row.time}"), axis=1)