snowflakedb · sfc-gh-lmukhopadhyay · Sep 12, 2025 · Aug 19, 2025 · Aug 19, 2025 · Aug 19, 2025
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -41,6 +41,7 @@
 - Introduce faster pandas: Improved performance by deferring row position computation. 
   - The following operations are currently supported and can benefit from the optimization: `read_snowflake`, `repr`, `loc`, `reset_index`, `merge`, and binary operations.
   - If a lazy object (e.g., DataFrame or Series) depends on a mix of supported and unsupported operations, the optimization will not be used.
+- Updated the error message for when Snowpark pandas is referenced within apply.
 
 #### Dependency Updates
 

@@ -8883,7 +8883,20 @@ def _apply_with_udtf_and_dynamic_pivot_along_axis_1(
         # materially slow down CI or individual groupby.apply() calls.
         # TODO(SNOW-1345395): Investigate why and to what extent the cache_result
         # is useful.
-        ordered_dataframe = cache_result(udtf_dataframe)
+        try:
+            ordered_dataframe = cache_result(udtf_dataframe)
+        except SnowparkSQLException as e:
+            if "No module named 'snowflake'" in str(
+                e
+            ) or "Modin is not installed" in str(e):
+                raise SnowparkSQLException(
+                    "modin.pandas cannot be referenced within a Snowpark pandas apply() function. "
+                    "You can only use native pandas inside apply(). Please check developer guide for details "
+                    "https://docs.snowflake.com/developer-guide/snowpark/python/pandas-on-snowflake#limitations."
+                )
+            else:
+                # retry the try-block logic
+                ordered_dataframe = cache_result(udtf_dataframe)
 
         # After applying the udtf, the underlying Snowpark DataFrame becomes
         # -------------------------------------------------------------------------------------------

@@ -1255,3 +1255,15 @@ def operation(col, arg):
         eval_snowpark_pandas_result(
             *create_test_dfs(test_data), lambda df: df.apply(operation, arg=arg2)
         )
+
+
+@sql_count_checker(query_count=3)
+def test_snowpandas_in_apply_negative():
+    df = pd.DataFrame({"date": ["2025-01-01"], "time": ["12:34:56"]})
+    with pytest.raises(
+        SnowparkSQLException,
+        match=re.escape(
+            "modin.pandas cannot be referenced within a Snowpark pandas apply() function"
+        ),
+    ):
+        df.apply(lambda row: pd.to_datetime(f"{row.date} {row.time}"), axis=1)