snowflakedb
diff --git a/‎CHANGELOG.md‎
Lines changed: 1 addition & 0 deletions b/‎CHANGELOG.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/snowflake/snowpark/modin/config/envvars.py‎
Lines changed: 18 additions & 1 deletion b/‎src/snowflake/snowpark/modin/config/envvars.py‎
Lines changed: 18 additions & 1 deletion
diff --git a/‎src/snowflake/snowpark/modin/plugin/_internal/utils.py‎
Lines changed: 81 additions & 0 deletions b/‎src/snowflake/snowpark/modin/plugin/_internal/utils.py‎
Lines changed: 81 additions & 0 deletions
diff --git a/‎src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py‎
Lines changed: 13 additions & 35 deletions b/‎src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py‎
Lines changed: 13 additions & 35 deletions
diff --git a/‎src/snowflake/snowpark/modin/plugin/extensions/dataframe_extensions.py‎
Lines changed: 127 additions & 0 deletions b/‎src/snowflake/snowpark/modin/plugin/extensions/dataframe_extensions.py‎
Lines changed: 127 additions & 0 deletions
@@ -126,6 +126,7 @@
 - Added a session parameter `pandas_hybrid_execution_enabled` to enable/disable hybrid execution as an alternative to using `AutoSwitchBackend`.
 - Removed an unnecessary `SHOW OBJECTS` query issued from `read_snowflake` under certain conditions.
 - When hybrid execution is enabled, `pd.merge`, `pd.concat`, `DataFrame.merge`, and `DataFrame.join` may now move arguments to backends other than those among the function arguments.
+- Improved performance of `DataFrame.to_snowflake` and `pd.to_snowflake(dataframe)` for large data by uploading data via a parquet file. You can control the dataset size at which Snowpark pandas switches to parquet with the variable `modin.config.PandasToSnowflakeParquetThresholdBytes`.
 
 ## 1.39.0 (2025-09-17)
 
 
@@ -116,8 +116,25 @@ class SnowflakePandasTransferThreshold(EnvironmentVariable, type=int):
     default = 100_000
 
 
-# have to monkey patch this into modin right now to use config contexts
+class PandasToSnowflakeParquetThresholdBytes(EnvironmentVariable, type=int):
+    """
+    When a pandas-backend dataframe's shallow memory usage exceeds this
+    threshold, implement to_snowflake() by writing the dataframe to a parquet
+    file and loading the parquet file into Snowflake.
+    """
+
+    varname = "SNOWFLAKE_PANDAS_MAX_TO_SNOWFLAKE_MEMORY_BYTES"
+    # This default comes from experimentation on integer data. At about this
+    # point, insertion via parquet appears to be faster on a 3XL warehouse.
+    default = 3_000_000
+
+
+# have to monkey patch these variables into modin right now to use config
+# contexts
 modin_config.SnowflakePandasTransferThreshold = SnowflakePandasTransferThreshold
+modin_config.PandasToSnowflakeParquetThresholdBytes = (
+    PandasToSnowflakeParquetThresholdBytes
+)
 
 
 class EnvWithSibilings(
 
@@ -980,6 +980,38 @@ def extract_all_duplicates(elements: Sequence[Hashable]) -> Sequence[Hashable]:
     return unique_duplicated_elements
 
 
+def validate_column_labels_for_to_snowflake(
+    index_column_labels: Sequence[Hashable], data_column_labels: Sequence[Hashable]
+) -> None:
+    """
+    Validate column labels for to_snowflake.
+
+    Check that the column labels are not duplicated, and that the data column
+    labels are not None.
+
+    Args:
+        index_column_labels: index column labels
+        data_column_labels: data column labels
+
+    Returns:
+        None
+    """
+    duplicates = extract_all_duplicates((*index_column_labels, *data_column_labels))
+    if len(duplicates) > 0:
+        raise ValueError(
+            f"Duplicated labels {duplicates} found in index columns {index_column_labels} and data columns {data_column_labels}. "
+            f"Snowflake does not allow duplicated identifiers, please rename to make sure there is no duplication "
+            f"among both index and data columns."
+        )
+
+    if any(is_all_label_components_none(label) for label in data_column_labels):
+        raise ValueError(
+            f"Label None is found in the data columns {data_column_labels}, which is invalid in Snowflake. "
+            "Please give it a name by set the dataframe columns like df.columns=['A', 'B'],"
+            " or set the series name if it is a series like series.name='A'."
+        )
+
+
 def is_duplicate_free(names: Sequence[Hashable]) -> bool:
     """
     check whether names contains duplicates
@@ -2300,3 +2332,52 @@ def new_snow_df(*args: Any, **kwargs: Any) -> pd.DataFrame:
     """
     with config_context(AutoSwitchBackend=False):
         return pd.DataFrame(*args, **kwargs)
+
+
+def extract_and_validate_index_labels_for_to_snowflake(
+    index_label_param: Any, num_index_columns: int
+) -> list[Hashable]:
+    """
+    Extract and validate index labels for read snowflake.
+
+    Args:
+        index_label_param: index_label parameter
+        num_index_columns: number of index columns
+    Returns:
+        list of index column labels
+    """
+    index_column_labels = (
+        index_label_param
+        if isinstance(index_label_param, list)
+        else [index_label_param]
+    )
+    if len(index_column_labels) != num_index_columns:
+        raise ValueError(
+            f"Length of 'index_label' should match number of levels, which is {num_index_columns}"
+        )
+    return index_column_labels
+
+
+def handle_if_exists_for_to_snowflake(
+    if_exists: str, name: Union[str, Iterable[str]]
+) -> None:
+    """
+    Handle if_exists for to_snowflake.
+
+    Validate if_exists for to_snowflake and raise an error if the table
+    already exists and if_exists == "fail".
+
+    Args:
+        if_exists: if_exists parameter
+        name: name parameter
+    Returns:
+        None
+    """
+    if if_exists not in ("fail", "replace", "append"):
+        raise ValueError(f"'{if_exists}' is not valid for if_exists")
+    if if_exists == "fail" and pd.session._table_exists(
+        parse_table_name(name) if isinstance(name, str) else name
+    ):
+        raise ValueError(
+            f"Table '{name}' already exists. Set 'if_exists' parameter as 'replace' to override existing table."
+        )
@@ -95,7 +95,6 @@
 from snowflake.snowpark._internal.type_utils import ColumnOrName
 from snowflake.snowpark._internal.utils import (
     generate_random_alphanumeric,
-    parse_table_name,
     random_name_for_temp_object,
 )
 from snowflake.snowpark.column import CaseExpr, Column as SnowparkColumn
@@ -350,7 +349,8 @@
     unpivot_empty_df,
 )
 from snowflake.snowpark.modin.plugin._internal.utils import (
-    MODIN_IS_AT_LEAST_0_37_0,
+    extract_and_validate_index_labels_for_to_snowflake,
+    handle_if_exists_for_to_snowflake,
     new_snow_series,
     INDEX_LABEL,
     ROW_COUNT_COLUMN_LABEL,
@@ -366,7 +366,6 @@
     create_frame_with_data_columns,
     create_ordered_dataframe_from_pandas,
     create_initial_ordered_dataframe,
-    extract_all_duplicates,
     extract_pandas_label_from_snowflake_quoted_identifier,
     fill_missing_levels_for_pandas_label,
     fill_none_in_index_labels,
@@ -383,6 +382,8 @@
     parse_object_construct_snowflake_quoted_identifier_and_extract_pandas_label,
     parse_snowflake_object_construct_identifier_to_map,
     unquote_name_if_quoted,
+    validate_column_labels_for_to_snowflake,
+    MODIN_IS_AT_LEAST_0_37_0,
 )
 from snowflake.snowpark.modin.plugin._internal.where_utils import (
     validate_expected_boolean_data_columns,
@@ -1896,12 +1897,11 @@ def _to_snowpark_dataframe_from_snowpark_pandas_dataframe(
             # Include index columns
             if index_label:
                 index_column_labels = (
-                    index_label if isinstance(index_label, list) else [index_label]
-                )
-                if len(index_column_labels) != self._modin_frame.num_index_columns:
-                    raise ValueError(
-                        f"Length of 'index_label' should match number of levels, which is {self._modin_frame.num_index_columns}"
+                    extract_and_validate_index_labels_for_to_snowflake(
+                        index_label_param=index_label,
+                        num_index_columns=self._modin_frame.num_index_columns,
                     )
+                )
             else:
                 index_column_labels = frame.index_column_pandas_labels
 
@@ -1920,23 +1920,10 @@ def _to_snowpark_dataframe_from_snowpark_pandas_dataframe(
             # label for the data column, set the label to be None
             data_column_labels = [None]
 
-        # check if there is any data column label is none
-        if any(is_all_label_components_none(label) for label in data_column_labels):
-            raise ValueError(
-                f"Label None is found in the data columns {data_column_labels}, which is invalid in Snowflake. "
-                "Please give it a name by set the dataframe columns like df.columns=['A', 'B'],"
-                " or set the series name if it is a series like series.name='A'."
-            )
-
-        # perform a column name duplication check
-        index_and_data_columns = data_column_labels + index_column_labels
-        duplicates = extract_all_duplicates(index_and_data_columns)
-        if duplicates:
-            raise ValueError(
-                f"Duplicated labels {duplicates} found in index columns {index_column_labels} and data columns {data_column_labels}. "
-                f"Snowflake does not allow duplicated identifiers, please rename to make sure there is no duplication "
-                f"among both index and data columns."
-            )
+        validate_column_labels_for_to_snowflake(
+            index_column_labels=index_column_labels,
+            data_column_labels=data_column_labels,
+        )
 
         # rename snowflake quoted identifiers for the retained index columns and data columns to
         # be the same as quoted pandas labels.
@@ -2038,24 +2025,15 @@ def to_snowflake(
         table_type: Literal["", "temp", "temporary", "transient"] = "",
     ) -> None:
         self._warn_lost_snowpark_pandas_type()
+        handle_if_exists_for_to_snowflake(if_exists=if_exists, name=name)
 
-        if if_exists not in ("fail", "replace", "append"):
-            # Same error message as native pandas.
-            raise ValueError(f"'{if_exists}' is not valid for if_exists")
         if if_exists == "fail":
             mode = "errorifexists"
         elif if_exists == "replace":
             mode = "overwrite"
         else:
             mode = "append"
 
-        if mode == "errorifexists" and pd.session._table_exists(
-            parse_table_name(name) if isinstance(name, str) else name
-        ):
-            raise ValueError(
-                f"Table '{name}' already exists. Set 'if_exists' parameter as 'replace' to override existing table."
-            )
-
         self._to_snowpark_dataframe_from_snowpark_pandas_dataframe(
             index, index_label
         ).write.save_as_table(
 
@@ -11,6 +11,11 @@
 import functools
 from typing import Any, List, Literal, Optional, Union
 
+from snowflake.snowpark.modin.config.envvars import (
+    PandasToSnowflakeParquetThresholdBytes,
+)
+from snowflake.snowpark.modin.plugin.utils.warning_message import WarningMessage
+
 import modin.pandas as pd
 from modin.pandas.api.extensions import (
     register_dataframe_accessor as _register_dataframe_accessor,
@@ -21,6 +26,17 @@
 from snowflake.snowpark._internal.type_utils import ColumnOrName
 from snowflake.snowpark.async_job import AsyncJob
 from snowflake.snowpark.dataframe import DataFrame as SnowparkDataFrame
+from snowflake.snowpark.modin.plugin._internal.snowpark_pandas_types import (
+    SnowparkPandasType,
+)
+from snowflake.snowpark.modin.plugin._internal.utils import (
+    extract_and_validate_index_labels_for_to_snowflake,
+    handle_if_exists_for_to_snowflake,
+    is_all_label_components_none,
+    is_valid_snowflake_quoted_identifier,
+    unquote_name_if_quoted,
+    validate_column_labels_for_to_snowflake,
+)
 from snowflake.snowpark.modin.plugin.extensions.utils import (
     add_cache_result_docstring,
     register_non_snowflake_accessors,
@@ -37,9 +53,120 @@
 register_non_snowflake_accessors(_register_dataframe_accessor, "DataFrame")
 
 
+def _convert_to_snowflake_table_name_to_write_pandas_table_name(name: str) -> str:
+    """
+    Convert the user's to_snowflake() table name to the name we need for write_pandas().
+
+    We call write_pandas() with quote_identifiers=True, so we need to strip
+    quotes from quoted identifiers and convert unquoted identifiers to
+    uppercase.
+
+    Parameters:
+        name: The name that the user passed to to_snowflake().
+
+    Returns:
+        The name we will pass to write_pandas().
+    """
+    if is_valid_snowflake_quoted_identifier(name):
+        # quoted -> strip quotes. e.g. '"CUSTOMER"' -> 'CUSTOMER'
+        return unquote_name_if_quoted(name)
+    else:
+        # unquoted identifier -> convert to uppercase
+        # e.g. 'ab$ab' -> 'AB$AB', 'customer' -> 'CUSTOMER'
+        return name.upper()
+
+
 # Snowflake specific dataframe methods
 # We use extensions, as we want to make clear that a Snowpark pandas DataFrame is NOT a
 # pandas DataFrame.
+@_register_dataframe_accessor("to_snowflake", backend="Pandas")
+def pandas_to_snowflake(
+    self,
+    name: Union[str, Iterable[str]],
+    if_exists: Optional[Literal["fail", "replace", "append"]] = "fail",
+    index: bool = True,
+    index_label: Optional[IndexLabel] = None,
+    table_type: Literal["", "temp", "temporary", "transient"] = "",
+) -> None:
+    if (
+        # Shallow memory usage may underestimate the memory usage of the
+        # dataframe, but deep memory usage can be expensive to compute.
+        # Since this threshold uses shallow memory usage, it may sometimes
+        # not use the parquet method in cases where the parquet method would be
+        # faster, especially if this frame contains deep data like Python
+        # strings.
+        self.memory_usage(deep=False).sum()
+        <= PandasToSnowflakeParquetThresholdBytes.get()
+    ):
+        return self.set_backend("Snowflake").to_snowflake(
+            name=name,
+            if_exists=if_exists,
+            index=index,
+            index_label=index_label,
+            table_type=table_type,
+        )
+
+    handle_if_exists_for_to_snowflake(if_exists=if_exists, name=name)
+
+    pandas_frame = self._query_compiler._modin_frame
+    num_index_columns = pandas_frame.index.nlevels
+    if index:
+        if index_label:
+            index_column_labels = extract_and_validate_index_labels_for_to_snowflake(
+                index_label_param=index_label,
+                num_index_columns=num_index_columns,
+            )
+        else:
+            index_column_labels = list(pandas_frame.index.names)
+
+        if any(is_all_label_components_none(label) for label in index_column_labels):
+            # It's common to have index level named None, in which case we
+            # follow the naming convention that pandas would follow if we
+            # were to convert the index to a data column with reset_index()
+            # and then skip writing the new index.
+            index = False
+            pandas_frame = pandas_frame.reset_index(drop=False, names=None)
+            index_column_labels = []
+    else:
+        index_column_labels = []
+    validate_column_labels_for_to_snowflake(
+        index_column_labels=index_column_labels,
+        data_column_labels=list(pandas_frame.columns),
+    )
+    if index:
+        # write_pandas() will always drop the index, so we move the index into
+        # the data columns with reset_index().
+        pandas_frame = pandas_frame.reset_index(drop=False, names=index_column_labels)
+
+    unsupported_types = list(
+        SnowparkPandasType.get_snowpark_pandas_type_for_pandas_type(dtype)
+        for dtype in pandas_frame.dtypes
+        if SnowparkPandasType.get_snowpark_pandas_type_for_pandas_type(dtype)
+        is not None
+    )
+    if len(unsupported_types) > 0:
+        WarningMessage.lost_type_warning(
+            "to_snowflake", ", ".join(type(t).__name__ for t in unsupported_types)
+        )
+
+    pd.session.write_pandas(
+        pandas_frame.rename(str, axis=1),
+        # We want to pass table_name as is, but quote column names. This is
+        # undocumented behavior of to_snowflake() on the "Snowflake" backend, so
+        # we mimic it here.
+        # Note that if we try to use quote_identifiers=False and quote the
+        # column identifiers ourselves, we get the correct column names and we
+        # don't have to modify the table name, but the snowflake connector seems
+        # to incorrectly insert null data.
+        table_name=_convert_to_snowflake_table_name_to_write_pandas_table_name(name),
+        auto_create_table=True,
+        overwrite=if_exists != "append",
+        table_type=table_type,
+    )
+
+    return None
+
+
 # Implementation note: Arguments names and types are kept consistent with pandas.DataFrame.to_sql
 @register_dataframe_accessor("to_snowflake")
 def to_snowflake(