snowflakedb
diff --git a/‎CHANGELOG.md‎
Lines changed: 11 additions & 0 deletions b/‎CHANGELOG.md‎
Lines changed: 11 additions & 0 deletions
diff --git a/‎docs/source/snowpark/functions.rst‎
Lines changed: 10 additions & 0 deletions b/‎docs/source/snowpark/functions.rst‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎scripts/jenkins_regress.sh‎
Lines changed: 1 addition & 1 deletion b/‎scripts/jenkins_regress.sh‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/snowflake/snowpark/_functions/scalar_functions.py‎
Lines changed: 316 additions & 6 deletions b/‎src/snowflake/snowpark/_functions/scalar_functions.py‎
Lines changed: 316 additions & 6 deletions
diff --git a/‎src/snowflake/snowpark/modin/config/envvars.py‎
Lines changed: 18 additions & 1 deletion b/‎src/snowflake/snowpark/modin/config/envvars.py‎
Lines changed: 18 additions & 1 deletion
diff --git a/‎src/snowflake/snowpark/modin/plugin/_internal/utils.py‎
Lines changed: 81 additions & 0 deletions b/‎src/snowflake/snowpark/modin/plugin/_internal/utils.py‎
Lines changed: 81 additions & 0 deletions
diff --git a/‎src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py‎
Lines changed: 13 additions & 35 deletions b/‎src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py‎
Lines changed: 13 additions & 35 deletions
@@ -93,6 +93,16 @@
       - `st_asewkt`
       - `st_asgeojson`
       - `st_aswkb`
+      - `st_aswkt`
+      - `st_azimuth`
+      - `st_buffer`
+      - `st_centroid`
+      - `st_collect`
+      - `st_contains`
+      - `st_coveredby`
+      - `st_covers`
+      - `st_difference`
+      - `st_dimension`
 
 #### Bug Fixes
 
@@ -126,6 +136,7 @@
 - Added a session parameter `pandas_hybrid_execution_enabled` to enable/disable hybrid execution as an alternative to using `AutoSwitchBackend`.
 - Removed an unnecessary `SHOW OBJECTS` query issued from `read_snowflake` under certain conditions.
 - When hybrid execution is enabled, `pd.merge`, `pd.concat`, `DataFrame.merge`, and `DataFrame.join` may now move arguments to backends other than those among the function arguments.
+- Improved performance of `DataFrame.to_snowflake` and `pd.to_snowflake(dataframe)` for large data by uploading data via a parquet file. You can control the dataset size at which Snowpark pandas switches to parquet with the variable `modin.config.PandasToSnowflakeParquetThresholdBytes`.
 
 ## 1.39.0 (2025-09-17)
 
 
@@ -440,6 +440,16 @@ Functions
     st_asewkt
     st_asgeojson
     st_aswkb
+    st_aswkt
+    st_azimuth
+    st_buffer
+    st_centroid
+    st_collect
+    st_contains
+    st_coveredby
+    st_covers
+    st_difference
+    st_dimension
     substr
     substring
     sum
 
@@ -21,4 +21,4 @@ gpg --quiet --batch --yes --decrypt --passphrase="$GPG_KEY" --output "tests/para
 pip install protoc-wheel-0==21.1 mypy-protobuf
 
 # Run linter, Python test and code coverage jobs
-exit_code_decorator "python -m tox -c $WORKING_DIR" -e notdoctest-pyarrowcap
+exit_code_decorator "python -m tox -c $WORKING_DIR" -e notdoctest-pandascap-pyarrowcap
@@ -116,8 +116,25 @@ class SnowflakePandasTransferThreshold(EnvironmentVariable, type=int):
     default = 100_000
 
 
-# have to monkey patch this into modin right now to use config contexts
+class PandasToSnowflakeParquetThresholdBytes(EnvironmentVariable, type=int):
+    """
+    When a pandas-backend dataframe's shallow memory usage exceeds this
+    threshold, implement to_snowflake() by writing the dataframe to a parquet
+    file and loading the parquet file into Snowflake.
+    """
+
+    varname = "SNOWFLAKE_PANDAS_MAX_TO_SNOWFLAKE_MEMORY_BYTES"
+    # This default comes from experimentation on integer data. At about this
+    # point, insertion via parquet appears to be faster on a 3XL warehouse.
+    default = 3_000_000
+
+
+# have to monkey patch these variables into modin right now to use config
+# contexts
 modin_config.SnowflakePandasTransferThreshold = SnowflakePandasTransferThreshold
+modin_config.PandasToSnowflakeParquetThresholdBytes = (
+    PandasToSnowflakeParquetThresholdBytes
+)
 
 
 class EnvWithSibilings(
 
@@ -980,6 +980,38 @@ def extract_all_duplicates(elements: Sequence[Hashable]) -> Sequence[Hashable]:
     return unique_duplicated_elements
 
 
+def validate_column_labels_for_to_snowflake(
+    index_column_labels: Sequence[Hashable], data_column_labels: Sequence[Hashable]
+) -> None:
+    """
+    Validate column labels for to_snowflake.
+
+    Check that the column labels are not duplicated, and that the data column
+    labels are not None.
+
+    Args:
+        index_column_labels: index column labels
+        data_column_labels: data column labels
+
+    Returns:
+        None
+    """
+    duplicates = extract_all_duplicates((*index_column_labels, *data_column_labels))
+    if len(duplicates) > 0:
+        raise ValueError(
+            f"Duplicated labels {duplicates} found in index columns {index_column_labels} and data columns {data_column_labels}. "
+            f"Snowflake does not allow duplicated identifiers, please rename to make sure there is no duplication "
+            f"among both index and data columns."
+        )
+
+    if any(is_all_label_components_none(label) for label in data_column_labels):
+        raise ValueError(
+            f"Label None is found in the data columns {data_column_labels}, which is invalid in Snowflake. "
+            "Please give it a name by set the dataframe columns like df.columns=['A', 'B'],"
+            " or set the series name if it is a series like series.name='A'."
+        )
+
+
 def is_duplicate_free(names: Sequence[Hashable]) -> bool:
     """
     check whether names contains duplicates
@@ -2300,3 +2332,52 @@ def new_snow_df(*args: Any, **kwargs: Any) -> pd.DataFrame:
     """
     with config_context(AutoSwitchBackend=False):
         return pd.DataFrame(*args, **kwargs)
+
+
+def extract_and_validate_index_labels_for_to_snowflake(
+    index_label_param: Any, num_index_columns: int
+) -> list[Hashable]:
+    """
+    Extract and validate index labels for read snowflake.
+
+    Args:
+        index_label_param: index_label parameter
+        num_index_columns: number of index columns
+    Returns:
+        list of index column labels
+    """
+    index_column_labels = (
+        index_label_param
+        if isinstance(index_label_param, list)
+        else [index_label_param]
+    )
+    if len(index_column_labels) != num_index_columns:
+        raise ValueError(
+            f"Length of 'index_label' should match number of levels, which is {num_index_columns}"
+        )
+    return index_column_labels
+
+
+def handle_if_exists_for_to_snowflake(
+    if_exists: str, name: Union[str, Iterable[str]]
+) -> None:
+    """
+    Handle if_exists for to_snowflake.
+
+    Validate if_exists for to_snowflake and raise an error if the table
+    already exists and if_exists == "fail".
+
+    Args:
+        if_exists: if_exists parameter
+        name: name parameter
+    Returns:
+        None
+    """
+    if if_exists not in ("fail", "replace", "append"):
+        raise ValueError(f"'{if_exists}' is not valid for if_exists")
+    if if_exists == "fail" and pd.session._table_exists(
+        parse_table_name(name) if isinstance(name, str) else name
+    ):
+        raise ValueError(
+            f"Table '{name}' already exists. Set 'if_exists' parameter as 'replace' to override existing table."
+        )
@@ -95,7 +95,6 @@
 from snowflake.snowpark._internal.type_utils import ColumnOrName
 from snowflake.snowpark._internal.utils import (
     generate_random_alphanumeric,
-    parse_table_name,
     random_name_for_temp_object,
 )
 from snowflake.snowpark.column import CaseExpr, Column as SnowparkColumn
@@ -350,7 +349,8 @@
     unpivot_empty_df,
 )
 from snowflake.snowpark.modin.plugin._internal.utils import (
-    MODIN_IS_AT_LEAST_0_37_0,
+    extract_and_validate_index_labels_for_to_snowflake,
+    handle_if_exists_for_to_snowflake,
     new_snow_series,
     INDEX_LABEL,
     ROW_COUNT_COLUMN_LABEL,
@@ -366,7 +366,6 @@
     create_frame_with_data_columns,
     create_ordered_dataframe_from_pandas,
     create_initial_ordered_dataframe,
-    extract_all_duplicates,
     extract_pandas_label_from_snowflake_quoted_identifier,
     fill_missing_levels_for_pandas_label,
     fill_none_in_index_labels,
@@ -383,6 +382,8 @@
     parse_object_construct_snowflake_quoted_identifier_and_extract_pandas_label,
     parse_snowflake_object_construct_identifier_to_map,
     unquote_name_if_quoted,
+    validate_column_labels_for_to_snowflake,
+    MODIN_IS_AT_LEAST_0_37_0,
 )
 from snowflake.snowpark.modin.plugin._internal.where_utils import (
     validate_expected_boolean_data_columns,
@@ -1896,12 +1897,11 @@ def _to_snowpark_dataframe_from_snowpark_pandas_dataframe(
             # Include index columns
             if index_label:
                 index_column_labels = (
-                    index_label if isinstance(index_label, list) else [index_label]
-                )
-                if len(index_column_labels) != self._modin_frame.num_index_columns:
-                    raise ValueError(
-                        f"Length of 'index_label' should match number of levels, which is {self._modin_frame.num_index_columns}"
+                    extract_and_validate_index_labels_for_to_snowflake(
+                        index_label_param=index_label,
+                        num_index_columns=self._modin_frame.num_index_columns,
                     )
+                )
             else:
                 index_column_labels = frame.index_column_pandas_labels
 
@@ -1920,23 +1920,10 @@ def _to_snowpark_dataframe_from_snowpark_pandas_dataframe(
             # label for the data column, set the label to be None
             data_column_labels = [None]
 
-        # check if there is any data column label is none
-        if any(is_all_label_components_none(label) for label in data_column_labels):
-            raise ValueError(
-                f"Label None is found in the data columns {data_column_labels}, which is invalid in Snowflake. "
-                "Please give it a name by set the dataframe columns like df.columns=['A', 'B'],"
-                " or set the series name if it is a series like series.name='A'."
-            )
-
-        # perform a column name duplication check
-        index_and_data_columns = data_column_labels + index_column_labels
-        duplicates = extract_all_duplicates(index_and_data_columns)
-        if duplicates:
-            raise ValueError(
-                f"Duplicated labels {duplicates} found in index columns {index_column_labels} and data columns {data_column_labels}. "
-                f"Snowflake does not allow duplicated identifiers, please rename to make sure there is no duplication "
-                f"among both index and data columns."
-            )
+        validate_column_labels_for_to_snowflake(
+            index_column_labels=index_column_labels,
+            data_column_labels=data_column_labels,
+        )
 
         # rename snowflake quoted identifiers for the retained index columns and data columns to
         # be the same as quoted pandas labels.
@@ -2038,24 +2025,15 @@ def to_snowflake(
         table_type: Literal["", "temp", "temporary", "transient"] = "",
     ) -> None:
         self._warn_lost_snowpark_pandas_type()
+        handle_if_exists_for_to_snowflake(if_exists=if_exists, name=name)
 
-        if if_exists not in ("fail", "replace", "append"):
-            # Same error message as native pandas.
-            raise ValueError(f"'{if_exists}' is not valid for if_exists")
         if if_exists == "fail":
             mode = "errorifexists"
         elif if_exists == "replace":
             mode = "overwrite"
         else:
             mode = "append"
 
-        if mode == "errorifexists" and pd.session._table_exists(
-            parse_table_name(name) if isinstance(name, str) else name
-        ):
-            raise ValueError(
-                f"Table '{name}' already exists. Set 'if_exists' parameter as 'replace' to override existing table."
-            )
-
         self._to_snowpark_dataframe_from_snowpark_pandas_dataframe(
             index, index_label
         ).write.save_as_table(