snowflakedb · sfc-gh-jdu · Sep 12, 2025 · Sep 12, 2025
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,29 @@
 # Release History
 
+## 1.40.0 (YYYY-MM-DD)
+
+### Snowpark Python API Updates
+
+#### New Features
+
+- Added support for unstructured data engineering in Snowpark, powered by Snowflake AISQL and Cortex functions:
+  - `DataFrame.ai.complete`: Generate per-row LLM completions from prompts built over columns and files.
+  - `DataFrame.ai.filter`: Keep rows where an AI classifier returns TRUE for the given predicate.
+  - `DataFrame.ai.agg`: Reduce a text column into one result using a natural-language task description.
+  - `RelationalGroupedDataFrame.ai_agg`: Perform the same natural-language aggregation per group.
+  - `DataFrame.ai.classify`: Assign single or multiple labels from given categories to text or images.
+  - `DataFrame.ai.similarity`: Compute cosine-based similarity scores between two columns via embeddings.
+  - `DataFrame.ai.sentiment`: Extract overall and aspect-level sentiment from text into JSON.
+  - `DataFrame.ai.embed`: Generate VECTOR embeddings for text or images using configurable models.
+  - `DataFrame.ai.summarize_agg`: Aggregate and produce a single comprehensive summary over many rows.
+  - `DataFrame.ai.transcribe`: Transcribe audio files to text with optional timestamps and speaker labels.
+  - `DataFrame.ai.parse_document`: OCR/layout-parse documents or images into structured JSON.
+  - `DataFrame.ai.extract`: Pull structured fields from text or files using a response schema.
+  - `DataFrame.ai.count_tokens`: Estimate token usage for a given model and input text per row.
+  - `DataFrame.ai.split_text_markdown_header`: Split Markdown into hierarchical header-aware chunks.
+  - `DataFrame.ai.split_text_recursive_character`: Split text into size-bounded chunks using recursive separators.
+  - `DataFrameReader.file`: Create a DataFrame containing all files from a stage as FILE data type for downstream unstructured data processing.
+
 ## 1.39.0 (YYYY-MM-DD)
 
 ### Snowpark Python API Updates

diff --git a/src/snowflake/snowpark/dataframe_ai_functions.py b/src/snowflake/snowpark/dataframe_ai_functions.py
@@ -43,7 +43,7 @@ class DataFrameAIFunctions:
     def __init__(self, dataframe: "snowflake.snowpark.DataFrame") -> None:
         self._dataframe = dataframe
 
-    @experimental(version="1.37.0")
+    @experimental(version="1.40.0")
     @publicapi
     def complete(
         self,
@@ -181,7 +181,7 @@ def complete(
             df._ast_id = stmt.uid
         return df
 
-    @experimental(version="1.37.0")
+    @experimental(version="1.40.0")
     @publicapi
     def filter(
         self,
@@ -285,7 +285,7 @@ def filter(
             filtered_df._ast_id = stmt.uid
         return filtered_df
 
-    @experimental(version="1.37.0")
+    @experimental(version="1.40.0")
     @publicapi
     def agg(
         self,
@@ -396,7 +396,7 @@ def agg(
             df._ast_id = stmt.uid
         return df
 
-    @experimental(version="1.37.0")
+    @experimental(version="1.40.0")
     @publicapi
     def classify(
         self,
@@ -557,7 +557,7 @@ def classify(
             df._ast_id = stmt.uid
         return df
 
-    @experimental(version="1.37.0")
+    @experimental(version="1.40.0")
     @publicapi
     def similarity(
         self,
@@ -712,7 +712,7 @@ def similarity(
             df._ast_id = stmt.uid
         return df
 
-    @experimental(version="1.37.0")
+    @experimental(version="1.40.0")
     @publicapi
     def sentiment(
         self,
@@ -831,7 +831,7 @@ def sentiment(
             df._ast_id = stmt.uid
         return df
 
-    @experimental(version="1.37.0")
+    @experimental(version="1.40.0")
     @publicapi
     def embed(
         self,
@@ -959,7 +959,7 @@ def embed(
             df._ast_id = stmt.uid
         return df
 
-    @experimental(version="1.37.0")
+    @experimental(version="1.40.0")
     @publicapi
     def summarize_agg(
         self,
@@ -1061,7 +1061,7 @@ def summarize_agg(
             df._ast_id = stmt.uid
         return df
 
-    @experimental(version="1.37.0")
+    @experimental(version="1.40.0")
     @publicapi
     def transcribe(
         self,
@@ -1166,7 +1166,7 @@ def transcribe(
             df._ast_id = stmt.uid
         return df
 
-    @experimental(version="1.37.0")
+    @experimental(version="1.40.0")
     @publicapi
     def parse_document(
         self,
@@ -1259,7 +1259,7 @@ def parse_document(
             df._ast_id = stmt.uid
         return df
 
-    @experimental(version="1.37.0")
+    @experimental(version="1.40.0")
     @publicapi
     def extract(
         self,
@@ -1433,7 +1433,7 @@ def extract(
             df._ast_id = stmt.uid
         return df
 
-    @experimental(version="1.37.0")
+    @experimental(version="1.40.0")
     @publicapi
     def count_tokens(
         self,
@@ -1530,7 +1530,7 @@ def count_tokens(
             df._ast_id = stmt.uid
         return df
 
-    @experimental(version="1.37.0")
+    @experimental(version="1.40.0")
     @publicapi
     def split_text_markdown_header(
         self,
@@ -1660,7 +1660,7 @@ def split_text_markdown_header(
             df._ast_id = stmt.uid
         return df
 
-    @experimental(version="1.37.0")
+    @experimental(version="1.40.0")
     @publicapi
     def split_text_recursive_character(
         self,

diff --git a/src/snowflake/snowpark/relational_grouped_dataframe.py b/src/snowflake/snowpark/relational_grouped_dataframe.py
@@ -41,6 +41,7 @@
 from snowflake.snowpark._internal.type_utils import ColumnOrName, LiteralType
 from snowflake.snowpark._internal.utils import (
     check_agg_exprs,
+    experimental,
     is_valid_tuple_for_agg,
     parse_positional_args_to_list,
     parse_positional_args_to_list_variadic,
@@ -832,6 +833,7 @@ def _function(
         return df
 
     @relational_group_df_api_usage
+    @experimental(version="1.40.0")
     @publicapi
     def ai_agg(
         self,