diff --git a/CHANGELOG.md b/CHANGELOG.md index 2f67c53015..10064b36b2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,29 @@ # Release History +## 1.40.0 (YYYY-MM-DD) + +### Snowpark Python API Updates + +#### New Features + +- Added support for unstructured data engineering in Snowpark, powered by Snowflake AISQL and Cortex functions: + - `DataFrame.ai.complete`: Generate per-row LLM completions from prompts built over columns and files. + - `DataFrame.ai.filter`: Keep rows where an AI classifier returns TRUE for the given predicate. + - `DataFrame.ai.agg`: Reduce a text column into one result using a natural-language task description. + - `RelationalGroupedDataFrame.ai_agg`: Perform the same natural-language aggregation per group. + - `DataFrame.ai.classify`: Assign single or multiple labels from given categories to text or images. + - `DataFrame.ai.similarity`: Compute cosine-based similarity scores between two columns via embeddings. + - `DataFrame.ai.sentiment`: Extract overall and aspect-level sentiment from text into JSON. + - `DataFrame.ai.embed`: Generate VECTOR embeddings for text or images using configurable models. + - `DataFrame.ai.summarize_agg`: Aggregate and produce a single comprehensive summary over many rows. + - `DataFrame.ai.transcribe`: Transcribe audio files to text with optional timestamps and speaker labels. + - `DataFrame.ai.parse_document`: OCR/layout-parse documents or images into structured JSON. + - `DataFrame.ai.extract`: Pull structured fields from text or files using a response schema. + - `DataFrame.ai.count_tokens`: Estimate token usage for a given model and input text per row. + - `DataFrame.ai.split_text_markdown_header`: Split Markdown into hierarchical header-aware chunks. + - `DataFrame.ai.split_text_recursive_character`: Split text into size-bounded chunks using recursive separators. + - `DataFrameReader.file`: Create a DataFrame containing all files from a stage as FILE data type for downstream unstructured data processing. + ## 1.39.0 (YYYY-MM-DD) ### Snowpark Python API Updates diff --git a/src/snowflake/snowpark/dataframe_ai_functions.py b/src/snowflake/snowpark/dataframe_ai_functions.py index 872cf9225c..256522827c 100644 --- a/src/snowflake/snowpark/dataframe_ai_functions.py +++ b/src/snowflake/snowpark/dataframe_ai_functions.py @@ -43,7 +43,7 @@ class DataFrameAIFunctions: def __init__(self, dataframe: "snowflake.snowpark.DataFrame") -> None: self._dataframe = dataframe - @experimental(version="1.37.0") + @experimental(version="1.40.0") @publicapi def complete( self, @@ -181,7 +181,7 @@ def complete( df._ast_id = stmt.uid return df - @experimental(version="1.37.0") + @experimental(version="1.40.0") @publicapi def filter( self, @@ -285,7 +285,7 @@ def filter( filtered_df._ast_id = stmt.uid return filtered_df - @experimental(version="1.37.0") + @experimental(version="1.40.0") @publicapi def agg( self, @@ -396,7 +396,7 @@ def agg( df._ast_id = stmt.uid return df - @experimental(version="1.37.0") + @experimental(version="1.40.0") @publicapi def classify( self, @@ -557,7 +557,7 @@ def classify( df._ast_id = stmt.uid return df - @experimental(version="1.37.0") + @experimental(version="1.40.0") @publicapi def similarity( self, @@ -712,7 +712,7 @@ def similarity( df._ast_id = stmt.uid return df - @experimental(version="1.37.0") + @experimental(version="1.40.0") @publicapi def sentiment( self, @@ -831,7 +831,7 @@ def sentiment( df._ast_id = stmt.uid return df - @experimental(version="1.37.0") + @experimental(version="1.40.0") @publicapi def embed( self, @@ -959,7 +959,7 @@ def embed( df._ast_id = stmt.uid return df - @experimental(version="1.37.0") + @experimental(version="1.40.0") @publicapi def summarize_agg( self, @@ -1061,7 +1061,7 @@ def summarize_agg( df._ast_id = stmt.uid return df - @experimental(version="1.37.0") + @experimental(version="1.40.0") @publicapi def transcribe( self, @@ -1166,7 +1166,7 @@ def transcribe( df._ast_id = stmt.uid return df - @experimental(version="1.37.0") + @experimental(version="1.40.0") @publicapi def parse_document( self, @@ -1259,7 +1259,7 @@ def parse_document( df._ast_id = stmt.uid return df - @experimental(version="1.37.0") + @experimental(version="1.40.0") @publicapi def extract( self, @@ -1433,7 +1433,7 @@ def extract( df._ast_id = stmt.uid return df - @experimental(version="1.37.0") + @experimental(version="1.40.0") @publicapi def count_tokens( self, @@ -1530,7 +1530,7 @@ def count_tokens( df._ast_id = stmt.uid return df - @experimental(version="1.37.0") + @experimental(version="1.40.0") @publicapi def split_text_markdown_header( self, @@ -1660,7 +1660,7 @@ def split_text_markdown_header( df._ast_id = stmt.uid return df - @experimental(version="1.37.0") + @experimental(version="1.40.0") @publicapi def split_text_recursive_character( self, diff --git a/src/snowflake/snowpark/relational_grouped_dataframe.py b/src/snowflake/snowpark/relational_grouped_dataframe.py index d58d4be6ce..ff659ec338 100644 --- a/src/snowflake/snowpark/relational_grouped_dataframe.py +++ b/src/snowflake/snowpark/relational_grouped_dataframe.py @@ -41,6 +41,7 @@ from snowflake.snowpark._internal.type_utils import ColumnOrName, LiteralType from snowflake.snowpark._internal.utils import ( check_agg_exprs, + experimental, is_valid_tuple_for_agg, parse_positional_args_to_list, parse_positional_args_to_list_variadic, @@ -832,6 +833,7 @@ def _function( return df @relational_group_df_api_usage + @experimental(version="1.40.0") @publicapi def ai_agg( self,