diff --git a/CHANGELOG.md b/CHANGELOG.md index d6574ee051..d10fa85890 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,63 +19,80 @@ - `get_cloud_provider_token` - Added support for the following scalar functions in `functions.py`: - - `array_remove_at` - - `as_boolean` - - `booland` - - `boolnot` - - `boolor` - - `boolor_agg` - - `boolxor` - - `chr` - - `decode` - - `div0null` - - `dp_interval_high` - - `dp_interval_low` - - `greatest_ignore_nulls` - - `h3_cell_to_boundary` - - `h3_cell_to_children` - - `h3_cell_to_children_string` - - `h3_cell_to_parent` - - `h3_cell_to_point` - - `h3_compact_cells` - - `h3_compact_cells_strings` - - `h3_coverage` - - `h3_coverage_strings` - - `h3_get_resolution` - - `h3_grid_disk` - - `h3_grid_distance` - - `h3_int_to_string` - - `h3_polygon_to_cells` - - `h3_polygon_to_cells_strings` - - `h3_string_to_int` - - `h3_try_grid_path` - - `h3_try_polygon_to_cells` - - `h3_try_polygon_to_cells_strings` - - `h3_uncompact_cells` - - `h3_uncompact_cells_strings` - - `haversine` - - `h3_grid_path` - - `h3_is_pentagon` - - `h3_is_valid_cell` - - `h3_latlng_to_cell` - - `h3_latlng_to_cell_string` - - `h3_point_to_cell` - - `h3_point_to_cell_string` - - `h3_try_coverage` - - `h3_try_coverage_strings` - - `h3_try_grid_distance` - - `hex_decode_binary` - - `last_query_id` - - `last_transaction` - - `least_ignore_nulls` - - `nullif` - - `nvl2` - - `regr_valx` - - `st_area` - - `st_asewkb` - - `st_asewkt` - - `st_asgeojson` - - `st_aswkb` + - Conditional expression functions: + - `booland` + - `boolnot` + - `boolor` + - `boolxor` + - `boolor_agg` + - `decode` + - `greatest_ignore_nulls` + - `least_ignore_nulls` + - `nullif` + - `nvl2` + - `regr_valx` + + - Semi-structured and structured date functions: + - `array_remove_at` + - `as_boolean` + - `map_delete` + - `map_insert` + - `map_pick` + - `map_size` + + - String & binary functions: + - `chr` + - `hex_decode_binary` + + - Numeric functions: + - `div0null` + + - Differential privacy functions: + - `dp_interval_high` + - `dp_interval_low` + + - Context functions: + - `last_query_id` + - `last_transaction` + + - Geospatial functions: + - `h3_cell_to_boundary` + - `h3_cell_to_children` + - `h3_cell_to_children_string` + - `h3_cell_to_parent` + - `h3_cell_to_point` + - `h3_compact_cells` + - `h3_compact_cells_strings` + - `h3_coverage` + - `h3_coverage_strings` + - `h3_get_resolution` + - `h3_grid_disk` + - `h3_grid_distance` + - `h3_int_to_string` + - `h3_polygon_to_cells` + - `h3_polygon_to_cells_strings` + - `h3_string_to_int` + - `h3_try_grid_path` + - `h3_try_polygon_to_cells` + - `h3_try_polygon_to_cells_strings` + - `h3_uncompact_cells` + - `h3_uncompact_cells_strings` + - `haversine` + - `h3_grid_path` + - `h3_is_pentagon` + - `h3_is_valid_cell` + - `h3_latlng_to_cell` + - `h3_latlng_to_cell_string` + - `h3_point_to_cell` + - `h3_point_to_cell_string` + - `h3_try_coverage` + - `h3_try_coverage_strings` + - `h3_try_grid_distance` + - `st_area` + - `st_asewkb` + - `st_asewkt` + - `st_asgeojson` + - `st_aswkb` #### Bug Fixes diff --git a/docs/source/snowpark/functions.rst b/docs/source/snowpark/functions.rst index f57e439cfc..c09c1304eb 100644 --- a/docs/source/snowpark/functions.rst +++ b/docs/source/snowpark/functions.rst @@ -336,6 +336,10 @@ Functions map_concat map_contains_key map_keys + map_delete + map_insert + map_pick + map_size max max_by md5 diff --git a/src/snowflake/snowpark/_functions/scalar_functions.py b/src/snowflake/snowpark/_functions/scalar_functions.py index bc17f4a1c2..89aa2874d0 100644 --- a/src/snowflake/snowpark/_functions/scalar_functions.py +++ b/src/snowflake/snowpark/_functions/scalar_functions.py @@ -2064,3 +2064,131 @@ def h3_try_grid_distance( cell_id_1 = _to_col_if_str(cell_id_1, "h3_try_grid_distance") cell_id_2 = _to_col_if_str(cell_id_2, "h3_try_grid_distance") return builtin("h3_try_grid_distance", _emit_ast=_emit_ast)(cell_id_1, cell_id_2) + + +@publicapi +def map_delete( + map_col: ColumnOrName, *keys: ColumnOrName, _emit_ast: bool = True +) -> Column: + """Returns a map consisting of the input map with one or more keys removed. + + Args: + map_col (ColumnOrName): The map used to remove keys. + *keys (ColumnOrName): Keys to remove. + + Returns: + Column: A map with the specified keys removed. + + Example:: + + >>> from snowflake.snowpark.functions import col, lit, to_variant + >>> df = session.sql(\""" + ... SELECT {'a':1,'b':2,'c':3}::MAP(VARCHAR,NUMBER) as map_col + ... union all + ... SELECT {'c':3,'d':4,'e':5}::MAP(VARCHAR,NUMBER) as map_col + ... \""") + >>> df.select(to_variant(map_delete(col("map_col"), lit("c"), lit("d"))).alias("result")).collect() + [Row(RESULT='{\\n "a": 1,\\n "b": 2\\n}'), Row(RESULT='{\\n "e": 5\\n}')] + + """ + m = _to_col_if_str(map_col, "map_delete") + ks = [_to_col_if_str(k, "map_delete") for k in keys] + return builtin("map_delete", _emit_ast=_emit_ast)(m, *ks) + + +@publicapi +def map_insert( + map_col: ColumnOrName, + key: ColumnOrName, + value: ColumnOrName, + update_flag: Optional[ColumnOrName] = None, + _emit_ast: bool = True, +) -> Column: + """ + Returns a map containing all key-value pairs from the source map as well as the new key-value pair. + If the key already exists in the map, the value is updated with the new value unless update_flag is False. + + Args: + map_col (ColumnOrName): The source map + key (ColumnOrName): The key to insert or update + value (ColumnOrName): The value to associate with the key + update_flag (Optional[ColumnOrName]): A boolean flag indicating whether to update existing keys. If None or True, existing keys are updated. If False, existing keys are not updated. + + Returns: + Column: A new map with the key-value pair inserted or updated + + Examples: + >>> from snowflake.snowpark.functions import lit, to_variant, col + >>> df = session.sql("SELECT {'a': 1, 'b': 2}::MAP(VARCHAR, NUMBER) as MAP_COL") + >>> df.select(to_variant(map_insert(col("MAP_COL"), lit("c"), lit(3))).alias("RESULT")).collect() + [Row(RESULT='{\\n "a": 1,\\n "b": 2,\\n "c": 3\\n}')] + + # Example using update flag + >>> from snowflake.snowpark.functions import lit, to_variant, col + >>> df = session.sql("SELECT {'a': 1, 'b': 2}::MAP(VARCHAR, NUMBER) as MAP_COL") + >>> df.select(to_variant(map_insert(col("MAP_COL"), lit("a"), lit(20), lit(True))).alias("RESULT")).collect() + [Row(RESULT='{\\n "a": 20,\\n "b": 2\\n}')] + """ + m = _to_col_if_str(map_col, "map_insert") + k = _to_col_if_str(key, "map_insert") + v = _to_col_if_str(value, "map_insert") + uf = _to_col_if_str(update_flag, "map_insert") if update_flag is not None else None + if uf is not None: + return builtin("map_insert", _emit_ast=_emit_ast)(m, k, v, uf) + else: + return builtin("map_insert", _emit_ast=_emit_ast)(m, k, v) + + +@publicapi +def map_pick( + map_col: ColumnOrName, *keys: ColumnOrName, _emit_ast: bool = True +) -> Column: + """ + Returns a new map containing some of the key-value pairs from an existing map. + + To identify the key-value pairs to include in the new map, pass in the keys as arguments. + If a specified key is not present in the input map, the key is ignored. + + Args: + map_col (ColumnOrName): The map column to pick from + *keys (ColumnOrName): Additional keys to pick + + Returns: + Column: A new map containing the selected key-value pairs + + Examples: + >>> from snowflake.snowpark.functions import lit, to_variant, col + >>> df = session.sql("SELECT {'a':1,'b':2,'c':3}::MAP(VARCHAR,NUMBER) as map_col") + >>> df.select(to_variant(map_pick(df["map_col"], lit("a"), lit("b"))).alias("result")).collect() + [Row(RESULT='{\\n "a": 1,\\n "b": 2\\n}')] + + # Examlpe sending an array of keys + >>> from snowflake.snowpark.functions import map_pick, to_variant, col + >>> df = session.sql("SELECT {'a':1,'b':2,'c':3}::MAP(VARCHAR,NUMBER) as map_col, ARRAY_CONSTRUCT('a','b') as keys_arr") + >>> df.select(to_variant(map_pick(col("map_col"), col("keys_arr"))).alias("RESULT")).collect() + [Row(RESULT='{\\n "a": 1,\\n "b": 2\\n}')] + """ + m = _to_col_if_str(map_col, "map_pick") + ks = [_to_col_if_str(k, "map_pick") for k in keys] + return builtin("map_pick", _emit_ast=_emit_ast)(m, *ks) + + +@publicapi +def map_size(map_col: ColumnOrName, _emit_ast: bool = True) -> Column: + """ + Returns the size of the input MAP. Returns None if the input column is not a MAP type. + + Args: + map_col (ColumnOrName): The map values. + + Returns: + Column: The size of the map. + + Examples: + >>> from snowflake.snowpark.functions import col + >>> df = session.sql("SELECT {'a': 1, 'b': 2}::MAP(VARCHAR, NUMBER) as MAP_COL") + >>> df.select(map_size(col("MAP_COL")).alias("MAP_SIZE")).collect() + [Row(MAP_SIZE=2)] + """ + c = _to_col_if_str(map_col, "map_size") + return builtin("map_size", _emit_ast=_emit_ast)(c)