diff --git a/CHANGELOG.md b/CHANGELOG.md index 7762d8bdc0..a0843fabc8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ - Added a new datatype `YearMonthIntervalType` that allows users to create intervals for datetime operations. - Added a new function `interval_year_month_from_parts` that allows users to easily create `YearMonthIntervalType` without using SQL. - Added a new datatype `DayTimeIntervalType` that allows users to create intervals for datetime operations. +- Added a new function `interval_day_time_from_parts` that allows users to easily create `DayTimeIntervalType` without using SQL. - Added support for `FileOperation.list` to list files in a stage with metadata. - Added support for `FileOperation.remove` to remove files in a stage. - Added a new function `snowflake.snowpark.functions.vectorized` that allows users to mark a function as vectorized UDF. diff --git a/docs/source/snowpark/functions.rst b/docs/source/snowpark/functions.rst index 9a7bd59279..21da5f49db 100644 --- a/docs/source/snowpark/functions.rst +++ b/docs/source/snowpark/functions.rst @@ -235,6 +235,7 @@ Functions initcap insert instr + interval_day_time_from_parts interval_year_month_from_parts invoker_role invoker_share diff --git a/src/snowflake/snowpark/functions.py b/src/snowflake/snowpark/functions.py index 069db97980..b5a0f33ba4 100644 --- a/src/snowflake/snowpark/functions.py +++ b/src/snowflake/snowpark/functions.py @@ -11000,11 +11000,10 @@ def interval_year_month_from_parts( """ ast = None if _emit_ast: + # Always include both parameters to match the actual function execution args = [] - if years is not None: - args.append(years) - if months is not None: - args.append(months) + args.append(years if years is not None else lit(0)) + args.append(months if months is not None else lit(0)) ast = build_function_expr("interval_year_month_from_parts", args) years_col = ( @@ -11042,6 +11041,153 @@ def get_col_name(col): return res +@private_preview( + version="1.38.0", + extra_doc_string="Type DayTimeIntervalType is currently in private preview and needs to be enabled by setting parameter `FEATURE_INTERVAL_TYPES` to `ENABLED`.", +) +@publicapi +def interval_day_time_from_parts( + days: Optional[ColumnOrName] = None, + hours: Optional[ColumnOrName] = None, + mins: Optional[ColumnOrName] = None, + secs: Optional[ColumnOrName] = None, + _emit_ast: bool = True, +) -> Column: + """ + Creates a day-time interval expression using with specified days, hours, mins and seconds. + + This DayTime is not to be confused with the interval created by make_interval. + You can define a table column to be of data type DayTimeIntervalType. + + Args: + days: The number of days, positive or negative + hours: The number of hours, positive or negative + mins: The number of minutes, positive or negative + secs: The number of seconds, positive or negative + + Returns: + A Column representing a day-time interval + + Example:: + + >>> from snowflake.snowpark.functions import interval_day_time_from_parts + >>> + >>> _ = session.sql("ALTER SESSION SET FEATURE_INTERVAL_TYPES=ENABLED;").collect() + >>> df = session.create_dataframe([[1, 12, 30, 01.001001]], ['day', 'hour', 'min', 'sec']) + >>> df.select(interval_day_time_from_parts(col("day"), col("hour"), col("min"), col("sec")).alias("interval")).show() + -------------------------- + |"INTERVAL" | + -------------------------- + |1 day, 12:30:01.001000 | + -------------------------- + + + """ + # Handle AST emission + ast = None + if _emit_ast: + # Create AST for this custom function using build_function_expr + # Always include all 4 parameters to match the actual function execution + args = [] + args.append(days if days is not None else lit(0)) + args.append(hours if hours is not None else lit(0)) + args.append(mins if mins is not None else lit(0)) + args.append(secs if secs is not None else lit(0)) + ast = build_function_expr("interval_day_time_from_parts", args) + + days_col = ( + lit(0) if days is None else _to_col_if_str(days, "interval_day_time_from_parts") + ) + hours_col = ( + lit(0) + if hours is None + else _to_col_if_str(hours, "interval_day_time_from_parts") + ) + mins_col = ( + lit(0) if mins is None else _to_col_if_str(mins, "interval_day_time_from_parts") + ) + secs_col = ( + lit(0) if secs is None else _to_col_if_str(secs, "interval_day_time_from_parts") + ) + + total_seconds = ( + days_col * lit(86400) + hours_col * lit(3600) + mins_col * lit(60) + secs_col + ) + + is_negative = total_seconds < lit(0) + abs_total_seconds = abs(total_seconds) + + days_part = cast(floor(abs_total_seconds / lit(86400)), "int") + remaining_after_days = abs_total_seconds % lit(86400) + + hours_part = cast(floor(remaining_after_days / lit(3600)), "int") + remaining_after_hours = remaining_after_days % lit(3600) + + mins_part = cast(floor(remaining_after_hours / lit(60)), "int") + secs_part = remaining_after_hours % lit(60) + + hours_str = iff( + hours_part < lit(10), + concat(lit("0"), cast(hours_part, "str")), + cast(hours_part, "str"), + ) + + mins_str = iff( + mins_part < lit(10), + concat(lit("0"), cast(mins_part, "str")), + cast(mins_part, "str"), + ) + + secs_int = cast(floor(secs_part), "int") + secs_str = iff( + secs_int < lit(10), + concat(lit("0"), cast(secs_int, "str")), + cast(secs_int, "str"), + ) + + has_fraction = abs(secs_part - cast(secs_int, "double")) > 1e-10 + fractional_part = secs_part - cast(secs_int, "double") + + fraction_str = iff( + has_fraction, + concat( + lit("."), + lpad( + cast(round(fractional_part * lit(1000), 0), "str"), + 3, + lit("0"), + ), + ), + lit(""), + ) + + secs_formatted = concat(secs_str, fraction_str) + + sign_prefix = iff(is_negative, lit("-"), lit("")) + interval_value = concat( + sign_prefix, + cast(days_part, "str"), + lit(" "), + hours_str, + lit(":"), + mins_str, + lit(":"), + secs_formatted, + ) + + def get_col_name(col): + if isinstance(col._expr1, Literal): + return str(col._expr1.value) + else: + return str(col._expr1) + + alias_name = f"interval_day_time_from_parts({get_col_name(days_col)}, {get_col_name(hours_col)}, {get_col_name(mins_col)}, {get_col_name(secs_col)})" + + res = cast(interval_value, "INTERVAL DAY TO SECOND").alias(alias_name) + res._ast = ast + return res + + @publicapi @deprecated( version="1.28.0", diff --git a/tests/ast/data/functions2.test b/tests/ast/data/functions2.test index 77152ae03d..81ad4eeca2 100644 --- a/tests/ast/data/functions2.test +++ b/tests/ast/data/functions2.test @@ -450,6 +450,8 @@ df351 = df.select(function("avg")("B")) df352 = df.select(interval_year_month_from_parts("A", "B")) +df353 = df.select(interval_day_time_from_parts("A", "B", "C", "D")) + ## EXPECTED UNPARSER OUTPUT df = session.table("table1") @@ -900,6 +902,8 @@ df351 = df.select(call_function("avg", "B")) df352 = df.select(interval_year_month_from_parts("A", "B")) +df353 = df.select(interval_day_time_from_parts("A", "B", "C", "D")) + ## EXPECTED ENCODED AST interned_value_table { @@ -29253,6 +29257,104 @@ body { uid: 224 } } +body { + bind { + expr { + dataframe_select { + cols { + args { + apply_expr { + fn { + builtin_fn { + name { + name { + name_flat { + name: "interval_day_time_from_parts" + } + } + } + } + } + pos_args { + string_val { + src { + end_column: 74 + end_line: 475 + file: 2 + start_column: 26 + start_line: 475 + } + v: "A" + } + } + pos_args { + string_val { + src { + end_column: 74 + end_line: 475 + file: 2 + start_column: 26 + start_line: 475 + } + v: "B" + } + } + pos_args { + string_val { + src { + end_column: 74 + end_line: 475 + file: 2 + start_column: 26 + start_line: 475 + } + v: "C" + } + } + pos_args { + string_val { + src { + end_column: 74 + end_line: 475 + file: 2 + start_column: 26 + start_line: 475 + } + v: "D" + } + } + src { + end_column: 74 + end_line: 475 + file: 2 + start_column: 26 + start_line: 475 + } + } + } + variadic: true + } + df { + dataframe_ref { + id: 1 + } + } + src { + end_column: 75 + end_line: 475 + file: 2 + start_column: 16 + start_line: 475 + } + } + } + first_request_id: "\003U\"\366q\366P\346\260\261?\234\303\254\316\353" + symbol { + value: "df353" + } + uid: 225 + } +} client_ast_version: 1 client_language { python_language { diff --git a/tests/integ/conftest.py b/tests/integ/conftest.py index 6f1a088b62..2ff90e2494 100644 --- a/tests/integ/conftest.py +++ b/tests/integ/conftest.py @@ -274,7 +274,7 @@ def session( .config("local_testing", local_testing_mode) .config( "session_parameters", - {"feature_interval_types": "ENABLED", "enable_interval_subtypes": "true"}, + {"feature_interval_types": "ENABLED"}, ) .create() ) diff --git a/tests/integ/scala/test_function_suite.py b/tests/integ/scala/test_function_suite.py index 7504db2ad3..f1bee49123 100644 --- a/tests/integ/scala/test_function_suite.py +++ b/tests/integ/scala/test_function_suite.py @@ -5,7 +5,7 @@ import json from contextlib import contextmanager -from datetime import date, datetime, time +from datetime import date, datetime, time, timedelta from decimal import Decimal from functools import partial @@ -136,6 +136,7 @@ lpad, ltrim, interval_year_month_from_parts, + interval_day_time_from_parts, max, md5, mean, @@ -215,6 +216,7 @@ from snowflake.snowpark.mock._functions import LocalTimezone from snowflake.snowpark.types import ( DateType, + DayTimeIntervalType, StructField, StructType, TimestampTimeZone, @@ -5724,3 +5726,194 @@ def test_interval_year_month_from_parts(session): assert result_nulls[0]['interval_year_month_from_parts("YEARS", "MONTHS")'] is None assert result_nulls[1]['interval_year_month_from_parts("YEARS", "MONTHS")'] is None assert result_nulls[2]['interval_year_month_from_parts("YEARS", "MONTHS")'] is None + + +@pytest.mark.skipif( + "config.getoption('local_testing_mode', default=False)", + reason="FEAT: Alter Session not supported in local testing", +) +@pytest.mark.skipif( + IS_IN_STORED_PROC, reason="Alter Session not supported in stored procedure." +) +def test_interval_day_time_from_parts(session): + test_cases = [ + (0, 0, 0, 0.0, timedelta(0)), + (1, 0, 0, 0.0, timedelta(days=1)), + (0, 1, 0, 0.0, timedelta(hours=1)), + (0, 0, 1, 0.0, timedelta(minutes=1)), + (0, 0, 0, 1.0, timedelta(seconds=1.0)), + (1, 2, 3, 4.5, timedelta(days=1, hours=2, minutes=3, seconds=4.5)), + (5, 10, 30, 45.123, timedelta(days=5, hours=10, minutes=30, seconds=45.123)), + (0, 25, 90, 120.999, timedelta(hours=25, minutes=90, seconds=120.999)), + (-1, 0, 0, 0.0, timedelta(days=-1)), + (0, -1, 0, 0.0, timedelta(hours=-1)), + (0, 0, -1, 0.0, timedelta(minutes=-1)), + (0, 0, 0, -1.0, timedelta(seconds=-1.0)), + (-1, -2, -3, -4.5, timedelta(days=-1, hours=-2, minutes=-3, seconds=-4.5)), + (2, -1, 30, -15.0, timedelta(days=2, hours=-1, minutes=30, seconds=-15.0)), + ( + 365, + 24, + 60, + 3600.0, + timedelta(days=365, hours=24, minutes=60, seconds=3600.0), + ), + ] + + input_data = [ + (days, hours, mins, secs) for days, hours, mins, secs, _ in test_cases + ] + expected_values = [expected for _, _, _, _, expected in test_cases] + + df = session.create_dataframe( + input_data, + schema=["days", "hours", "mins", "secs"], + ) + + result = df.select( + interval_day_time_from_parts( + col("days"), col("hours"), col("mins"), col("secs") + ).alias("interval_result"), + ).collect() + + assert len(result) == len(expected_values) + for i, expected in enumerate(expected_values): + assert result[i]["INTERVAL_RESULT"] == expected + + df_only_days = session.create_dataframe([(10,), (-5,), (0,)], schema=["days"]) + days_schema_result = df_only_days.select( + interval_day_time_from_parts(days=col("days")) + ) + assert days_schema_result.schema.fields[0].datatype == DayTimeIntervalType(0, 3) + + result_days = days_schema_result.collect() + assert result_days[0]["interval_day_time_from_parts(days, 0, 0, 0)"] == timedelta( + days=10 + ) + assert result_days[1]["interval_day_time_from_parts(days, 0, 0, 0)"] == timedelta( + days=-5 + ) + assert result_days[2]["interval_day_time_from_parts(days, 0, 0, 0)"] == timedelta(0) + + df_only_hours = session.create_dataframe([(25,), (-12,), (0,)], schema=["hours"]) + hours_schema_result = df_only_hours.select( + interval_day_time_from_parts(hours=col("hours")) + ) + assert hours_schema_result.schema.fields[0].datatype == DayTimeIntervalType(0, 3) + + result_hours = hours_schema_result.collect() + assert result_hours[0]["interval_day_time_from_parts(0, hours, 0, 0)"] == timedelta( + hours=25 + ) + assert result_hours[1]["interval_day_time_from_parts(0, hours, 0, 0)"] == timedelta( + hours=-12 + ) + assert result_hours[2]["interval_day_time_from_parts(0, hours, 0, 0)"] == timedelta( + 0 + ) + + df_only_mins = session.create_dataframe([(90,), (-45,), (0,)], schema=["mins"]) + mins_schema_result = df_only_mins.select( + interval_day_time_from_parts(mins=col("mins")) + ) + assert mins_schema_result.schema.fields[0].datatype == DayTimeIntervalType(0, 3) + + result_mins = mins_schema_result.collect() + assert result_mins[0]["interval_day_time_from_parts(0, 0, mins, 0)"] == timedelta( + minutes=90 + ) + assert result_mins[1]["interval_day_time_from_parts(0, 0, mins, 0)"] == timedelta( + minutes=-45 + ) + assert result_mins[2]["interval_day_time_from_parts(0, 0, mins, 0)"] == timedelta(0) + + df_only_secs = session.create_dataframe( + [(3661.5,), (-1800.25,), (0.0,)], schema=["secs"] + ) + secs_schema_result = df_only_secs.select( + interval_day_time_from_parts(secs=col("secs")) + ) + assert secs_schema_result.schema.fields[0].datatype == DayTimeIntervalType(0, 3) + + result_secs = secs_schema_result.collect() + assert result_secs[0]["interval_day_time_from_parts(0, 0, 0, secs)"] == timedelta( + seconds=3661.5 + ) + assert result_secs[1]["interval_day_time_from_parts(0, 0, 0, secs)"] == timedelta( + seconds=-1800.25 + ) + assert result_secs[2]["interval_day_time_from_parts(0, 0, 0, secs)"] == timedelta(0) + + df_literals = session.create_dataframe([(1,)], schema=["dummy"]) + literals_schema_result = df_literals.select( + interval_day_time_from_parts(lit(1), lit(2), lit(3), lit(4.5)).alias( + "all_literal" + ), + interval_day_time_from_parts(days=lit(7)).alias("days_only"), + interval_day_time_from_parts(hours=lit(12)).alias("hours_only"), + interval_day_time_from_parts(mins=lit(30)).alias("mins_only"), + interval_day_time_from_parts(secs=lit(45.5)).alias("secs_only"), + ) + + for field in literals_schema_result.schema.fields: + assert field.datatype == DayTimeIntervalType(0, 3) + + result_literals = literals_schema_result.collect() + assert result_literals[0]["ALL_LITERAL"] == timedelta( + days=1, hours=2, minutes=3, seconds=4.5 + ) + assert result_literals[0]["DAYS_ONLY"] == timedelta(days=7) + assert result_literals[0]["HOURS_ONLY"] == timedelta(hours=12) + assert result_literals[0]["MINS_ONLY"] == timedelta(minutes=30) + assert result_literals[0]["SECS_ONLY"] == timedelta(seconds=45.5) + + df_mixed_params = session.create_dataframe( + [(2, 30), (1, 90), (0, 0)], schema=["days", "mins"] + ) + mixed_schema_result = df_mixed_params.select( + interval_day_time_from_parts(days=col("days"), mins=col("mins")) + ) + assert mixed_schema_result.schema.fields[0].datatype == DayTimeIntervalType(0, 3) + + result_mixed = mixed_schema_result.collect() + assert result_mixed[0][ + "interval_day_time_from_parts(days, 0, mins, 0)" + ] == timedelta(days=2, minutes=30) + assert result_mixed[1][ + "interval_day_time_from_parts(days, 0, mins, 0)" + ] == timedelta(days=1, minutes=90) + assert result_mixed[2][ + "interval_day_time_from_parts(days, 0, mins, 0)" + ] == timedelta(0) + + df_schema_test = session.create_dataframe( + [(1, 2, 3, 4.0)], schema=["d", "h", "m", "s"] + ) + schema_result = df_schema_test.select( + interval_day_time_from_parts(col("d"), col("h"), col("m"), col("s")) + ) + schema_fields = schema_result.schema.fields + assert len(schema_fields) == 1 + assert schema_fields[0].datatype == DayTimeIntervalType(0, 3) + + df_nulls = session.create_dataframe( + [ + (None, 1, 2, 3.0), + (1, None, 2, 3.0), + (1, 2, None, 3.0), + (1, 2, 3, None), + (None, None, None, None), + ], + schema=["days", "hours", "mins", "secs"], + ) + result_nulls = df_nulls.select( + interval_day_time_from_parts( + col("days"), col("hours"), col("mins"), col("secs") + ).alias("interval_result") + ).collect() + + assert result_nulls[0]["INTERVAL_RESULT"] is None + assert result_nulls[1]["INTERVAL_RESULT"] is None + assert result_nulls[2]["INTERVAL_RESULT"] is None + assert result_nulls[3]["INTERVAL_RESULT"] is None + assert result_nulls[4]["INTERVAL_RESULT"] is None