From 89ab0c36dc90b75c20273a83048bd05c2a918cc7 Mon Sep 17 00:00:00 2001 From: Felix He Date: Wed, 17 Sep 2025 17:59:49 -0700 Subject: [PATCH 01/12] SNOW-2346552: IntervalType spark show string support --- src/snowflake/snowpark/dataframe.py | 205 ++++++++++++++++++++++++++++ 1 file changed, 205 insertions(+) diff --git a/src/snowflake/snowpark/dataframe.py b/src/snowflake/snowpark/dataframe.py index fc31cb0120..8c38c47e8d 100644 --- a/src/snowflake/snowpark/dataframe.py +++ b/src/snowflake/snowpark/dataframe.py @@ -206,11 +206,13 @@ from snowflake.snowpark.types import ( ArrayType, DataType, + DayTimeIntervalType, MapType, PandasDataFrameType, StringType, StructField, StructType, + YearMonthIntervalType, _NumericType, _FractionalType, TimestampType, @@ -5115,6 +5117,209 @@ def format_timestamp_spark(dt: datetime.datetime) -> str: res = "-Infinity" else: res = str(cell).replace("e+", "E").replace("e-", "E-") + elif isinstance(cell, str) and isinstance(datatype, YearMonthIntervalType): + # Determine the appropriate range based on datatype fields + start_field = getattr( + datatype, "start_field", YearMonthIntervalType.YEAR + ) + end_field = getattr(datatype, "end_field", YearMonthIntervalType.MONTH) + + # Handle different input formats + # Check for compound format (year-month) vs simple number + has_internal_dash = ( + cell.startswith("+") or cell.startswith("-") + ) and "-" in cell[1:] + + if has_internal_dash: + # Format like "+1-03" or "-1-03" or "-1-6" (compound year-month) + is_negative = cell.startswith("-") + # Handle the case where there might be multiple dashes + if is_negative: + # For negative: "-1-6" -> split on the second dash + remaining = cell[1:] # "1-6" + if "-" in remaining: + parts = remaining.split( + "-", 1 + ) # Split only on first dash: ["1", "6"] + years = str(int(parts[0])) + months = str(int(parts[1])) + else: + # Single number case, handle in the elif below + years = "0" + months = remaining + else: + # For positive: "+1-6" -> split on the second dash + remaining = cell[1:] # "1-6" + if "-" in remaining: + parts = remaining.split( + "-", 1 + ) # Split only on first dash: ["1", "6"] + years = str(int(parts[0])) + months = str(int(parts[1])) + else: + # Single number case + years = "0" + months = remaining + elif cell.startswith("-") or cell.startswith("+") or cell.isdigit(): + # Format like "-8" or "15" (single number for months-only or years-only) + is_negative = cell.startswith("-") + if ( + start_field == YearMonthIntervalType.MONTH + and end_field == YearMonthIntervalType.MONTH + ): + # This is a month-only interval, treat the number as total months + total_months = int(cell) + res = f"INTERVAL '{total_months}' MONTH" + return res.replace("\n", "\\n") + elif ( + start_field == YearMonthIntervalType.YEAR + and end_field == YearMonthIntervalType.YEAR + ): + # This is a year-only interval + years_val = int(cell) + res = f"INTERVAL '{years_val}' YEAR" + return res.replace("\n", "\\n") + else: + # Shouldn't happen, but fallback + res = cell + return res.replace("\n", "\\n") + else: + res = cell + return res.replace("\n", "\\n") + + # Format based on start/end field + if ( + start_field == YearMonthIntervalType.YEAR + and end_field == YearMonthIntervalType.MONTH + ): + # Full range: YEAR TO MONTH + sign_prefix = "-" if is_negative else "" + res = f"INTERVAL '{sign_prefix}{years}-{months}' YEAR TO MONTH" + elif ( + start_field == YearMonthIntervalType.YEAR + and end_field == YearMonthIntervalType.YEAR + ): + # Years only: YEAR + sign_prefix = "-" if is_negative else "" + res = f"INTERVAL '{sign_prefix}{years}' YEAR" + elif ( + start_field == YearMonthIntervalType.MONTH + and end_field == YearMonthIntervalType.MONTH + ): + # Months only: MONTH - calculate total months + total_months = int(years) * 12 + int(months) + if is_negative: + total_months = -total_months + res = f"INTERVAL '{total_months}' MONTH" + else: + # Fallback to full format + sign_prefix = "-" if is_negative else "" + res = f"INTERVAL '{sign_prefix}{years}-{months}' YEAR TO MONTH" + elif isinstance(cell, (str, datetime.timedelta)) and isinstance( + datatype, DayTimeIntervalType + ): + start_field = getattr(datatype, "start_field", DayTimeIntervalType.DAY) + end_field = getattr(datatype, "end_field", DayTimeIntervalType.SECOND) + + def format_day_time_interval(total_seconds_float: float) -> str: + is_negative = total_seconds_float < 0 + abs_total_seconds = abs(total_seconds_float) + + days = int(abs_total_seconds) // 86400 + remaining_seconds = abs_total_seconds - (days * 86400) + hours = int(remaining_seconds) // 3600 + remaining_after_hours = remaining_seconds - (hours * 3600) + minutes = int(remaining_after_hours) // 60 + seconds = remaining_after_hours - (minutes * 60) + + sign = "-" if is_negative else "" + + # For single field intervals, extract just that component + if start_field == end_field: + if start_field == DayTimeIntervalType.DAY: + return f"{sign}{days}" + elif start_field == DayTimeIntervalType.HOUR: + total_hours = int(abs_total_seconds) // 3600 + return f"{sign}{total_hours:02d}" + elif start_field == DayTimeIntervalType.MINUTE: + total_minutes = int(abs_total_seconds) // 60 + return f"{sign}{total_minutes}" + elif start_field == DayTimeIntervalType.SECOND: + # Handle fractional seconds - use total seconds, not just remainder + if abs_total_seconds == int(abs_total_seconds): + return f"{sign}{int(abs_total_seconds)}" + else: + return f"{sign}{abs_total_seconds:g}" + + # For multi-field intervals, format based on start/end fields + if start_field == DayTimeIntervalType.DAY: + # DAY TO X format: "D HH:MM:SS" + if seconds == int(seconds): + return f"{sign}{days} {hours:02d}:{minutes:02d}:{int(seconds):02d}" + else: + return f"{sign}{days} {hours:02d}:{minutes:02d}:{seconds:06.3f}" + elif start_field == DayTimeIntervalType.HOUR: + # HOUR TO X format: "HH:MM:SS" (no days) + total_hours = int(abs_total_seconds) // 3600 + remaining_after_hours = abs_total_seconds - (total_hours * 3600) + mins = int(remaining_after_hours) // 60 + secs = remaining_after_hours - (mins * 60) + + if end_field == DayTimeIntervalType.HOUR: + return f"{sign}{total_hours:02d}" + elif end_field == DayTimeIntervalType.MINUTE: + return f"{sign}{total_hours:02d}:{mins:02d}" + else: # TO SECOND + if secs == int(secs): + return f"{sign}{total_hours:02d}:{mins:02d}:{int(secs):02d}" + else: + return ( + f"{sign}{total_hours:02d}:{mins:02d}:{secs:06.3f}" + ) + elif start_field == DayTimeIntervalType.MINUTE: + # MINUTE TO X format: "MM:SS" (no days or hours) + total_minutes = int(abs_total_seconds) // 60 + remaining_secs = abs_total_seconds - (total_minutes * 60) + + if end_field == DayTimeIntervalType.MINUTE: + return f"{sign}{total_minutes}" + else: # TO SECOND + if remaining_secs == int(remaining_secs): + return f"{sign}{total_minutes:02d}:{int(remaining_secs):02d}" + else: + return ( + f"{sign}{total_minutes:02d}:{remaining_secs:06.3f}" + ) + else: + # Fallback to basic format + if seconds == int(seconds): + return f"{sign}{days} {hours:02d}:{minutes:02d}:{int(seconds):02d}" + else: + return f"{sign}{days} {hours:02d}:{minutes:02d}:{seconds:06.3f}" + + if isinstance(cell, datetime.timedelta): + total_seconds_float = cell.total_seconds() + interval_str = format_day_time_interval(total_seconds_float) + elif isinstance(cell, str) and "INTERVAL" not in cell: + interval_str = cell + else: + res = cell + return res.replace("\n", "\\n") + + field_names = { + DayTimeIntervalType.DAY: "DAY", + DayTimeIntervalType.HOUR: "HOUR", + DayTimeIntervalType.MINUTE: "MINUTE", + DayTimeIntervalType.SECOND: "SECOND", + } + + start_name = field_names.get(start_field, "DAY") + end_name = field_names.get(end_field, "SECOND") + + if start_field == end_field: + res = f"INTERVAL '{interval_str}' {start_name}" + else: + res = f"INTERVAL '{interval_str}' {start_name} TO {end_name}" else: res = str(cell) return res.replace("\n", "\\n") From 577d9c8193b439eeb662c81ee2c26d68ec988a0a Mon Sep 17 00:00:00 2001 From: Felix He Date: Thu, 18 Sep 2025 10:32:23 -0700 Subject: [PATCH 02/12] SNOW-2346552: added testing --- src/snowflake/snowpark/dataframe.py | 59 ++++++++-- tests/integ/test_dataframe.py | 173 ++++++++++++++++++++++++++++ 2 files changed, 223 insertions(+), 9 deletions(-) diff --git a/src/snowflake/snowpark/dataframe.py b/src/snowflake/snowpark/dataframe.py index 8c38c47e8d..5b46d8389e 100644 --- a/src/snowflake/snowpark/dataframe.py +++ b/src/snowflake/snowpark/dataframe.py @@ -212,11 +212,11 @@ StringType, StructField, StructType, - YearMonthIntervalType, _NumericType, _FractionalType, TimestampType, TimestampTimeZone, + YearMonthIntervalType, ) # Python 3.8 needs to use typing.Iterable because collections.abc.Iterable is not subscriptable @@ -5240,24 +5240,65 @@ def format_day_time_interval(total_seconds_float: float) -> str: return f"{sign}{days}" elif start_field == DayTimeIntervalType.HOUR: total_hours = int(abs_total_seconds) // 3600 - return f"{sign}{total_hours:02d}" + return ( + f"{sign}{total_hours:02d}" + if total_hours < 10 + else f"{sign}{total_hours}" + ) elif start_field == DayTimeIntervalType.MINUTE: total_minutes = int(abs_total_seconds) // 60 - return f"{sign}{total_minutes}" + return ( + f"{sign}{total_minutes:02d}" + if total_minutes < 10 + else f"{sign}{total_minutes}" + ) elif start_field == DayTimeIntervalType.SECOND: # Handle fractional seconds - use total seconds, not just remainder if abs_total_seconds == int(abs_total_seconds): - return f"{sign}{int(abs_total_seconds)}" + total_secs_int = int(abs_total_seconds) + return ( + f"{sign}{total_secs_int:02d}" + if total_secs_int < 10 + else f"{sign}{total_secs_int}" + ) else: - return f"{sign}{abs_total_seconds:g}" + # For fractional seconds, format with leading zero if < 10 + if abs_total_seconds < 10: + # Format with leading zero: split into integer and fractional parts + integer_part = int(abs_total_seconds) + fractional_part = abs_total_seconds - integer_part + if fractional_part == 0: + return f"{sign}{integer_part:02d}" + else: + # Format fractional part and remove leading '0.' + frac_str = f"{fractional_part:.6f}"[2:].rstrip( + "0" + ) + return f"{sign}{integer_part:02d}.{frac_str}" + else: + return f"{sign}{abs_total_seconds:g}" # For multi-field intervals, format based on start/end fields if start_field == DayTimeIntervalType.DAY: - # DAY TO X format: "D HH:MM:SS" - if seconds == int(seconds): - return f"{sign}{days} {hours:02d}:{minutes:02d}:{int(seconds):02d}" + # DAY TO X format: truncate based on end_field + if end_field == DayTimeIntervalType.HOUR: + # DAY TO HOUR: "D HH" + return ( + f"{sign}{days} {hours:02d}" + if hours < 10 + else f"{sign}{days} {hours}" + ) + elif end_field == DayTimeIntervalType.MINUTE: + # DAY TO MINUTE: "D HH:MM" + hours_str = f"{hours:02d}" if hours < 10 else f"{hours}" + return f"{sign}{days} {hours_str}:{minutes:02d}" else: - return f"{sign}{days} {hours:02d}:{minutes:02d}:{seconds:06.3f}" + # DAY TO SECOND: "D HH:MM:SS" + hours_str = f"{hours:02d}" if hours < 10 else f"{hours}" + if seconds == int(seconds): + return f"{sign}{days} {hours_str}:{minutes:02d}:{int(seconds):02d}" + else: + return f"{sign}{days} {hours_str}:{minutes:02d}:{seconds:06.3f}" elif start_field == DayTimeIntervalType.HOUR: # HOUR TO X format: "HH:MM:SS" (no days) total_hours = int(abs_total_seconds) // 3600 diff --git a/tests/integ/test_dataframe.py b/tests/integ/test_dataframe.py index ee418e525c..0094e53020 100644 --- a/tests/integ/test_dataframe.py +++ b/tests/integ/test_dataframe.py @@ -2536,6 +2536,179 @@ def assert_show_string_equals(actual: str, expected: str): ) +@pytest.mark.skipif( + "config.getoption('local_testing_mode', default=False)", + reason="FEAT: Interval types not fully supported in local testing", +) +def test_show_interval_formatting(session): + df = session.sql("SELECT INTERVAL '1' HOUR as hour_single") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +------------------+ + |"HOUR_SINGLE" | + +------------------+ + |INTERVAL '01' HOUR| + +------------------+ + """ + ) + + df = session.sql("SELECT INTERVAL '5' MINUTE as minute_single") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +--------------------+ + |"MINUTE_SINGLE" | + +--------------------+ + |INTERVAL '05' MINUTE| + +--------------------+ + """ + ) + + df = session.sql("SELECT INTERVAL '5' SECOND as second_integer") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +--------------------+ + |"SECOND_INTEGER" | + +--------------------+ + |INTERVAL '05' SECOND| + +--------------------+ + """ + ) + + df = session.sql("SELECT INTERVAL '1.000001' SECOND as second_microseconds") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +---------------------------+ + |"SECOND_MICROSECONDS" | + +---------------------------+ + |INTERVAL '01.000001' SECOND| + +---------------------------+ + """ + ) + + df = session.sql("SELECT INTERVAL '24' HOUR as hour_full_day") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +------------------+ + |"HOUR_FULL_DAY" | + +------------------+ + |INTERVAL '24' HOUR| + +------------------+ + """ + ) + + df = session.sql("SELECT INTERVAL '90' MINUTE as minute_over_hour") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +--------------------+ + |"MINUTE_OVER_HOUR" | + +--------------------+ + |INTERVAL '90' MINUTE| + +--------------------+ + """ + ) + + df = session.sql("SELECT INTERVAL '0' SECOND as zero_second") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +--------------------+ + |"ZERO_SECOND" | + +--------------------+ + |INTERVAL '00' SECOND| + +--------------------+ + """ + ) + + df = session.sql("SELECT INTERVAL '0.000001' SECOND as microsecond") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +---------------------------+ + |"MICROSECOND" | + +---------------------------+ + |INTERVAL '00.000001' SECOND| + +---------------------------+ + """ + ) + + df = session.sql("SELECT INTERVAL '2 12' DAY TO HOUR as day_to_hour") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +---------------------------+ + |"DAY_TO_HOUR" | + +---------------------------+ + |INTERVAL '2 12' DAY TO HOUR| + +---------------------------+ + """ + ) + + df = session.sql("SELECT INTERVAL '1 08:30' DAY TO MINUTE as day_to_minute") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +--------------------------------+ + |"DAY_TO_MINUTE" | + +--------------------------------+ + |INTERVAL '1 08:30' DAY TO MINUTE| + +--------------------------------+ + """ + ) + + df = session.sql("SELECT INTERVAL '08:30' HOUR TO MINUTE as hour_to_minute") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +-------------------------------+ + |"HOUR_TO_MINUTE" | + +-------------------------------+ + |INTERVAL '08:30' HOUR TO MINUTE| + +-------------------------------+ + """ + ) + + df = session.sql( + "SELECT INTERVAL '01:00:00.456' HOUR TO SECOND as hour_to_second_fractional" + ) + assert df._show_string_spark(truncate=False) == dedent( + """\ + +--------------------------------------+ + |"HOUR_TO_SECOND_FRACTIONAL" | + +--------------------------------------+ + |INTERVAL '01:00:00.456' HOUR TO SECOND| + +--------------------------------------+ + """ + ) + + df = session.sql("SELECT INTERVAL '-2' HOUR as negative_hour") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +-------------------+ + |"NEGATIVE_HOUR" | + +-------------------+ + |INTERVAL '-02' HOUR| + +-------------------+ + """ + ) + + df = session.sql("SELECT INTERVAL '-15.5' SECOND as negative_second") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +-----------------------+ + |"NEGATIVE_SECOND" | + +-----------------------+ + |INTERVAL '-15.5' SECOND| + +-----------------------+ + """ + ) + + df = session.sql("SELECT INTERVAL '999999' SECOND as large_second") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +------------------------+ + |"LARGE_SECOND" | + +------------------------+ + |INTERVAL '999999' SECOND| + +------------------------+ + """ + ) + + @pytest.mark.parametrize("data", [[0, 1, 2, 3], ["", "a"], [False, True], [None]]) def test_create_dataframe_with_single_value(session, data): expected_names = ["_1"] From 7812eb441a33ffccc840b6b43a3cc7fc305d3ca1 Mon Sep 17 00:00:00 2001 From: Felix He Date: Fri, 19 Sep 2025 11:05:47 -0700 Subject: [PATCH 03/12] SNOW-2346552: added more coverage --- src/snowflake/snowpark/dataframe.py | 14 +- tests/integ/test_dataframe.py | 292 ++++++++++++++++++++++++++++ 2 files changed, 303 insertions(+), 3 deletions(-) diff --git a/src/snowflake/snowpark/dataframe.py b/src/snowflake/snowpark/dataframe.py index 5b46d8389e..c8098254f7 100644 --- a/src/snowflake/snowpark/dataframe.py +++ b/src/snowflake/snowpark/dataframe.py @@ -5326,11 +5326,19 @@ def format_day_time_interval(total_seconds_float: float) -> str: return f"{sign}{total_minutes}" else: # TO SECOND if remaining_secs == int(remaining_secs): - return f"{sign}{total_minutes:02d}:{int(remaining_secs):02d}" + minutes_str = ( + f"{total_minutes:02d}" + if total_minutes < 10 + else f"{total_minutes}" + ) + return f"{sign}{minutes_str}:{int(remaining_secs):02d}" else: - return ( - f"{sign}{total_minutes:02d}:{remaining_secs:06.3f}" + minutes_str = ( + f"{total_minutes:02d}" + if total_minutes < 10 + else f"{total_minutes}" ) + return f"{sign}{minutes_str}:{remaining_secs:06.3f}" else: # Fallback to basic format if seconds == int(seconds): diff --git a/tests/integ/test_dataframe.py b/tests/integ/test_dataframe.py index 0094e53020..126f411a24 100644 --- a/tests/integ/test_dataframe.py +++ b/tests/integ/test_dataframe.py @@ -2708,6 +2708,298 @@ def test_show_interval_formatting(session): """ ) + # Year-month intervals with dash format + df = session.sql("SELECT INTERVAL '1-6' YEAR TO MONTH as year_to_month") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +----------------------------+ + |"YEAR_TO_MONTH" | + +----------------------------+ + |INTERVAL '1-6' YEAR TO MONTH| + +----------------------------+ + """ + ) + + # Negative year-month intervals + df = session.sql("SELECT INTERVAL '-2-3' YEAR TO MONTH as negative_year_month") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +-----------------------------+ + |"NEGATIVE_YEAR_MONTH" | + +-----------------------------+ + |INTERVAL '-2-3' YEAR TO MONTH| + +-----------------------------+ + """ + ) + + # Single year intervals (not YEAR TO MONTH) + df = session.sql("SELECT INTERVAL '5' YEAR as single_year") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +-----------------+ + |"SINGLE_YEAR" | + +-----------------+ + |INTERVAL '5' YEAR| + +-----------------+ + """ + ) + + # Single month intervals (not YEAR TO MONTH) + df = session.sql("SELECT INTERVAL '8' MONTH as single_month") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +------------------+ + |"SINGLE_MONTH" | + +------------------+ + |INTERVAL '8' MONTH| + +------------------+ + """ + ) + + # Zero year-month intervals + df = session.sql("SELECT INTERVAL '0-0' YEAR TO MONTH as zero_year_month") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +----------------------------+ + |"ZERO_YEAR_MONTH" | + +----------------------------+ + |INTERVAL '0-0' YEAR TO MONTH| + +----------------------------+ + """ + ) + + # Very large day intervals + df = session.sql("SELECT INTERVAL '999' DAY as large_day") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +------------------+ + |"LARGE_DAY" | + +------------------+ + |INTERVAL '999' DAY| + +------------------+ + """ + ) + + # Minute to second with large minutes + df = session.sql( + "SELECT INTERVAL '150:30' MINUTE TO SECOND as large_minute_to_second" + ) + assert df._show_string_spark(truncate=False) == dedent( + """\ + +----------------------------------+ + |"LARGE_MINUTE_TO_SECOND" | + +----------------------------------+ + |INTERVAL '150:30' MINUTE TO SECOND| + +----------------------------------+ + """ + ) + + # Day to second with fractional seconds + df = session.sql( + "SELECT INTERVAL '5 10:20:30.123' DAY TO SECOND as day_to_second_frac" + ) + assert df._show_string_spark(truncate=False) == dedent( + """\ + +---------------------------------------+ + |"DAY_TO_SECOND_FRAC" | + +---------------------------------------+ + |INTERVAL '5 10:20:30.123' DAY TO SECOND| + +---------------------------------------+ + """ + ) + + # Hour to second with zero padding in multi-field + df = session.sql("SELECT INTERVAL '05:00:00' HOUR TO SECOND as hour_zero_padded") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +----------------------------------+ + |"HOUR_ZERO_PADDED" | + +----------------------------------+ + |INTERVAL '05:00:00' HOUR TO SECOND| + +----------------------------------+ + """ + ) + + # Negative day-time intervals + df = session.sql("SELECT INTERVAL '-3 05:30:45' DAY TO SECOND as negative_complex") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +------------------------------------+ + |"NEGATIVE_COMPLEX" | + +------------------------------------+ + |INTERVAL '-3 05:30:45' DAY TO SECOND| + +------------------------------------+ + """ + ) + + # Additional edge cases for complete coverage based on actual Snowflake output + + # Year-month compound intervals + df = session.sql("SELECT INTERVAL '1-6' YEAR TO MONTH as year_to_month") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +----------------------------+ + |"YEAR_TO_MONTH" | + +----------------------------+ + |INTERVAL '1-6' YEAR TO MONTH| + +----------------------------+ + """ + ) + + df = session.sql("SELECT INTERVAL '-2-3' YEAR TO MONTH as negative_year_month") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +-----------------------------+ + |"NEGATIVE_YEAR_MONTH" | + +-----------------------------+ + |INTERVAL '-2-3' YEAR TO MONTH| + +-----------------------------+ + """ + ) + + # Single field intervals + df = session.sql("SELECT INTERVAL '5' YEAR as single_year") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +-----------------+ + |"SINGLE_YEAR" | + +-----------------+ + |INTERVAL '5' YEAR| + +-----------------+ + """ + ) + + df = session.sql("SELECT INTERVAL '8' MONTH as single_month") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +------------------+ + |"SINGLE_MONTH" | + +------------------+ + |INTERVAL '8' MONTH| + +------------------+ + """ + ) + + df = session.sql("SELECT INTERVAL '0-0' YEAR TO MONTH as zero_year_month") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +----------------------------+ + |"ZERO_YEAR_MONTH" | + +----------------------------+ + |INTERVAL '0-0' YEAR TO MONTH| + +----------------------------+ + """ + ) + + # Large day interval + df = session.sql("SELECT INTERVAL '999' DAY as large_day") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +------------------+ + |"LARGE_DAY" | + +------------------+ + |INTERVAL '999' DAY| + +------------------+ + """ + ) + + # Large minute to second interval (tests the bug we just fixed) + df = session.sql( + "SELECT INTERVAL '150:30' MINUTE TO SECOND as large_minute_to_second" + ) + assert df._show_string_spark(truncate=False) == dedent( + """\ + +----------------------------------+ + |"LARGE_MINUTE_TO_SECOND" | + +----------------------------------+ + |INTERVAL '150:30' MINUTE TO SECOND| + +----------------------------------+ + """ + ) + + # Day to second with fractional seconds + df = session.sql( + "SELECT INTERVAL '5 10:20:30.123' DAY TO SECOND as day_to_second_frac" + ) + assert df._show_string_spark(truncate=False) == dedent( + """\ + +---------------------------------------+ + |"DAY_TO_SECOND_FRAC" | + +---------------------------------------+ + |INTERVAL '5 10:20:30.123' DAY TO SECOND| + +---------------------------------------+ + """ + ) + + # Hour to second with zero padding + df = session.sql("SELECT INTERVAL '05:00:00' HOUR TO SECOND as hour_zero_padded") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +----------------------------------+ + |"HOUR_ZERO_PADDED" | + +----------------------------------+ + |INTERVAL '05:00:00' HOUR TO SECOND| + +----------------------------------+ + """ + ) + + # Negative complex interval + df = session.sql("SELECT INTERVAL '-3 05:30:45' DAY TO SECOND as negative_complex") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +------------------------------------+ + |"NEGATIVE_COMPLEX" | + +------------------------------------+ + |INTERVAL '-3 05:30:45' DAY TO SECOND| + +------------------------------------+ + """ + ) + + # Positive prefix intervals + df = session.sql("SELECT INTERVAL '+2-5' YEAR TO MONTH as positive_year_month") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +----------------------------+ + |"POSITIVE_YEAR_MONTH" | + +----------------------------+ + |INTERVAL '2-5' YEAR TO MONTH| + +----------------------------+ + """ + ) + + df = session.sql("SELECT INTERVAL '+3' YEAR as positive_year") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +-----------------+ + |"POSITIVE_YEAR" | + +-----------------+ + |INTERVAL '3' YEAR| + +-----------------+ + """ + ) + + df = session.sql("SELECT INTERVAL '+15' MONTH as positive_month") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +-------------------+ + |"POSITIVE_MONTH" | + +-------------------+ + |INTERVAL '15' MONTH| + +-------------------+ + """ + ) + + df = session.sql("SELECT INTERVAL '-5' YEAR as negative_single_year") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +----------------------+ + |"NEGATIVE_SINGLE_YEAR"| + +----------------------+ + |INTERVAL '-5' YEAR | + +----------------------+ + """ + ) + @pytest.mark.parametrize("data", [[0, 1, 2, 3], ["", "a"], [False, True], [None]]) def test_create_dataframe_with_single_value(session, data): From 3063df7d6bd50599cad202ce402e872b3a891c21 Mon Sep 17 00:00:00 2001 From: Felix He Date: Fri, 19 Sep 2025 16:20:27 -0700 Subject: [PATCH 04/12] SNOW-2346552: remove dead code and add more coverage --- src/snowflake/snowpark/dataframe.py | 15 --- tests/integ/test_dataframe.py | 152 ++++++++++++++++++++++++++++ 2 files changed, 152 insertions(+), 15 deletions(-) diff --git a/src/snowflake/snowpark/dataframe.py b/src/snowflake/snowpark/dataframe.py index c8098254f7..1332b6202a 100644 --- a/src/snowflake/snowpark/dataframe.py +++ b/src/snowflake/snowpark/dataframe.py @@ -5143,10 +5143,6 @@ def format_timestamp_spark(dt: datetime.datetime) -> str: ) # Split only on first dash: ["1", "6"] years = str(int(parts[0])) months = str(int(parts[1])) - else: - # Single number case, handle in the elif below - years = "0" - months = remaining else: # For positive: "+1-6" -> split on the second dash remaining = cell[1:] # "1-6" @@ -5156,10 +5152,6 @@ def format_timestamp_spark(dt: datetime.datetime) -> str: ) # Split only on first dash: ["1", "6"] years = str(int(parts[0])) months = str(int(parts[1])) - else: - # Single number case - years = "0" - months = remaining elif cell.startswith("-") or cell.startswith("+") or cell.isdigit(): # Format like "-8" or "15" (single number for months-only or years-only) is_negative = cell.startswith("-") @@ -5179,13 +5171,6 @@ def format_timestamp_spark(dt: datetime.datetime) -> str: years_val = int(cell) res = f"INTERVAL '{years_val}' YEAR" return res.replace("\n", "\\n") - else: - # Shouldn't happen, but fallback - res = cell - return res.replace("\n", "\\n") - else: - res = cell - return res.replace("\n", "\\n") # Format based on start/end field if ( diff --git a/tests/integ/test_dataframe.py b/tests/integ/test_dataframe.py index 126f411a24..e3c11a16db 100644 --- a/tests/integ/test_dataframe.py +++ b/tests/integ/test_dataframe.py @@ -3000,6 +3000,158 @@ def test_show_interval_formatting(session): """ ) + # Additional edge cases to hit missing lines + + # Positive number without dash for single month (lines 5161-5163, 5171-5173) + df = session.sql("SELECT INTERVAL '+7' MONTH as positive_single_month") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +-----------------------+ + |"POSITIVE_SINGLE_MONTH"| + +-----------------------+ + |INTERVAL '7' MONTH | + +-----------------------+ + """ + ) + + # Negative number for single month (lines 5165-5166, 5171-5173) + df = session.sql("SELECT INTERVAL '-12' MONTH as negative_single_month") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +-----------------------+ + |"NEGATIVE_SINGLE_MONTH"| + +-----------------------+ + |INTERVAL '-12' MONTH | + +-----------------------+ + """ + ) + + # Positive number without dash for single year (lines 5174-5181) + df = session.sql("SELECT INTERVAL '+4' YEAR as positive_single_year") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +----------------------+ + |"POSITIVE_SINGLE_YEAR"| + +----------------------+ + |INTERVAL '4' YEAR | + +----------------------+ + """ + ) + + # Positive number with no sign, single number for fallback (lines 5184-5185) + df = session.sql("SELECT INTERVAL '42' MONTH as plain_number_month") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +--------------------+ + |"PLAIN_NUMBER_MONTH"| + +--------------------+ + |INTERVAL '42' MONTH | + +--------------------+ + """ + ) + + # Edge case: positive single dash for months (lines 5148-5149) + df = session.sql("SELECT INTERVAL '+8' MONTH as plus_month_edge") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +------------------+ + |"PLUS_MONTH_EDGE" | + +------------------+ + |INTERVAL '8' MONTH| + +------------------+ + """ + ) + + # Day-time intervals for missing lines + + # Single minute-only interval to hit line 5326 + df = session.sql("SELECT INTERVAL '5' MINUTE as single_minute_only") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +--------------------+ + |"SINGLE_MINUTE_ONLY"| + +--------------------+ + |INTERVAL '05' MINUTE| + +--------------------+ + """ + ) + + # Single hour-only interval to hit line 5310 + df = session.sql("SELECT INTERVAL '7' HOUR as single_hour_only") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +------------------+ + |"SINGLE_HOUR_ONLY"| + +------------------+ + |INTERVAL '07' HOUR| + +------------------+ + """ + ) + + # Single second-only interval to hit lines 5212, 5216-5217 + df = session.sql("SELECT INTERVAL '8' SECOND as single_second_only") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +--------------------+ + |"SINGLE_SECOND_ONLY"| + +--------------------+ + |INTERVAL '08' SECOND| + +--------------------+ + """ + ) + + # Single second with fractional part to hit different branches (lines 5216-5217) + df = session.sql("SELECT INTERVAL '3.456' SECOND as fractional_second_only") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +------------------------+ + |"FRACTIONAL_SECOND_ONLY"| + +------------------------+ + |INTERVAL '03.456' SECOND| + +------------------------+ + """ + ) + + # Fractional seconds < 1 to hit line 5271 (single digit second with fractional) + df = session.sql("SELECT INTERVAL '0.789' SECOND as sub_second_frac") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +------------------------+ + |"SUB_SECOND_FRAC" | + +------------------------+ + |INTERVAL '00.789' SECOND| + +------------------------+ + """ + ) + + # Minute to second with fractional to hit lines 5336, 5341 + df = session.sql( + "SELECT INTERVAL '8:45.321' MINUTE TO SECOND as minute_to_second_frac" + ) + assert df._show_string_spark(truncate=False) == dedent( + """\ + +-------------------------------------+ + |"MINUTE_TO_SECOND_FRAC" | + +-------------------------------------+ + |INTERVAL '08:45.321' MINUTE TO SECOND| + +-------------------------------------+ + """ + ) + + # Large minute to second with fractional to hit lines 5336, 5341 with large minutes + df = session.sql( + "SELECT INTERVAL '123:45.678' MINUTE TO SECOND as large_minute_frac" + ) + assert df._show_string_spark(truncate=False) == dedent( + """\ + +--------------------------------------+ + |"LARGE_MINUTE_FRAC" | + +--------------------------------------+ + |INTERVAL '123:45.678' MINUTE TO SECOND| + +--------------------------------------+ + """ + ) + @pytest.mark.parametrize("data", [[0, 1, 2, 3], ["", "a"], [False, True], [None]]) def test_create_dataframe_with_single_value(session, data): From 62abdc7f7c20525b8126fbecec9b309e720be6bb Mon Sep 17 00:00:00 2001 From: Felix He Date: Fri, 19 Sep 2025 18:10:09 -0700 Subject: [PATCH 05/12] SNOW-2346552: removed more dead code --- src/snowflake/snowpark/dataframe.py | 23 --------- tests/integ/test_dataframe.py | 74 ++++++++++++++++++++++++----- 2 files changed, 62 insertions(+), 35 deletions(-) diff --git a/src/snowflake/snowpark/dataframe.py b/src/snowflake/snowpark/dataframe.py index 1332b6202a..d65af74110 100644 --- a/src/snowflake/snowpark/dataframe.py +++ b/src/snowflake/snowpark/dataframe.py @@ -5152,25 +5152,6 @@ def format_timestamp_spark(dt: datetime.datetime) -> str: ) # Split only on first dash: ["1", "6"] years = str(int(parts[0])) months = str(int(parts[1])) - elif cell.startswith("-") or cell.startswith("+") or cell.isdigit(): - # Format like "-8" or "15" (single number for months-only or years-only) - is_negative = cell.startswith("-") - if ( - start_field == YearMonthIntervalType.MONTH - and end_field == YearMonthIntervalType.MONTH - ): - # This is a month-only interval, treat the number as total months - total_months = int(cell) - res = f"INTERVAL '{total_months}' MONTH" - return res.replace("\n", "\\n") - elif ( - start_field == YearMonthIntervalType.YEAR - and end_field == YearMonthIntervalType.YEAR - ): - # This is a year-only interval - years_val = int(cell) - res = f"INTERVAL '{years_val}' YEAR" - return res.replace("\n", "\\n") # Format based on start/end field if ( @@ -5196,10 +5177,6 @@ def format_timestamp_spark(dt: datetime.datetime) -> str: if is_negative: total_months = -total_months res = f"INTERVAL '{total_months}' MONTH" - else: - # Fallback to full format - sign_prefix = "-" if is_negative else "" - res = f"INTERVAL '{sign_prefix}{years}-{months}' YEAR TO MONTH" elif isinstance(cell, (str, datetime.timedelta)) and isinstance( datatype, DayTimeIntervalType ): diff --git a/tests/integ/test_dataframe.py b/tests/integ/test_dataframe.py index e3c11a16db..f25d82c1c6 100644 --- a/tests/integ/test_dataframe.py +++ b/tests/integ/test_dataframe.py @@ -3000,9 +3000,9 @@ def test_show_interval_formatting(session): """ ) - # Additional edge cases to hit missing lines + # Additional edge cases for comprehensive coverage - # Positive number without dash for single month (lines 5161-5163, 5171-5173) + # Positive number without dash for single month df = session.sql("SELECT INTERVAL '+7' MONTH as positive_single_month") assert df._show_string_spark(truncate=False) == dedent( """\ @@ -3014,7 +3014,7 @@ def test_show_interval_formatting(session): """ ) - # Negative number for single month (lines 5165-5166, 5171-5173) + # Negative number for single month df = session.sql("SELECT INTERVAL '-12' MONTH as negative_single_month") assert df._show_string_spark(truncate=False) == dedent( """\ @@ -3026,7 +3026,7 @@ def test_show_interval_formatting(session): """ ) - # Positive number without dash for single year (lines 5174-5181) + # Positive number without dash for single year df = session.sql("SELECT INTERVAL '+4' YEAR as positive_single_year") assert df._show_string_spark(truncate=False) == dedent( """\ @@ -3038,7 +3038,7 @@ def test_show_interval_formatting(session): """ ) - # Positive number with no sign, single number for fallback (lines 5184-5185) + # Positive number with no sign, single number for fallback df = session.sql("SELECT INTERVAL '42' MONTH as plain_number_month") assert df._show_string_spark(truncate=False) == dedent( """\ @@ -3050,7 +3050,7 @@ def test_show_interval_formatting(session): """ ) - # Edge case: positive single dash for months (lines 5148-5149) + # Edge case: positive single dash for months df = session.sql("SELECT INTERVAL '+8' MONTH as plus_month_edge") assert df._show_string_spark(truncate=False) == dedent( """\ @@ -3062,9 +3062,9 @@ def test_show_interval_formatting(session): """ ) - # Day-time intervals for missing lines + # Day-time intervals for additional coverage - # Single minute-only interval to hit line 5326 + # Single minute-only interval df = session.sql("SELECT INTERVAL '5' MINUTE as single_minute_only") assert df._show_string_spark(truncate=False) == dedent( """\ @@ -3076,7 +3076,7 @@ def test_show_interval_formatting(session): """ ) - # Single hour-only interval to hit line 5310 + # Single hour-only interval df = session.sql("SELECT INTERVAL '7' HOUR as single_hour_only") assert df._show_string_spark(truncate=False) == dedent( """\ @@ -3088,7 +3088,7 @@ def test_show_interval_formatting(session): """ ) - # Single second-only interval to hit lines 5212, 5216-5217 + # Single second-only interval df = session.sql("SELECT INTERVAL '8' SECOND as single_second_only") assert df._show_string_spark(truncate=False) == dedent( """\ @@ -3112,7 +3112,7 @@ def test_show_interval_formatting(session): """ ) - # Fractional seconds < 1 to hit line 5271 (single digit second with fractional) + # Fractional seconds < 1 df = session.sql("SELECT INTERVAL '0.789' SECOND as sub_second_frac") assert df._show_string_spark(truncate=False) == dedent( """\ @@ -3124,7 +3124,7 @@ def test_show_interval_formatting(session): """ ) - # Minute to second with fractional to hit lines 5336, 5341 + # Minute to second with fractional df = session.sql( "SELECT INTERVAL '8:45.321' MINUTE TO SECOND as minute_to_second_frac" ) @@ -3152,6 +3152,56 @@ def test_show_interval_formatting(session): """ ) + # Additional test cases for interval formatting coverage + + # Test DAY TO MINUTE formatting + df = session.sql("SELECT INTERVAL '2 05:30' DAY TO MINUTE as day_to_minute") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +--------------------------------+ + |"DAY_TO_MINUTE" | + +--------------------------------+ + |INTERVAL '2 05:30' DAY TO MINUTE| + +--------------------------------+ + """ + ) + + # Test MINUTE TO SECOND with integer seconds + df = session.sql("SELECT INTERVAL '15:30' MINUTE TO SECOND as minute_to_second_int") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +---------------------------------+ + |"MINUTE_TO_SECOND_INT" | + +---------------------------------+ + |INTERVAL '15:30' MINUTE TO SECOND| + +---------------------------------+ + """ + ) + + # Test single field interval + df = session.sql("SELECT INTERVAL '5' HOUR as single_hour_field") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +-------------------+ + |"SINGLE_HOUR_FIELD"| + +-------------------+ + |INTERVAL '05' HOUR | + +-------------------+ + """ + ) + + # Test multi-field interval + df = session.sql("SELECT INTERVAL '2:30:45' HOUR TO SECOND as multi_field") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +----------------------------------+ + |"MULTI_FIELD" | + +----------------------------------+ + |INTERVAL '02:30:45' HOUR TO SECOND| + +----------------------------------+ + """ + ) + @pytest.mark.parametrize("data", [[0, 1, 2, 3], ["", "a"], [False, True], [None]]) def test_create_dataframe_with_single_value(session, data): From 69adf6df885c268b21ed76b4e4741a773b582f8c Mon Sep 17 00:00:00 2001 From: Felix He Date: Fri, 19 Sep 2025 22:16:41 -0700 Subject: [PATCH 06/12] SNOW-2346552: more coverage --- src/snowflake/snowpark/dataframe.py | 35 +++++++++++++---------------- tests/integ/test_dataframe.py | 13 ++++++++++- 2 files changed, 27 insertions(+), 21 deletions(-) diff --git a/src/snowflake/snowpark/dataframe.py b/src/snowflake/snowpark/dataframe.py index d65af74110..122db6ee52 100644 --- a/src/snowflake/snowpark/dataframe.py +++ b/src/snowflake/snowpark/dataframe.py @@ -5268,9 +5268,7 @@ def format_day_time_interval(total_seconds_float: float) -> str: mins = int(remaining_after_hours) // 60 secs = remaining_after_hours - (mins * 60) - if end_field == DayTimeIntervalType.HOUR: - return f"{sign}{total_hours:02d}" - elif end_field == DayTimeIntervalType.MINUTE: + if end_field == DayTimeIntervalType.MINUTE: return f"{sign}{total_hours:02d}:{mins:02d}" else: # TO SECOND if secs == int(secs): @@ -5284,23 +5282,20 @@ def format_day_time_interval(total_seconds_float: float) -> str: total_minutes = int(abs_total_seconds) // 60 remaining_secs = abs_total_seconds - (total_minutes * 60) - if end_field == DayTimeIntervalType.MINUTE: - return f"{sign}{total_minutes}" - else: # TO SECOND - if remaining_secs == int(remaining_secs): - minutes_str = ( - f"{total_minutes:02d}" - if total_minutes < 10 - else f"{total_minutes}" - ) - return f"{sign}{minutes_str}:{int(remaining_secs):02d}" - else: - minutes_str = ( - f"{total_minutes:02d}" - if total_minutes < 10 - else f"{total_minutes}" - ) - return f"{sign}{minutes_str}:{remaining_secs:06.3f}" + if remaining_secs == int(remaining_secs): + minutes_str = ( + f"{total_minutes:02d}" + if total_minutes < 10 + else f"{total_minutes}" + ) + return f"{sign}{minutes_str}:{int(remaining_secs):02d}" + else: + minutes_str = ( + f"{total_minutes:02d}" + if total_minutes < 10 + else f"{total_minutes}" + ) + return f"{sign}{minutes_str}:{remaining_secs:06.3f}" else: # Fallback to basic format if seconds == int(seconds): diff --git a/tests/integ/test_dataframe.py b/tests/integ/test_dataframe.py index f25d82c1c6..0a9ef8e266 100644 --- a/tests/integ/test_dataframe.py +++ b/tests/integ/test_dataframe.py @@ -3138,7 +3138,7 @@ def test_show_interval_formatting(session): """ ) - # Large minute to second with fractional to hit lines 5336, 5341 with large minutes + # Large minute to second with fractional df = session.sql( "SELECT INTERVAL '123:45.678' MINUTE TO SECOND as large_minute_frac" ) @@ -3202,6 +3202,17 @@ def test_show_interval_formatting(session): """ ) + df = session.sql("SELECT INTERVAL '5.000' SECOND as zero_frac_test") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +--------------------+ + |"ZERO_FRAC_TEST" | + +--------------------+ + |INTERVAL '05' SECOND| + +--------------------+ + """ + ) + @pytest.mark.parametrize("data", [[0, 1, 2, 3], ["", "a"], [False, True], [None]]) def test_create_dataframe_with_single_value(session, data): From aff9068c620f4c3fa65a68188b2472e0ad7867eb Mon Sep 17 00:00:00 2001 From: Felix He Date: Mon, 22 Sep 2025 10:33:07 -0700 Subject: [PATCH 07/12] SNOW-2346552: removed handles cases --- src/snowflake/snowpark/dataframe.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/snowflake/snowpark/dataframe.py b/src/snowflake/snowpark/dataframe.py index 122db6ee52..19f3d32942 100644 --- a/src/snowflake/snowpark/dataframe.py +++ b/src/snowflake/snowpark/dataframe.py @@ -5130,6 +5130,11 @@ def format_timestamp_spark(dt: datetime.datetime) -> str: cell.startswith("+") or cell.startswith("-") ) and "-" in cell[1:] + # Default initialization + years = "0" + months = "0" + is_negative = False + if has_internal_dash: # Format like "+1-03" or "-1-03" or "-1-6" (compound year-month) is_negative = cell.startswith("-") @@ -5296,12 +5301,6 @@ def format_day_time_interval(total_seconds_float: float) -> str: else f"{total_minutes}" ) return f"{sign}{minutes_str}:{remaining_secs:06.3f}" - else: - # Fallback to basic format - if seconds == int(seconds): - return f"{sign}{days} {hours:02d}:{minutes:02d}:{int(seconds):02d}" - else: - return f"{sign}{days} {hours:02d}:{minutes:02d}:{seconds:06.3f}" if isinstance(cell, datetime.timedelta): total_seconds_float = cell.total_seconds() From a22b55b36599f68c2d8d26556b6af5abec4d5c85 Mon Sep 17 00:00:00 2001 From: Felix He Date: Mon, 22 Sep 2025 15:51:18 -0700 Subject: [PATCH 08/12] SNOW-2346552: extracted and cleaned up code --- .../snowpark/_internal/type_utils.py | 210 ++++++++++++++++++ src/snowflake/snowpark/dataframe.py | 204 +---------------- 2 files changed, 216 insertions(+), 198 deletions(-) diff --git a/src/snowflake/snowpark/_internal/type_utils.py b/src/snowflake/snowpark/_internal/type_utils.py index bf06fa8e7d..1e0ee0de23 100644 --- a/src/snowflake/snowpark/_internal/type_utils.py +++ b/src/snowflake/snowpark/_internal/type_utils.py @@ -1388,6 +1388,216 @@ def most_permissive_type(datatype: DataType) -> DataType: return copy.deepcopy(datatype) +def format_year_month_interval_for_display( + cell: str, start_field: int, end_field: int +) -> str: + """ + Format a YearMonthIntervalType string for display in _show_string_spark(). + + Args: + cell: The string representation of the interval (e.g., "+1-6", "-2-03", "24") + start_field: Start field constant from YearMonthIntervalType (YEAR=0, MONTH=1) + end_field: End field constant from YearMonthIntervalType (YEAR=0, MONTH=1) + + Returns: + Formatted interval string (e.g., "INTERVAL '1-6' YEAR TO MONTH", "INTERVAL '24' MONTH") + """ + # Handle different input formats + # Check for compound format (year-month) vs simple number + has_internal_dash = (cell.startswith("+") or cell.startswith("-")) and "-" in cell[ + 1: + ] + + # Default initialization + years = "0" + months = "0" + is_negative = False + + if has_internal_dash: + # Format like "+1-03" or "-1-03" or "-1-6" (compound year-month) + is_negative = cell.startswith("-") + + # Remove the sign prefix and parse the remaining "year-month" part + remaining = cell[1:] # Remove the "+" or "-" prefix: "1-6" + if "-" in remaining: + parts = remaining.split("-", 1) # Split only on first dash: ["1", "6"] + years = str(int(parts[0])) + months = str(int(parts[1])) + + # Format based on start/end field + sign_prefix = "-" if is_negative else "" + + if ( + start_field == YearMonthIntervalType.YEAR + and end_field == YearMonthIntervalType.MONTH + ): + # Full range: YEAR TO MONTH + return f"INTERVAL '{sign_prefix}{years}-{months}' YEAR TO MONTH" + elif ( + start_field == YearMonthIntervalType.YEAR + and end_field == YearMonthIntervalType.YEAR + ): + # Years only: YEAR + return f"INTERVAL '{sign_prefix}{years}' YEAR" + elif ( + start_field == YearMonthIntervalType.MONTH + and end_field == YearMonthIntervalType.MONTH + ): + # Months only: MONTH - calculate total months + total_months = int(years) * 12 + int(months) + if is_negative: + total_months = -total_months + return f"INTERVAL '{total_months}' MONTH" + + +def format_day_time_interval_for_display(cell, start_field: int, end_field: int) -> str: + """ + Format a DayTimeIntervalType value for display in _show_string_spark(). + + Args: + cell: Either a datetime.timedelta object or string representation + start_field: Start field constant from DayTimeIntervalType (DAY=0, HOUR=1, MINUTE=2, SECOND=3) + end_field: End field constant from DayTimeIntervalType (DAY=0, HOUR=1, MINUTE=2, SECOND=3) + + Returns: + Formatted interval string (e.g., "INTERVAL '01:30:45' HOUR TO SECOND") + """ + import datetime + + if isinstance(cell, datetime.timedelta): + total_seconds_float = cell.total_seconds() + interval_str = format_day_time_interval( + total_seconds_float, start_field, end_field + ) + elif isinstance(cell, str): + if "INTERVAL" not in cell: + # Raw string that needs to be formatted + interval_str = cell + else: + # This is already a formatted interval string, use as-is + return cell.replace("\n", "\\n") + else: + return str(cell).replace("\n", "\\n") + + field_names = { + DayTimeIntervalType.DAY: "DAY", + DayTimeIntervalType.HOUR: "HOUR", + DayTimeIntervalType.MINUTE: "MINUTE", + DayTimeIntervalType.SECOND: "SECOND", + } + + start_name = field_names.get(start_field, "DAY") + end_name = field_names.get(end_field, "SECOND") + + if start_field == end_field: + return f"INTERVAL '{interval_str}' {start_name}" + else: + return f"INTERVAL '{interval_str}' {start_name} TO {end_name}" + + +def format_day_time_interval( + total_seconds_float: float, start_field: int, end_field: int +) -> str: + """ + Format a DayTimeIntervalType value for display in _show_string_spark(). + + Args: + total_seconds_float: Total seconds as a float (can be negative) + start_field: Start field constant from DayTimeIntervalType (DAY=0, HOUR=1, MINUTE=2, SECOND=3) + end_field: End field constant from DayTimeIntervalType (DAY=0, HOUR=1, MINUTE=2, SECOND=3) + + Returns: + Formatted interval string (e.g., "01:30:45", "2 12:30", "05", etc.) + """ + is_negative = total_seconds_float < 0 + abs_total_seconds = abs(total_seconds_float) + + days = int(abs_total_seconds) // 86400 + remaining_seconds = abs_total_seconds - (days * 86400) + hours = int(remaining_seconds) // 3600 + remaining_after_hours = remaining_seconds - (hours * 3600) + minutes = int(remaining_after_hours) // 60 + seconds = remaining_after_hours - (minutes * 60) + + sign = "-" if is_negative else "" + + def format_with_leading_zero(value: int) -> str: + """Format integer with leading zero if < 10, otherwise as-is""" + return f"{value:02d}" if value < 10 else f"{value}" + + # For single field intervals, extract just that component + if start_field == end_field: + if start_field == DayTimeIntervalType.DAY: + return f"{sign}{days}" + elif start_field == DayTimeIntervalType.HOUR: + total_hours = int(abs_total_seconds) // 3600 + return f"{sign}{format_with_leading_zero(total_hours)}" + elif start_field == DayTimeIntervalType.MINUTE: + total_minutes = int(abs_total_seconds) // 60 + return f"{sign}{format_with_leading_zero(total_minutes)}" + elif start_field == DayTimeIntervalType.SECOND: + # Handle fractional seconds - use total seconds, not just remainder + if abs_total_seconds == int(abs_total_seconds): + total_secs_int = int(abs_total_seconds) + return f"{sign}{format_with_leading_zero(total_secs_int)}" + else: + # For fractional seconds, format with leading zero if < 10 + if abs_total_seconds < 10: + # Format with leading zero: split into integer and fractional parts + integer_part = int(abs_total_seconds) + fractional_part = abs_total_seconds - integer_part + if fractional_part == 0: + return f"{sign}{integer_part:02d}" + else: + # Format fractional part and remove leading '0.' + frac_str = f"{fractional_part:.6f}"[2:].rstrip("0") + return f"{sign}{integer_part:02d}.{frac_str}" + else: + return f"{sign}{abs_total_seconds:g}" + + # For multi-field intervals, format based on start/end fields + if start_field == DayTimeIntervalType.DAY: + # DAY TO X format: truncate based on end_field + if end_field == DayTimeIntervalType.HOUR: + # DAY TO HOUR: "D HH" + return f"{sign}{days} {format_with_leading_zero(hours)}" + elif end_field == DayTimeIntervalType.MINUTE: + # DAY TO MINUTE: "D HH:MM" + hours_str = format_with_leading_zero(hours) + return f"{sign}{days} {hours_str}:{minutes:02d}" + else: + # DAY TO SECOND: "D HH:MM:SS" + hours_str = format_with_leading_zero(hours) + if seconds == int(seconds): + return f"{sign}{days} {hours_str}:{minutes:02d}:{int(seconds):02d}" + else: + return f"{sign}{days} {hours_str}:{minutes:02d}:{seconds:06.3f}" + elif start_field == DayTimeIntervalType.HOUR: + # HOUR TO X format: "HH:MM:SS" (no days) + total_hours = int(abs_total_seconds) // 3600 + remaining_after_hours = abs_total_seconds - (total_hours * 3600) + mins = int(remaining_after_hours) // 60 + secs = remaining_after_hours - (mins * 60) + + if end_field == DayTimeIntervalType.MINUTE: + return f"{sign}{format_with_leading_zero(total_hours)}:{mins:02d}" + else: # TO SECOND + if secs == int(secs): + return f"{sign}{format_with_leading_zero(total_hours)}:{mins:02d}:{int(secs):02d}" + else: + return f"{sign}{format_with_leading_zero(total_hours)}:{mins:02d}:{secs:06.3f}" + elif start_field == DayTimeIntervalType.MINUTE: + # MINUTE TO X format: "MM:SS" (no days or hours) + total_minutes = int(abs_total_seconds) // 60 + remaining_secs = abs_total_seconds - (total_minutes * 60) + + minutes_str = format_with_leading_zero(total_minutes) + if remaining_secs == int(remaining_secs): + return f"{sign}{minutes_str}:{int(remaining_secs):02d}" + else: + return f"{sign}{minutes_str}:{remaining_secs:06.3f}" + + # Type hints ColumnOrName = Union["snowflake.snowpark.column.Column", str] ColumnOrLiteralStr = Union["snowflake.snowpark.column.Column", str] diff --git a/src/snowflake/snowpark/dataframe.py b/src/snowflake/snowpark/dataframe.py index 19f3d32942..0ad8c00420 100644 --- a/src/snowflake/snowpark/dataframe.py +++ b/src/snowflake/snowpark/dataframe.py @@ -137,6 +137,8 @@ ColumnOrName, ColumnOrSqlExpr, LiteralType, + format_day_time_interval_for_display, + format_year_month_interval_for_display, snow_type_to_dtype_str, type_string_to_type_object, ) @@ -5118,213 +5120,19 @@ def format_timestamp_spark(dt: datetime.datetime) -> str: else: res = str(cell).replace("e+", "E").replace("e-", "E-") elif isinstance(cell, str) and isinstance(datatype, YearMonthIntervalType): - # Determine the appropriate range based on datatype fields start_field = getattr( datatype, "start_field", YearMonthIntervalType.YEAR ) end_field = getattr(datatype, "end_field", YearMonthIntervalType.MONTH) - - # Handle different input formats - # Check for compound format (year-month) vs simple number - has_internal_dash = ( - cell.startswith("+") or cell.startswith("-") - ) and "-" in cell[1:] - - # Default initialization - years = "0" - months = "0" - is_negative = False - - if has_internal_dash: - # Format like "+1-03" or "-1-03" or "-1-6" (compound year-month) - is_negative = cell.startswith("-") - # Handle the case where there might be multiple dashes - if is_negative: - # For negative: "-1-6" -> split on the second dash - remaining = cell[1:] # "1-6" - if "-" in remaining: - parts = remaining.split( - "-", 1 - ) # Split only on first dash: ["1", "6"] - years = str(int(parts[0])) - months = str(int(parts[1])) - else: - # For positive: "+1-6" -> split on the second dash - remaining = cell[1:] # "1-6" - if "-" in remaining: - parts = remaining.split( - "-", 1 - ) # Split only on first dash: ["1", "6"] - years = str(int(parts[0])) - months = str(int(parts[1])) - - # Format based on start/end field - if ( - start_field == YearMonthIntervalType.YEAR - and end_field == YearMonthIntervalType.MONTH - ): - # Full range: YEAR TO MONTH - sign_prefix = "-" if is_negative else "" - res = f"INTERVAL '{sign_prefix}{years}-{months}' YEAR TO MONTH" - elif ( - start_field == YearMonthIntervalType.YEAR - and end_field == YearMonthIntervalType.YEAR - ): - # Years only: YEAR - sign_prefix = "-" if is_negative else "" - res = f"INTERVAL '{sign_prefix}{years}' YEAR" - elif ( - start_field == YearMonthIntervalType.MONTH - and end_field == YearMonthIntervalType.MONTH - ): - # Months only: MONTH - calculate total months - total_months = int(years) * 12 + int(months) - if is_negative: - total_months = -total_months - res = f"INTERVAL '{total_months}' MONTH" + res = format_year_month_interval_for_display( + cell, start_field, end_field + ) elif isinstance(cell, (str, datetime.timedelta)) and isinstance( datatype, DayTimeIntervalType ): start_field = getattr(datatype, "start_field", DayTimeIntervalType.DAY) end_field = getattr(datatype, "end_field", DayTimeIntervalType.SECOND) - - def format_day_time_interval(total_seconds_float: float) -> str: - is_negative = total_seconds_float < 0 - abs_total_seconds = abs(total_seconds_float) - - days = int(abs_total_seconds) // 86400 - remaining_seconds = abs_total_seconds - (days * 86400) - hours = int(remaining_seconds) // 3600 - remaining_after_hours = remaining_seconds - (hours * 3600) - minutes = int(remaining_after_hours) // 60 - seconds = remaining_after_hours - (minutes * 60) - - sign = "-" if is_negative else "" - - # For single field intervals, extract just that component - if start_field == end_field: - if start_field == DayTimeIntervalType.DAY: - return f"{sign}{days}" - elif start_field == DayTimeIntervalType.HOUR: - total_hours = int(abs_total_seconds) // 3600 - return ( - f"{sign}{total_hours:02d}" - if total_hours < 10 - else f"{sign}{total_hours}" - ) - elif start_field == DayTimeIntervalType.MINUTE: - total_minutes = int(abs_total_seconds) // 60 - return ( - f"{sign}{total_minutes:02d}" - if total_minutes < 10 - else f"{sign}{total_minutes}" - ) - elif start_field == DayTimeIntervalType.SECOND: - # Handle fractional seconds - use total seconds, not just remainder - if abs_total_seconds == int(abs_total_seconds): - total_secs_int = int(abs_total_seconds) - return ( - f"{sign}{total_secs_int:02d}" - if total_secs_int < 10 - else f"{sign}{total_secs_int}" - ) - else: - # For fractional seconds, format with leading zero if < 10 - if abs_total_seconds < 10: - # Format with leading zero: split into integer and fractional parts - integer_part = int(abs_total_seconds) - fractional_part = abs_total_seconds - integer_part - if fractional_part == 0: - return f"{sign}{integer_part:02d}" - else: - # Format fractional part and remove leading '0.' - frac_str = f"{fractional_part:.6f}"[2:].rstrip( - "0" - ) - return f"{sign}{integer_part:02d}.{frac_str}" - else: - return f"{sign}{abs_total_seconds:g}" - - # For multi-field intervals, format based on start/end fields - if start_field == DayTimeIntervalType.DAY: - # DAY TO X format: truncate based on end_field - if end_field == DayTimeIntervalType.HOUR: - # DAY TO HOUR: "D HH" - return ( - f"{sign}{days} {hours:02d}" - if hours < 10 - else f"{sign}{days} {hours}" - ) - elif end_field == DayTimeIntervalType.MINUTE: - # DAY TO MINUTE: "D HH:MM" - hours_str = f"{hours:02d}" if hours < 10 else f"{hours}" - return f"{sign}{days} {hours_str}:{minutes:02d}" - else: - # DAY TO SECOND: "D HH:MM:SS" - hours_str = f"{hours:02d}" if hours < 10 else f"{hours}" - if seconds == int(seconds): - return f"{sign}{days} {hours_str}:{minutes:02d}:{int(seconds):02d}" - else: - return f"{sign}{days} {hours_str}:{minutes:02d}:{seconds:06.3f}" - elif start_field == DayTimeIntervalType.HOUR: - # HOUR TO X format: "HH:MM:SS" (no days) - total_hours = int(abs_total_seconds) // 3600 - remaining_after_hours = abs_total_seconds - (total_hours * 3600) - mins = int(remaining_after_hours) // 60 - secs = remaining_after_hours - (mins * 60) - - if end_field == DayTimeIntervalType.MINUTE: - return f"{sign}{total_hours:02d}:{mins:02d}" - else: # TO SECOND - if secs == int(secs): - return f"{sign}{total_hours:02d}:{mins:02d}:{int(secs):02d}" - else: - return ( - f"{sign}{total_hours:02d}:{mins:02d}:{secs:06.3f}" - ) - elif start_field == DayTimeIntervalType.MINUTE: - # MINUTE TO X format: "MM:SS" (no days or hours) - total_minutes = int(abs_total_seconds) // 60 - remaining_secs = abs_total_seconds - (total_minutes * 60) - - if remaining_secs == int(remaining_secs): - minutes_str = ( - f"{total_minutes:02d}" - if total_minutes < 10 - else f"{total_minutes}" - ) - return f"{sign}{minutes_str}:{int(remaining_secs):02d}" - else: - minutes_str = ( - f"{total_minutes:02d}" - if total_minutes < 10 - else f"{total_minutes}" - ) - return f"{sign}{minutes_str}:{remaining_secs:06.3f}" - - if isinstance(cell, datetime.timedelta): - total_seconds_float = cell.total_seconds() - interval_str = format_day_time_interval(total_seconds_float) - elif isinstance(cell, str) and "INTERVAL" not in cell: - interval_str = cell - else: - res = cell - return res.replace("\n", "\\n") - - field_names = { - DayTimeIntervalType.DAY: "DAY", - DayTimeIntervalType.HOUR: "HOUR", - DayTimeIntervalType.MINUTE: "MINUTE", - DayTimeIntervalType.SECOND: "SECOND", - } - - start_name = field_names.get(start_field, "DAY") - end_name = field_names.get(end_field, "SECOND") - - if start_field == end_field: - res = f"INTERVAL '{interval_str}' {start_name}" - else: - res = f"INTERVAL '{interval_str}' {start_name} TO {end_name}" + res = format_day_time_interval_for_display(cell, start_field, end_field) else: res = str(cell) return res.replace("\n", "\\n") From 6017c036a2b3fad3d8125db90bee5117c1f01bc0 Mon Sep 17 00:00:00 2001 From: Felix He Date: Tue, 23 Sep 2025 10:47:00 -0700 Subject: [PATCH 09/12] SNOW-2346552: address comments --- src/snowflake/snowpark/_internal/type_utils.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/snowflake/snowpark/_internal/type_utils.py b/src/snowflake/snowpark/_internal/type_utils.py index 1e0ee0de23..4cbd6229d7 100644 --- a/src/snowflake/snowpark/_internal/type_utils.py +++ b/src/snowflake/snowpark/_internal/type_utils.py @@ -1450,7 +1450,9 @@ def format_year_month_interval_for_display( return f"INTERVAL '{total_months}' MONTH" -def format_day_time_interval_for_display(cell, start_field: int, end_field: int) -> str: +def format_day_time_interval_for_display( + cell: Union[str, datetime.timedelta], start_field: int, end_field: int +) -> str: """ Format a DayTimeIntervalType value for display in _show_string_spark(). @@ -1462,8 +1464,6 @@ def format_day_time_interval_for_display(cell, start_field: int, end_field: int) Returns: Formatted interval string (e.g., "INTERVAL '01:30:45' HOUR TO SECOND") """ - import datetime - if isinstance(cell, datetime.timedelta): total_seconds_float = cell.total_seconds() interval_str = format_day_time_interval( @@ -1557,17 +1557,16 @@ def format_with_leading_zero(value: int) -> str: # For multi-field intervals, format based on start/end fields if start_field == DayTimeIntervalType.DAY: + hours_str = format_with_leading_zero(hours) # DAY TO X format: truncate based on end_field if end_field == DayTimeIntervalType.HOUR: # DAY TO HOUR: "D HH" - return f"{sign}{days} {format_with_leading_zero(hours)}" + return f"{sign}{days} {hours_str}" elif end_field == DayTimeIntervalType.MINUTE: # DAY TO MINUTE: "D HH:MM" - hours_str = format_with_leading_zero(hours) return f"{sign}{days} {hours_str}:{minutes:02d}" else: # DAY TO SECOND: "D HH:MM:SS" - hours_str = format_with_leading_zero(hours) if seconds == int(seconds): return f"{sign}{days} {hours_str}:{minutes:02d}:{int(seconds):02d}" else: From 84bef45b8876d0905cb34827710b6f11b104d371 Mon Sep 17 00:00:00 2001 From: Felix He Date: Tue, 23 Sep 2025 12:42:35 -0700 Subject: [PATCH 10/12] SNOW-2346552: remove dead code --- src/snowflake/snowpark/_internal/type_utils.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/snowflake/snowpark/_internal/type_utils.py b/src/snowflake/snowpark/_internal/type_utils.py index 4cbd6229d7..d1d7fb6005 100644 --- a/src/snowflake/snowpark/_internal/type_utils.py +++ b/src/snowflake/snowpark/_internal/type_utils.py @@ -1470,12 +1470,8 @@ def format_day_time_interval_for_display( total_seconds_float, start_field, end_field ) elif isinstance(cell, str): - if "INTERVAL" not in cell: - # Raw string that needs to be formatted - interval_str = cell - else: - # This is already a formatted interval string, use as-is - return cell.replace("\n", "\\n") + # Raw string that needs to be formatted (e.g., "1 01:01:01.7878") + interval_str = cell else: return str(cell).replace("\n", "\\n") From d5f3f54f4c9c74749fe60d7305a1b05e61d6d728 Mon Sep 17 00:00:00 2001 From: Felix He Date: Tue, 23 Sep 2025 20:58:20 -0700 Subject: [PATCH 11/12] SNOW-2346552: add more edge ases --- .../snowpark/_internal/type_utils.py | 116 ++++++++++--- tests/integ/test_dataframe.py | 160 ++++++++++++++++++ 2 files changed, 253 insertions(+), 23 deletions(-) diff --git a/src/snowflake/snowpark/_internal/type_utils.py b/src/snowflake/snowpark/_internal/type_utils.py index d1d7fb6005..350b1b5e36 100644 --- a/src/snowflake/snowpark/_internal/type_utils.py +++ b/src/snowflake/snowpark/_internal/type_utils.py @@ -1465,9 +1465,27 @@ def format_day_time_interval_for_display( Formatted interval string (e.g., "INTERVAL '01:30:45' HOUR TO SECOND") """ if isinstance(cell, datetime.timedelta): - total_seconds_float = cell.total_seconds() + # Heuristic: Use Decimal for extreme values near 64-bit boundary, float for normal values + total_seconds_approx = cell.total_seconds() + + # Check if we're approaching values where float precision becomes problematic + # Be conservative: use Decimal for large values to ensure precision + # This corresponds to roughly 3 million years - normal use cases are well below this + if ( + abs(total_seconds_approx) > 1e11 + ): # ~100 gigaseconds, very conservative threshold + # Use Decimal arithmetic for precise conversion to avoid floating-point precision loss + total_seconds_value = ( + decimal.Decimal(cell.days) * decimal.Decimal(86400) + + decimal.Decimal(cell.seconds) + + decimal.Decimal(cell.microseconds) / decimal.Decimal(1_000_000) + ) + else: + # Use fast float path for normal values + total_seconds_value = cell.total_seconds() + interval_str = format_day_time_interval( - total_seconds_float, start_field, end_field + total_seconds_value, start_field, end_field ) elif isinstance(cell, str): # Raw string that needs to be formatted (e.g., "1 01:01:01.7878") @@ -1492,28 +1510,42 @@ def format_day_time_interval_for_display( def format_day_time_interval( - total_seconds_float: float, start_field: int, end_field: int + total_seconds_value: Union[float, decimal.Decimal], start_field: int, end_field: int ) -> str: """ Format a DayTimeIntervalType value for display in _show_string_spark(). Args: - total_seconds_float: Total seconds as a float (can be negative) + total_seconds_value: Total seconds as either float or Decimal (can be negative) start_field: Start field constant from DayTimeIntervalType (DAY=0, HOUR=1, MINUTE=2, SECOND=3) end_field: End field constant from DayTimeIntervalType (DAY=0, HOUR=1, MINUTE=2, SECOND=3) Returns: Formatted interval string (e.g., "01:30:45", "2 12:30", "05", etc.) """ - is_negative = total_seconds_float < 0 - abs_total_seconds = abs(total_seconds_float) + is_negative = total_seconds_value < 0 + abs_total_seconds = abs(total_seconds_value) + + # Determine if we're working with Decimal for high-precision arithmetic + use_decimal = isinstance(total_seconds_value, decimal.Decimal) days = int(abs_total_seconds) // 86400 remaining_seconds = abs_total_seconds - (days * 86400) hours = int(remaining_seconds) // 3600 remaining_after_hours = remaining_seconds - (hours * 3600) minutes = int(remaining_after_hours) // 60 - seconds = remaining_after_hours - (minutes * 60) + + # Calculate seconds more precisely to avoid floating-point accumulation errors + # Use the original total and subtract the calculated day/hour/minute components + if use_decimal: + total_non_second_time = ( + decimal.Decimal(days * 86400) + + decimal.Decimal(hours * 3600) + + decimal.Decimal(minutes * 60) + ) + else: + total_non_second_time = (days * 86400) + (hours * 3600) + (minutes * 60) + seconds = abs_total_seconds - total_non_second_time sign = "-" if is_negative else "" @@ -1521,6 +1553,53 @@ def format_with_leading_zero(value: int) -> str: """Format integer with leading zero if < 10, otherwise as-is""" return f"{value:02d}" if value < 10 else f"{value}" + def format_seconds_with_precision( + seconds_value: Union[float, decimal.Decimal] + ) -> str: + """Format seconds with full precision, preserving trailing zeros for proper padding""" + if isinstance(seconds_value, decimal.Decimal): + # Use Decimal precision formatting + if seconds_value == int(seconds_value): + return f"{int(seconds_value):02d}" + else: + # For fractional seconds, ensure proper leading zero padding + integer_part = int(seconds_value) + if integer_part < 10: + # Format with leading zero for the integer part + formatted = f"{seconds_value:.6f}".rstrip("0") + if formatted.endswith("."): + return f"{integer_part:02d}" + # Replace the integer part with zero-padded version + decimal_part = formatted.split(".", 1)[1] + return f"{integer_part:02d}.{decimal_part}" + else: + # For >= 10, use normal formatting + formatted = f"{seconds_value:.6f}".rstrip("0") + if formatted.endswith("."): + return f"{integer_part}" + return formatted + else: + # Float precision formatting (original logic) + if seconds_value == int(seconds_value): + return f"{int(seconds_value):02d}" + else: + # For fractional seconds, ensure proper leading zero padding + integer_part = int(seconds_value) + if integer_part < 10: + # Format with leading zero for the integer part + formatted = f"{seconds_value:.6f}".rstrip("0") + if formatted.endswith("."): + return f"{integer_part:02d}" + # Replace the integer part with zero-padded version + decimal_part = formatted.split(".", 1)[1] + return f"{integer_part:02d}.{decimal_part}" + else: + # For >= 10, use normal formatting + formatted = f"{seconds_value:.6f}".rstrip("0") + if formatted.endswith("."): + return f"{integer_part}" + return formatted + # For single field intervals, extract just that component if start_field == end_field: if start_field == DayTimeIntervalType.DAY: @@ -1537,19 +1616,8 @@ def format_with_leading_zero(value: int) -> str: total_secs_int = int(abs_total_seconds) return f"{sign}{format_with_leading_zero(total_secs_int)}" else: - # For fractional seconds, format with leading zero if < 10 - if abs_total_seconds < 10: - # Format with leading zero: split into integer and fractional parts - integer_part = int(abs_total_seconds) - fractional_part = abs_total_seconds - integer_part - if fractional_part == 0: - return f"{sign}{integer_part:02d}" - else: - # Format fractional part and remove leading '0.' - frac_str = f"{fractional_part:.6f}"[2:].rstrip("0") - return f"{sign}{integer_part:02d}.{frac_str}" - else: - return f"{sign}{abs_total_seconds:g}" + # Use unified formatting that handles both float and Decimal + return f"{sign}{format_seconds_with_precision(abs_total_seconds)}" # For multi-field intervals, format based on start/end fields if start_field == DayTimeIntervalType.DAY: @@ -1566,7 +1634,7 @@ def format_with_leading_zero(value: int) -> str: if seconds == int(seconds): return f"{sign}{days} {hours_str}:{minutes:02d}:{int(seconds):02d}" else: - return f"{sign}{days} {hours_str}:{minutes:02d}:{seconds:06.3f}" + return f"{sign}{days} {hours_str}:{minutes:02d}:{format_seconds_with_precision(seconds)}" elif start_field == DayTimeIntervalType.HOUR: # HOUR TO X format: "HH:MM:SS" (no days) total_hours = int(abs_total_seconds) // 3600 @@ -1580,7 +1648,7 @@ def format_with_leading_zero(value: int) -> str: if secs == int(secs): return f"{sign}{format_with_leading_zero(total_hours)}:{mins:02d}:{int(secs):02d}" else: - return f"{sign}{format_with_leading_zero(total_hours)}:{mins:02d}:{secs:06.3f}" + return f"{sign}{format_with_leading_zero(total_hours)}:{mins:02d}:{format_seconds_with_precision(secs)}" elif start_field == DayTimeIntervalType.MINUTE: # MINUTE TO X format: "MM:SS" (no days or hours) total_minutes = int(abs_total_seconds) // 60 @@ -1590,7 +1658,9 @@ def format_with_leading_zero(value: int) -> str: if remaining_secs == int(remaining_secs): return f"{sign}{minutes_str}:{int(remaining_secs):02d}" else: - return f"{sign}{minutes_str}:{remaining_secs:06.3f}" + return ( + f"{sign}{minutes_str}:{format_seconds_with_precision(remaining_secs)}" + ) # Type hints diff --git a/tests/integ/test_dataframe.py b/tests/integ/test_dataframe.py index 0a9ef8e266..e6ae56fc12 100644 --- a/tests/integ/test_dataframe.py +++ b/tests/integ/test_dataframe.py @@ -3213,6 +3213,166 @@ def test_show_interval_formatting(session): """ ) + # === Edge Cases for Decimal Precision and Large Values === + + # Large positive DAY TO HOUR intervals + df = session.sql("SELECT INTERVAL '106751991 04' DAY TO HOUR as large_day_to_hour") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +-----------------------------------+ + |"LARGE_DAY_TO_HOUR" | + +-----------------------------------+ + |INTERVAL '106751991 04' DAY TO HOUR| + +-----------------------------------+ + """ + ) + + # Large positive DAY TO MINUTE intervals + df = session.sql( + "SELECT INTERVAL '106751991 04:00' DAY TO MINUTE as large_day_to_minute" + ) + assert df._show_string_spark(truncate=False) == dedent( + """\ + +----------------------------------------+ + |"LARGE_DAY_TO_MINUTE" | + +----------------------------------------+ + |INTERVAL '106751991 04:00' DAY TO MINUTE| + +----------------------------------------+ + """ + ) + + # Large positive DAY TO SECOND intervals with high precision fractional seconds + df = session.sql( + "SELECT INTERVAL '106751991 04:00:54.775807' DAY TO SECOND as large_day_to_second" + ) + assert df._show_string_spark(truncate=False) == dedent( + """\ + +--------------------------------------------------+ + |"LARGE_DAY_TO_SECOND" | + +--------------------------------------------------+ + |INTERVAL '106751991 04:00:54.775807' DAY TO SECOND| + +--------------------------------------------------+ + """ + ) + + # Large negative DAY TO HOUR intervals + df = session.sql( + "SELECT INTERVAL '-106751991 04' DAY TO HOUR as large_negative_day_to_hour" + ) + assert df._show_string_spark(truncate=False) == dedent( + """\ + +------------------------------------+ + |"LARGE_NEGATIVE_DAY_TO_HOUR" | + +------------------------------------+ + |INTERVAL '-106751991 04' DAY TO HOUR| + +------------------------------------+ + """ + ) + + # Large negative DAY TO MINUTE intervals + df = session.sql( + "SELECT INTERVAL '-106751991 04:00' DAY TO MINUTE as large_negative_day_to_minute" + ) + assert df._show_string_spark(truncate=False) == dedent( + """\ + +-----------------------------------------+ + |"LARGE_NEGATIVE_DAY_TO_MINUTE" | + +-----------------------------------------+ + |INTERVAL '-106751991 04:00' DAY TO MINUTE| + +-----------------------------------------+ + """ + ) + + # Large negative DAY TO SECOND intervals with high precision fractional seconds + df = session.sql( + "SELECT INTERVAL '-106751991 04:00:54.775808' DAY TO SECOND as large_negative_day_to_second" + ) + assert df._show_string_spark(truncate=False) == dedent( + """\ + +---------------------------------------------------+ + |"LARGE_NEGATIVE_DAY_TO_SECOND" | + +---------------------------------------------------+ + |INTERVAL '-106751991 04:00:54.775808' DAY TO SECOND| + +---------------------------------------------------+ + """ + ) + + # Extremely large positive YEAR intervals + df = session.sql("SELECT INTERVAL '178956970' YEAR as extremely_large_year") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +-------------------------+ + |"EXTREMELY_LARGE_YEAR" | + +-------------------------+ + |INTERVAL '178956970' YEAR| + +-------------------------+ + """ + ) + + # Extremely large negative YEAR intervals + df = session.sql( + "SELECT INTERVAL '-178956970' YEAR as extremely_large_negative_year" + ) + assert df._show_string_spark(truncate=False) == dedent( + """\ + +-------------------------------+ + |"EXTREMELY_LARGE_NEGATIVE_YEAR"| + +-------------------------------+ + |INTERVAL '-178956970' YEAR | + +-------------------------------+ + """ + ) + + # Large positive DAY intervals + df = session.sql("SELECT INTERVAL '106751991' DAY as extremely_large_day") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +------------------------+ + |"EXTREMELY_LARGE_DAY" | + +------------------------+ + |INTERVAL '106751991' DAY| + +------------------------+ + """ + ) + + # Large negative DAY intervals + df = session.sql("SELECT INTERVAL '-106751991' DAY as extremely_large_negative_day") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +------------------------------+ + |"EXTREMELY_LARGE_NEGATIVE_DAY"| + +------------------------------+ + |INTERVAL '-106751991' DAY | + +------------------------------+ + """ + ) + + # High precision positive fractional SECOND intervals + df = session.sql("SELECT INTERVAL '54.775807' SECOND as high_precision_second") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +---------------------------+ + |"HIGH_PRECISION_SECOND" | + +---------------------------+ + |INTERVAL '54.775807' SECOND| + +---------------------------+ + """ + ) + + # High precision negative fractional SECOND intervals + df = session.sql( + "SELECT INTERVAL '-54.775807' SECOND as high_precision_negative_second" + ) + assert df._show_string_spark(truncate=False) == dedent( + """\ + +--------------------------------+ + |"HIGH_PRECISION_NEGATIVE_SECOND"| + +--------------------------------+ + |INTERVAL '-54.775807' SECOND | + +--------------------------------+ + """ + ) + @pytest.mark.parametrize("data", [[0, 1, 2, 3], ["", "a"], [False, True], [None]]) def test_create_dataframe_with_single_value(session, data): From 2a0607e0bc229d576cbe1f8703deab61eb2ef375 Mon Sep 17 00:00:00 2001 From: Felix He Date: Tue, 23 Sep 2025 22:59:09 -0700 Subject: [PATCH 12/12] SNOW-2346552: added more coverage and removed redundant code --- .../snowpark/_internal/type_utils.py | 60 ++++---------- tests/integ/test_dataframe.py | 82 +++++++++++++++++++ 2 files changed, 100 insertions(+), 42 deletions(-) diff --git a/src/snowflake/snowpark/_internal/type_utils.py b/src/snowflake/snowpark/_internal/type_utils.py index 350b1b5e36..3b61320f06 100644 --- a/src/snowflake/snowpark/_internal/type_utils.py +++ b/src/snowflake/snowpark/_internal/type_utils.py @@ -1490,8 +1490,6 @@ def format_day_time_interval_for_display( elif isinstance(cell, str): # Raw string that needs to be formatted (e.g., "1 01:01:01.7878") interval_str = cell - else: - return str(cell).replace("\n", "\\n") field_names = { DayTimeIntervalType.DAY: "DAY", @@ -1557,48 +1555,26 @@ def format_seconds_with_precision( seconds_value: Union[float, decimal.Decimal] ) -> str: """Format seconds with full precision, preserving trailing zeros for proper padding""" - if isinstance(seconds_value, decimal.Decimal): - # Use Decimal precision formatting - if seconds_value == int(seconds_value): - return f"{int(seconds_value):02d}" - else: - # For fractional seconds, ensure proper leading zero padding - integer_part = int(seconds_value) - if integer_part < 10: - # Format with leading zero for the integer part - formatted = f"{seconds_value:.6f}".rstrip("0") - if formatted.endswith("."): - return f"{integer_part:02d}" - # Replace the integer part with zero-padded version - decimal_part = formatted.split(".", 1)[1] - return f"{integer_part:02d}.{decimal_part}" - else: - # For >= 10, use normal formatting - formatted = f"{seconds_value:.6f}".rstrip("0") - if formatted.endswith("."): - return f"{integer_part}" - return formatted + # Unified formatting logic for both Decimal and float types + if seconds_value == int(seconds_value): + return f"{int(seconds_value):02d}" else: - # Float precision formatting (original logic) - if seconds_value == int(seconds_value): - return f"{int(seconds_value):02d}" + # For fractional seconds, ensure proper leading zero padding + integer_part = int(seconds_value) + if integer_part < 10: + # Format with leading zero for the integer part + formatted = f"{seconds_value:.6f}".rstrip("0") + if formatted.endswith("."): + return f"{integer_part:02d}" + # Replace the integer part with zero-padded version + decimal_part = formatted.split(".", 1)[1] + return f"{integer_part:02d}.{decimal_part}" else: - # For fractional seconds, ensure proper leading zero padding - integer_part = int(seconds_value) - if integer_part < 10: - # Format with leading zero for the integer part - formatted = f"{seconds_value:.6f}".rstrip("0") - if formatted.endswith("."): - return f"{integer_part:02d}" - # Replace the integer part with zero-padded version - decimal_part = formatted.split(".", 1)[1] - return f"{integer_part:02d}.{decimal_part}" - else: - # For >= 10, use normal formatting - formatted = f"{seconds_value:.6f}".rstrip("0") - if formatted.endswith("."): - return f"{integer_part}" - return formatted + # For >= 10, use normal formatting + formatted = f"{seconds_value:.6f}".rstrip("0") + if formatted.endswith("."): + return f"{integer_part}" + return formatted # For single field intervals, extract just that component if start_field == end_field: diff --git a/tests/integ/test_dataframe.py b/tests/integ/test_dataframe.py index e6ae56fc12..9d486ac728 100644 --- a/tests/integ/test_dataframe.py +++ b/tests/integ/test_dataframe.py @@ -3373,6 +3373,88 @@ def test_show_interval_formatting(session): """ ) + # === Targeted Tests to Hit Remaining Missing Lines === + + # Very large interval to trigger Decimal path with integer seconds + df = session.sql("SELECT INTERVAL '2000000' DAY as decimal_large_int") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +----------------------+ + |"DECIMAL_LARGE_INT" | + +----------------------+ + |INTERVAL '2000000' DAY| + +----------------------+ + """ + ) + + # Very large interval with fractional seconds < 10 to trigger Decimal path + df = session.sql( + "SELECT INTERVAL '2000000 00:00:05.123456' DAY TO SECOND as decimal_small_frac" + ) + assert df._show_string_spark(truncate=False) == dedent( + """\ + +------------------------------------------------+ + |"DECIMAL_SMALL_FRAC" | + +------------------------------------------------+ + |INTERVAL '2000000 00:00:05.123456' DAY TO SECOND| + +------------------------------------------------+ + """ + ) + + # Very large interval with fractional seconds >= 10 to trigger Decimal path + df = session.sql( + "SELECT INTERVAL '2000000 00:00:15.123456' DAY TO SECOND as decimal_large_frac" + ) + assert df._show_string_spark(truncate=False) == dedent( + """\ + +------------------------------------------------+ + |"DECIMAL_LARGE_FRAC" | + +------------------------------------------------+ + |INTERVAL '2000000 00:00:15.123456' DAY TO SECOND| + +------------------------------------------------+ + """ + ) + + # Normal interval with integer seconds to trigger float path + df = session.sql("SELECT INTERVAL '00:00:05' HOUR TO SECOND as float_int_test") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +----------------------------------+ + |"FLOAT_INT_TEST" | + +----------------------------------+ + |INTERVAL '00:00:05' HOUR TO SECOND| + +----------------------------------+ + """ + ) + + # Normal interval with fractional seconds < 10 for float path + df = session.sql( + "SELECT INTERVAL '00:00:05.123456' HOUR TO SECOND as float_small_frac" + ) + assert df._show_string_spark(truncate=False) == dedent( + """\ + +-----------------------------------------+ + |"FLOAT_SMALL_FRAC" | + +-----------------------------------------+ + |INTERVAL '00:00:05.123456' HOUR TO SECOND| + +-----------------------------------------+ + """ + ) + + # Normal interval with fractional seconds >= 10 for float path + df = session.sql( + "SELECT INTERVAL '00:00:15.123456' HOUR TO SECOND as float_large_frac" + ) + assert df._show_string_spark(truncate=False) == dedent( + """\ + +-----------------------------------------+ + |"FLOAT_LARGE_FRAC" | + +-----------------------------------------+ + |INTERVAL '00:00:15.123456' HOUR TO SECOND| + +-----------------------------------------+ + """ + ) + @pytest.mark.parametrize("data", [[0, 1, 2, 3], ["", "a"], [False, True], [None]]) def test_create_dataframe_with_single_value(session, data):