diff --git a/src/snowflake/snowpark/_internal/type_utils.py b/src/snowflake/snowpark/_internal/type_utils.py index bf06fa8e7d..3b61320f06 100644 --- a/src/snowflake/snowpark/_internal/type_utils.py +++ b/src/snowflake/snowpark/_internal/type_utils.py @@ -1388,6 +1388,257 @@ def most_permissive_type(datatype: DataType) -> DataType: return copy.deepcopy(datatype) +def format_year_month_interval_for_display( + cell: str, start_field: int, end_field: int +) -> str: + """ + Format a YearMonthIntervalType string for display in _show_string_spark(). + + Args: + cell: The string representation of the interval (e.g., "+1-6", "-2-03", "24") + start_field: Start field constant from YearMonthIntervalType (YEAR=0, MONTH=1) + end_field: End field constant from YearMonthIntervalType (YEAR=0, MONTH=1) + + Returns: + Formatted interval string (e.g., "INTERVAL '1-6' YEAR TO MONTH", "INTERVAL '24' MONTH") + """ + # Handle different input formats + # Check for compound format (year-month) vs simple number + has_internal_dash = (cell.startswith("+") or cell.startswith("-")) and "-" in cell[ + 1: + ] + + # Default initialization + years = "0" + months = "0" + is_negative = False + + if has_internal_dash: + # Format like "+1-03" or "-1-03" or "-1-6" (compound year-month) + is_negative = cell.startswith("-") + + # Remove the sign prefix and parse the remaining "year-month" part + remaining = cell[1:] # Remove the "+" or "-" prefix: "1-6" + if "-" in remaining: + parts = remaining.split("-", 1) # Split only on first dash: ["1", "6"] + years = str(int(parts[0])) + months = str(int(parts[1])) + + # Format based on start/end field + sign_prefix = "-" if is_negative else "" + + if ( + start_field == YearMonthIntervalType.YEAR + and end_field == YearMonthIntervalType.MONTH + ): + # Full range: YEAR TO MONTH + return f"INTERVAL '{sign_prefix}{years}-{months}' YEAR TO MONTH" + elif ( + start_field == YearMonthIntervalType.YEAR + and end_field == YearMonthIntervalType.YEAR + ): + # Years only: YEAR + return f"INTERVAL '{sign_prefix}{years}' YEAR" + elif ( + start_field == YearMonthIntervalType.MONTH + and end_field == YearMonthIntervalType.MONTH + ): + # Months only: MONTH - calculate total months + total_months = int(years) * 12 + int(months) + if is_negative: + total_months = -total_months + return f"INTERVAL '{total_months}' MONTH" + + +def format_day_time_interval_for_display( + cell: Union[str, datetime.timedelta], start_field: int, end_field: int +) -> str: + """ + Format a DayTimeIntervalType value for display in _show_string_spark(). + + Args: + cell: Either a datetime.timedelta object or string representation + start_field: Start field constant from DayTimeIntervalType (DAY=0, HOUR=1, MINUTE=2, SECOND=3) + end_field: End field constant from DayTimeIntervalType (DAY=0, HOUR=1, MINUTE=2, SECOND=3) + + Returns: + Formatted interval string (e.g., "INTERVAL '01:30:45' HOUR TO SECOND") + """ + if isinstance(cell, datetime.timedelta): + # Heuristic: Use Decimal for extreme values near 64-bit boundary, float for normal values + total_seconds_approx = cell.total_seconds() + + # Check if we're approaching values where float precision becomes problematic + # Be conservative: use Decimal for large values to ensure precision + # This corresponds to roughly 3 million years - normal use cases are well below this + if ( + abs(total_seconds_approx) > 1e11 + ): # ~100 gigaseconds, very conservative threshold + # Use Decimal arithmetic for precise conversion to avoid floating-point precision loss + total_seconds_value = ( + decimal.Decimal(cell.days) * decimal.Decimal(86400) + + decimal.Decimal(cell.seconds) + + decimal.Decimal(cell.microseconds) / decimal.Decimal(1_000_000) + ) + else: + # Use fast float path for normal values + total_seconds_value = cell.total_seconds() + + interval_str = format_day_time_interval( + total_seconds_value, start_field, end_field + ) + elif isinstance(cell, str): + # Raw string that needs to be formatted (e.g., "1 01:01:01.7878") + interval_str = cell + + field_names = { + DayTimeIntervalType.DAY: "DAY", + DayTimeIntervalType.HOUR: "HOUR", + DayTimeIntervalType.MINUTE: "MINUTE", + DayTimeIntervalType.SECOND: "SECOND", + } + + start_name = field_names.get(start_field, "DAY") + end_name = field_names.get(end_field, "SECOND") + + if start_field == end_field: + return f"INTERVAL '{interval_str}' {start_name}" + else: + return f"INTERVAL '{interval_str}' {start_name} TO {end_name}" + + +def format_day_time_interval( + total_seconds_value: Union[float, decimal.Decimal], start_field: int, end_field: int +) -> str: + """ + Format a DayTimeIntervalType value for display in _show_string_spark(). + + Args: + total_seconds_value: Total seconds as either float or Decimal (can be negative) + start_field: Start field constant from DayTimeIntervalType (DAY=0, HOUR=1, MINUTE=2, SECOND=3) + end_field: End field constant from DayTimeIntervalType (DAY=0, HOUR=1, MINUTE=2, SECOND=3) + + Returns: + Formatted interval string (e.g., "01:30:45", "2 12:30", "05", etc.) + """ + is_negative = total_seconds_value < 0 + abs_total_seconds = abs(total_seconds_value) + + # Determine if we're working with Decimal for high-precision arithmetic + use_decimal = isinstance(total_seconds_value, decimal.Decimal) + + days = int(abs_total_seconds) // 86400 + remaining_seconds = abs_total_seconds - (days * 86400) + hours = int(remaining_seconds) // 3600 + remaining_after_hours = remaining_seconds - (hours * 3600) + minutes = int(remaining_after_hours) // 60 + + # Calculate seconds more precisely to avoid floating-point accumulation errors + # Use the original total and subtract the calculated day/hour/minute components + if use_decimal: + total_non_second_time = ( + decimal.Decimal(days * 86400) + + decimal.Decimal(hours * 3600) + + decimal.Decimal(minutes * 60) + ) + else: + total_non_second_time = (days * 86400) + (hours * 3600) + (minutes * 60) + seconds = abs_total_seconds - total_non_second_time + + sign = "-" if is_negative else "" + + def format_with_leading_zero(value: int) -> str: + """Format integer with leading zero if < 10, otherwise as-is""" + return f"{value:02d}" if value < 10 else f"{value}" + + def format_seconds_with_precision( + seconds_value: Union[float, decimal.Decimal] + ) -> str: + """Format seconds with full precision, preserving trailing zeros for proper padding""" + # Unified formatting logic for both Decimal and float types + if seconds_value == int(seconds_value): + return f"{int(seconds_value):02d}" + else: + # For fractional seconds, ensure proper leading zero padding + integer_part = int(seconds_value) + if integer_part < 10: + # Format with leading zero for the integer part + formatted = f"{seconds_value:.6f}".rstrip("0") + if formatted.endswith("."): + return f"{integer_part:02d}" + # Replace the integer part with zero-padded version + decimal_part = formatted.split(".", 1)[1] + return f"{integer_part:02d}.{decimal_part}" + else: + # For >= 10, use normal formatting + formatted = f"{seconds_value:.6f}".rstrip("0") + if formatted.endswith("."): + return f"{integer_part}" + return formatted + + # For single field intervals, extract just that component + if start_field == end_field: + if start_field == DayTimeIntervalType.DAY: + return f"{sign}{days}" + elif start_field == DayTimeIntervalType.HOUR: + total_hours = int(abs_total_seconds) // 3600 + return f"{sign}{format_with_leading_zero(total_hours)}" + elif start_field == DayTimeIntervalType.MINUTE: + total_minutes = int(abs_total_seconds) // 60 + return f"{sign}{format_with_leading_zero(total_minutes)}" + elif start_field == DayTimeIntervalType.SECOND: + # Handle fractional seconds - use total seconds, not just remainder + if abs_total_seconds == int(abs_total_seconds): + total_secs_int = int(abs_total_seconds) + return f"{sign}{format_with_leading_zero(total_secs_int)}" + else: + # Use unified formatting that handles both float and Decimal + return f"{sign}{format_seconds_with_precision(abs_total_seconds)}" + + # For multi-field intervals, format based on start/end fields + if start_field == DayTimeIntervalType.DAY: + hours_str = format_with_leading_zero(hours) + # DAY TO X format: truncate based on end_field + if end_field == DayTimeIntervalType.HOUR: + # DAY TO HOUR: "D HH" + return f"{sign}{days} {hours_str}" + elif end_field == DayTimeIntervalType.MINUTE: + # DAY TO MINUTE: "D HH:MM" + return f"{sign}{days} {hours_str}:{minutes:02d}" + else: + # DAY TO SECOND: "D HH:MM:SS" + if seconds == int(seconds): + return f"{sign}{days} {hours_str}:{minutes:02d}:{int(seconds):02d}" + else: + return f"{sign}{days} {hours_str}:{minutes:02d}:{format_seconds_with_precision(seconds)}" + elif start_field == DayTimeIntervalType.HOUR: + # HOUR TO X format: "HH:MM:SS" (no days) + total_hours = int(abs_total_seconds) // 3600 + remaining_after_hours = abs_total_seconds - (total_hours * 3600) + mins = int(remaining_after_hours) // 60 + secs = remaining_after_hours - (mins * 60) + + if end_field == DayTimeIntervalType.MINUTE: + return f"{sign}{format_with_leading_zero(total_hours)}:{mins:02d}" + else: # TO SECOND + if secs == int(secs): + return f"{sign}{format_with_leading_zero(total_hours)}:{mins:02d}:{int(secs):02d}" + else: + return f"{sign}{format_with_leading_zero(total_hours)}:{mins:02d}:{format_seconds_with_precision(secs)}" + elif start_field == DayTimeIntervalType.MINUTE: + # MINUTE TO X format: "MM:SS" (no days or hours) + total_minutes = int(abs_total_seconds) // 60 + remaining_secs = abs_total_seconds - (total_minutes * 60) + + minutes_str = format_with_leading_zero(total_minutes) + if remaining_secs == int(remaining_secs): + return f"{sign}{minutes_str}:{int(remaining_secs):02d}" + else: + return ( + f"{sign}{minutes_str}:{format_seconds_with_precision(remaining_secs)}" + ) + + # Type hints ColumnOrName = Union["snowflake.snowpark.column.Column", str] ColumnOrLiteralStr = Union["snowflake.snowpark.column.Column", str] diff --git a/src/snowflake/snowpark/dataframe.py b/src/snowflake/snowpark/dataframe.py index fc31cb0120..0ad8c00420 100644 --- a/src/snowflake/snowpark/dataframe.py +++ b/src/snowflake/snowpark/dataframe.py @@ -137,6 +137,8 @@ ColumnOrName, ColumnOrSqlExpr, LiteralType, + format_day_time_interval_for_display, + format_year_month_interval_for_display, snow_type_to_dtype_str, type_string_to_type_object, ) @@ -206,6 +208,7 @@ from snowflake.snowpark.types import ( ArrayType, DataType, + DayTimeIntervalType, MapType, PandasDataFrameType, StringType, @@ -215,6 +218,7 @@ _FractionalType, TimestampType, TimestampTimeZone, + YearMonthIntervalType, ) # Python 3.8 needs to use typing.Iterable because collections.abc.Iterable is not subscriptable @@ -5115,6 +5119,20 @@ def format_timestamp_spark(dt: datetime.datetime) -> str: res = "-Infinity" else: res = str(cell).replace("e+", "E").replace("e-", "E-") + elif isinstance(cell, str) and isinstance(datatype, YearMonthIntervalType): + start_field = getattr( + datatype, "start_field", YearMonthIntervalType.YEAR + ) + end_field = getattr(datatype, "end_field", YearMonthIntervalType.MONTH) + res = format_year_month_interval_for_display( + cell, start_field, end_field + ) + elif isinstance(cell, (str, datetime.timedelta)) and isinstance( + datatype, DayTimeIntervalType + ): + start_field = getattr(datatype, "start_field", DayTimeIntervalType.DAY) + end_field = getattr(datatype, "end_field", DayTimeIntervalType.SECOND) + res = format_day_time_interval_for_display(cell, start_field, end_field) else: res = str(cell) return res.replace("\n", "\\n") diff --git a/tests/integ/test_dataframe.py b/tests/integ/test_dataframe.py index ee418e525c..9d486ac728 100644 --- a/tests/integ/test_dataframe.py +++ b/tests/integ/test_dataframe.py @@ -2536,6 +2536,926 @@ def assert_show_string_equals(actual: str, expected: str): ) +@pytest.mark.skipif( + "config.getoption('local_testing_mode', default=False)", + reason="FEAT: Interval types not fully supported in local testing", +) +def test_show_interval_formatting(session): + df = session.sql("SELECT INTERVAL '1' HOUR as hour_single") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +------------------+ + |"HOUR_SINGLE" | + +------------------+ + |INTERVAL '01' HOUR| + +------------------+ + """ + ) + + df = session.sql("SELECT INTERVAL '5' MINUTE as minute_single") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +--------------------+ + |"MINUTE_SINGLE" | + +--------------------+ + |INTERVAL '05' MINUTE| + +--------------------+ + """ + ) + + df = session.sql("SELECT INTERVAL '5' SECOND as second_integer") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +--------------------+ + |"SECOND_INTEGER" | + +--------------------+ + |INTERVAL '05' SECOND| + +--------------------+ + """ + ) + + df = session.sql("SELECT INTERVAL '1.000001' SECOND as second_microseconds") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +---------------------------+ + |"SECOND_MICROSECONDS" | + +---------------------------+ + |INTERVAL '01.000001' SECOND| + +---------------------------+ + """ + ) + + df = session.sql("SELECT INTERVAL '24' HOUR as hour_full_day") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +------------------+ + |"HOUR_FULL_DAY" | + +------------------+ + |INTERVAL '24' HOUR| + +------------------+ + """ + ) + + df = session.sql("SELECT INTERVAL '90' MINUTE as minute_over_hour") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +--------------------+ + |"MINUTE_OVER_HOUR" | + +--------------------+ + |INTERVAL '90' MINUTE| + +--------------------+ + """ + ) + + df = session.sql("SELECT INTERVAL '0' SECOND as zero_second") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +--------------------+ + |"ZERO_SECOND" | + +--------------------+ + |INTERVAL '00' SECOND| + +--------------------+ + """ + ) + + df = session.sql("SELECT INTERVAL '0.000001' SECOND as microsecond") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +---------------------------+ + |"MICROSECOND" | + +---------------------------+ + |INTERVAL '00.000001' SECOND| + +---------------------------+ + """ + ) + + df = session.sql("SELECT INTERVAL '2 12' DAY TO HOUR as day_to_hour") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +---------------------------+ + |"DAY_TO_HOUR" | + +---------------------------+ + |INTERVAL '2 12' DAY TO HOUR| + +---------------------------+ + """ + ) + + df = session.sql("SELECT INTERVAL '1 08:30' DAY TO MINUTE as day_to_minute") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +--------------------------------+ + |"DAY_TO_MINUTE" | + +--------------------------------+ + |INTERVAL '1 08:30' DAY TO MINUTE| + +--------------------------------+ + """ + ) + + df = session.sql("SELECT INTERVAL '08:30' HOUR TO MINUTE as hour_to_minute") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +-------------------------------+ + |"HOUR_TO_MINUTE" | + +-------------------------------+ + |INTERVAL '08:30' HOUR TO MINUTE| + +-------------------------------+ + """ + ) + + df = session.sql( + "SELECT INTERVAL '01:00:00.456' HOUR TO SECOND as hour_to_second_fractional" + ) + assert df._show_string_spark(truncate=False) == dedent( + """\ + +--------------------------------------+ + |"HOUR_TO_SECOND_FRACTIONAL" | + +--------------------------------------+ + |INTERVAL '01:00:00.456' HOUR TO SECOND| + +--------------------------------------+ + """ + ) + + df = session.sql("SELECT INTERVAL '-2' HOUR as negative_hour") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +-------------------+ + |"NEGATIVE_HOUR" | + +-------------------+ + |INTERVAL '-02' HOUR| + +-------------------+ + """ + ) + + df = session.sql("SELECT INTERVAL '-15.5' SECOND as negative_second") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +-----------------------+ + |"NEGATIVE_SECOND" | + +-----------------------+ + |INTERVAL '-15.5' SECOND| + +-----------------------+ + """ + ) + + df = session.sql("SELECT INTERVAL '999999' SECOND as large_second") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +------------------------+ + |"LARGE_SECOND" | + +------------------------+ + |INTERVAL '999999' SECOND| + +------------------------+ + """ + ) + + # Year-month intervals with dash format + df = session.sql("SELECT INTERVAL '1-6' YEAR TO MONTH as year_to_month") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +----------------------------+ + |"YEAR_TO_MONTH" | + +----------------------------+ + |INTERVAL '1-6' YEAR TO MONTH| + +----------------------------+ + """ + ) + + # Negative year-month intervals + df = session.sql("SELECT INTERVAL '-2-3' YEAR TO MONTH as negative_year_month") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +-----------------------------+ + |"NEGATIVE_YEAR_MONTH" | + +-----------------------------+ + |INTERVAL '-2-3' YEAR TO MONTH| + +-----------------------------+ + """ + ) + + # Single year intervals (not YEAR TO MONTH) + df = session.sql("SELECT INTERVAL '5' YEAR as single_year") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +-----------------+ + |"SINGLE_YEAR" | + +-----------------+ + |INTERVAL '5' YEAR| + +-----------------+ + """ + ) + + # Single month intervals (not YEAR TO MONTH) + df = session.sql("SELECT INTERVAL '8' MONTH as single_month") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +------------------+ + |"SINGLE_MONTH" | + +------------------+ + |INTERVAL '8' MONTH| + +------------------+ + """ + ) + + # Zero year-month intervals + df = session.sql("SELECT INTERVAL '0-0' YEAR TO MONTH as zero_year_month") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +----------------------------+ + |"ZERO_YEAR_MONTH" | + +----------------------------+ + |INTERVAL '0-0' YEAR TO MONTH| + +----------------------------+ + """ + ) + + # Very large day intervals + df = session.sql("SELECT INTERVAL '999' DAY as large_day") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +------------------+ + |"LARGE_DAY" | + +------------------+ + |INTERVAL '999' DAY| + +------------------+ + """ + ) + + # Minute to second with large minutes + df = session.sql( + "SELECT INTERVAL '150:30' MINUTE TO SECOND as large_minute_to_second" + ) + assert df._show_string_spark(truncate=False) == dedent( + """\ + +----------------------------------+ + |"LARGE_MINUTE_TO_SECOND" | + +----------------------------------+ + |INTERVAL '150:30' MINUTE TO SECOND| + +----------------------------------+ + """ + ) + + # Day to second with fractional seconds + df = session.sql( + "SELECT INTERVAL '5 10:20:30.123' DAY TO SECOND as day_to_second_frac" + ) + assert df._show_string_spark(truncate=False) == dedent( + """\ + +---------------------------------------+ + |"DAY_TO_SECOND_FRAC" | + +---------------------------------------+ + |INTERVAL '5 10:20:30.123' DAY TO SECOND| + +---------------------------------------+ + """ + ) + + # Hour to second with zero padding in multi-field + df = session.sql("SELECT INTERVAL '05:00:00' HOUR TO SECOND as hour_zero_padded") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +----------------------------------+ + |"HOUR_ZERO_PADDED" | + +----------------------------------+ + |INTERVAL '05:00:00' HOUR TO SECOND| + +----------------------------------+ + """ + ) + + # Negative day-time intervals + df = session.sql("SELECT INTERVAL '-3 05:30:45' DAY TO SECOND as negative_complex") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +------------------------------------+ + |"NEGATIVE_COMPLEX" | + +------------------------------------+ + |INTERVAL '-3 05:30:45' DAY TO SECOND| + +------------------------------------+ + """ + ) + + # Additional edge cases for complete coverage based on actual Snowflake output + + # Year-month compound intervals + df = session.sql("SELECT INTERVAL '1-6' YEAR TO MONTH as year_to_month") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +----------------------------+ + |"YEAR_TO_MONTH" | + +----------------------------+ + |INTERVAL '1-6' YEAR TO MONTH| + +----------------------------+ + """ + ) + + df = session.sql("SELECT INTERVAL '-2-3' YEAR TO MONTH as negative_year_month") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +-----------------------------+ + |"NEGATIVE_YEAR_MONTH" | + +-----------------------------+ + |INTERVAL '-2-3' YEAR TO MONTH| + +-----------------------------+ + """ + ) + + # Single field intervals + df = session.sql("SELECT INTERVAL '5' YEAR as single_year") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +-----------------+ + |"SINGLE_YEAR" | + +-----------------+ + |INTERVAL '5' YEAR| + +-----------------+ + """ + ) + + df = session.sql("SELECT INTERVAL '8' MONTH as single_month") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +------------------+ + |"SINGLE_MONTH" | + +------------------+ + |INTERVAL '8' MONTH| + +------------------+ + """ + ) + + df = session.sql("SELECT INTERVAL '0-0' YEAR TO MONTH as zero_year_month") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +----------------------------+ + |"ZERO_YEAR_MONTH" | + +----------------------------+ + |INTERVAL '0-0' YEAR TO MONTH| + +----------------------------+ + """ + ) + + # Large day interval + df = session.sql("SELECT INTERVAL '999' DAY as large_day") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +------------------+ + |"LARGE_DAY" | + +------------------+ + |INTERVAL '999' DAY| + +------------------+ + """ + ) + + # Large minute to second interval (tests the bug we just fixed) + df = session.sql( + "SELECT INTERVAL '150:30' MINUTE TO SECOND as large_minute_to_second" + ) + assert df._show_string_spark(truncate=False) == dedent( + """\ + +----------------------------------+ + |"LARGE_MINUTE_TO_SECOND" | + +----------------------------------+ + |INTERVAL '150:30' MINUTE TO SECOND| + +----------------------------------+ + """ + ) + + # Day to second with fractional seconds + df = session.sql( + "SELECT INTERVAL '5 10:20:30.123' DAY TO SECOND as day_to_second_frac" + ) + assert df._show_string_spark(truncate=False) == dedent( + """\ + +---------------------------------------+ + |"DAY_TO_SECOND_FRAC" | + +---------------------------------------+ + |INTERVAL '5 10:20:30.123' DAY TO SECOND| + +---------------------------------------+ + """ + ) + + # Hour to second with zero padding + df = session.sql("SELECT INTERVAL '05:00:00' HOUR TO SECOND as hour_zero_padded") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +----------------------------------+ + |"HOUR_ZERO_PADDED" | + +----------------------------------+ + |INTERVAL '05:00:00' HOUR TO SECOND| + +----------------------------------+ + """ + ) + + # Negative complex interval + df = session.sql("SELECT INTERVAL '-3 05:30:45' DAY TO SECOND as negative_complex") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +------------------------------------+ + |"NEGATIVE_COMPLEX" | + +------------------------------------+ + |INTERVAL '-3 05:30:45' DAY TO SECOND| + +------------------------------------+ + """ + ) + + # Positive prefix intervals + df = session.sql("SELECT INTERVAL '+2-5' YEAR TO MONTH as positive_year_month") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +----------------------------+ + |"POSITIVE_YEAR_MONTH" | + +----------------------------+ + |INTERVAL '2-5' YEAR TO MONTH| + +----------------------------+ + """ + ) + + df = session.sql("SELECT INTERVAL '+3' YEAR as positive_year") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +-----------------+ + |"POSITIVE_YEAR" | + +-----------------+ + |INTERVAL '3' YEAR| + +-----------------+ + """ + ) + + df = session.sql("SELECT INTERVAL '+15' MONTH as positive_month") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +-------------------+ + |"POSITIVE_MONTH" | + +-------------------+ + |INTERVAL '15' MONTH| + +-------------------+ + """ + ) + + df = session.sql("SELECT INTERVAL '-5' YEAR as negative_single_year") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +----------------------+ + |"NEGATIVE_SINGLE_YEAR"| + +----------------------+ + |INTERVAL '-5' YEAR | + +----------------------+ + """ + ) + + # Additional edge cases for comprehensive coverage + + # Positive number without dash for single month + df = session.sql("SELECT INTERVAL '+7' MONTH as positive_single_month") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +-----------------------+ + |"POSITIVE_SINGLE_MONTH"| + +-----------------------+ + |INTERVAL '7' MONTH | + +-----------------------+ + """ + ) + + # Negative number for single month + df = session.sql("SELECT INTERVAL '-12' MONTH as negative_single_month") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +-----------------------+ + |"NEGATIVE_SINGLE_MONTH"| + +-----------------------+ + |INTERVAL '-12' MONTH | + +-----------------------+ + """ + ) + + # Positive number without dash for single year + df = session.sql("SELECT INTERVAL '+4' YEAR as positive_single_year") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +----------------------+ + |"POSITIVE_SINGLE_YEAR"| + +----------------------+ + |INTERVAL '4' YEAR | + +----------------------+ + """ + ) + + # Positive number with no sign, single number for fallback + df = session.sql("SELECT INTERVAL '42' MONTH as plain_number_month") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +--------------------+ + |"PLAIN_NUMBER_MONTH"| + +--------------------+ + |INTERVAL '42' MONTH | + +--------------------+ + """ + ) + + # Edge case: positive single dash for months + df = session.sql("SELECT INTERVAL '+8' MONTH as plus_month_edge") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +------------------+ + |"PLUS_MONTH_EDGE" | + +------------------+ + |INTERVAL '8' MONTH| + +------------------+ + """ + ) + + # Day-time intervals for additional coverage + + # Single minute-only interval + df = session.sql("SELECT INTERVAL '5' MINUTE as single_minute_only") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +--------------------+ + |"SINGLE_MINUTE_ONLY"| + +--------------------+ + |INTERVAL '05' MINUTE| + +--------------------+ + """ + ) + + # Single hour-only interval + df = session.sql("SELECT INTERVAL '7' HOUR as single_hour_only") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +------------------+ + |"SINGLE_HOUR_ONLY"| + +------------------+ + |INTERVAL '07' HOUR| + +------------------+ + """ + ) + + # Single second-only interval + df = session.sql("SELECT INTERVAL '8' SECOND as single_second_only") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +--------------------+ + |"SINGLE_SECOND_ONLY"| + +--------------------+ + |INTERVAL '08' SECOND| + +--------------------+ + """ + ) + + # Single second with fractional part to hit different branches (lines 5216-5217) + df = session.sql("SELECT INTERVAL '3.456' SECOND as fractional_second_only") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +------------------------+ + |"FRACTIONAL_SECOND_ONLY"| + +------------------------+ + |INTERVAL '03.456' SECOND| + +------------------------+ + """ + ) + + # Fractional seconds < 1 + df = session.sql("SELECT INTERVAL '0.789' SECOND as sub_second_frac") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +------------------------+ + |"SUB_SECOND_FRAC" | + +------------------------+ + |INTERVAL '00.789' SECOND| + +------------------------+ + """ + ) + + # Minute to second with fractional + df = session.sql( + "SELECT INTERVAL '8:45.321' MINUTE TO SECOND as minute_to_second_frac" + ) + assert df._show_string_spark(truncate=False) == dedent( + """\ + +-------------------------------------+ + |"MINUTE_TO_SECOND_FRAC" | + +-------------------------------------+ + |INTERVAL '08:45.321' MINUTE TO SECOND| + +-------------------------------------+ + """ + ) + + # Large minute to second with fractional + df = session.sql( + "SELECT INTERVAL '123:45.678' MINUTE TO SECOND as large_minute_frac" + ) + assert df._show_string_spark(truncate=False) == dedent( + """\ + +--------------------------------------+ + |"LARGE_MINUTE_FRAC" | + +--------------------------------------+ + |INTERVAL '123:45.678' MINUTE TO SECOND| + +--------------------------------------+ + """ + ) + + # Additional test cases for interval formatting coverage + + # Test DAY TO MINUTE formatting + df = session.sql("SELECT INTERVAL '2 05:30' DAY TO MINUTE as day_to_minute") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +--------------------------------+ + |"DAY_TO_MINUTE" | + +--------------------------------+ + |INTERVAL '2 05:30' DAY TO MINUTE| + +--------------------------------+ + """ + ) + + # Test MINUTE TO SECOND with integer seconds + df = session.sql("SELECT INTERVAL '15:30' MINUTE TO SECOND as minute_to_second_int") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +---------------------------------+ + |"MINUTE_TO_SECOND_INT" | + +---------------------------------+ + |INTERVAL '15:30' MINUTE TO SECOND| + +---------------------------------+ + """ + ) + + # Test single field interval + df = session.sql("SELECT INTERVAL '5' HOUR as single_hour_field") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +-------------------+ + |"SINGLE_HOUR_FIELD"| + +-------------------+ + |INTERVAL '05' HOUR | + +-------------------+ + """ + ) + + # Test multi-field interval + df = session.sql("SELECT INTERVAL '2:30:45' HOUR TO SECOND as multi_field") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +----------------------------------+ + |"MULTI_FIELD" | + +----------------------------------+ + |INTERVAL '02:30:45' HOUR TO SECOND| + +----------------------------------+ + """ + ) + + df = session.sql("SELECT INTERVAL '5.000' SECOND as zero_frac_test") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +--------------------+ + |"ZERO_FRAC_TEST" | + +--------------------+ + |INTERVAL '05' SECOND| + +--------------------+ + """ + ) + + # === Edge Cases for Decimal Precision and Large Values === + + # Large positive DAY TO HOUR intervals + df = session.sql("SELECT INTERVAL '106751991 04' DAY TO HOUR as large_day_to_hour") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +-----------------------------------+ + |"LARGE_DAY_TO_HOUR" | + +-----------------------------------+ + |INTERVAL '106751991 04' DAY TO HOUR| + +-----------------------------------+ + """ + ) + + # Large positive DAY TO MINUTE intervals + df = session.sql( + "SELECT INTERVAL '106751991 04:00' DAY TO MINUTE as large_day_to_minute" + ) + assert df._show_string_spark(truncate=False) == dedent( + """\ + +----------------------------------------+ + |"LARGE_DAY_TO_MINUTE" | + +----------------------------------------+ + |INTERVAL '106751991 04:00' DAY TO MINUTE| + +----------------------------------------+ + """ + ) + + # Large positive DAY TO SECOND intervals with high precision fractional seconds + df = session.sql( + "SELECT INTERVAL '106751991 04:00:54.775807' DAY TO SECOND as large_day_to_second" + ) + assert df._show_string_spark(truncate=False) == dedent( + """\ + +--------------------------------------------------+ + |"LARGE_DAY_TO_SECOND" | + +--------------------------------------------------+ + |INTERVAL '106751991 04:00:54.775807' DAY TO SECOND| + +--------------------------------------------------+ + """ + ) + + # Large negative DAY TO HOUR intervals + df = session.sql( + "SELECT INTERVAL '-106751991 04' DAY TO HOUR as large_negative_day_to_hour" + ) + assert df._show_string_spark(truncate=False) == dedent( + """\ + +------------------------------------+ + |"LARGE_NEGATIVE_DAY_TO_HOUR" | + +------------------------------------+ + |INTERVAL '-106751991 04' DAY TO HOUR| + +------------------------------------+ + """ + ) + + # Large negative DAY TO MINUTE intervals + df = session.sql( + "SELECT INTERVAL '-106751991 04:00' DAY TO MINUTE as large_negative_day_to_minute" + ) + assert df._show_string_spark(truncate=False) == dedent( + """\ + +-----------------------------------------+ + |"LARGE_NEGATIVE_DAY_TO_MINUTE" | + +-----------------------------------------+ + |INTERVAL '-106751991 04:00' DAY TO MINUTE| + +-----------------------------------------+ + """ + ) + + # Large negative DAY TO SECOND intervals with high precision fractional seconds + df = session.sql( + "SELECT INTERVAL '-106751991 04:00:54.775808' DAY TO SECOND as large_negative_day_to_second" + ) + assert df._show_string_spark(truncate=False) == dedent( + """\ + +---------------------------------------------------+ + |"LARGE_NEGATIVE_DAY_TO_SECOND" | + +---------------------------------------------------+ + |INTERVAL '-106751991 04:00:54.775808' DAY TO SECOND| + +---------------------------------------------------+ + """ + ) + + # Extremely large positive YEAR intervals + df = session.sql("SELECT INTERVAL '178956970' YEAR as extremely_large_year") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +-------------------------+ + |"EXTREMELY_LARGE_YEAR" | + +-------------------------+ + |INTERVAL '178956970' YEAR| + +-------------------------+ + """ + ) + + # Extremely large negative YEAR intervals + df = session.sql( + "SELECT INTERVAL '-178956970' YEAR as extremely_large_negative_year" + ) + assert df._show_string_spark(truncate=False) == dedent( + """\ + +-------------------------------+ + |"EXTREMELY_LARGE_NEGATIVE_YEAR"| + +-------------------------------+ + |INTERVAL '-178956970' YEAR | + +-------------------------------+ + """ + ) + + # Large positive DAY intervals + df = session.sql("SELECT INTERVAL '106751991' DAY as extremely_large_day") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +------------------------+ + |"EXTREMELY_LARGE_DAY" | + +------------------------+ + |INTERVAL '106751991' DAY| + +------------------------+ + """ + ) + + # Large negative DAY intervals + df = session.sql("SELECT INTERVAL '-106751991' DAY as extremely_large_negative_day") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +------------------------------+ + |"EXTREMELY_LARGE_NEGATIVE_DAY"| + +------------------------------+ + |INTERVAL '-106751991' DAY | + +------------------------------+ + """ + ) + + # High precision positive fractional SECOND intervals + df = session.sql("SELECT INTERVAL '54.775807' SECOND as high_precision_second") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +---------------------------+ + |"HIGH_PRECISION_SECOND" | + +---------------------------+ + |INTERVAL '54.775807' SECOND| + +---------------------------+ + """ + ) + + # High precision negative fractional SECOND intervals + df = session.sql( + "SELECT INTERVAL '-54.775807' SECOND as high_precision_negative_second" + ) + assert df._show_string_spark(truncate=False) == dedent( + """\ + +--------------------------------+ + |"HIGH_PRECISION_NEGATIVE_SECOND"| + +--------------------------------+ + |INTERVAL '-54.775807' SECOND | + +--------------------------------+ + """ + ) + + # === Targeted Tests to Hit Remaining Missing Lines === + + # Very large interval to trigger Decimal path with integer seconds + df = session.sql("SELECT INTERVAL '2000000' DAY as decimal_large_int") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +----------------------+ + |"DECIMAL_LARGE_INT" | + +----------------------+ + |INTERVAL '2000000' DAY| + +----------------------+ + """ + ) + + # Very large interval with fractional seconds < 10 to trigger Decimal path + df = session.sql( + "SELECT INTERVAL '2000000 00:00:05.123456' DAY TO SECOND as decimal_small_frac" + ) + assert df._show_string_spark(truncate=False) == dedent( + """\ + +------------------------------------------------+ + |"DECIMAL_SMALL_FRAC" | + +------------------------------------------------+ + |INTERVAL '2000000 00:00:05.123456' DAY TO SECOND| + +------------------------------------------------+ + """ + ) + + # Very large interval with fractional seconds >= 10 to trigger Decimal path + df = session.sql( + "SELECT INTERVAL '2000000 00:00:15.123456' DAY TO SECOND as decimal_large_frac" + ) + assert df._show_string_spark(truncate=False) == dedent( + """\ + +------------------------------------------------+ + |"DECIMAL_LARGE_FRAC" | + +------------------------------------------------+ + |INTERVAL '2000000 00:00:15.123456' DAY TO SECOND| + +------------------------------------------------+ + """ + ) + + # Normal interval with integer seconds to trigger float path + df = session.sql("SELECT INTERVAL '00:00:05' HOUR TO SECOND as float_int_test") + assert df._show_string_spark(truncate=False) == dedent( + """\ + +----------------------------------+ + |"FLOAT_INT_TEST" | + +----------------------------------+ + |INTERVAL '00:00:05' HOUR TO SECOND| + +----------------------------------+ + """ + ) + + # Normal interval with fractional seconds < 10 for float path + df = session.sql( + "SELECT INTERVAL '00:00:05.123456' HOUR TO SECOND as float_small_frac" + ) + assert df._show_string_spark(truncate=False) == dedent( + """\ + +-----------------------------------------+ + |"FLOAT_SMALL_FRAC" | + +-----------------------------------------+ + |INTERVAL '00:00:05.123456' HOUR TO SECOND| + +-----------------------------------------+ + """ + ) + + # Normal interval with fractional seconds >= 10 for float path + df = session.sql( + "SELECT INTERVAL '00:00:15.123456' HOUR TO SECOND as float_large_frac" + ) + assert df._show_string_spark(truncate=False) == dedent( + """\ + +-----------------------------------------+ + |"FLOAT_LARGE_FRAC" | + +-----------------------------------------+ + |INTERVAL '00:00:15.123456' HOUR TO SECOND| + +-----------------------------------------+ + """ + ) + + @pytest.mark.parametrize("data", [[0, 1, 2, 3], ["", "a"], [False, True], [None]]) def test_create_dataframe_with_single_value(session, data): expected_names = ["_1"]