Skip to content

Commit d14f99e

Browse files
authored
SNOW-2346552: IntervalType spark show string support (#3794)
1 parent 719a4d6 commit d14f99e

3 files changed

Lines changed: 1189 additions & 0 deletions

File tree

src/snowflake/snowpark/_internal/type_utils.py

Lines changed: 251 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1388,6 +1388,257 @@ def most_permissive_type(datatype: DataType) -> DataType:
13881388
return copy.deepcopy(datatype)
13891389

13901390

1391+
def format_year_month_interval_for_display(
1392+
cell: str, start_field: int, end_field: int
1393+
) -> str:
1394+
"""
1395+
Format a YearMonthIntervalType string for display in _show_string_spark().
1396+
1397+
Args:
1398+
cell: The string representation of the interval (e.g., "+1-6", "-2-03", "24")
1399+
start_field: Start field constant from YearMonthIntervalType (YEAR=0, MONTH=1)
1400+
end_field: End field constant from YearMonthIntervalType (YEAR=0, MONTH=1)
1401+
1402+
Returns:
1403+
Formatted interval string (e.g., "INTERVAL '1-6' YEAR TO MONTH", "INTERVAL '24' MONTH")
1404+
"""
1405+
# Handle different input formats
1406+
# Check for compound format (year-month) vs simple number
1407+
has_internal_dash = (cell.startswith("+") or cell.startswith("-")) and "-" in cell[
1408+
1:
1409+
]
1410+
1411+
# Default initialization
1412+
years = "0"
1413+
months = "0"
1414+
is_negative = False
1415+
1416+
if has_internal_dash:
1417+
# Format like "+1-03" or "-1-03" or "-1-6" (compound year-month)
1418+
is_negative = cell.startswith("-")
1419+
1420+
# Remove the sign prefix and parse the remaining "year-month" part
1421+
remaining = cell[1:] # Remove the "+" or "-" prefix: "1-6"
1422+
if "-" in remaining:
1423+
parts = remaining.split("-", 1) # Split only on first dash: ["1", "6"]
1424+
years = str(int(parts[0]))
1425+
months = str(int(parts[1]))
1426+
1427+
# Format based on start/end field
1428+
sign_prefix = "-" if is_negative else ""
1429+
1430+
if (
1431+
start_field == YearMonthIntervalType.YEAR
1432+
and end_field == YearMonthIntervalType.MONTH
1433+
):
1434+
# Full range: YEAR TO MONTH
1435+
return f"INTERVAL '{sign_prefix}{years}-{months}' YEAR TO MONTH"
1436+
elif (
1437+
start_field == YearMonthIntervalType.YEAR
1438+
and end_field == YearMonthIntervalType.YEAR
1439+
):
1440+
# Years only: YEAR
1441+
return f"INTERVAL '{sign_prefix}{years}' YEAR"
1442+
elif (
1443+
start_field == YearMonthIntervalType.MONTH
1444+
and end_field == YearMonthIntervalType.MONTH
1445+
):
1446+
# Months only: MONTH - calculate total months
1447+
total_months = int(years) * 12 + int(months)
1448+
if is_negative:
1449+
total_months = -total_months
1450+
return f"INTERVAL '{total_months}' MONTH"
1451+
1452+
1453+
def format_day_time_interval_for_display(
1454+
cell: Union[str, datetime.timedelta], start_field: int, end_field: int
1455+
) -> str:
1456+
"""
1457+
Format a DayTimeIntervalType value for display in _show_string_spark().
1458+
1459+
Args:
1460+
cell: Either a datetime.timedelta object or string representation
1461+
start_field: Start field constant from DayTimeIntervalType (DAY=0, HOUR=1, MINUTE=2, SECOND=3)
1462+
end_field: End field constant from DayTimeIntervalType (DAY=0, HOUR=1, MINUTE=2, SECOND=3)
1463+
1464+
Returns:
1465+
Formatted interval string (e.g., "INTERVAL '01:30:45' HOUR TO SECOND")
1466+
"""
1467+
if isinstance(cell, datetime.timedelta):
1468+
# Heuristic: Use Decimal for extreme values near 64-bit boundary, float for normal values
1469+
total_seconds_approx = cell.total_seconds()
1470+
1471+
# Check if we're approaching values where float precision becomes problematic
1472+
# Be conservative: use Decimal for large values to ensure precision
1473+
# This corresponds to roughly 3 million years - normal use cases are well below this
1474+
if (
1475+
abs(total_seconds_approx) > 1e11
1476+
): # ~100 gigaseconds, very conservative threshold
1477+
# Use Decimal arithmetic for precise conversion to avoid floating-point precision loss
1478+
total_seconds_value = (
1479+
decimal.Decimal(cell.days) * decimal.Decimal(86400)
1480+
+ decimal.Decimal(cell.seconds)
1481+
+ decimal.Decimal(cell.microseconds) / decimal.Decimal(1_000_000)
1482+
)
1483+
else:
1484+
# Use fast float path for normal values
1485+
total_seconds_value = cell.total_seconds()
1486+
1487+
interval_str = format_day_time_interval(
1488+
total_seconds_value, start_field, end_field
1489+
)
1490+
elif isinstance(cell, str):
1491+
# Raw string that needs to be formatted (e.g., "1 01:01:01.7878")
1492+
interval_str = cell
1493+
1494+
field_names = {
1495+
DayTimeIntervalType.DAY: "DAY",
1496+
DayTimeIntervalType.HOUR: "HOUR",
1497+
DayTimeIntervalType.MINUTE: "MINUTE",
1498+
DayTimeIntervalType.SECOND: "SECOND",
1499+
}
1500+
1501+
start_name = field_names.get(start_field, "DAY")
1502+
end_name = field_names.get(end_field, "SECOND")
1503+
1504+
if start_field == end_field:
1505+
return f"INTERVAL '{interval_str}' {start_name}"
1506+
else:
1507+
return f"INTERVAL '{interval_str}' {start_name} TO {end_name}"
1508+
1509+
1510+
def format_day_time_interval(
1511+
total_seconds_value: Union[float, decimal.Decimal], start_field: int, end_field: int
1512+
) -> str:
1513+
"""
1514+
Format a DayTimeIntervalType value for display in _show_string_spark().
1515+
1516+
Args:
1517+
total_seconds_value: Total seconds as either float or Decimal (can be negative)
1518+
start_field: Start field constant from DayTimeIntervalType (DAY=0, HOUR=1, MINUTE=2, SECOND=3)
1519+
end_field: End field constant from DayTimeIntervalType (DAY=0, HOUR=1, MINUTE=2, SECOND=3)
1520+
1521+
Returns:
1522+
Formatted interval string (e.g., "01:30:45", "2 12:30", "05", etc.)
1523+
"""
1524+
is_negative = total_seconds_value < 0
1525+
abs_total_seconds = abs(total_seconds_value)
1526+
1527+
# Determine if we're working with Decimal for high-precision arithmetic
1528+
use_decimal = isinstance(total_seconds_value, decimal.Decimal)
1529+
1530+
days = int(abs_total_seconds) // 86400
1531+
remaining_seconds = abs_total_seconds - (days * 86400)
1532+
hours = int(remaining_seconds) // 3600
1533+
remaining_after_hours = remaining_seconds - (hours * 3600)
1534+
minutes = int(remaining_after_hours) // 60
1535+
1536+
# Calculate seconds more precisely to avoid floating-point accumulation errors
1537+
# Use the original total and subtract the calculated day/hour/minute components
1538+
if use_decimal:
1539+
total_non_second_time = (
1540+
decimal.Decimal(days * 86400)
1541+
+ decimal.Decimal(hours * 3600)
1542+
+ decimal.Decimal(minutes * 60)
1543+
)
1544+
else:
1545+
total_non_second_time = (days * 86400) + (hours * 3600) + (minutes * 60)
1546+
seconds = abs_total_seconds - total_non_second_time
1547+
1548+
sign = "-" if is_negative else ""
1549+
1550+
def format_with_leading_zero(value: int) -> str:
1551+
"""Format integer with leading zero if < 10, otherwise as-is"""
1552+
return f"{value:02d}" if value < 10 else f"{value}"
1553+
1554+
def format_seconds_with_precision(
1555+
seconds_value: Union[float, decimal.Decimal]
1556+
) -> str:
1557+
"""Format seconds with full precision, preserving trailing zeros for proper padding"""
1558+
# Unified formatting logic for both Decimal and float types
1559+
if seconds_value == int(seconds_value):
1560+
return f"{int(seconds_value):02d}"
1561+
else:
1562+
# For fractional seconds, ensure proper leading zero padding
1563+
integer_part = int(seconds_value)
1564+
if integer_part < 10:
1565+
# Format with leading zero for the integer part
1566+
formatted = f"{seconds_value:.6f}".rstrip("0")
1567+
if formatted.endswith("."):
1568+
return f"{integer_part:02d}"
1569+
# Replace the integer part with zero-padded version
1570+
decimal_part = formatted.split(".", 1)[1]
1571+
return f"{integer_part:02d}.{decimal_part}"
1572+
else:
1573+
# For >= 10, use normal formatting
1574+
formatted = f"{seconds_value:.6f}".rstrip("0")
1575+
if formatted.endswith("."):
1576+
return f"{integer_part}"
1577+
return formatted
1578+
1579+
# For single field intervals, extract just that component
1580+
if start_field == end_field:
1581+
if start_field == DayTimeIntervalType.DAY:
1582+
return f"{sign}{days}"
1583+
elif start_field == DayTimeIntervalType.HOUR:
1584+
total_hours = int(abs_total_seconds) // 3600
1585+
return f"{sign}{format_with_leading_zero(total_hours)}"
1586+
elif start_field == DayTimeIntervalType.MINUTE:
1587+
total_minutes = int(abs_total_seconds) // 60
1588+
return f"{sign}{format_with_leading_zero(total_minutes)}"
1589+
elif start_field == DayTimeIntervalType.SECOND:
1590+
# Handle fractional seconds - use total seconds, not just remainder
1591+
if abs_total_seconds == int(abs_total_seconds):
1592+
total_secs_int = int(abs_total_seconds)
1593+
return f"{sign}{format_with_leading_zero(total_secs_int)}"
1594+
else:
1595+
# Use unified formatting that handles both float and Decimal
1596+
return f"{sign}{format_seconds_with_precision(abs_total_seconds)}"
1597+
1598+
# For multi-field intervals, format based on start/end fields
1599+
if start_field == DayTimeIntervalType.DAY:
1600+
hours_str = format_with_leading_zero(hours)
1601+
# DAY TO X format: truncate based on end_field
1602+
if end_field == DayTimeIntervalType.HOUR:
1603+
# DAY TO HOUR: "D HH"
1604+
return f"{sign}{days} {hours_str}"
1605+
elif end_field == DayTimeIntervalType.MINUTE:
1606+
# DAY TO MINUTE: "D HH:MM"
1607+
return f"{sign}{days} {hours_str}:{minutes:02d}"
1608+
else:
1609+
# DAY TO SECOND: "D HH:MM:SS"
1610+
if seconds == int(seconds):
1611+
return f"{sign}{days} {hours_str}:{minutes:02d}:{int(seconds):02d}"
1612+
else:
1613+
return f"{sign}{days} {hours_str}:{minutes:02d}:{format_seconds_with_precision(seconds)}"
1614+
elif start_field == DayTimeIntervalType.HOUR:
1615+
# HOUR TO X format: "HH:MM:SS" (no days)
1616+
total_hours = int(abs_total_seconds) // 3600
1617+
remaining_after_hours = abs_total_seconds - (total_hours * 3600)
1618+
mins = int(remaining_after_hours) // 60
1619+
secs = remaining_after_hours - (mins * 60)
1620+
1621+
if end_field == DayTimeIntervalType.MINUTE:
1622+
return f"{sign}{format_with_leading_zero(total_hours)}:{mins:02d}"
1623+
else: # TO SECOND
1624+
if secs == int(secs):
1625+
return f"{sign}{format_with_leading_zero(total_hours)}:{mins:02d}:{int(secs):02d}"
1626+
else:
1627+
return f"{sign}{format_with_leading_zero(total_hours)}:{mins:02d}:{format_seconds_with_precision(secs)}"
1628+
elif start_field == DayTimeIntervalType.MINUTE:
1629+
# MINUTE TO X format: "MM:SS" (no days or hours)
1630+
total_minutes = int(abs_total_seconds) // 60
1631+
remaining_secs = abs_total_seconds - (total_minutes * 60)
1632+
1633+
minutes_str = format_with_leading_zero(total_minutes)
1634+
if remaining_secs == int(remaining_secs):
1635+
return f"{sign}{minutes_str}:{int(remaining_secs):02d}"
1636+
else:
1637+
return (
1638+
f"{sign}{minutes_str}:{format_seconds_with_precision(remaining_secs)}"
1639+
)
1640+
1641+
13911642
# Type hints
13921643
ColumnOrName = Union["snowflake.snowpark.column.Column", str]
13931644
ColumnOrLiteralStr = Union["snowflake.snowpark.column.Column", str]

src/snowflake/snowpark/dataframe.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,8 @@
137137
ColumnOrName,
138138
ColumnOrSqlExpr,
139139
LiteralType,
140+
format_day_time_interval_for_display,
141+
format_year_month_interval_for_display,
140142
snow_type_to_dtype_str,
141143
type_string_to_type_object,
142144
)
@@ -206,6 +208,7 @@
206208
from snowflake.snowpark.types import (
207209
ArrayType,
208210
DataType,
211+
DayTimeIntervalType,
209212
MapType,
210213
PandasDataFrameType,
211214
StringType,
@@ -215,6 +218,7 @@
215218
_FractionalType,
216219
TimestampType,
217220
TimestampTimeZone,
221+
YearMonthIntervalType,
218222
)
219223

220224
# Python 3.8 needs to use typing.Iterable because collections.abc.Iterable is not subscriptable
@@ -5115,6 +5119,20 @@ def format_timestamp_spark(dt: datetime.datetime) -> str:
51155119
res = "-Infinity"
51165120
else:
51175121
res = str(cell).replace("e+", "E").replace("e-", "E-")
5122+
elif isinstance(cell, str) and isinstance(datatype, YearMonthIntervalType):
5123+
start_field = getattr(
5124+
datatype, "start_field", YearMonthIntervalType.YEAR
5125+
)
5126+
end_field = getattr(datatype, "end_field", YearMonthIntervalType.MONTH)
5127+
res = format_year_month_interval_for_display(
5128+
cell, start_field, end_field
5129+
)
5130+
elif isinstance(cell, (str, datetime.timedelta)) and isinstance(
5131+
datatype, DayTimeIntervalType
5132+
):
5133+
start_field = getattr(datatype, "start_field", DayTimeIntervalType.DAY)
5134+
end_field = getattr(datatype, "end_field", DayTimeIntervalType.SECOND)
5135+
res = format_day_time_interval_for_display(cell, start_field, end_field)
51185136
else:
51195137
res = str(cell)
51205138
return res.replace("\n", "\\n")

0 commit comments

Comments
 (0)