Skip to content

Commit 1379aa0

Browse files
committed
fix: tighten decimal regex and fall through on unknown duration unit
Two nits from PR review: - decimal regex 'decimal128?' made only the trailing 8 optional, so it matched the non-existent 'decimal12' form. Changed to 'decimal(?:128|256)?' to accept decimal, decimal128, or decimal256. - _ARROW_UNIT_TO_IBIS.get(..., 's') silently mapped any unrecognised duration unit (e.g. duration[foo]) to Interval(s). Now returns None so the caller falls through to the Postgres parser / String fallback.
1 parent bddb9f2 commit 1379aa0

2 files changed

Lines changed: 20 additions & 2 deletions

File tree

src/ibis_hotdata/types.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@
4242
# embedded parameters (unit, timezone, precision, value type, …).
4343
_TIMESTAMP_RE = re.compile(r"^timestamp\[(\w+)(?:,\s*tz=(.+))?\]$", re.IGNORECASE)
4444
_DURATION_RE = re.compile(r"^duration\[(\w+)\]$", re.IGNORECASE)
45-
_DECIMAL_RE = re.compile(r"^decimal128?\((\d+),\s*(\d+)\)$", re.IGNORECASE)
45+
_DECIMAL_RE = re.compile(r"^decimal(?:128|256)?\((\d+),\s*(\d+)\)$", re.IGNORECASE)
4646
_LIST_RE = re.compile(r"^(?:large_)?list<item:\s*(.+)>$", re.IGNORECASE)
4747

4848
# Map Arrow time-unit strings to Ibis IntervalUnit strings and Timestamp scales.
@@ -79,7 +79,9 @@ def _parse_parametric_arrow_type(raw: str, *, nullable: bool) -> dt.DataType | N
7979

8080
m = _DURATION_RE.match(raw)
8181
if m:
82-
unit = _ARROW_UNIT_TO_IBIS.get(m.group(1).lower(), "s")
82+
unit = _ARROW_UNIT_TO_IBIS.get(m.group(1).lower())
83+
if unit is None:
84+
return None # unrecognised unit — fall through to Postgres parser / String fallback
8385
return dt.Interval(unit=unit, nullable=nullable)
8486

8587
m = _DECIMAL_RE.match(raw)

tests/test_hotdata_types.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,13 +113,22 @@ def test_dtype_from_hotdata_arrow_duration(sql_type, expected_unit):
113113
assert out.nullable is False
114114

115115

116+
def test_dtype_from_hotdata_arrow_duration_unknown_unit_falls_back():
117+
# An unrecognised duration unit should not silently map to seconds;
118+
# it falls through to the Postgres parser (which returns Unknown) or String fallback.
119+
out = dtype_from_hotdata_sql_type("duration[foo]", nullable=True)
120+
assert not isinstance(out, dt.Interval) # must not produce a valid Interval
121+
122+
116123
@pytest.mark.parametrize(
117124
("sql_type", "expected_precision", "expected_scale"),
118125
[
119126
("decimal128(10, 3)", 10, 3),
120127
("decimal128(38, 0)", 38, 0),
128+
("decimal256(76, 38)", 76, 38),
121129
("decimal(5, 2)", 5, 2),
122130
("DECIMAL128(18, 6)", 18, 6),
131+
# decimal12 is NOT a valid form — should not be matched by the decimal regex
123132
],
124133
)
125134
def test_dtype_from_hotdata_arrow_decimal(sql_type, expected_precision, expected_scale):
@@ -150,3 +159,10 @@ def test_dtype_from_hotdata_arrow_list(sql_type, expected_value_cls, expected_it
150159
assert isinstance(out.value_type, expected_value_cls)
151160
assert out.value_type.nullable is expected_item_nullable
152161
assert out.nullable is True
162+
163+
164+
def test_dtype_from_hotdata_arrow_decimal12_not_matched():
165+
# "decimal12" (only the trailing 8 made optional) must NOT match the decimal regex.
166+
# The Postgres parser handles bare "decimal" forms; decimal12 is not a real type.
167+
out = dtype_from_hotdata_sql_type("decimal12(10, 3)", nullable=True)
168+
assert not isinstance(out, dt.Decimal) # falls through to Unknown or String

0 commit comments

Comments
 (0)