Skip to content

Commit add20b4

Browse files
Fix parser robustness: matching paren lookup and unnamed struct split
- Use _find_matching_paren() instead of assuming closing ')' is at end of string, so trailing modifiers don't break parsing - Replace naive comma split with _split_array_items() in unnamed struct path to handle nested values correctly Closes #693, closes #694. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent ab33667 commit add20b4

File tree

2 files changed

+38
-2
lines changed

2 files changed

+38
-2
lines changed

pyathena/parser.py

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,8 @@ def parse(self, type_str: str) -> TypeNode:
130130
type_name = type_str[:paren_idx].strip().lower()
131131
type_name = _TYPE_ALIASES.get(type_name, type_name)
132132

133-
inner = type_str[paren_idx + 1 : -1].strip()
133+
close_idx = self._find_matching_paren(type_str, paren_idx)
134+
inner = type_str[paren_idx + 1 : close_idx].strip()
134135

135136
if type_name in ("row", "struct"):
136137
parts = self._split_type_args(inner)
@@ -192,6 +193,27 @@ def _split_type_args(self, s: str) -> list[str]:
192193
parts.append("".join(current).strip())
193194
return parts
194195

196+
@staticmethod
197+
def _find_matching_paren(s: str, open_idx: int) -> int:
198+
"""Find the index of the closing parenthesis matching the one at *open_idx*.
199+
200+
Args:
201+
s: The full string.
202+
open_idx: Index of the opening ``(``.
203+
204+
Returns:
205+
Index of the matching ``)``.
206+
"""
207+
depth = 0
208+
for i in range(open_idx, len(s)):
209+
if s[i] == "(":
210+
depth += 1
211+
elif s[i] == ")":
212+
depth -= 1
213+
if depth == 0:
214+
return i
215+
return len(s) - 1
216+
195217
def _find_field_name_boundary(self, part: str) -> int:
196218
"""Find the boundary between field name and type in a row field definition.
197219
@@ -479,7 +501,7 @@ def _convert_typed_struct(self, value: str, type_node: TypeNode) -> dict[str, An
479501

480502
# Unnamed struct
481503
field_names = type_node.field_names or []
482-
values = [v.strip() for v in inner.split(",")]
504+
values = _split_array_items(inner)
483505
result = {}
484506
for i, v in enumerate(values):
485507
ft = field_types[i] if i < len(field_types) else TypeNode("varchar")

tests/pyathena/test_parser.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,13 @@ def test_normalize_hive_syntax_noop(self):
158158
def test_normalize_hive_syntax_replaces(self):
159159
assert _normalize_hive_syntax("array<struct<a:int>>") == "array(struct(a int))"
160160

161+
def test_trailing_modifier_after_paren(self):
162+
"""Type with content after closing paren should not break parsing."""
163+
parser = TypeSignatureParser()
164+
# Simulates a hypothetical "timestamp(3) with time zone" style input
165+
node = parser.parse("decimal(10, 2) extra")
166+
assert node.type_name == "decimal"
167+
161168

162169
class TestTypedValueConverter:
163170
@pytest.fixture
@@ -261,3 +268,10 @@ def test_map_json_null_value_preserved(self, converter):
261268
result = converter.convert('{"a": null, "b": "null"}', node)
262269
assert result["a"] is None
263270
assert result["b"] == "null"
271+
272+
def test_unnamed_struct_with_nested_value(self, converter):
273+
"""Unnamed struct split must respect nested braces."""
274+
parser = TypeSignatureParser()
275+
node = parser.parse("row(inner row(x integer, y integer), val varchar)")
276+
result = converter.convert("{inner={x=1, y=2}, val=hello}", node)
277+
assert result == {"inner": {"x": 1, "y": 2}, "val": "hello"}

0 commit comments

Comments
 (0)