Skip to content

Commit ae871e4

Browse files
Fix ARRAY converter for struct arrays and update test expectations
- Improve _parse_array_native to handle struct arrays properly with smart comma splitting - Add _split_array_items helper for respecting brace/bracket groupings - Remove overly restrictive complex structure filtering that blocked struct arrays - Update test expectations from "[1, 2]" strings to [1, 2] lists (improvement) - Fix struct array parsing: '[{1, 2}, {3, 4}]' → [{'0': 1, '1': 2}, {'0': 3, '1': 4}] All ARRAY tests now pass including struct_array, nested_array, and simple arrays. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
1 parent 8d082e8 commit ae871e4

4 files changed

Lines changed: 60 additions & 13 deletions

File tree

pyathena/converter.py

Lines changed: 57 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,8 @@ def _to_array(varchar_value: Optional[str]) -> Optional[List[Any]]:
103103
# Optimize: Try JSON parsing first (most reliable)
104104
try:
105105
result = json.loads(varchar_value)
106-
return result if isinstance(result, list) else None
106+
if isinstance(result, list):
107+
return result
107108
except json.JSONDecodeError:
108109
# If JSON parsing fails, fall back to basic parsing for simple cases
109110
pass
@@ -113,11 +114,11 @@ def _to_array(varchar_value: Optional[str]) -> Optional[List[Any]]:
113114
return []
114115

115116
try:
116-
# Simple array format: [1, 2, 3] or [a, b, c]
117-
# For complex structures, return None to keep as string
118-
if any(char in inner for char in "{}()[]"):
119-
# Contains complex structures, skip parsing
117+
# For nested arrays, too complex for basic parsing
118+
if "[" in inner:
119+
# Contains nested arrays - too complex for basic parsing
120120
return None
121+
# Try native parsing (including struct arrays)
121122
return _parse_array_native(inner)
122123
except Exception:
123124
return None
@@ -225,7 +226,7 @@ def _to_struct(varchar_value: Optional[str]) -> Optional[Dict[str, Any]]:
225226

226227

227228
def _parse_array_native(inner: str) -> Optional[List[Any]]:
228-
"""Parse array native format: 1, 2, 3 or a, b, c.
229+
"""Parse array native format: 1, 2, 3 or {a, b}, {c, d}.
229230
230231
Args:
231232
inner: Interior content of array without brackets.
@@ -235,15 +236,23 @@ def _parse_array_native(inner: str) -> Optional[List[Any]]:
235236
"""
236237
result = []
237238

238-
# Simple split by comma for basic cases
239-
items = [item.strip() for item in inner.split(",")]
239+
# Smart split by comma - respect brace groupings
240+
items = _split_array_items(inner)
240241

241242
for item in items:
242243
if not item:
243244
continue
244245

245-
# Skip items with special characters (safety check)
246-
if any(char in item for char in '{}[]()="'):
246+
# Handle struct (ROW) values in format {a, b, c} or {key=value, ...}
247+
if item.strip().startswith("{") and item.strip().endswith("}"):
248+
# This is a struct value - parse it as a struct
249+
struct_value = _to_struct(item.strip())
250+
if struct_value is not None:
251+
result.append(struct_value)
252+
continue
253+
254+
# Skip items with nested arrays or complex quoting (safety check)
255+
if any(char in item for char in '[]="'):
247256
continue
248257

249258
# Convert item to appropriate type
@@ -253,6 +262,44 @@ def _parse_array_native(inner: str) -> Optional[List[Any]]:
253262
return result if result else None
254263

255264

265+
def _split_array_items(inner: str) -> List[str]:
266+
"""Split array items by comma, respecting brace and bracket groupings.
267+
268+
Args:
269+
inner: Interior content of array without brackets.
270+
271+
Returns:
272+
List of item strings.
273+
"""
274+
items = []
275+
current_item = ""
276+
brace_depth = 0
277+
bracket_depth = 0
278+
279+
for char in inner:
280+
if char == "{":
281+
brace_depth += 1
282+
elif char == "}":
283+
brace_depth -= 1
284+
elif char == "[":
285+
bracket_depth += 1
286+
elif char == "]":
287+
bracket_depth -= 1
288+
elif char == "," and brace_depth == 0 and bracket_depth == 0:
289+
# Top-level comma - end current item
290+
items.append(current_item.strip())
291+
current_item = ""
292+
continue
293+
294+
current_item += char
295+
296+
# Add the last item
297+
if current_item.strip():
298+
items.append(current_item.strip())
299+
300+
return items
301+
302+
256303
def _parse_map_native(inner: str) -> Optional[Dict[str, Any]]:
257304
"""Parse map native format: key1=value1, key2=value2.
258305

tests/pyathena/pandas/test_util.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ def test_as_pandas(cursor):
115115
datetime(2017, 1, 1, 0, 0, 0).time(),
116116
date(2017, 1, 2),
117117
b"123",
118-
"[1, 2]",
118+
[1, 2],
119119
[1, 2],
120120
{"1": 2, "3": 4},
121121
{"1": 2, "3": 4},

tests/pyathena/sqlalchemy/test_base.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -254,7 +254,7 @@ def test_reflect_select(self, engine):
254254
datetime(2017, 1, 1, 0, 0, 0),
255255
date(2017, 1, 2),
256256
b"123",
257-
"[1, 2]",
257+
[1, 2],
258258
{"1": 2, "3": 4}, # map type now converted to dict
259259
{"a": 1, "b": 2}, # row type now converted to dict
260260
Decimal("0.1"),

tests/pyathena/test_cursor.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -515,7 +515,7 @@ def test_complex(self, cursor):
515515
datetime(2017, 1, 1, 0, 0, 0).time(),
516516
date(2017, 1, 2),
517517
b"123",
518-
"[1, 2]",
518+
[1, 2],
519519
[1, 2],
520520
{"1": 2, "3": 4},
521521
{"1": 2, "3": 4},

0 commit comments

Comments
 (0)