|
8 | 8 | from copy import deepcopy |
9 | 9 | from datetime import date, datetime, time |
10 | 10 | from decimal import Decimal |
11 | | -from typing import Any, Callable, Dict, Optional, Type |
| 11 | +from typing import Any, Callable, Dict, List, Optional, Type |
12 | 12 |
|
13 | 13 | from dateutil.tz import gettz |
14 | 14 |
|
@@ -78,6 +78,51 @@ def _to_json(varchar_value: Optional[str]) -> Optional[Any]: |
78 | 78 | return json.loads(varchar_value) |
79 | 79 |
|
80 | 80 |
|
| 81 | +def _to_array(varchar_value: Optional[str]) -> Optional[List[Any]]: |
| 82 | + """Convert array data to Python list. |
| 83 | +
|
| 84 | + Supports two formats: |
| 85 | + 1. JSON format: '[1, 2, 3]' or '["a", "b", "c"]' (recommended) |
| 86 | + 2. Athena native format: '[1, 2, 3]' (basic cases only) |
| 87 | +
|
| 88 | + For complex arrays, use CAST(array_column AS JSON) in your SQL query. |
| 89 | +
|
| 90 | + Args: |
| 91 | + varchar_value: String representation of array data |
| 92 | +
|
| 93 | + Returns: |
| 94 | + List representation of array, or None if parsing fails |
| 95 | + """ |
| 96 | + if varchar_value is None: |
| 97 | + return None |
| 98 | + |
| 99 | + # Quick check: if it doesn't look like an array, return None |
| 100 | + if not (varchar_value.startswith("[") and varchar_value.endswith("]")): |
| 101 | + return None |
| 102 | + |
| 103 | + # Optimize: Try JSON parsing first (most reliable) |
| 104 | + try: |
| 105 | + result = json.loads(varchar_value) |
| 106 | + return result if isinstance(result, list) else None |
| 107 | + except json.JSONDecodeError: |
| 108 | + # If JSON parsing fails, fall back to basic parsing for simple cases |
| 109 | + pass |
| 110 | + |
| 111 | + inner = varchar_value[1:-1].strip() |
| 112 | + if not inner: |
| 113 | + return [] |
| 114 | + |
| 115 | + try: |
| 116 | + # Simple array format: [1, 2, 3] or [a, b, c] |
| 117 | + # For complex structures, return None to keep as string |
| 118 | + if any(char in inner for char in "{}()[]"): |
| 119 | + # Contains complex structures, skip parsing |
| 120 | + return None |
| 121 | + return _parse_array_native(inner) |
| 122 | + except Exception: |
| 123 | + return None |
| 124 | + |
| 125 | + |
81 | 126 | def _to_map(varchar_value: Optional[str]) -> Optional[Dict[str, Any]]: |
82 | 127 | """Convert map data to Python dictionary. |
83 | 128 |
|
@@ -179,6 +224,35 @@ def _to_struct(varchar_value: Optional[str]) -> Optional[Dict[str, Any]]: |
179 | 224 | return None |
180 | 225 |
|
181 | 226 |
|
| 227 | +def _parse_array_native(inner: str) -> Optional[List[Any]]: |
| 228 | + """Parse array native format: 1, 2, 3 or a, b, c. |
| 229 | +
|
| 230 | + Args: |
| 231 | + inner: Interior content of array without brackets. |
| 232 | +
|
| 233 | + Returns: |
| 234 | + List with parsed values, or None if no valid values found. |
| 235 | + """ |
| 236 | + result = [] |
| 237 | + |
| 238 | + # Simple split by comma for basic cases |
| 239 | + items = [item.strip() for item in inner.split(",")] |
| 240 | + |
| 241 | + for item in items: |
| 242 | + if not item: |
| 243 | + continue |
| 244 | + |
| 245 | + # Skip items with special characters (safety check) |
| 246 | + if any(char in item for char in '{}[]()="'): |
| 247 | + continue |
| 248 | + |
| 249 | + # Convert item to appropriate type |
| 250 | + converted_item = _convert_value(item) |
| 251 | + result.append(converted_item) |
| 252 | + |
| 253 | + return result if result else None |
| 254 | + |
| 255 | + |
182 | 256 | def _parse_map_native(inner: str) -> Optional[Dict[str, Any]]: |
183 | 257 | """Parse map native format: key1=value1, key2=value2. |
184 | 258 |
|
@@ -302,7 +376,7 @@ def _to_default(varchar_value: Optional[str]) -> Optional[str]: |
302 | 376 | "date": _to_date, |
303 | 377 | "time": _to_time, |
304 | 378 | "varbinary": _to_binary, |
305 | | - "array": _to_default, |
| 379 | + "array": _to_array, |
306 | 380 | "map": _to_map, |
307 | 381 | "row": _to_struct, |
308 | 382 | "decimal": _to_decimal, |
|
0 commit comments