@@ -85,9 +85,7 @@ def _to_struct(varchar_value: Optional[str]) -> Optional[Dict[str, Any]]:
8585 1. JSON format: '{"key": "value", "num": 123}' (recommended)
8686 2. Athena native format: '{key=value, num=123}' (basic cases only)
8787
88- For structs containing special characters (commas, equals signs, quotes,
89- braces), use CAST(struct_column AS JSON) in your SQL query to ensure
90- proper handling.
88+ For complex structs, use CAST(struct_column AS JSON) in your SQL query.
9189
9290 Args:
9391 varchar_value: String representation of struct data
@@ -98,99 +96,96 @@ def _to_struct(varchar_value: Optional[str]) -> Optional[Dict[str, Any]]:
9896 if varchar_value is None :
9997 return None
10098
101- # First try to parse as JSON (preferred format )
99+ # First try JSON parsing (preferred)
102100 try :
103101 result = json .loads (varchar_value )
104102 return result if isinstance (result , dict ) else None
105103 except json .JSONDecodeError :
106104 pass
107105
108- # Handle Athena's native struct format: {a=1, b=2} or {Alice, 25}
109- # WARNING: This is a simplified parser that works for basic cases.
110- # Athena's actual struct format may have complex escaping rules for
111- # special characters that are not fully handled here.
112- # For complex structs, JSON format is recommended.
113- if varchar_value .startswith ("{" ) and varchar_value .endswith ("}" ):
114- try :
115- inner = varchar_value [1 :- 1 ].strip ()
116- if not inner :
117- return {}
118-
119- # Check if this is a named struct (contains =) or unnamed struct
120- # (comma-separated values)
121- if "=" in inner :
122- # Named struct format: {a=1, b=2}
123- pairs = []
124- current_pos = 0
125-
126- while current_pos < len (inner ):
127- # Find the next key=value pair
128- eq_pos = inner .find ("=" , current_pos )
129- if eq_pos == - 1 :
130- break
131-
132- # Extract key (everything before =)
133- key = inner [current_pos :eq_pos ].strip ()
134-
135- # Find the end of the value (next comma or end of string)
136- comma_pos = inner .find ("," , eq_pos + 1 )
137- if comma_pos == - 1 :
138- value = inner [eq_pos + 1 :].strip ()
139- current_pos = len (inner )
140- else :
141- value = inner [eq_pos + 1 : comma_pos ].strip ()
142- current_pos = comma_pos + 1
143-
144- # Basic validation: skip problematic pairs but continue processing others
145- # Allow basic comma separation, but reject nested structures
146- if any (char in key for char in '{}="' ) or any (char in value for char in '{}="' ):
147- # Skip this problematic pair but continue with others
148- continue
149-
150- # Add quotes to key
151- key = f'"{ key } "'
152-
153- # Handle value quoting - if it's not a number, quote it
154- if not (
155- value .isdigit ()
156- or (value .startswith ("-" ) and value [1 :].isdigit ())
157- or value .replace ("." , "" , 1 ).isdigit ()
158- or value in ("true" , "false" , "null" )
159- ):
160- value = f'"{ value } "'
161-
162- pairs .append (f"{ key } :{ value } " )
163-
164- if pairs :
165- json_str = "{" + "," .join (pairs ) + "}"
166- result = json .loads (json_str )
167- return result if isinstance (result , dict ) else None
168- else :
169- # Unnamed struct format: {Alice, 25} - convert to indexed dict
170- # Split by comma and create indexed keys
171- values = [v .strip () for v in inner .split ("," )]
172- if values :
173- # Create indexed dictionary: {"0": "Alice", "1": "25"}
174- indexed_dict : Dict [str , Any ] = {}
175- for i , value in enumerate (values ):
176- # Try to convert numbers
177- try :
178- # Check if it's an integer
179- if value .isdigit () or (value .startswith ("-" ) and value [1 :].isdigit ()):
180- indexed_dict [str (i )] = int (value )
181- # Check if it's a float
182- elif "." in value :
183- indexed_dict [str (i )] = float (value )
184- else :
185- indexed_dict [str (i )] = value
186- except ValueError :
187- indexed_dict [str (i )] = value
188- return indexed_dict
189- except (ValueError , json .JSONDecodeError , IndexError ):
190- pass
191-
192- # If all parsing attempts fail, return None
193- return None
106+ # Handle Athena native format: {a=1, b=2} or {Alice, 25}
107+ if not (varchar_value .startswith ("{" ) and varchar_value .endswith ("}" )):
108+ return None
109+
110+ inner = varchar_value [1 :- 1 ].strip ()
111+ if not inner :
112+ return {}
113+
114+ try :
115+ if "=" in inner :
116+ # Named struct: {a=1, b=2}
117+ return _parse_named_struct (inner )
118+ # Unnamed struct: {Alice, 25}
119+ return _parse_unnamed_struct (inner )
120+ except Exception :
121+ return None
122+
123+
124+ def _parse_named_struct (inner : str ) -> Optional [Dict [str , Any ]]:
125+ """Parse named struct format: a=1, b=2.
126+
127+ Args:
128+ inner: Interior content of struct without braces.
129+
130+ Returns:
131+ Dictionary with parsed key-value pairs, or None if no valid pairs found.
132+ """
133+ result = {}
134+
135+ # Simple split by comma for basic cases
136+ pairs = [pair .strip () for pair in inner .split ("," )]
137+
138+ for pair in pairs :
139+ if "=" not in pair :
140+ continue
141+
142+ key , value = pair .split ("=" , 1 )
143+ key = key .strip ()
144+ value = value .strip ()
145+
146+ # Skip pairs with special characters (safety check)
147+ if any (char in key for char in '{}="' ) or any (char in value for char in '{}="' ):
148+ continue
149+
150+ # Convert value to appropriate type
151+ result [key ] = _convert_value (value )
152+
153+ return result if result else None
154+
155+
156+ def _parse_unnamed_struct (inner : str ) -> Dict [str , Any ]:
157+ """Parse unnamed struct format: Alice, 25.
158+
159+ Args:
160+ inner: Interior content of struct without braces.
161+
162+ Returns:
163+ Dictionary with indexed keys mapping to parsed values.
164+ """
165+ values = [v .strip () for v in inner .split ("," )]
166+ return {str (i ): _convert_value (value ) for i , value in enumerate (values )}
167+
168+
169+ def _convert_value (value : str ) -> Any :
170+ """Convert string value to appropriate Python type.
171+
172+ Args:
173+ value: String value to convert.
174+
175+ Returns:
176+ Converted value as int, float, bool, None, or string.
177+ """
178+ if value .lower () == "null" :
179+ return None
180+ if value .lower () == "true" :
181+ return True
182+ if value .lower () == "false" :
183+ return False
184+ if value .isdigit () or value .startswith ("-" ) and value [1 :].isdigit ():
185+ return int (value )
186+ if "." in value and value .replace ("." , "" , 1 ).replace ("-" , "" , 1 ).isdigit ():
187+ return float (value )
188+ return value
194189
195190
196191def _to_default (varchar_value : Optional [str ]) -> Optional [str ]:
0 commit comments