Skip to content

Commit 981e786

Browse files
Merge pull request #695 from pyathena-dev/refactor/converter-cleanup
2 parents 90e67bc + a8df601 commit 981e786

File tree

10 files changed

+121
-80
lines changed

10 files changed

+121
-80
lines changed

pyathena/arrow/converter.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,4 +115,5 @@ def __init__(self) -> None:
115115
)
116116

117117
def convert(self, type_: str, value: str | None, type_hint: str | None = None) -> Any | None:
118-
pass
118+
converter = self.get(type_)
119+
return converter(value)

pyathena/converter.py

Lines changed: 25 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,20 +3,20 @@
33
import binascii
44
import json
55
import logging
6+
import re
67
from abc import ABCMeta, abstractmethod
78
from collections.abc import Callable
89
from copy import deepcopy
910
from datetime import date, datetime, time
1011
from decimal import Decimal
11-
from typing import Any
12+
from typing import Any, ClassVar
1213

1314
from dateutil.tz import gettz
1415

1516
from pyathena.parser import (
1617
TypedValueConverter,
1718
TypeNode,
1819
TypeSignatureParser,
19-
_normalize_hive_syntax,
2020
_split_array_items,
2121
)
2222
from pyathena.util import strtobool
@@ -551,6 +551,9 @@ class DefaultTypeConverter(Converter):
551551
['1', '2', '3']
552552
"""
553553

554+
_HIVE_SYNTAX_RE: ClassVar[re.Pattern[str]] = re.compile(r"[<>:]")
555+
_HIVE_REPLACEMENTS: ClassVar[dict[str, str]] = {"<": "(", ">": ")", ":": " "}
556+
554557
def __init__(self) -> None:
555558
super().__init__(mappings=deepcopy(_DEFAULT_CONVERTERS), default=_to_default)
556559
self._parser = TypeSignatureParser()
@@ -561,6 +564,25 @@ def __init__(self) -> None:
561564
)
562565
self._parsed_hints: dict[str, TypeNode] = {}
563566

567+
@staticmethod
568+
def _normalize_hive_syntax(type_str: str) -> str:
569+
"""Normalize Hive-style DDL syntax to Trino-style.
570+
571+
Converts angle-bracket notation (``array<struct<a:int>>``) to
572+
parenthesized notation (``array(struct(a int))``).
573+
574+
Args:
575+
type_str: Type signature string, possibly using Hive syntax.
576+
577+
Returns:
578+
Normalized type signature using Trino-style parenthesized notation.
579+
"""
580+
if "<" not in type_str:
581+
return type_str
582+
return DefaultTypeConverter._HIVE_SYNTAX_RE.sub(
583+
lambda m: DefaultTypeConverter._HIVE_REPLACEMENTS[m.group()], type_str
584+
)
585+
564586
def convert(self, type_: str, value: str | None, type_hint: str | None = None) -> Any | None:
565587
"""Convert a string value to the appropriate Python type.
566588
@@ -605,7 +627,7 @@ def _parse_type_hint(self, type_hint: str) -> TypeNode:
605627
Returns:
606628
Parsed TypeNode.
607629
"""
608-
normalized = _normalize_hive_syntax(type_hint)
630+
normalized = self._normalize_hive_syntax(type_hint)
609631
if normalized not in self._parsed_hints:
610632
self._parsed_hints[normalized] = self._parser.parse(normalized)
611633
return self._parsed_hints[normalized]

pyathena/pandas/converter.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,8 @@ def _dtypes(self) -> dict[str, type[Any]]:
8181
return self.__dtypes
8282

8383
def convert(self, type_: str, value: str | None, type_hint: str | None = None) -> Any | None:
84-
pass
84+
converter = self.get(type_)
85+
return converter(value)
8586

8687

8788
class DefaultPandasUnloadTypeConverter(Converter):
@@ -104,4 +105,5 @@ def __init__(self) -> None:
104105
)
105106

106107
def convert(self, type_: str, value: str | None, type_hint: str | None = None) -> Any | None:
107-
pass
108+
converter = self.get(type_)
109+
return converter(value)

pyathena/parser.py

Lines changed: 0 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
from __future__ import annotations
22

33
import json
4-
import re
54
from collections.abc import Callable
65
from dataclasses import dataclass, field
76
from typing import Any
@@ -11,28 +10,6 @@
1110
"int": "integer",
1211
}
1312

14-
# Pattern for normalizing Hive-style type signatures to Trino-style.
15-
# Matches angle brackets and colons used in Hive DDL (e.g., array<struct<a:int>>).
16-
_HIVE_SYNTAX_RE: re.Pattern[str] = re.compile(r"[<>:]")
17-
_HIVE_REPLACEMENTS: dict[str, str] = {"<": "(", ">": ")", ":": " "}
18-
19-
20-
def _normalize_hive_syntax(type_str: str) -> str:
21-
"""Normalize Hive-style DDL syntax to Trino-style.
22-
23-
Converts angle-bracket notation (``array<struct<a:int>>``) to
24-
parenthesized notation (``array(struct(a int))``).
25-
26-
Args:
27-
type_str: Type signature string, possibly using Hive syntax.
28-
29-
Returns:
30-
Normalized type signature using Trino-style parenthesized notation.
31-
"""
32-
if "<" not in type_str:
33-
return type_str
34-
return _HIVE_SYNTAX_RE.sub(lambda m: _HIVE_REPLACEMENTS[m.group()], type_str)
35-
3613

3714
def _split_array_items(inner: str) -> list[str]:
3815
"""Split array items by comma, respecting brace and bracket groupings.

pyathena/polars/converter.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -128,4 +128,5 @@ def __init__(self) -> None:
128128
)
129129

130130
def convert(self, type_: str, value: str | None, type_hint: str | None = None) -> Any | None:
131-
pass
131+
converter = self.get(type_)
132+
return converter(value)
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
from pyathena.arrow.converter import DefaultArrowUnloadTypeConverter
2+
3+
4+
class TestDefaultArrowUnloadTypeConverter:
5+
def test_convert_delegates_to_default(self):
6+
"""convert() dispatches through the default converter instead of returning None."""
7+
converter = DefaultArrowUnloadTypeConverter()
8+
assert converter.convert("varchar", "hello") == "hello"
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
from pyathena.pandas.converter import (
2+
DefaultPandasTypeConverter,
3+
DefaultPandasUnloadTypeConverter,
4+
)
5+
6+
7+
class TestDefaultPandasTypeConverter:
8+
def test_convert_delegates_to_mapping(self):
9+
"""convert() dispatches through self.get(type_) instead of returning None.
10+
11+
Verifies both the explicit mapping path (boolean → _to_boolean)
12+
and the default converter path (varchar → _to_default), plus
13+
None passthrough.
14+
"""
15+
converter = DefaultPandasTypeConverter()
16+
assert converter.convert("boolean", "true") is True
17+
assert converter.convert("varchar", "hello") == "hello"
18+
assert converter.convert("varchar", None) is None
19+
20+
21+
class TestDefaultPandasUnloadTypeConverter:
22+
def test_convert_delegates_to_default(self):
23+
"""convert() dispatches through the default converter instead of returning None."""
24+
converter = DefaultPandasUnloadTypeConverter()
25+
assert converter.convert("varchar", "hello") == "hello"
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
from pyathena.polars.converter import DefaultPolarsUnloadTypeConverter
2+
3+
4+
class TestDefaultPolarsUnloadTypeConverter:
5+
def test_convert_delegates_to_default(self):
6+
"""convert() dispatches through the default converter instead of returning None."""
7+
converter = DefaultPolarsUnloadTypeConverter()
8+
assert converter.convert("varchar", "hello") == "hello"

tests/pyathena/test_converter.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -408,3 +408,50 @@ def test_hive_syntax_caching(self):
408408
# Both should normalize to "array(integer)" in the cache
409409
assert "array(integer)" in converter._parsed_hints
410410
assert len(converter._parsed_hints) == 1
411+
412+
def test_normalize_hive_syntax_noop(self):
413+
"""Trino-style input passes through unchanged."""
414+
assert DefaultTypeConverter._normalize_hive_syntax("array(integer)") == "array(integer)"
415+
416+
def test_normalize_hive_syntax_replaces(self):
417+
assert (
418+
DefaultTypeConverter._normalize_hive_syntax("array<struct<a:int>>")
419+
== "array(struct(a int))"
420+
)
421+
422+
def test_normalize_hive_syntax_struct(self):
423+
converter = DefaultTypeConverter()
424+
result = converter.convert(
425+
"row",
426+
"{name=Alice, age=25}",
427+
type_hint="struct<name:varchar,age:int>",
428+
)
429+
assert result == {"name": "Alice", "age": 25}
430+
431+
def test_normalize_hive_syntax_nested(self):
432+
converter = DefaultTypeConverter()
433+
result = converter.convert(
434+
"array",
435+
"[{a=1, b=hello}, {a=2, b=world}]",
436+
type_hint="array<struct<a:int,b:varchar>>",
437+
)
438+
assert result == [{"a": 1, "b": "hello"}, {"a": 2, "b": "world"}]
439+
440+
def test_normalize_hive_syntax_map(self):
441+
converter = DefaultTypeConverter()
442+
result = converter.convert(
443+
"map",
444+
'{"x": 1, "y": 2}',
445+
type_hint="map<string,int>",
446+
)
447+
assert result == {"x": 1, "y": 2}
448+
449+
def test_normalize_hive_syntax_mixed(self):
450+
"""Hive angle brackets wrapping Trino-style parenthesized inner type."""
451+
converter = DefaultTypeConverter()
452+
result = converter.convert(
453+
"array",
454+
"[{a=1, b=hello}]",
455+
type_hint="array<row(a int, b varchar)>",
456+
)
457+
assert result == [{"a": 1, "b": "hello"}]

tests/pyathena/test_parser.py

Lines changed: 0 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
TypedValueConverter,
66
TypeNode,
77
TypeSignatureParser,
8-
_normalize_hive_syntax,
98
)
109

1110

@@ -109,55 +108,6 @@ def test_type_alias_in_complex_type(self):
109108
assert node.type_name == "array"
110109
assert node.children[0].type_name == "integer"
111110

112-
def test_hive_syntax_simple(self):
113-
parser = TypeSignatureParser()
114-
node = parser.parse(_normalize_hive_syntax("array<int>"))
115-
assert node.type_name == "array"
116-
assert node.children[0].type_name == "integer"
117-
118-
def test_hive_syntax_struct(self):
119-
parser = TypeSignatureParser()
120-
node = parser.parse(_normalize_hive_syntax("struct<a:int,b:varchar>"))
121-
assert node.type_name == "struct"
122-
assert node.field_names == ["a", "b"]
123-
assert node.children[0].type_name == "integer"
124-
assert node.children[1].type_name == "varchar"
125-
126-
def test_hive_syntax_nested(self):
127-
parser = TypeSignatureParser()
128-
node = parser.parse(_normalize_hive_syntax("array<struct<a:int,b:varchar>>"))
129-
assert node.type_name == "array"
130-
struct_node = node.children[0]
131-
assert struct_node.type_name == "struct"
132-
assert struct_node.field_names == ["a", "b"]
133-
assert struct_node.children[0].type_name == "integer"
134-
assert struct_node.children[1].type_name == "varchar"
135-
136-
def test_hive_syntax_map(self):
137-
parser = TypeSignatureParser()
138-
node = parser.parse(_normalize_hive_syntax("map<string,int>"))
139-
assert node.type_name == "map"
140-
assert node.children[0].type_name == "string"
141-
assert node.children[1].type_name == "integer"
142-
143-
def test_mixed_syntax(self):
144-
"""Hive angle brackets wrapping Trino-style parenthesized inner type."""
145-
parser = TypeSignatureParser()
146-
node = parser.parse(_normalize_hive_syntax("array<row(a int, b varchar)>"))
147-
assert node.type_name == "array"
148-
row_node = node.children[0]
149-
assert row_node.type_name == "row"
150-
assert row_node.field_names == ["a", "b"]
151-
assert row_node.children[0].type_name == "integer"
152-
assert row_node.children[1].type_name == "varchar"
153-
154-
def test_normalize_hive_syntax_noop(self):
155-
"""Trino-style input passes through unchanged."""
156-
assert _normalize_hive_syntax("array(integer)") == "array(integer)"
157-
158-
def test_normalize_hive_syntax_replaces(self):
159-
assert _normalize_hive_syntax("array<struct<a:int>>") == "array(struct(a int))"
160-
161111
def test_trailing_modifier_after_paren(self):
162112
"""Type with content after closing paren should not break parsing."""
163113
parser = TypeSignatureParser()

0 commit comments

Comments
 (0)