Skip to content

Commit 60e92cc

Browse files
refactor:
- reorganized expressions/values conversions types, improved their doc - added Literals for sqltypes ids and string conversion, and various type aliases, covering all paths. - using aformentionned literals in _sqltypes signatures
1 parent d560d80 commit 60e92cc

File tree

2 files changed

+112
-28
lines changed

2 files changed

+112
-28
lines changed

_duckdb-stubs/_sqltypes.pyi

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import duckdb
22
import typing
3+
from ._typing import StrIntoDType, DTypeIdentifiers
34

45
__all__: list[str] = [
56
"BIGINT",
@@ -39,13 +40,13 @@ class DuckDBPyType:
3940
def __getitem__(self, name: str) -> DuckDBPyType: ...
4041
def __hash__(self) -> int: ...
4142
@typing.overload
42-
def __init__(self, type_str: str, connection: duckdb.DuckDBPyConnection) -> None: ...
43+
def __init__(self, type_str: StrIntoDType, connection: duckdb.DuckDBPyConnection) -> None: ...
4344
@typing.overload
4445
def __init__(self, obj: object) -> None: ...
4546
@property
4647
def children(self) -> list[tuple[str, DuckDBPyType | int | list[str]]]: ...
4748
@property
48-
def id(self) -> str: ...
49+
def id(self) -> DTypeIdentifiers: ...
4950

5051
BIGINT: DuckDBPyType # value = BIGINT
5152
BIT: DuckDBPyType # value = BIT

_duckdb-stubs/_typing.pyi

Lines changed: 109 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
from __future__ import annotations
22

3-
from typing import TypeAlias, TYPE_CHECKING, Protocol, Any, TypeVar, Generic
3+
from typing import TypeAlias, TYPE_CHECKING, Protocol, Any, TypeVar, Generic, Literal
44
from datetime import date, datetime, time, timedelta
55
from decimal import Decimal
66
from uuid import UUID
7-
from collections.abc import Mapping, Iterator
7+
from collections.abc import Mapping, Iterator, Sequence
88

99
if TYPE_CHECKING:
1010
from ._expression import Expression
@@ -52,44 +52,127 @@ class NPArrayLike(NPProtocol, Generic[_S_co, _D_co], Protocol):
5252
@property
5353
def size(self) -> int: ...
5454

55-
NumericLiteral: TypeAlias = int | float | Decimal
56-
"""Python objects that can be converted to a numerical `ConstantExpression` (integer or floating points numbers.)"""
55+
# Expression and values conversions
56+
57+
NumericLiteral: TypeAlias = int | float
58+
"""Python objects that can be converted to a numerical `Expression` or `DuckDBPyType` (integer or floating points numbers.)"""
5759
TemporalLiteral: TypeAlias = date | datetime | time | timedelta
58-
BlobLiteral: TypeAlias = bytes | bytearray | memoryview
59-
"""Python objects that can be converted to a `BLOB` `ConstantExpression`.
60+
BlobLiteral: TypeAlias = bytes | bytearray
61+
"""Python objects that can be converted to a `BLOB` `ConstantExpression` or `DuckDBPyType`.
6062
6163
Note:
6264
`bytes` can also be converted to a `BITSTRING`.
6365
"""
64-
NonNestedLiteral: TypeAlias = NumericLiteral | TemporalLiteral | str | bool | BlobLiteral | UUID
65-
PythonLiteral: TypeAlias = (
66-
NonNestedLiteral
67-
| list[PythonLiteral]
68-
| tuple[PythonLiteral, ...]
69-
| dict[NonNestedLiteral, PythonLiteral]
70-
| NPArrayLike[Any, Any]
71-
| None
72-
)
66+
ScalarLiteral: TypeAlias = NumericLiteral | BlobLiteral | str | bool
67+
NonNestedLiteral: TypeAlias = ScalarLiteral | TemporalLiteral | UUID | Decimal | memoryview
68+
69+
# NOTE:
70+
# Using `Sequence` and `Mapping` instead of `list | tuple` and `dict` would make the covariance of the element types work.
71+
# Thus, this would allow to avoid the use of `Any` for them.
72+
# However, this would also be incorrect at runtime, since only the 3 aformentioned containers types are accepted.
73+
NestedLiteral: TypeAlias = list[Any] | tuple[Any, ...] | dict[Any, Any] | NPArrayLike[Any, Any]
74+
"""Containers types that can be converted to a nested `ConstantExpression` (e.g. to `ARRAY` or `STRUCT`).
75+
76+
Those types can be aribtraly nested, as long as their leaf values are `PythonLiteral`."""
77+
78+
PythonLiteral: TypeAlias = NonNestedLiteral | NestedLiteral | None
7379
"""Python objects that can be converted to a `ConstantExpression`."""
80+
81+
IntoExprColumn: TypeAlias = Expression | str
82+
"""Types that are, or can be used as a `ColumnExpression`."""
83+
84+
IntoExpr: TypeAlias = IntoExprColumn | PythonLiteral
85+
"""Any type that can be converted to an `Expression` (or is already one).
86+
87+
See Also:
88+
https://duckdb.org/docs/stable/clients/python/conversion
89+
"""
90+
7491
# the field_ids argument to to_parquet and write_parquet has a recursive structure
7592
ParquetFieldIdsType: TypeAlias = Mapping[str, int | ParquetFieldIdsType]
7693
IntoValues: TypeAlias = list[PythonLiteral] | tuple[Expression, ...] | Expression
7794
"""Types that can be converted to a table."""
78-
IntoDType: TypeAlias = DuckDBPyType | str
79-
"""Types that can be converted to a `DuckDBPyType`.
95+
# Datatypes conversions
8096

81-
Passing `INTEGER` is equivalent to passing `DuckDBPyType("INTEGER")` or `DuckDBPyType.INTEGER`.
97+
Builtins: TypeAlias = Literal[
98+
"bigint",
99+
"bit",
100+
"bignum",
101+
"blob",
102+
"boolean",
103+
"date",
104+
"double",
105+
"float",
106+
"hugeint",
107+
"integer",
108+
"interval",
109+
"smallint",
110+
"null",
111+
"time_tz",
112+
"time",
113+
"timestamp_ms",
114+
"timestamp_ns",
115+
"timestamp_s",
116+
"timestamp_tz",
117+
"timestamp",
118+
"tinyint",
119+
"ubigint",
120+
"uhugeint",
121+
"uinteger",
122+
"usmallint",
123+
"utinyint",
124+
"uuid",
125+
"varchar",
126+
]
127+
"""Literals strings convertibles into `DuckDBPyType` instances.
82128
83129
Note:
84-
A `StrEnum` will be handled the same way as a `str`.
130+
Passing the same values in uppercase is also accepted.
131+
We use lowercase here to be able to reuse this `Literal` in the `DTypeIdentifiers` `Literal`.
85132
"""
86-
IntoNestedDType: TypeAlias = dict[str, IntoDType] | list[IntoDType]
87-
"""Types that can be converted to a nested `DuckDBPyType` (e.g. for struct or union types)."""
88-
IntoExprColumn: TypeAlias = Expression | str
89-
"""Types that are, or can be used as a `ColumnExpression`."""
90-
IntoExpr: TypeAlias = IntoExprColumn | PythonLiteral
91-
"""Any type that can be converted to an `Expression` (or is already one).
133+
134+
NestedIds: TypeAlias = Literal["list", "struct", "array", "enum", "map", "decimal", "union"]
135+
"""Identifiers for nested types in `DuckDBPyType.id`."""
136+
137+
DTypeIdentifiers: TypeAlias = Builtins | NestedIds
138+
"""All possible identifiers for `DuckDBPyType.id`."""
139+
140+
StrIntoDType = Builtins | Literal["json"] | str
141+
"""Any `str` that can be converted into a `DuckDBPyType`.
142+
143+
The dtypes not present in the literal values are the composed ones, like `STRUCT` or `DECIMAL`
144+
145+
Note:
146+
A `StrEnum` will be handled the same way as a `str`."""
147+
148+
# NOTE:
149+
# the `dict` and `list` types are `Any` due to the same limitation mentionned in `NestedLiteral`.
150+
IntoDType: TypeAlias = (
151+
DuckDBPyType
152+
| StrIntoDType
153+
| type[NPScalarTypeLike]
154+
| type[ScalarLiteral]
155+
| type[list[Any]]
156+
| type[dict[Any, Any]]
157+
| dict[Any, Any]
158+
)
159+
"""All types that can be converted to a `DuckDBPyType`.
160+
161+
They can be arbitrarily nested as long as their leaf values are convertible to `DuckDBPyType`.
92162
93163
See Also:
94-
https://duckdb.org/docs/stable/clients/python/conversion
164+
https://duckdb.org/docs/stable/clients/python/types
165+
"""
166+
167+
# NOTE: here we keep the covariance "hack" and warn the user in the docstring,
168+
# because otherwise we can just resort to `Any` for the `dict` and `list` types.
169+
IntoNestedDType: TypeAlias = Mapping[str, IntoDType] | Sequence[IntoDType]
170+
"""Types that can be converted either into:
171+
172+
- a nested `DuckDBPyType` (e.g. `STRUCT` or `UNION`)
173+
- a schema for file reads
174+
175+
Warning:
176+
Only `dict` and `list` containers are accepted at runtime.
177+
We use `Mapping` and `Sequence` here to satisfy the covariance of the element types.
95178
"""

0 commit comments

Comments
 (0)