Skip to content

Commit 2d6e507

Browse files
committed
Merge branch 'main' into bugfix/use-balanced-or-tree
2 parents 7733923 + 7a6a7c8 commit 2d6e507

28 files changed

Lines changed: 770 additions & 172 deletions

.github/workflows/pypi-build-artifacts.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ jobs:
6262
if: startsWith(matrix.os, 'ubuntu')
6363

6464
- name: Build wheels
65-
uses: pypa/cibuildwheel@v2.23.1
65+
uses: pypa/cibuildwheel@v2.23.2
6666
with:
6767
output-dir: wheelhouse
6868
config-file: "pyproject.toml"

.github/workflows/svn-build-artifacts.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ jobs:
5757
if: startsWith(matrix.os, 'ubuntu')
5858

5959
- name: Build wheels
60-
uses: pypa/cibuildwheel@v2.23.1
60+
uses: pypa/cibuildwheel@v2.23.2
6161
with:
6262
output-dir: wheelhouse
6363
config-file: "pyproject.toml"

dev/provision.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -328,6 +328,7 @@
328328
CREATE TABLE {catalog_name}.default.test_table_empty_list_and_map (
329329
col_list array<int>,
330330
col_map map<int, int>,
331+
col_struct struct<test:int>,
331332
col_list_with_struct array<struct<test:int>>
332333
)
333334
USING iceberg
@@ -340,8 +341,8 @@
340341
spark.sql(
341342
f"""
342343
INSERT INTO {catalog_name}.default.test_table_empty_list_and_map
343-
VALUES (null, null, null),
344-
(array(), map(), array(struct(1)))
344+
VALUES (null, null, null, null),
345+
(array(), map(), struct(1), array(struct(1)))
345346
"""
346347
)
347348

poetry.lock

Lines changed: 93 additions & 93 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyiceberg/avro/reader.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,14 @@ class TimestampReader(IntegerReader):
175175
"""
176176

177177

178+
class TimestampNanoReader(IntegerReader):
179+
"""Reads a nanosecond granularity timestamp from the stream.
180+
181+
Long is decoded as python integer which represents
182+
the number of nanoseconds from the unix epoch, 1 January 1970.
183+
"""
184+
185+
178186
class TimestamptzReader(IntegerReader):
179187
"""Reads a microsecond granularity timestamptz from the stream.
180188
@@ -185,6 +193,16 @@ class TimestamptzReader(IntegerReader):
185193
"""
186194

187195

196+
class TimestamptzNanoReader(IntegerReader):
197+
"""Reads a microsecond granularity timestamptz from the stream.
198+
199+
Long is decoded as python integer which represents
200+
the number of nanoseconds from the unix epoch, 1 January 1970.
201+
202+
Adjusted to UTC.
203+
"""
204+
205+
188206
class StringReader(Reader):
189207
def read(self, decoder: BinaryDecoder) -> str:
190208
return decoder.read_utf8()

pyiceberg/avro/resolver.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,9 @@
4444
StringReader,
4545
StructReader,
4646
TimeReader,
47+
TimestampNanoReader,
4748
TimestampReader,
49+
TimestamptzNanoReader,
4850
TimestamptzReader,
4951
UnknownReader,
5052
UUIDReader,
@@ -64,6 +66,8 @@
6466
OptionWriter,
6567
StringWriter,
6668
StructWriter,
69+
TimestampNanoWriter,
70+
TimestamptzNanoWriter,
6771
TimestamptzWriter,
6872
TimestampWriter,
6973
TimeWriter,
@@ -99,7 +103,9 @@
99103
PrimitiveType,
100104
StringType,
101105
StructType,
106+
TimestampNanoType,
102107
TimestampType,
108+
TimestamptzNanoType,
103109
TimestamptzType,
104110
TimeType,
105111
UnknownType,
@@ -184,9 +190,15 @@ def visit_time(self, time_type: TimeType) -> Writer:
184190
def visit_timestamp(self, timestamp_type: TimestampType) -> Writer:
185191
return TimestampWriter()
186192

193+
def visit_timestamp_ns(self, timestamp_ns_type: TimestampNanoType) -> Writer:
194+
return TimestampNanoWriter()
195+
187196
def visit_timestamptz(self, timestamptz_type: TimestamptzType) -> Writer:
188197
return TimestamptzWriter()
189198

199+
def visit_timestamptz_ns(self, timestamptz_ns_type: TimestamptzNanoType) -> Writer:
200+
return TimestamptzNanoWriter()
201+
190202
def visit_string(self, string_type: StringType) -> Writer:
191203
return StringWriter()
192204

@@ -332,9 +344,15 @@ def visit_time(self, time_type: TimeType, partner: Optional[IcebergType]) -> Wri
332344
def visit_timestamp(self, timestamp_type: TimestampType, partner: Optional[IcebergType]) -> Writer:
333345
return TimestampWriter()
334346

347+
def visit_timestamp_ns(self, timestamp_ns_type: TimestampNanoType, partner: Optional[IcebergType]) -> Writer:
348+
return TimestampNanoWriter()
349+
335350
def visit_timestamptz(self, timestamptz_type: TimestamptzType, partner: Optional[IcebergType]) -> Writer:
336351
return TimestamptzWriter()
337352

353+
def visit_timestamptz_ns(self, timestamptz_ns_type: TimestamptzNanoType, partner: Optional[IcebergType]) -> Writer:
354+
return TimestamptzNanoWriter()
355+
338356
def visit_string(self, string_type: StringType, partner: Optional[IcebergType]) -> Writer:
339357
return StringWriter()
340358

@@ -465,9 +483,15 @@ def visit_time(self, time_type: TimeType, partner: Optional[IcebergType]) -> Rea
465483
def visit_timestamp(self, timestamp_type: TimestampType, partner: Optional[IcebergType]) -> Reader:
466484
return TimestampReader()
467485

486+
def visit_timestamp_ns(self, timestamp_ns_type: TimestampNanoType, partner: Optional[IcebergType]) -> Reader:
487+
return TimestampNanoReader()
488+
468489
def visit_timestamptz(self, timestamptz_type: TimestamptzType, partner: Optional[IcebergType]) -> Reader:
469490
return TimestamptzReader()
470491

492+
def visit_timestamptz_ns(self, timestamptz_ns_type: TimestamptzNanoType, partner: Optional[IcebergType]) -> Reader:
493+
return TimestamptzNanoReader()
494+
471495
def visit_string(self, string_type: StringType, partner: Optional[IcebergType]) -> Reader:
472496
return StringReader()
473497

pyiceberg/avro/writer.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,12 +95,24 @@ def write(self, encoder: BinaryEncoder, val: int) -> None:
9595
encoder.write_int(val)
9696

9797

98+
@dataclass(frozen=True)
99+
class TimestampNanoWriter(Writer):
100+
def write(self, encoder: BinaryEncoder, val: int) -> None:
101+
encoder.write_int(val)
102+
103+
98104
@dataclass(frozen=True)
99105
class TimestamptzWriter(Writer):
100106
def write(self, encoder: BinaryEncoder, val: int) -> None:
101107
encoder.write_int(val)
102108

103109

110+
@dataclass(frozen=True)
111+
class TimestamptzNanoWriter(Writer):
112+
def write(self, encoder: BinaryEncoder, val: int) -> None:
113+
encoder.write_int(val)
114+
115+
104116
@dataclass(frozen=True)
105117
class StringWriter(Writer):
106118
def write(self, encoder: BinaryEncoder, val: Any) -> None:

pyiceberg/conversions.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,9 @@
5555
LongType,
5656
PrimitiveType,
5757
StringType,
58+
TimestampNanoType,
5859
TimestampType,
60+
TimestamptzNanoType,
5961
TimestamptzType,
6062
TimeType,
6163
UnknownType,
@@ -66,6 +68,7 @@
6668
date_str_to_days,
6769
date_to_days,
6870
datetime_to_micros,
71+
datetime_to_nanos,
6972
days_to_date,
7073
micros_to_time,
7174
micros_to_timestamp,
@@ -127,7 +130,9 @@ def _(primitive_type: BooleanType, value_str: str) -> Union[int, float, str, uui
127130
@partition_to_py.register(DateType)
128131
@partition_to_py.register(TimeType)
129132
@partition_to_py.register(TimestampType)
133+
@partition_to_py.register(TimestampNanoType)
130134
@partition_to_py.register(TimestamptzType)
135+
@partition_to_py.register(TimestamptzNanoType)
131136
@handle_none
132137
def _(primitive_type: PrimitiveType, value_str: str) -> int:
133138
"""Convert a string to an integer value.
@@ -213,12 +218,20 @@ def _(_: PrimitiveType, value: int) -> bytes:
213218

214219
@to_bytes.register(TimestampType)
215220
@to_bytes.register(TimestamptzType)
216-
def _(_: TimestampType, value: Union[datetime, int]) -> bytes:
221+
def _(_: PrimitiveType, value: Union[datetime, int]) -> bytes:
217222
if isinstance(value, datetime):
218223
value = datetime_to_micros(value)
219224
return _LONG_STRUCT.pack(value)
220225

221226

227+
@to_bytes.register(TimestampNanoType)
228+
@to_bytes.register(TimestamptzNanoType)
229+
def _(_: PrimitiveType, value: Union[datetime, int]) -> bytes:
230+
if isinstance(value, datetime):
231+
value = datetime_to_nanos(value)
232+
return _LONG_STRUCT.pack(value)
233+
234+
222235
@to_bytes.register(DateType)
223236
def _(_: DateType, value: Union[date, int]) -> bytes:
224237
if isinstance(value, date):
@@ -319,6 +332,8 @@ def _(_: PrimitiveType, b: bytes) -> int:
319332
@from_bytes.register(TimeType)
320333
@from_bytes.register(TimestampType)
321334
@from_bytes.register(TimestamptzType)
335+
@from_bytes.register(TimestampNanoType)
336+
@from_bytes.register(TimestamptzNanoType)
322337
def _(_: PrimitiveType, b: bytes) -> int:
323338
return _LONG_STRUCT.unpack(b)[0]
324339

pyiceberg/io/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@
6363
S3_PROXY_URI = "s3.proxy-uri"
6464
S3_CONNECT_TIMEOUT = "s3.connect-timeout"
6565
S3_REQUEST_TIMEOUT = "s3.request-timeout"
66+
S3_SIGNER = "s3.signer"
6667
S3_SIGNER_URI = "s3.signer.uri"
6768
S3_SIGNER_ENDPOINT = "s3.signer.endpoint"
6869
S3_SIGNER_ENDPOINT_DEFAULT = "v1/aws/s3/sign"

pyiceberg/io/fsspec.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@
6767
S3_REQUEST_TIMEOUT,
6868
S3_SECRET_ACCESS_KEY,
6969
S3_SESSION_TOKEN,
70+
S3_SIGNER,
7071
S3_SIGNER_ENDPOINT,
7172
S3_SIGNER_ENDPOINT_DEFAULT,
7273
S3_SIGNER_URI,
@@ -137,7 +138,7 @@ def _s3(properties: Properties) -> AbstractFileSystem:
137138
config_kwargs = {}
138139
register_events: Dict[str, Callable[[Properties], None]] = {}
139140

140-
if signer := properties.get("s3.signer"):
141+
if signer := properties.get(S3_SIGNER):
141142
logger.info("Loading signer %s", signer)
142143
if signer_func := SIGNERS.get(signer):
143144
signer_func_with_properties = partial(signer_func, properties)

0 commit comments

Comments
 (0)