Skip to content

Commit aba4953

Browse files
committed
fixes
Signed-off-by: Robert Kruszewski <github@robertk.io>
1 parent 1e1f1fa commit aba4953

11 files changed

Lines changed: 161 additions & 98 deletions

File tree

AGENTS.md

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,22 @@ Makefile target is used.
7272

7373
If you touch documentation run doc tests via `cargo test --doc`.
7474

75+
For Python binding changes under `vortex-python/`, run the narrow Python checks that match the
76+
files touched before broader test suites. Useful checks include:
77+
78+
```bash
79+
python -m py_compile <changed-python-files>
80+
uv run --all-packages --reinstall-package vortex-data pytest <changed-python-tests>
81+
```
82+
83+
If Python docstrings, `docs/api/python/`, or Sphinx configuration change, also run the docs checks
84+
from a clean Sphinx environment:
85+
86+
```bash
87+
uv run --all-packages make -C docs clean html
88+
uv run --all-packages make -C docs clean doctest
89+
```
90+
7591
## Linting, Formatting, and Generated Files
7692

7793
Run verification that matches the files changed. Do not run expensive Rust checks for changes that
@@ -80,6 +96,16 @@ with no Rust/API behavior impact. For docs/config-only changes, validate formatt
8096
or with a targeted doc/config command, and verify symlink or path changes with `ls`, `find`, and
8197
`git status`.
8298

99+
For Python binding changes under `vortex-python/`, run the relevant Python lint and type checks:
100+
101+
```bash
102+
uv run basedpyright vortex-python
103+
uv run ruff check <changed-python-files>
104+
```
105+
106+
If PyO3 Rust files in `vortex-python/src/` change, include `cargo +nightly fmt --check -p
107+
vortex-python`. Always finish Python binding work with `git diff --check`.
108+
83109
For Rust code, public API, feature flag, or generated-file changes, run these before stopping:
84110

85111
```bash

docs/api/python/index.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ API Reference
6262
arrays
6363
expr
6464
compress
65+
session
6566
io
6667
store
6768
dataset

docs/api/python/session.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
Session
2+
=======
3+
4+
.. autoclass:: vortex.Session
5+
:members:

vortex-python/python/vortex/__init__.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,6 @@
7979
from .arrays import (
8080
Array,
8181
PyArray,
82-
_unpickle_array, # pyright: ignore[reportPrivateUsage]
8382
array,
8483
)
8584
from .file import VortexFile, open
@@ -193,7 +192,6 @@
193192
"dumps",
194193
"load",
195194
"loads",
196-
"_unpickle_array",
197195
# File
198196
"VortexFile",
199197
"open",
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
# SPDX-License-Identifier: Apache-2.0
2+
# SPDX-FileCopyrightText: Copyright the Vortex contributors
3+
4+
def set_worker_threads(n: int) -> None: ...
5+
def set_worker_threads_to_available_parallelism() -> None: ...
6+
def worker_count() -> int: ...

vortex-python/python/vortex/arrays.py

Lines changed: 2 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -3,19 +3,15 @@
33
from __future__ import annotations
44

55
import abc
6-
from collections.abc import Callable, Sequence
6+
from collections.abc import Callable
77
from typing import TYPE_CHECKING, Any
88

99
import pyarrow
1010
from typing_extensions import override
1111

1212
import vortex._lib.arrays as _arrays # pyright: ignore[reportMissingModuleSource]
1313
from vortex._lib.dtype import DType # pyright: ignore[reportMissingModuleSource]
14-
from vortex._lib.serde import ( # pyright: ignore[reportMissingModuleSource]
15-
ArrayContext,
16-
SerializedArray,
17-
decode_ipc_array_buffers,
18-
)
14+
from vortex._lib.serde import ArrayContext, SerializedArray # pyright: ignore[reportMissingModuleSource]
1915
from vortex._lib.session import Session # pyright: ignore[reportMissingModuleSource]
2016

2117
try:
@@ -507,20 +503,3 @@ def decode(cls, parts: SerializedArray, ctx: ArrayContext, dtype: DType, len: in
507503
that represent the current array. Implementations of this function should validate this
508504
information, and then construct a new array.
509505
"""
510-
511-
512-
def _unpickle_array(
513-
array_buffers: Sequence[bytes | memoryview],
514-
dtype_buffers: Sequence[bytes | memoryview],
515-
*,
516-
session: Session,
517-
) -> Array: # pyright: ignore[reportUnusedFunction]
518-
"""Unpickle a Vortex array from IPC-encoded buffer lists.
519-
520-
This is an internal function used by the pickle module for both protocol 4 and 5.
521-
522-
For protocol 4, receives list[bytes] from __reduce__.
523-
For protocol 5, receives list[PickleBuffer/memoryview] from __reduce_ex__.
524-
Both use decode_ipc_array_buffers which concatenates the buffers during deserialization.
525-
"""
526-
return decode_ipc_array_buffers(array_buffers, dtype_buffers, session=session)

vortex-python/python/vortex/pickle.py

Lines changed: 63 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,10 @@
66
import io
77
import pickle as _pickle
88
from ast import literal_eval
9-
from typing import Any
9+
from collections.abc import Callable, Iterable, Sequence
10+
from typing import BinaryIO, TypeAlias, TypeGuard, cast
11+
12+
from typing_extensions import override
1013

1114
from vortex._lib.arrays import Array # pyright: ignore[reportMissingModuleSource]
1215
from vortex._lib.serde import ( # pyright: ignore[reportMissingModuleSource]
@@ -18,27 +21,61 @@
1821
_ARRAY_PERSISTENT_ID = "vortex.array"
1922
_ARRAY_PERSISTENT_ID_VERSION = 1
2023

24+
_BufferSequence: TypeAlias = Sequence[bytes | memoryview]
25+
_ArrayPersistentId: TypeAlias = tuple[str, int, _BufferSequence, _BufferSequence]
26+
_BufferCallback: TypeAlias = Callable[[_pickle.PickleBuffer], object | None]
27+
_OutOfBandBuffers: TypeAlias = Iterable[bytes | bytearray | memoryview | _pickle.PickleBuffer]
28+
29+
30+
def _is_buffer_sequence(obj: object) -> TypeGuard[_BufferSequence]:
31+
return isinstance(obj, Sequence) and all(isinstance(buffer, bytes | memoryview) for buffer in obj)
32+
33+
34+
def _parse_array_persistent_id(pid: object) -> _ArrayPersistentId:
35+
parsed_pid: object = pid
36+
if isinstance(parsed_pid, str):
37+
try:
38+
parsed_pid = cast(object, literal_eval(parsed_pid))
39+
except (SyntaxError, ValueError) as err:
40+
raise _pickle.UnpicklingError(f"unsupported persistent id: {pid!r}") from err
41+
42+
if not isinstance(parsed_pid, tuple):
43+
raise _pickle.UnpicklingError(f"unsupported persistent id: {pid!r}")
44+
45+
parsed_tuple = cast(tuple[object, ...], parsed_pid)
46+
if len(parsed_tuple) != 4:
47+
raise _pickle.UnpicklingError(f"unsupported persistent id: {pid!r}")
48+
49+
tag, version, array_buffers, dtype_buffers = parsed_tuple
50+
if tag != _ARRAY_PERSISTENT_ID or version != _ARRAY_PERSISTENT_ID_VERSION:
51+
raise _pickle.UnpicklingError(f"unsupported persistent id: {pid!r}")
52+
if not _is_buffer_sequence(array_buffers) or not _is_buffer_sequence(dtype_buffers):
53+
raise _pickle.UnpicklingError(f"unsupported persistent id: {pid!r}")
54+
55+
return (_ARRAY_PERSISTENT_ID, _ARRAY_PERSISTENT_ID_VERSION, array_buffers, dtype_buffers)
56+
2157

2258
class Pickler(_pickle.Pickler):
2359
"""Pickler that serializes Vortex arrays using an explicit session."""
2460

2561
def __init__(
2662
self,
27-
file: Any, # pyright: ignore[reportExplicitAny]
63+
file: BinaryIO,
2864
*,
2965
session: Session,
3066
protocol: int | None = None,
3167
fix_imports: bool = True,
32-
buffer_callback: Any | None = None, # pyright: ignore[reportExplicitAny]
68+
buffer_callback: _BufferCallback | None = None,
3369
) -> None:
3470
super().__init__(
3571
file,
3672
protocol=protocol,
3773
fix_imports=fix_imports,
3874
buffer_callback=buffer_callback,
3975
)
40-
self._session = session
76+
self._session: Session = session
4177

78+
@override
4279
def persistent_id(self, obj: object) -> object | None:
4380
if isinstance(obj, Array):
4481
array_buffers, dtype_buffers = encode_ipc_array_buffers(obj, session=self._session)
@@ -51,13 +88,13 @@ class Unpickler(_pickle.Unpickler):
5188

5289
def __init__(
5390
self,
54-
file: Any, # pyright: ignore[reportExplicitAny]
91+
file: BinaryIO,
5592
*,
5693
session: Session,
5794
fix_imports: bool = True,
5895
encoding: str = "ASCII",
5996
errors: str = "strict",
60-
buffers: Any | None = None, # pyright: ignore[reportExplicitAny]
97+
buffers: _OutOfBandBuffers | None = None,
6198
) -> None:
6299
super().__init__(
63100
file,
@@ -66,33 +103,22 @@ def __init__(
66103
errors=errors,
67104
buffers=buffers,
68105
)
69-
self._session = session
106+
self._session: Session = session
70107

108+
@override
71109
def persistent_load(self, pid: object) -> object:
72-
if isinstance(pid, str):
73-
try:
74-
pid = literal_eval(pid)
75-
except (SyntaxError, ValueError) as err:
76-
raise _pickle.UnpicklingError(f"unsupported persistent id: {pid!r}") from err
77-
78-
if not isinstance(pid, tuple) or len(pid) != 4:
79-
raise _pickle.UnpicklingError(f"unsupported persistent id: {pid!r}")
80-
81-
tag, version, array_buffers, dtype_buffers = pid
82-
if tag != _ARRAY_PERSISTENT_ID or version != _ARRAY_PERSISTENT_ID_VERSION:
83-
raise _pickle.UnpicklingError(f"unsupported persistent id: {pid!r}")
84-
110+
_, _, array_buffers, dtype_buffers = _parse_array_persistent_id(pid)
85111
return decode_ipc_array_buffers(array_buffers, dtype_buffers, session=self._session)
86112

87113

88114
def dump(
89115
obj: object,
90-
file: Any, # pyright: ignore[reportExplicitAny]
116+
file: BinaryIO,
91117
*,
92118
session: Session,
93119
protocol: int | None = None,
94120
fix_imports: bool = True,
95-
buffer_callback: Any | None = None, # pyright: ignore[reportExplicitAny]
121+
buffer_callback: _BufferCallback | None = None,
96122
) -> None:
97123
Pickler(
98124
file,
@@ -109,7 +135,7 @@ def dumps(
109135
session: Session,
110136
protocol: int | None = None,
111137
fix_imports: bool = True,
112-
buffer_callback: Any | None = None, # pyright: ignore[reportExplicitAny]
138+
buffer_callback: _BufferCallback | None = None,
113139
) -> bytes:
114140
file = io.BytesIO()
115141
dump(
@@ -124,22 +150,25 @@ def dumps(
124150

125151

126152
def load(
127-
file: Any, # pyright: ignore[reportExplicitAny]
153+
file: BinaryIO,
128154
*,
129155
session: Session,
130156
fix_imports: bool = True,
131157
encoding: str = "ASCII",
132158
errors: str = "strict",
133-
buffers: Any | None = None, # pyright: ignore[reportExplicitAny]
159+
buffers: _OutOfBandBuffers | None = None,
134160
) -> object:
135-
return Unpickler(
136-
file,
137-
session=session,
138-
fix_imports=fix_imports,
139-
encoding=encoding,
140-
errors=errors,
141-
buffers=buffers,
142-
).load()
161+
return cast(
162+
object,
163+
Unpickler(
164+
file,
165+
session=session,
166+
fix_imports=fix_imports,
167+
encoding=encoding,
168+
errors=errors,
169+
buffers=buffers,
170+
).load(),
171+
)
143172

144173

145174
def loads(
@@ -149,7 +178,7 @@ def loads(
149178
fix_imports: bool = True,
150179
encoding: str = "ASCII",
151180
errors: str = "strict",
152-
buffers: Any | None = None, # pyright: ignore[reportExplicitAny]
181+
buffers: _OutOfBandBuffers | None = None,
153182
) -> object:
154183
return load(
155184
io.BytesIO(data),

vortex-python/src/session.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,10 @@ pub(crate) fn init(py: Python, parent: &Bound<PyModule>) -> PyResult<()> {
2424
Ok(())
2525
}
2626

27+
/// A Vortex execution session.
28+
///
29+
/// Sessions hold runtime state and registries used by Vortex operations. Create one session and
30+
/// pass it to APIs that read, write, convert, compress, or otherwise execute arrays.
2731
#[pyclass(name = "Session", module = "vortex", frozen)]
2832
pub struct PyVortexSession {
2933
inner: VortexSession,

0 commit comments

Comments
 (0)