From ebc6bf8f0e5414ee5872211980887db747440609 Mon Sep 17 00:00:00 2001 From: Robsdedude Date: Mon, 8 Jun 2026 10:31:52 +0200 Subject: [PATCH] Fix pyarrow import `neo4j.vector.Vector` uses `pyarrow.compute.count` but the driver never loads the module `pyarrow.compute`. We've not seen issues in tests because we either have all optional dependencies or none installed. And the current versions of `pandas` (another optional dependency), when loaded, in turn loads `pyarrow.compute`. However, this is not guaranteed to remain this way and won't work for users only that only have pyarrow (but not pandas) installed. Further does this PR silence pyarrow's type annotations for now as they're not quite up to snuff yet. --- pyproject.toml | 7 +++++++ src/neo4j/_optional_deps.py | 3 +++ src/neo4j/vector.py | 4 +++- tests/unit/common/vector/test_vector.py | 5 +++-- 4 files changed, 16 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 23a985584..ee62fcb55 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -225,6 +225,13 @@ module = [ ] ignore_missing_imports = true +[[tool.mypy.overrides]] +# https://github.com/apache/arrow/issues/50123 +module = [ + "pyarrow.*", +] +follow_imports = "skip" + [tool.ruff] line-length = 79 extend-exclude = [ diff --git a/src/neo4j/_optional_deps.py b/src/neo4j/_optional_deps.py index a8bb4f6a0..70f6246ae 100644 --- a/src/neo4j/_optional_deps.py +++ b/src/neo4j/_optional_deps.py @@ -32,13 +32,16 @@ import pandas as pd # type: ignore[no-redef] pa: t.Any = None +pa_compute: t.Any = None with suppress(ImportError): import pyarrow as pa # type: ignore[no-redef] + import pyarrow.compute as pa_compute # type: ignore[no-redef] __all__ = [ "np", "pa", + "pa_compute", "pd", ] diff --git a/src/neo4j/vector.py b/src/neo4j/vector.py index ca11568c9..b3b090668 100644 --- a/src/neo4j/vector.py +++ b/src/neo4j/vector.py @@ -32,10 +32,12 @@ # This beautiful construct helps sphinx to properly resolve the type hints. import numpy as _np import pyarrow as _pa + import pyarrow.compute as _pa_compute else: from ._optional_deps import ( np as _np, pa as _pa, + pa_compute as _pa_compute, ) @@ -795,7 +797,7 @@ def to_numpy(self) -> _np.ndarray: ... def from_pyarrow(cls, data: _pa.Array, /) -> _t.Self: width = data.type.byte_width assert cls.size == width - if _pa.compute.count(data, mode="only_null").as_py(): + if _pa_compute.count(data, mode="only_null").as_py(): raise ValueError("PyArrow array must not contain any null values.") _, buffer = data.buffers() buffer = buffer[ diff --git a/tests/unit/common/vector/test_vector.py b/tests/unit/common/vector/test_vector.py index 05aacaca4..219b60e92 100644 --- a/tests/unit/common/vector/test_vector.py +++ b/tests/unit/common/vector/test_vector.py @@ -28,6 +28,7 @@ from neo4j._optional_deps import ( np, pa, + pa_compute, ) from neo4j.vector import ( _swap_endian, @@ -1044,7 +1045,7 @@ def test_to_pyarrow_random( v = _vector_from_data(data_be, dtype, endian) array = v.to_pyarrow() assert array.type == pa_type - assert pa.compute.count(array, mode="only_null").as_py() == 0 + assert pa_compute.count(array, mode="only_null").as_py() == 0 buffers = array.buffers() assert len(buffers) == 2 assert buffers[0] is None @@ -1076,7 +1077,7 @@ def test_to_pyarrow_special_values( v = _vector_from_data(data_be, dtype, endian) array = v.to_pyarrow() assert array.type == pa_type - assert pa.compute.count(array, mode="only_null").as_py() == 0 + assert pa_compute.count(array, mode="only_null").as_py() == 0 buffers = array.buffers() assert len(buffers) == 2 assert buffers[0] is None