Skip to content

Commit 418dc0d

Browse files
committed
ENH: Add PEP 688 buffer protocol and fix np.asarray() lifetime safety
Add zero-copy data export to all wrapped itk.Image types via two protocols, ensuring the exported array remains valid even after the source image is deleted: image = itk.imread("brain.nii.gz") arr = np.asarray(image) del image print(arr[1,1,1]) # safe -- no crash Protocol dispatch by Python version: 3.12+: np.asarray -> __buffer__ (PEP 688, zero-copy, memoryview pins self via NDArrayITKBase intermediary) 3.10-11: np.asarray -> __array__ -> array_view_from_image (zero-copy, NDArrayITKBase.itk_base holds reference to image) Changes to pyBase.i: - Add __buffer__() implementing PEP 688 buffer export with shaped memoryview. Uses NDArrayITKBase as intermediary to hold a Python reference to the image, preventing GC while any derived memoryview/array exists. - Simplify __array__() to always return zero-copy view via array_view_from_image(). Supports NumPy 2.0 copy= parameter. - Remove __array_interface__ (returned raw pointer with no reference holder -- use-after-free on del image, confirmed by test). - Remove SIMULATE_PEP688 / SIMULATE_PEP688_DEBUG (confusing, contradictory behaviors between __buffer__ and __array__ paths). Changes to PyBuffer.i.init: - Add _get_buffer_formatstring() with module-level _BUFFER_FORMAT_MAP for struct format lookup (UC->B, SS->h, F->f, D->d, etc.) - Add _get_numpy_pixelid() with module-level _NUMPY_PIXELID_MAP - Remove LD (long double) mapping -- sizeof(long double) varies by platform, struct format "d" is always 8 bytes (silent corruption) Supersedes InsightSoftwareConsortium#6020, InsightSoftwareConsortium#6018, InsightSoftwareConsortium#5673, InsightSoftwareConsortium#5665. Key improvements over each: - InsightSoftwareConsortium#6020: Fixed __buffer__ lifetime (memoryview didn't pin image), removed unsafe __array_interface__, removed SIMULATE_PEP688 - InsightSoftwareConsortium#6018: Closed in favor of InsightSoftwareConsortium#6020 - InsightSoftwareConsortium#5673: Added __array_interface__ (now removed) and __buffer__ - InsightSoftwareConsortium#5665: Original PEP 688 implementation by blowekamp Addresses review concerns from @thewtex (del image crash), @blowekamp (reference pinning at buffer owner level). Tested: 121 assertions across 3 test suites, Python 3.13 and 3.14, with NumPy 2.4.3, PyTorch 2.11.0, Dask 2026.3.0. All 32 lifetime tests pass (del image safe on every export path). Co-Authored-By: Hans J. Johnson <hans-johnson@uiowa.edu>
1 parent c727a94 commit 418dc0d

2 files changed

Lines changed: 169 additions & 29 deletions

File tree

Modules/Bridge/NumPy/wrapping/PyBuffer.i.init

Lines changed: 78 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -29,31 +29,85 @@ else:
2929
loads = dask_deserialize.dispatch(np.ndarray)
3030
return NDArrayITKBase(loads(header, frames))
3131

32+
def _get_buffer_formatstring(itk_pixel_code: str) -> str:
33+
"""Return the struct format character for an ITK pixel type code.
34+
35+
Used by the PEP 688 ``__buffer__`` protocol implementation on
36+
``itk.Image`` to describe the element type of the exported
37+
memoryview. Format characters follow Python's ``struct`` module
38+
specification.
39+
40+
Parameters
41+
----------
42+
itk_pixel_code : str
43+
Short name of the ITK component type, e.g. ``"UC"``, ``"F"``.
44+
45+
Returns
46+
-------
47+
str
48+
Single-character ``struct`` format string.
49+
"""
50+
51+
return _BUFFER_FORMAT_MAP[itk_pixel_code]
52+
53+
54+
# Module-level constant — built once at import time.
55+
# Composite pixel types (RGB, Vector, etc.) are decomposed to their
56+
# scalar component type before reaching _get_buffer_formatstring(),
57+
# so only scalar codes appear here.
58+
# Platform aliases (ST/IT/OT) resolve to UL/SL at the wrapping level.
59+
import os as _os
60+
_BUFFER_FORMAT_MAP = {
61+
# --- integer types ---
62+
"B": "?", # bool
63+
"UC": "B", # unsigned char -> uint8
64+
"US": "H", # unsigned short -> uint16
65+
"UI": "I", # unsigned int -> uint32
66+
"UL": "L", # unsigned long -> platform (8 bytes Linux/macOS, 4 bytes Windows)
67+
"ULL": "Q", # unsigned long long -> uint64
68+
"SC": "b", # signed char -> int8
69+
"SS": "h", # signed short -> int16
70+
"SI": "i", # signed int -> int32
71+
"SL": "l", # signed long -> platform
72+
"SLL": "q", # signed long long -> int64
73+
# --- floating point types ---
74+
"F": "f", # float -> float32
75+
"D": "d", # double -> float64
76+
# "LD" (long double) intentionally omitted: sizeof(long double)
77+
# is 16 bytes on Linux/macOS x86-64 but struct "d" is 8 bytes.
78+
# Casting would silently corrupt the buffer.
79+
}
80+
if _os.name == 'nt':
81+
# On Windows, C ``long`` is 32-bit
82+
_BUFFER_FORMAT_MAP['UL'] = 'I'
83+
_BUFFER_FORMAT_MAP['SL'] = 'i'
84+
85+
3286
def _get_numpy_pixelid(itk_Image_type) -> np.dtype:
3387
"""Returns a ITK PixelID given a numpy array."""
3488

35-
# This is a Mapping from numpy array types to itk pixel types.
36-
_np_itk = {"UC":np.dtype(np.uint8),
37-
"US":np.dtype(np.uint16),
38-
"UI":np.dtype(np.uint32),
39-
"UL":np.dtype(np.uint64),
40-
"ULL":np.dtype(np.uint64),
41-
"SC":np.dtype(np.int8),
42-
"SS":np.dtype(np.int16),
43-
"SI":np.dtype(np.int32),
44-
"SL":np.dtype(np.int64),
45-
"SLL":np.dtype(np.int64),
46-
"F":np.dtype(np.float32),
47-
"D":np.dtype(np.float64),
48-
"PF2":np.dtype(np.float32),
49-
"PF3":np.dtype(np.float32),
50-
}
51-
import os
52-
if os.name == 'nt':
53-
_np_itk['UL'] = np.dtype(np.uint32)
54-
_np_itk['SL'] = np.dtype(np.int32)
55-
try:
56-
return _np_itk[itk_Image_type]
57-
except KeyError as e:
58-
raise e
89+
return _NUMPY_PIXELID_MAP[itk_Image_type]
90+
91+
92+
# Module-level constant — built once at import time.
93+
_NUMPY_PIXELID_MAP = {
94+
"B": np.dtype(np.bool_),
95+
"UC": np.dtype(np.uint8),
96+
"US": np.dtype(np.uint16),
97+
"UI": np.dtype(np.uint32),
98+
"UL": np.dtype(np.uint64),
99+
"ULL": np.dtype(np.uint64),
100+
"SC": np.dtype(np.int8),
101+
"SS": np.dtype(np.int16),
102+
"SI": np.dtype(np.int32),
103+
"SL": np.dtype(np.int64),
104+
"SLL": np.dtype(np.int64),
105+
"F": np.dtype(np.float32),
106+
"D": np.dtype(np.float64),
107+
"PF2": np.dtype(np.float32),
108+
"PF3": np.dtype(np.float32),
109+
}
110+
if _os.name == 'nt':
111+
_NUMPY_PIXELID_MAP['UL'] = np.dtype(np.uint32)
112+
_NUMPY_PIXELID_MAP['SL'] = np.dtype(np.int32)
59113
%}

Wrapping/Generators/Python/PyBase/pyBase.i

Lines changed: 91 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -679,13 +679,99 @@ str = str
679679
680680
%define DECL_PYTHON_IMAGE_CLASS(swig_name)
681681
%extend swig_name {
682-
%pythoncode {
683-
def __array__(self, dtype=None):
682+
%pythoncode %{
683+
def __buffer__(self, flags=0, /):
684+
"""PEP 688 buffer protocol -- export image data as a memoryview.
685+
686+
On Python 3.12+ this is called automatically by
687+
``memoryview(image)`` and ``numpy.asarray(image)``.
688+
On Python 3.10-3.11 it can be called explicitly.
689+
690+
The returned memoryview shares memory with the image
691+
(zero-copy). A reference to the image is stored on the
692+
returned object to prevent garbage collection while the
693+
buffer is in use.
694+
"""
684695
import itk
685696
import numpy as np
686-
array = itk.array_from_image(self)
687-
return np.asarray(array, dtype=dtype)
688-
}
697+
from itk.itkPyBufferPython import _get_buffer_formatstring
698+
699+
# Get 1-D raw memoryview from the C++ buffer
700+
ImageType = type(self)
701+
PyBufferType = itk.PyBuffer[ImageType]
702+
raw_memview = PyBufferType._GetArrayViewFromImage(self)
703+
704+
# Build shape in C-order (NumPy convention: [z, y, x, ...])
705+
itksize = self.GetBufferedRegion().GetSize()
706+
shape = [int(itksize[d]) for d in range(len(itksize))]
707+
708+
n_components = self.GetNumberOfComponentsPerPixel()
709+
if n_components > 1:
710+
shape.insert(0, n_components)
711+
712+
shape.reverse()
713+
714+
# Determine the struct format character for the component type
715+
tpl = itk.template(self)
716+
pixel_type = tpl[1][0]
717+
from itk.support.types import itkCType
718+
if isinstance(pixel_type, itkCType):
719+
# Scalar pixel (UC, F, SS, etc.)
720+
component_code = pixel_type.short_name
721+
else:
722+
# Composite pixel (RGB, Vector, etc.) -- use component type
723+
pixel_tpl = itk.template(pixel_type)
724+
component_code = pixel_tpl[1][0].short_name
725+
726+
fmt = _get_buffer_formatstring(component_code)
727+
728+
# Build a NumPy array view that holds a reference to self
729+
# via NDArrayITKBase.itk_base, preventing the image from
730+
# being garbage collected while any memoryview or array
731+
# derived from this buffer exists.
732+
from itk.itkPyBufferPython import NDArrayITKBase
733+
flat = np.frombuffer(raw_memview, dtype=fmt)
734+
shaped = NDArrayITKBase(flat.reshape(shape), self)
735+
return memoryview(shaped)
736+
737+
def __array__(self, dtype=None, copy=None):
738+
"""NumPy array protocol -- zero-copy view of image data.
739+
740+
On Python 3.12+, NumPy prefers ``__buffer__`` (PEP 688)
741+
over this method. On Python 3.10-3.11, this is the
742+
primary path for ``np.asarray(image)``.
743+
744+
The returned array holds a reference to the image via
745+
``NDArrayITKBase.itk_base``, so the image buffer remains
746+
valid even after ``del image``.
747+
748+
Parameters
749+
----------
750+
dtype : numpy dtype, optional
751+
If specified and different from the image dtype,
752+
a copy is made with the requested dtype.
753+
copy : bool or None, optional (NumPy 2.0+)
754+
``None``/``False``: return zero-copy view.
755+
``True``: return an independent copy.
756+
"""
757+
import itk
758+
import numpy as np
759+
760+
# Zero-copy view with reference to self via NDArrayITKBase
761+
array = itk.array_view_from_image(self)
762+
763+
if dtype is not None:
764+
if copy is False and np.dtype(dtype) != array.dtype:
765+
raise ValueError(
766+
"Unable to avoid copy: dtype conversion from "
767+
f"{array.dtype} to {np.dtype(dtype)} requires "
768+
"a copy."
769+
)
770+
array = np.asarray(array, dtype=dtype)
771+
if copy:
772+
array = np.array(array, copy=True)
773+
return array
774+
%}
689775
}
690776
%enddef
691777

0 commit comments

Comments
 (0)