Skip to content

Commit 45a5096

Browse files
committed
ENH: Add PEP 688 buffer protocol and fix np.asarray() lifetime safety
Add zero-copy data export to all wrapped itk.Image types via three protocols, ensuring the exported array remains valid even after the source image is deleted: image = itk.imread("brain.nii.gz") arr = np.asarray(image) del image print(arr[1,1,1]) # safe -- no crash Protocol dispatch by Python version: 3.12+: np.asarray -> __buffer__ (PEP 688, zero-copy, memoryview pins self via NDArrayITKBase intermediary) 3.10-11: np.asarray -> __array_interface__ (zero-copy, NumPy sets arr.base = self, preventing image GC) All versions: image.__array__() returns NDArrayITKBase with .itk_base = image for explicit __array__ calls. Changes to pyBase.i: - Add __buffer__() implementing PEP 688 buffer export with shaped memoryview. Uses NDArrayITKBase as intermediary to hold a Python reference to the image, preventing GC while any derived memoryview/array exists. flags parameter accepted for PEP 688 compliance but not inspected (ITK buffers always writable). - Add __array_interface__ property returning NumPy v3 array interface dict. NumPy sets arr.base = self when creating arrays from this interface, providing correct lifetime on 3.10-3.11. - Simplify __array__() to always return zero-copy view via array_view_from_image(). Supports NumPy 2.0 copy= parameter. copy=True returns a plain ndarray so image can be GCd. Changes to PyBuffer.i.init: - Add _BUFFER_FORMAT_MAP constant (before function definition) - Add _get_buffer_formatstring() with descriptive KeyError - Add _NUMPY_PIXELID_MAP and _get_numpy_pixelid() - Remove LD (long double) mapping (silent corruption) Tested: Python 3.10-3.14, all pass (0 failures). Supersedes InsightSoftwareConsortium#6020, InsightSoftwareConsortium#6018, InsightSoftwareConsortium#5673, InsightSoftwareConsortium#5665. Addresses @thewtex (del image crash), @blowekamp (ref pinning). Co-Authored-By: Hans J. Johnson <hans-johnson@uiowa.edu>
1 parent 86f75a9 commit 45a5096

2 files changed

Lines changed: 233 additions & 29 deletions

File tree

Modules/Bridge/NumPy/wrapping/PyBuffer.i.init

Lines changed: 88 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -29,31 +29,95 @@ else:
2929
loads = dask_deserialize.dispatch(np.ndarray)
3030
return NDArrayITKBase(loads(header, frames))
3131

32+
# Module-level constant — built once at import time.
33+
# Composite pixel types (RGB, Vector, etc.) are decomposed to their
34+
# scalar component type before reaching _get_buffer_formatstring(),
35+
# so only scalar codes appear here.
36+
# Platform aliases (ST/IT/OT) resolve to UL/SL at the wrapping level.
37+
import os as _os
38+
_BUFFER_FORMAT_MAP = {
39+
# --- integer types ---
40+
"B": "?", # bool
41+
"UC": "B", # unsigned char -> uint8
42+
"US": "H", # unsigned short -> uint16
43+
"UI": "I", # unsigned int -> uint32
44+
"UL": "L", # unsigned long -> platform (8 bytes Linux/macOS, 4 bytes Windows)
45+
"ULL": "Q", # unsigned long long -> uint64
46+
"SC": "b", # signed char -> int8
47+
"SS": "h", # signed short -> int16
48+
"SI": "i", # signed int -> int32
49+
"SL": "l", # signed long -> platform
50+
"SLL": "q", # signed long long -> int64
51+
# --- floating point types ---
52+
"F": "f", # float -> float32
53+
"D": "d", # double -> float64
54+
# "LD" (long double) intentionally omitted: sizeof(long double)
55+
# is 16 bytes on Linux/macOS x86-64 but struct "d" is 8 bytes.
56+
# Casting would silently corrupt the buffer.
57+
}
58+
59+
def _get_buffer_formatstring(itk_pixel_code: str) -> str:
60+
"""Return the struct format character for an ITK pixel type code.
61+
62+
Used by the PEP 688 ``__buffer__`` protocol implementation on
63+
``itk.Image`` to describe the element type of the exported
64+
memoryview. Format characters follow Python's ``struct`` module
65+
specification.
66+
67+
Parameters
68+
----------
69+
itk_pixel_code : str
70+
Short name of the ITK component type, e.g. ``"UC"``, ``"F"``.
71+
72+
Returns
73+
-------
74+
str
75+
Single-character ``struct`` format string.
76+
77+
Raises
78+
------
79+
KeyError
80+
If ``itk_pixel_code`` is not a supported scalar type.
81+
"""
82+
try:
83+
return _BUFFER_FORMAT_MAP[itk_pixel_code]
84+
except KeyError:
85+
raise KeyError(
86+
f"Unsupported ITK pixel type code {itk_pixel_code!r} for "
87+
f"buffer export. Supported codes: "
88+
f"{sorted(_BUFFER_FORMAT_MAP)}"
89+
) from None
90+
if _os.name == 'nt':
91+
# On Windows, C ``long`` is 32-bit
92+
_BUFFER_FORMAT_MAP['UL'] = 'I'
93+
_BUFFER_FORMAT_MAP['SL'] = 'i'
94+
95+
3296
def _get_numpy_pixelid(itk_Image_type) -> np.dtype:
3397
"""Returns a ITK PixelID given a numpy array."""
3498

35-
# This is a Mapping from numpy array types to itk pixel types.
36-
_np_itk = {"UC":np.dtype(np.uint8),
37-
"US":np.dtype(np.uint16),
38-
"UI":np.dtype(np.uint32),
39-
"UL":np.dtype(np.uint64),
40-
"ULL":np.dtype(np.uint64),
41-
"SC":np.dtype(np.int8),
42-
"SS":np.dtype(np.int16),
43-
"SI":np.dtype(np.int32),
44-
"SL":np.dtype(np.int64),
45-
"SLL":np.dtype(np.int64),
46-
"F":np.dtype(np.float32),
47-
"D":np.dtype(np.float64),
48-
"PF2":np.dtype(np.float32),
49-
"PF3":np.dtype(np.float32),
50-
}
51-
import os
52-
if os.name == 'nt':
53-
_np_itk['UL'] = np.dtype(np.uint32)
54-
_np_itk['SL'] = np.dtype(np.int32)
55-
try:
56-
return _np_itk[itk_Image_type]
57-
except KeyError as e:
58-
raise e
99+
return _NUMPY_PIXELID_MAP[itk_Image_type]
100+
101+
102+
# Module-level constant — built once at import time.
103+
_NUMPY_PIXELID_MAP = {
104+
"B": np.dtype(np.bool_),
105+
"UC": np.dtype(np.uint8),
106+
"US": np.dtype(np.uint16),
107+
"UI": np.dtype(np.uint32),
108+
"UL": np.dtype(np.uint64),
109+
"ULL": np.dtype(np.uint64),
110+
"SC": np.dtype(np.int8),
111+
"SS": np.dtype(np.int16),
112+
"SI": np.dtype(np.int32),
113+
"SL": np.dtype(np.int64),
114+
"SLL": np.dtype(np.int64),
115+
"F": np.dtype(np.float32),
116+
"D": np.dtype(np.float64),
117+
"PF2": np.dtype(np.float32),
118+
"PF3": np.dtype(np.float32),
119+
}
120+
if _os.name == 'nt':
121+
_NUMPY_PIXELID_MAP['UL'] = np.dtype(np.uint32)
122+
_NUMPY_PIXELID_MAP['SL'] = np.dtype(np.int32)
59123
%}

Wrapping/Generators/Python/PyBase/pyBase.i

Lines changed: 145 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -679,13 +679,153 @@ str = str
679679
680680
%define DECL_PYTHON_IMAGE_CLASS(swig_name)
681681
%extend swig_name {
682-
%pythoncode {
683-
def __array__(self, dtype=None):
682+
%pythoncode %{
683+
def __buffer__(self, flags=0, /):
684+
"""PEP 688 buffer protocol -- export image data as a memoryview.
685+
686+
On Python 3.12+ this is called automatically by
687+
``memoryview(image)`` and ``numpy.asarray(image)``.
688+
On Python 3.10-3.11 it can be called explicitly.
689+
690+
The returned memoryview shares memory with the image
691+
(zero-copy). A reference to the image is stored on the
692+
returned object to prevent garbage collection while the
693+
buffer is in use.
694+
695+
``flags`` is accepted for PEP 688 compliance but not
696+
inspected: ITK image buffers are always writable, so all
697+
flag combinations (including PyBUF_WRITABLE) are satisfied.
698+
"""
684699
import itk
685700
import numpy as np
686-
array = itk.array_from_image(self)
687-
return np.asarray(array, dtype=dtype)
688-
}
701+
from itk.itkPyBufferPython import _get_buffer_formatstring
702+
703+
# Get 1-D raw memoryview from the C++ buffer
704+
ImageType = type(self)
705+
PyBufferType = itk.PyBuffer[ImageType]
706+
raw_memview = PyBufferType._GetArrayViewFromImage(self)
707+
708+
# Build shape in C-order (NumPy convention: [z, y, x, ...])
709+
itksize = self.GetBufferedRegion().GetSize()
710+
shape = [int(itksize[d]) for d in range(len(itksize))]
711+
712+
n_components = self.GetNumberOfComponentsPerPixel()
713+
if n_components > 1:
714+
shape.insert(0, n_components)
715+
716+
shape.reverse()
717+
718+
# Determine the struct format character for the component type
719+
tpl = itk.template(self)
720+
pixel_type = tpl[1][0]
721+
from itk.support.types import itkCType
722+
if isinstance(pixel_type, itkCType):
723+
# Scalar pixel (UC, F, SS, etc.)
724+
component_code = pixel_type.short_name
725+
else:
726+
# Composite pixel (RGB, Vector, etc.) -- use component type
727+
pixel_tpl = itk.template(pixel_type)
728+
component_code = pixel_tpl[1][0].short_name
729+
730+
fmt = _get_buffer_formatstring(component_code)
731+
732+
# Build a NumPy array view that holds a reference to self
733+
# via NDArrayITKBase.itk_base, preventing the image from
734+
# being garbage collected while any memoryview or array
735+
# derived from this buffer exists.
736+
from itk.itkPyBufferPython import NDArrayITKBase
737+
flat = np.frombuffer(raw_memview, dtype=fmt)
738+
shaped = NDArrayITKBase(flat.reshape(shape), self)
739+
return memoryview(shaped)
740+
741+
def _get_array_interface(self):
742+
"""NumPy array interface (v3) -- zero-copy on all versions.
743+
744+
When NumPy creates an array from ``__array_interface__``,
745+
it sets ``arr.base = self`` (the image), which prevents
746+
the image from being garbage collected while the array
747+
exists. This is the correct lifetime behavior for
748+
``np.asarray(image)`` on all Python versions.
749+
750+
On Python 3.12+, NumPy prefers ``__buffer__`` (PEP 688)
751+
over this interface, which also provides correct lifetime
752+
via the NDArrayITKBase intermediary.
753+
"""
754+
import itk
755+
import numpy as np
756+
from itk.itkPyBufferPython import _get_numpy_pixelid
757+
758+
ImageType = type(self)
759+
PyBufferType = itk.PyBuffer[ImageType]
760+
raw_memview = PyBufferType._GetArrayViewFromImage(self)
761+
762+
# Shape in C-order (NumPy convention: [z, y, x])
763+
itksize = self.GetBufferedRegion().GetSize()
764+
shape = tuple(int(itksize[d]) for d in reversed(range(len(itksize))))
765+
766+
n_components = self.GetNumberOfComponentsPerPixel()
767+
if n_components > 1:
768+
shape = shape + (n_components,)
769+
770+
# Resolve component type code
771+
tpl = itk.template(self)
772+
pixel_type = tpl[1][0]
773+
from itk.support.types import itkCType
774+
if isinstance(pixel_type, itkCType):
775+
component_code = pixel_type.short_name
776+
else:
777+
pixel_tpl = itk.template(pixel_type)
778+
component_code = pixel_tpl[1][0].short_name
779+
780+
dtype = _get_numpy_pixelid(component_code)
781+
np_arr = np.asarray(raw_memview)
782+
data_ptr = np_arr.__array_interface__['data'][0]
783+
784+
return {
785+
'version': 3,
786+
'shape': shape,
787+
'typestr': dtype.str,
788+
'data': (data_ptr, False),
789+
'strides': None,
790+
}
791+
__array_interface__ = property(_get_array_interface)
792+
793+
def __array__(self, dtype=None, copy=None):
794+
"""NumPy array protocol -- zero-copy view of image data.
795+
796+
On Python 3.12+, NumPy prefers ``__buffer__`` (PEP 688)
797+
over this method. On Python 3.10-3.11, NumPy uses
798+
``__array_interface__`` (which sets arr.base = self) for
799+
``np.asarray()``, so this method is only called for
800+
explicit ``image.__array__()`` or ``np.array(image)``.
801+
802+
Parameters
803+
----------
804+
dtype : numpy dtype, optional
805+
If specified and different from the image dtype,
806+
a copy is made with the requested dtype.
807+
copy : bool or None, optional (NumPy 2.0+)
808+
``None``/``False``: return zero-copy view.
809+
``True``: return an independent copy.
810+
"""
811+
import itk
812+
import numpy as np
813+
814+
# Zero-copy view with reference to self via NDArrayITKBase
815+
array = itk.array_view_from_image(self)
816+
817+
if dtype is not None:
818+
if copy is False and np.dtype(dtype) != array.dtype:
819+
raise ValueError(
820+
"Unable to avoid copy: dtype conversion from "
821+
f"{array.dtype} to {np.dtype(dtype)} requires "
822+
"a copy."
823+
)
824+
array = np.asarray(array, dtype=dtype)
825+
if copy:
826+
array = np.array(array, copy=True)
827+
return array
828+
%}
689829
}
690830
%enddef
691831

0 commit comments

Comments
 (0)