3636INTARRAY = Union [UINTARRAY , NDArray [INT ]]
3737
3838
39+ def np_isin (arr : np .ndarray , labels , invert : bool = False ) -> np .ndarray :
40+ """Fast ``np.isin`` for non-negative integer label arrays via a boolean lookup table.
41+
42+ For unsigned-integer segmentation masks this is ~3-6x faster than ``np.isin`` when testing
43+ membership in more than one label, because it replaces the general algorithm with a single
44+ ``lut[arr]`` gather. Falls back to ``np.isin`` for non-unsigned dtypes, negative labels, or
45+ very large label ranges; uses ``arr == label`` for the single-label case.
46+
47+ Args:
48+ arr (np.ndarray): Input array.
49+ labels: A label or iterable of labels to test membership against.
50+ invert (bool, optional): If True, return the complement (equivalent to
51+ ``np.isin(arr, labels, invert=True)``). Defaults to False.
52+
53+ Returns:
54+ np.ndarray: Boolean mask, same shape as ``arr``.
55+ """
56+ if not isinstance (labels , (list , tuple , np .ndarray )):
57+ labels = [labels ]
58+ if len (labels ) == 0 :
59+ return np .ones (arr .shape , dtype = bool ) if invert else np .zeros (arr .shape , dtype = bool )
60+ if len (labels ) == 1 :
61+ res = arr == labels [0 ]
62+ return ~ res if invert else res
63+ if np .issubdtype (arr .dtype , np .unsignedinteger ) and min (int (x ) for x in labels ) >= 0 :
64+ m = max (int (arr .max ()), int (max (labels ))) + 1
65+ if m < 2 ** 20 : # keep the lookup table small (same threshold as np_unique's bincount path)
66+ lut = np .zeros (m , dtype = bool )
67+ lut [np .asarray (labels )] = True
68+ res = lut [arr ]
69+ return ~ res if invert else res
70+ return np .isin (arr , labels , invert = invert )
71+
72+
3973def np_extract_label (
4074 arr : np .ndarray ,
4175 label : int | list [int ],
@@ -69,7 +103,7 @@ def np_extract_label(
69103
70104 if isinstance (label , list ):
71105 assert 0 not in label , "label 0 is not supported in list mode"
72- arr_msk = np . isin (arr , label )
106+ arr_msk = np_isin (arr , label )
73107 arr [arr_msk ] = to_label
74108 arr [~ arr_msk ] = 0
75109 return arr
@@ -125,10 +159,12 @@ def np_volume(arr: UINTARRAY, include_zero: bool = False) -> dict[int, int]:
125159 Returns:
126160 dict[int, int]: Mapping from label value to number of voxels with that label.
127161 """
162+ # np.bincount wins decisively when there are many labels (e.g. connected-component maps);
163+ # cc3d statistics is faster for the few-label case typical of anatomical segmentations.
164+ counts = np .bincount (arr .ravel ()) if int (arr .max ()) > 256 else cc3dstatistics (arr , use_crop = not include_zero )["voxel_counts" ]
128165 if include_zero :
129- return {idx : i for idx , i in dict (enumerate (cc3dstatistics (arr , use_crop = False )["voxel_counts" ])).items () if i > 0 }
130- else :
131- return {idx : i for idx , i in dict (enumerate (cc3dstatistics (arr )["voxel_counts" ])).items () if i > 0 and idx != 0 }
166+ return {idx : i for idx , i in enumerate (counts ) if i > 0 }
167+ return {idx : i for idx , i in enumerate (counts ) if i > 0 and idx != 0 }
132168
133169
134170def np_is_empty (arr : UINTARRAY | INTARRAY ) -> bool :
@@ -253,8 +289,8 @@ def np_center_of_mass(arr: UINTARRAY) -> dict[int, COORDINATE]:
253289 """
254290 stats = cc3dstatistics (arr , use_crop = False )
255291 # Does not use the other calls for speed reasons
256- unique = [ idx for idx , i in enumerate ( stats ["voxel_counts" ]) if i > 0 and idx != 0 ]
257- return {idx : v for idx , v in enumerate (stats ["centroids" ]) if idx in unique }
292+ vc = stats ["voxel_counts" ]
293+ return {idx : v for idx , v in enumerate (stats ["centroids" ]) if idx != 0 and vc [ idx ] > 0 }
258294
259295
260296def np_bounding_boxes (arr : UINTARRAY ) -> dict [int , tuple [slice , slice , slice ]]:
@@ -270,8 +306,8 @@ def np_bounding_boxes(arr: UINTARRAY) -> dict[int, tuple[slice, slice, slice]]:
270306 """
271307 stats = cc3dstatistics (arr )
272308 # Does not use the other calls for speed reasons
273- unique = [ idx for idx , i in enumerate ( stats ["voxel_counts" ]) if i > 0 and idx != 0 ]
274- return {idx : v for idx , v in enumerate (stats ["bounding_boxes" ]) if idx in unique }
309+ vc = stats ["voxel_counts" ]
310+ return {idx : v for idx , v in enumerate (stats ["bounding_boxes" ]) if idx != 0 and vc [ idx ] > 0 }
275311
276312
277313def np_contacts (arr : UINTARRAY , connectivity : int ) -> dict [tuple [int , int ], int ]:
@@ -383,14 +419,14 @@ def np_erode_msk_euclid(arr: np.ndarray, n_pixel: int = 3, use_crop=True, labels
383419 if use_crop :
384420 arr_bin = arr .copy ()
385421 if labels is not None :
386- arr_bin [np . isin (arr_bin , labels , invert = True )] = 0
422+ arr_bin [np_isin (arr_bin , labels , invert = True )] = 0
387423 crop = np_bbox_binary (arr_bin , px_dist = 1 + n_pixel , raise_error = False )
388424 arrc = arr [crop ]
389425 else :
390426 arrc = arr
391427 if labels is not None :
392428 arrc = arrc .copy ()
393- arrc [np . isin (arrc , labels , invert = True )] = 0
429+ arrc [np_isin (arrc , labels , invert = True )] = 0
394430
395431 if mask is not None :
396432 mask = mask .copy ()
@@ -426,17 +462,18 @@ def np_dilate_msk_euclid(arr: np.ndarray, n_pixel: int = 3, use_crop=True, label
426462
427463 Assigns each newly covered voxel to the nearest existing label.
428464 """
465+ arr_bin = arr .copy ()
466+ if labels is not None :
467+ arr_bin [np_isin (arr_bin , labels , invert = True )] = 0
468+
429469 if use_crop :
430- arr_bin = arr .copy ()
431- if labels is not None :
432- arr_bin [np .isin (arr_bin , labels , invert = True )] = 0
433470 crop = np_bbox_binary (arr_bin , px_dist = 1 + n_pixel , raise_error = False )
434471 arrc = arr [crop ]
435472 else :
436473 arrc = arr
437474 if labels is not None :
438475 arrc = arrc .copy ()
439- arrc [np . isin (arr_bin , labels , invert = True )] = 0
476+ arrc [np_isin (arr_bin , labels , invert = True )] = 0
440477 if mask is not None :
441478 mask [mask != 0 ] = 1
442479 if use_crop :
@@ -500,7 +537,7 @@ def np_dilate_msk(
500537 if use_crop :
501538 # try:
502539 arr_bin = arr .copy ()
503- arr_bin [np . isin (arr_bin , labels , invert = True )] = 0
540+ arr_bin [np_isin (arr_bin , labels , invert = True )] = 0
504541 crop = np_bbox_binary (arr_bin , px_dist = 1 + n_pixel , raise_error = False )
505542 arrc = arr [crop ]
506543 else :
@@ -521,8 +558,7 @@ def np_dilate_msk(
521558 out = arrc
522559 for _ in range (n_pixel ):
523560 for i in labels :
524- data = out .copy ()
525- data [i != data ] = 0
561+ data = out == i # boolean mask; _binary_dilation casts to bool anyway, so this is exact and avoids a full copy
526562 if use_crop :
527563 lcrop = np_bbox_binary (data , px_dist = 2 + n_pixel , raise_error = False )
528564 data = data [lcrop ]
@@ -575,7 +611,7 @@ def np_erode_msk(
575611 labels : list [int ] = _to_labels (arr , label_ref )
576612
577613 if use_crop :
578- crop = np_bbox_binary (np . isin (arr , labels , invert = False ), px_dist = 1 + n_pixel , raise_error = False )
614+ crop = np_bbox_binary (np_isin (arr , labels , invert = False ), px_dist = 1 + n_pixel , raise_error = False )
579615 arrc = arr [crop ]
580616 else :
581617 arrc = arr
@@ -703,9 +739,16 @@ def np_bbox_binary(img: np.ndarray, px_dist: int | Sequence[int] | np.ndarray =
703739 assert len (px_dist ) == n , f"dimension mismatch, got img shape { shp } and px_dist { px_dist } "
704740
705741 bbox : list [float ] = []
706- for ax in itertools .combinations (reversed (range (n )), n - 1 ):
707- nonzero = np .any (a = img , axis = ax )
708- bbox .extend (np .where (nonzero )[0 ][[0 , - 1 ]]) # type: ignore
742+ if n == 3 :
743+ # 2 full passes instead of 3: two axis extents come from a shared 2D projection (cheap),
744+ # only the third axis needs a second full reduction.
745+ p = np .any (img , axis = 2 )
746+ for nonzero in (np .any (p , axis = 1 ), np .any (p , axis = 0 ), np .any (img , axis = (0 , 1 ))):
747+ bbox .extend (np .where (nonzero )[0 ][[0 , - 1 ]]) # type: ignore
748+ else :
749+ for ax in itertools .combinations (reversed (range (n )), n - 1 ):
750+ nonzero = np .any (a = img , axis = ax )
751+ bbox .extend (np .where (nonzero )[0 ][[0 , - 1 ]]) # type: ignore
709752 out : tuple [slice , ...] = tuple (
710753 slice (
711754 max (bbox [i ] - px_dist [i // 2 ], 0 ),
@@ -867,7 +910,7 @@ def np_connected_components(
867910 labels : Sequence [int ] = _to_labels (arr , label_ref )
868911 if include_zero :
869912 arr [arr == 0 ] = arr .max () + 1
870- arr [np . isin (arr , labels , invert = True )] = 0
913+ arr [np_isin (arr , labels , invert = True )] = 0
871914 cc_map , n = _connected_components (arr , connectivity = connectivity , return_N = True )
872915 return cc_map , n
873916
@@ -952,7 +995,7 @@ def np_filter_connected_components(
952995
953996 arr2 = arr .copy ()
954997 labels : Sequence [int ] = _to_labels (arr , label_ref )
955- arr2 [np . isin (arr2 , labels , invert = True )] = 0 # type:ignore
998+ arr2 [np_isin (arr2 , labels , invert = True )] = 0 # type:ignore
956999
9571000 labels_out , n = _connected_components (arr2 , connectivity = connectivity , return_N = True )
9581001 largest_k_components_org = largest_k_components
0 commit comments