Skip to content

Commit 6cbafb7

Browse files
authored
Rename NormalizeLabelsInDatasetd to RemapLabelsToSequentiald and fix label ordering bug (#8680)
### Description Rename `NormalizeLabelsInDatasetd` to `RemapLabelsToSequentiald` to better describe its actual functionality. The old name was confusing as it suggests normalization when it actually remaps arbitrary label values to sequential indices (0, 1, 2, 3, ...). Fixes #7800 ### Bug Fix Fixed a bug where the order of labels in the input dictionary affected the output. Previously, if background appeared first (e.g., `{background: 0, organ1: 1, organ2: 2}`), the transform would skip index 1 and produce `{background: 0, organ1: 2, organ2: 3}`. This was caused by enumerate starting at 1 for all items but skipping background without adjusting the index. The fix excludes background from enumeration and handles it separately. ### Changes - Renamed `NormalizeLabelsInDatasetd` to `RemapLabelsToSequentiald` - Fixed label ordering bug by excluding background from enumeration - Kept `NormalizeLabelsInDatasetd` as deprecated alias for backward compatibility - Enhanced documentation to clearly explain remapping behavior - Added alphabetical sorting for deterministic output ordering - Added tests for deprecated name warning and proper remapping ### Types of changes <!--- Put an `x` in all the boxes that apply, and remove the not applicable items --> - [x] Non-breaking change (fix or new feature that would not break existing functionality). - [x] New tests added to cover the changes. - [x] Quick tests passed locally by running `./runtests.sh --quick --unittests --disttests`. - [x] In-line docstrings updated. --------- Signed-off-by: Soumya Snigdha Kundu <soumya_snigdha.kundu@kcl.ac.uk>
1 parent 57fdd59 commit 6cbafb7

File tree

2 files changed

+156
-16
lines changed

2 files changed

+156
-16
lines changed

monai/apps/deepedit/transforms.py

Lines changed: 61 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
from monai.data import MetaTensor
2525
from monai.networks.layers import GaussianFilter
2626
from monai.transforms.transform import MapTransform, Randomizable, Transform
27-
from monai.utils import min_version, optional_import
27+
from monai.utils import deprecated, min_version, optional_import
2828

2929
measure, _ = optional_import("skimage.measure", "0.14.2", min_version)
3030

@@ -84,18 +84,44 @@ def __call__(self, data: Mapping[Hashable, np.ndarray]) -> dict[Hashable, np.nda
8484
return d
8585

8686

87-
class NormalizeLabelsInDatasetd(MapTransform):
87+
class RemapLabelsToSequentiald(MapTransform):
88+
"""
89+
Remap label values from a dataset-specific schema to sequential indices (0, 1, 2, 3, ...).
90+
91+
This transform takes labels with arbitrary values defined in a label dictionary and remaps them
92+
to a sequential range starting from 1 (with background always set to 0). This is useful for
93+
standardizing labels across different datasets or ensuring labels are in a contiguous range.
94+
95+
The output label indices are assigned in alphabetical order by label name to ensure
96+
deterministic behavior regardless of input dictionary ordering.
97+
98+
Args:
99+
keys: The ``keys`` parameter will be used to get and set the actual data item to transform
100+
label_names: Dictionary mapping label names to their current values in the dataset.
101+
For example: {"spleen": 1, "liver": 6, "background": 0}
102+
Will be remapped to: {"background": 0, "liver": 1, "spleen": 2}
103+
(alphabetically sorted, excluding background)
104+
allow_missing_keys: If True, missing keys in the data dictionary will not raise an error
105+
106+
Example:
107+
>>> transform = RemapLabelsToSequentiald(
108+
... keys="label",
109+
... label_names={"liver": 6, "spleen": 1, "background": 0}
110+
... )
111+
>>> # Input label has values [0, 1, 6]
112+
>>> # Output label will have values [0, 1, 2] (background=0, liver=1, spleen=2)
113+
>>> # And updates d["label_names"] to {"background": 0, "liver": 1, "spleen": 2}
114+
115+
Note:
116+
- Background label (if present) is always mapped to 0
117+
- Non-background labels are mapped to sequential indices 1, 2, 3, ... in alphabetical order
118+
- Undefined labels (not in label_names) will be set to 0 (background)
119+
- The transform updates the data dictionary with a new "label_names" key containing the remapped values
120+
"""
88121

89122
def __init__(
90123
self, keys: KeysCollection, label_names: dict[str, int] | None = None, allow_missing_keys: bool = False
91124
):
92-
"""
93-
Normalize label values according to label names dictionary
94-
95-
Args:
96-
keys: The ``keys`` parameter will be used to get and set the actual data item to transform
97-
label_names: all label names
98-
"""
99125
super().__init__(keys, allow_missing_keys)
100126

101127
self.label_names = label_names or {}
@@ -106,13 +132,18 @@ def __call__(self, data: Mapping[Hashable, np.ndarray]) -> dict[Hashable, np.nda
106132
# Dictionary containing new label numbers
107133
new_label_names = {}
108134
label = np.zeros(d[key].shape)
109-
# Making sure the range values and number of labels are the same
110-
for idx, (key_label, val_label) in enumerate(self.label_names.items(), start=1):
111-
if key_label != "background":
112-
new_label_names[key_label] = idx
113-
label[d[key] == val_label] = idx
114-
if key_label == "background":
115-
new_label_names["background"] = 0
135+
136+
# Sort label names to ensure deterministic ordering (exclude background)
137+
sorted_labels = sorted([(k, v) for k, v in self.label_names.items() if k != "background"])
138+
139+
# Always set background to 0 first
140+
if "background" in self.label_names:
141+
new_label_names["background"] = 0
142+
143+
# Assign sequential indices to sorted non-background labels
144+
for idx, (key_label, val_label) in enumerate(sorted_labels, start=1):
145+
new_label_names[key_label] = idx
146+
label[d[key] == val_label] = idx
116147

117148
d["label_names"] = new_label_names
118149
if isinstance(d[key], MetaTensor):
@@ -122,6 +153,20 @@ def __call__(self, data: Mapping[Hashable, np.ndarray]) -> dict[Hashable, np.nda
122153
return d
123154

124155

156+
@deprecated(since="1.6", removed="1.8", msg_suffix="Use `RemapLabelsToSequentiald` instead.")
157+
class NormalizeLabelsInDatasetd(RemapLabelsToSequentiald):
158+
"""
159+
.. deprecated:: 1.6.0
160+
`NormalizeLabelsInDatasetd` is deprecated and will be removed in version 1.8.0.
161+
Use :class:`RemapLabelsToSequentiald` instead.
162+
163+
This class is maintained for backward compatibility. Please use RemapLabelsToSequentiald
164+
which better describes the transform's functionality.
165+
"""
166+
167+
pass
168+
169+
125170
class SingleLabelSelectiond(MapTransform):
126171

127172
def __init__(

tests/apps/deepedit/test_deepedit_transforms.py

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
FindAllValidSlicesMissingLabelsd,
2626
FindDiscrepancyRegionsDeepEditd,
2727
NormalizeLabelsInDatasetd,
28+
RemapLabelsToSequentiald,
2829
ResizeGuidanceMultipleLabelDeepEditd,
2930
SingleLabelSelectiond,
3031
SplitPredsLabeld,
@@ -282,6 +283,100 @@ def test_correct_results(self, arguments, input_data, expected_result):
282283
result = add_fn(input_data)
283284
self.assertEqual(len(np.unique(result["label"])), expected_result)
284285

286+
def test_ordering_determinism(self):
287+
"""Test that different input ordering produces the same output (alphabetical)"""
288+
# Create a label array with different label values
289+
label = np.array([[[0, 1, 6, 3]]]) # background=0, spleen=1, liver=6, kidney=3
290+
291+
# Test case 1: liver first, then kidney, then spleen
292+
data1 = {"label": label.copy()}
293+
transform1 = RemapLabelsToSequentiald(
294+
keys="label", label_names={"liver": 6, "kidney": 3, "spleen": 1, "background": 0}
295+
)
296+
result1 = transform1(data1)
297+
298+
# Test case 2: spleen first, then kidney, then liver (different order)
299+
data2 = {"label": label.copy()}
300+
transform2 = RemapLabelsToSequentiald(
301+
keys="label", label_names={"spleen": 1, "kidney": 3, "liver": 6, "background": 0}
302+
)
303+
result2 = transform2(data2)
304+
305+
# Both should produce the same output (alphabetically sorted)
306+
# Expected mapping: background=0, kidney=1, liver=2, spleen=3
307+
np.testing.assert_array_equal(result1["label"], result2["label"])
308+
309+
# Verify the actual mapping is alphabetical
310+
expected_output = np.array([[[0, 3, 2, 1]]]) # kidney=1, liver=2, spleen=3, background=0
311+
np.testing.assert_array_equal(result1["label"], expected_output)
312+
313+
# Verify label_names is correct
314+
self.assertEqual(result1["label_names"], {"background": 0, "kidney": 1, "liver": 2, "spleen": 3})
315+
self.assertEqual(result2["label_names"], {"background": 0, "kidney": 1, "liver": 2, "spleen": 3})
316+
317+
def test_multiple_labels(self):
318+
"""Test with multiple non-background labels"""
319+
label = np.array([[[0, 1, 2, 5]]]) # background, spleen, kidney, liver
320+
data = {"label": label.copy()}
321+
transform = RemapLabelsToSequentiald(
322+
keys="label", label_names={"spleen": 1, "kidney": 2, "liver": 5, "background": 0}
323+
)
324+
result = transform(data)
325+
326+
# Expected: background=0, kidney=1, liver=2, spleen=3 (alphabetical)
327+
expected = np.array([[[0, 3, 1, 2]]])
328+
np.testing.assert_array_equal(result["label"], expected)
329+
self.assertEqual(result["label_names"], {"background": 0, "kidney": 1, "liver": 2, "spleen": 3})
330+
331+
def test_deprecated_name_warning(self):
332+
"""Test that NormalizeLabelsInDatasetd is properly deprecated.
333+
334+
The deprecation warning only triggers when MONAI version >= 1.6 (since="1.6").
335+
This test verifies:
336+
1. The actual NormalizeLabelsInDatasetd class is marked as deprecated in docstring
337+
2. The class is a subclass of RemapLabelsToSequentiald
338+
3. The deprecation mechanism works correctly (tested via version_val simulation)
339+
4. The actual class functions correctly
340+
"""
341+
import warnings
342+
343+
from monai.utils import deprecated
344+
345+
# Verify NormalizeLabelsInDatasetd docstring indicates deprecation
346+
self.assertIn("deprecated", NormalizeLabelsInDatasetd.__doc__.lower())
347+
self.assertIn("RemapLabelsToSequentiald", NormalizeLabelsInDatasetd.__doc__)
348+
349+
# Verify NormalizeLabelsInDatasetd is a subclass of RemapLabelsToSequentiald
350+
self.assertTrue(issubclass(NormalizeLabelsInDatasetd, RemapLabelsToSequentiald))
351+
352+
# Test the deprecation mechanism using version_val to simulate version 1.6
353+
# This verifies the @deprecated decorator behavior that NormalizeLabelsInDatasetd uses
354+
@deprecated(
355+
since="1.6",
356+
removed="1.8",
357+
msg_suffix="Use `RemapLabelsToSequentiald` instead.",
358+
version_val="1.6", # Simulate version 1.6 to trigger warning
359+
)
360+
class DeprecatedNormalizeLabels(RemapLabelsToSequentiald):
361+
pass
362+
363+
data = {"label": np.array([[[0, 1]]])}
364+
365+
with warnings.catch_warnings(record=True) as w:
366+
warnings.simplefilter("always")
367+
transform = DeprecatedNormalizeLabels(keys="label", label_names={"spleen": 1, "background": 0})
368+
_ = transform(data)
369+
370+
# Check that a deprecation warning was raised
371+
self.assertEqual(len(w), 1)
372+
self.assertTrue(issubclass(w[0].category, FutureWarning))
373+
self.assertIn("RemapLabelsToSequentiald", str(w[0].message))
374+
375+
# Verify the actual NormalizeLabelsInDatasetd class works correctly
376+
transform_actual = NormalizeLabelsInDatasetd(keys="label", label_names={"spleen": 1, "background": 0})
377+
result = transform_actual({"label": np.array([[[0, 1]]])})
378+
self.assertIn("label", result)
379+
285380

286381
class TestResizeGuidanceMultipleLabelCustomd(unittest.TestCase):
287382

0 commit comments

Comments
 (0)