Skip to content

Commit 6cbafb7

Browse files
authored
Rename NormalizeLabelsInDatasetd to RemapLabelsToSequentiald and fix label ordering bug (Project-MONAI#8680)
### Description Rename `NormalizeLabelsInDatasetd` to `RemapLabelsToSequentiald` to better describe its actual functionality. The old name was confusing as it suggests normalization when it actually remaps arbitrary label values to sequential indices (0, 1, 2, 3, ...). Fixes Project-MONAI#7800 ### Bug Fix Fixed a bug where the order of labels in the input dictionary affected the output. Previously, if background appeared first (e.g., `{background: 0, organ1: 1, organ2: 2}`), the transform would skip index 1 and produce `{background: 0, organ1: 2, organ2: 3}`. This was caused by enumerate starting at 1 for all items but skipping background without adjusting the index. The fix excludes background from enumeration and handles it separately. ### Changes - Renamed `NormalizeLabelsInDatasetd` to `RemapLabelsToSequentiald` - Fixed label ordering bug by excluding background from enumeration - Kept `NormalizeLabelsInDatasetd` as deprecated alias for backward compatibility - Enhanced documentation to clearly explain remapping behavior - Added alphabetical sorting for deterministic output ordering - Added tests for deprecated name warning and proper remapping ### Types of changes <!--- Put an `x` in all the boxes that apply, and remove the not applicable items --> - [x] Non-breaking change (fix or new feature that would not break existing functionality). - [x] New tests added to cover the changes. - [x] Quick tests passed locally by running `./runtests.sh --quick --unittests --disttests`. - [x] In-line docstrings updated. --------- Signed-off-by: Soumya Snigdha Kundu <soumya_snigdha.kundu@kcl.ac.uk>
1 parent 57fdd59 commit 6cbafb7

2 files changed

Lines changed: 156 additions & 16 deletions

File tree

monai/apps/deepedit/transforms.py

Lines changed: 61 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
from monai.data import MetaTensor
2525
from monai.networks.layers import GaussianFilter
2626
from monai.transforms.transform import MapTransform, Randomizable, Transform
27-
from monai.utils import min_version, optional_import
27+
from monai.utils import deprecated, min_version, optional_import
2828

2929
measure, _ = optional_import("skimage.measure", "0.14.2", min_version)
3030

@@ -84,18 +84,44 @@ def __call__(self, data: Mapping[Hashable, np.ndarray]) -> dict[Hashable, np.nda
8484
return d
8585

8686

87-
class NormalizeLabelsInDatasetd(MapTransform):
87+
class RemapLabelsToSequentiald(MapTransform):
88+
"""
89+
Remap label values from a dataset-specific schema to sequential indices (0, 1, 2, 3, ...).
90+
91+
This transform takes labels with arbitrary values defined in a label dictionary and remaps them
92+
to a sequential range starting from 1 (with background always set to 0). This is useful for
93+
standardizing labels across different datasets or ensuring labels are in a contiguous range.
94+
95+
The output label indices are assigned in alphabetical order by label name to ensure
96+
deterministic behavior regardless of input dictionary ordering.
97+
98+
Args:
99+
keys: The ``keys`` parameter will be used to get and set the actual data item to transform
100+
label_names: Dictionary mapping label names to their current values in the dataset.
101+
For example: {"spleen": 1, "liver": 6, "background": 0}
102+
Will be remapped to: {"background": 0, "liver": 1, "spleen": 2}
103+
(alphabetically sorted, excluding background)
104+
allow_missing_keys: If True, missing keys in the data dictionary will not raise an error
105+
106+
Example:
107+
>>> transform = RemapLabelsToSequentiald(
108+
... keys="label",
109+
... label_names={"liver": 6, "spleen": 1, "background": 0}
110+
... )
111+
>>> # Input label has values [0, 1, 6]
112+
>>> # Output label will have values [0, 1, 2] (background=0, liver=1, spleen=2)
113+
>>> # And updates d["label_names"] to {"background": 0, "liver": 1, "spleen": 2}
114+
115+
Note:
116+
- Background label (if present) is always mapped to 0
117+
- Non-background labels are mapped to sequential indices 1, 2, 3, ... in alphabetical order
118+
- Undefined labels (not in label_names) will be set to 0 (background)
119+
- The transform updates the data dictionary with a new "label_names" key containing the remapped values
120+
"""
88121

89122
def __init__(
90123
self, keys: KeysCollection, label_names: dict[str, int] | None = None, allow_missing_keys: bool = False
91124
):
92-
"""
93-
Normalize label values according to label names dictionary
94-
95-
Args:
96-
keys: The ``keys`` parameter will be used to get and set the actual data item to transform
97-
label_names: all label names
98-
"""
99125
super().__init__(keys, allow_missing_keys)
100126

101127
self.label_names = label_names or {}
@@ -106,13 +132,18 @@ def __call__(self, data: Mapping[Hashable, np.ndarray]) -> dict[Hashable, np.nda
106132
# Dictionary containing new label numbers
107133
new_label_names = {}
108134
label = np.zeros(d[key].shape)
109-
# Making sure the range values and number of labels are the same
110-
for idx, (key_label, val_label) in enumerate(self.label_names.items(), start=1):
111-
if key_label != "background":
112-
new_label_names[key_label] = idx
113-
label[d[key] == val_label] = idx
114-
if key_label == "background":
115-
new_label_names["background"] = 0
135+
136+
# Sort label names to ensure deterministic ordering (exclude background)
137+
sorted_labels = sorted([(k, v) for k, v in self.label_names.items() if k != "background"])
138+
139+
# Always set background to 0 first
140+
if "background" in self.label_names:
141+
new_label_names["background"] = 0
142+
143+
# Assign sequential indices to sorted non-background labels
144+
for idx, (key_label, val_label) in enumerate(sorted_labels, start=1):
145+
new_label_names[key_label] = idx
146+
label[d[key] == val_label] = idx
116147

117148
d["label_names"] = new_label_names
118149
if isinstance(d[key], MetaTensor):
@@ -122,6 +153,20 @@ def __call__(self, data: Mapping[Hashable, np.ndarray]) -> dict[Hashable, np.nda
122153
return d
123154

124155

156+
@deprecated(since="1.6", removed="1.8", msg_suffix="Use `RemapLabelsToSequentiald` instead.")
157+
class NormalizeLabelsInDatasetd(RemapLabelsToSequentiald):
158+
"""
159+
.. deprecated:: 1.6.0
160+
`NormalizeLabelsInDatasetd` is deprecated and will be removed in version 1.8.0.
161+
Use :class:`RemapLabelsToSequentiald` instead.
162+
163+
This class is maintained for backward compatibility. Please use RemapLabelsToSequentiald
164+
which better describes the transform's functionality.
165+
"""
166+
167+
pass
168+
169+
125170
class SingleLabelSelectiond(MapTransform):
126171

127172
def __init__(

tests/apps/deepedit/test_deepedit_transforms.py

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
FindAllValidSlicesMissingLabelsd,
2626
FindDiscrepancyRegionsDeepEditd,
2727
NormalizeLabelsInDatasetd,
28+
RemapLabelsToSequentiald,
2829
ResizeGuidanceMultipleLabelDeepEditd,
2930
SingleLabelSelectiond,
3031
SplitPredsLabeld,
@@ -282,6 +283,100 @@ def test_correct_results(self, arguments, input_data, expected_result):
282283
result = add_fn(input_data)
283284
self.assertEqual(len(np.unique(result["label"])), expected_result)
284285

286+
def test_ordering_determinism(self):
287+
"""Test that different input ordering produces the same output (alphabetical)"""
288+
# Create a label array with different label values
289+
label = np.array([[[0, 1, 6, 3]]]) # background=0, spleen=1, liver=6, kidney=3
290+
291+
# Test case 1: liver first, then kidney, then spleen
292+
data1 = {"label": label.copy()}
293+
transform1 = RemapLabelsToSequentiald(
294+
keys="label", label_names={"liver": 6, "kidney": 3, "spleen": 1, "background": 0}
295+
)
296+
result1 = transform1(data1)
297+
298+
# Test case 2: spleen first, then kidney, then liver (different order)
299+
data2 = {"label": label.copy()}
300+
transform2 = RemapLabelsToSequentiald(
301+
keys="label", label_names={"spleen": 1, "kidney": 3, "liver": 6, "background": 0}
302+
)
303+
result2 = transform2(data2)
304+
305+
# Both should produce the same output (alphabetically sorted)
306+
# Expected mapping: background=0, kidney=1, liver=2, spleen=3
307+
np.testing.assert_array_equal(result1["label"], result2["label"])
308+
309+
# Verify the actual mapping is alphabetical
310+
expected_output = np.array([[[0, 3, 2, 1]]]) # kidney=1, liver=2, spleen=3, background=0
311+
np.testing.assert_array_equal(result1["label"], expected_output)
312+
313+
# Verify label_names is correct
314+
self.assertEqual(result1["label_names"], {"background": 0, "kidney": 1, "liver": 2, "spleen": 3})
315+
self.assertEqual(result2["label_names"], {"background": 0, "kidney": 1, "liver": 2, "spleen": 3})
316+
317+
def test_multiple_labels(self):
318+
"""Test with multiple non-background labels"""
319+
label = np.array([[[0, 1, 2, 5]]]) # background, spleen, kidney, liver
320+
data = {"label": label.copy()}
321+
transform = RemapLabelsToSequentiald(
322+
keys="label", label_names={"spleen": 1, "kidney": 2, "liver": 5, "background": 0}
323+
)
324+
result = transform(data)
325+
326+
# Expected: background=0, kidney=1, liver=2, spleen=3 (alphabetical)
327+
expected = np.array([[[0, 3, 1, 2]]])
328+
np.testing.assert_array_equal(result["label"], expected)
329+
self.assertEqual(result["label_names"], {"background": 0, "kidney": 1, "liver": 2, "spleen": 3})
330+
331+
def test_deprecated_name_warning(self):
332+
"""Test that NormalizeLabelsInDatasetd is properly deprecated.
333+
334+
The deprecation warning only triggers when MONAI version >= 1.6 (since="1.6").
335+
This test verifies:
336+
1. The actual NormalizeLabelsInDatasetd class is marked as deprecated in docstring
337+
2. The class is a subclass of RemapLabelsToSequentiald
338+
3. The deprecation mechanism works correctly (tested via version_val simulation)
339+
4. The actual class functions correctly
340+
"""
341+
import warnings
342+
343+
from monai.utils import deprecated
344+
345+
# Verify NormalizeLabelsInDatasetd docstring indicates deprecation
346+
self.assertIn("deprecated", NormalizeLabelsInDatasetd.__doc__.lower())
347+
self.assertIn("RemapLabelsToSequentiald", NormalizeLabelsInDatasetd.__doc__)
348+
349+
# Verify NormalizeLabelsInDatasetd is a subclass of RemapLabelsToSequentiald
350+
self.assertTrue(issubclass(NormalizeLabelsInDatasetd, RemapLabelsToSequentiald))
351+
352+
# Test the deprecation mechanism using version_val to simulate version 1.6
353+
# This verifies the @deprecated decorator behavior that NormalizeLabelsInDatasetd uses
354+
@deprecated(
355+
since="1.6",
356+
removed="1.8",
357+
msg_suffix="Use `RemapLabelsToSequentiald` instead.",
358+
version_val="1.6", # Simulate version 1.6 to trigger warning
359+
)
360+
class DeprecatedNormalizeLabels(RemapLabelsToSequentiald):
361+
pass
362+
363+
data = {"label": np.array([[[0, 1]]])}
364+
365+
with warnings.catch_warnings(record=True) as w:
366+
warnings.simplefilter("always")
367+
transform = DeprecatedNormalizeLabels(keys="label", label_names={"spleen": 1, "background": 0})
368+
_ = transform(data)
369+
370+
# Check that a deprecation warning was raised
371+
self.assertEqual(len(w), 1)
372+
self.assertTrue(issubclass(w[0].category, FutureWarning))
373+
self.assertIn("RemapLabelsToSequentiald", str(w[0].message))
374+
375+
# Verify the actual NormalizeLabelsInDatasetd class works correctly
376+
transform_actual = NormalizeLabelsInDatasetd(keys="label", label_names={"spleen": 1, "background": 0})
377+
result = transform_actual({"label": np.array([[[0, 1]]])})
378+
self.assertIn("label", result)
379+
285380

286381
class TestResizeGuidanceMultipleLabelCustomd(unittest.TestCase):
287382

0 commit comments

Comments
 (0)