Skip to content

Commit 6b4e409

Browse files
author
Chris Bridge
committed
Remove sort_key parameter in favour of boolean sort option
1 parent 2398bf6 commit 6b4e409

3 files changed

Lines changed: 125 additions & 17 deletions

File tree

docs/legacy.rst

Lines changed: 49 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -159,9 +159,12 @@ multiframe instance. The default behavior sorts first by the "SeriesNumber"
159159
attribute (for cases with multiple series), then by the "InstanceNumber"
160160
attribute, then by the "SOPInstanceUID" attribute (to give a predictable sort
161161
order in cases where instance number is not present). If you prefer an
162-
alternative sorting scheme, you can pass a callable to the ``sort_key``
163-
argument of the form accepted by the Python built-in ``sorted`` function. For
164-
example, to sort by ``KVP`` and then ``SliceLocation``:
162+
alternative sorting scheme, you can disable the default sorting by passing
163+
``sort=False`` and then pass the datasets in whatever order you prefer. There
164+
is no strict requirement that frames be sorted, but it would be best practice
165+
to use some sort of logical sorting scheme.
166+
167+
For example, to sort by ``KVP`` and then ``SliceLocation``:
165168

166169
.. code-block:: python
167170
@@ -180,6 +183,48 @@ example, to sort by ``KVP`` and then ``SliceLocation``:
180183
# Read in the files
181184
ct_series = [dcmread(f) for f in legacy_ct_files]
182185
186+
# Sort using KVP and slice location
187+
ct_series = sorted(
188+
ct_series, key=lambda dcm: (dcm.KVP, dcm.SliceLocation)
189+
)
190+
191+
# Create multiframe instance with custom sort key
192+
multiframe = hd.legacy.LegacyConvertedEnhancedCTImage(
193+
ct_series,
194+
series_number=1,
195+
instance_number=1,
196+
series_instance_uid=hd.UID(),
197+
sop_instance_uid=hd.UID(),
198+
series_description="Enhanced Test Files",
199+
sort=False, # disable default sorting
200+
)
201+
202+
# Save out the new multiframe conversion
203+
multiframe.save_as("legacy_converted_ct.dcm")
204+
205+
Alternatively, you could use highdicom's ``sort_datasets`` function to sort the
206+
datasets based purely on spatial location:
207+
208+
.. code-block:: python
209+
210+
import highdicom as hd
211+
from pydicom import dcmread
212+
from pydicom.data import get_testdata_file
213+
214+
215+
# Use this series of files from the pydicom test data
216+
legacy_ct_files = [
217+
get_testdata_file('dicomdirtests/77654033/CT2/17136'),
218+
get_testdata_file('dicomdirtests/77654033/CT2/17196'),
219+
get_testdata_file('dicomdirtests/77654033/CT2/17166'),
220+
]
221+
222+
# Read in the files
223+
ct_series = [dcmread(f) for f in legacy_ct_files]
224+
225+
# Sort frames spatially
226+
ct_series = hd.spatial.sort_datasets(ct_series)
227+
183228
# Create multiframe instance with custom sort key
184229
multiframe = hd.legacy.LegacyConvertedEnhancedCTImage(
185230
ct_series,
@@ -188,7 +233,7 @@ example, to sort by ``KVP`` and then ``SliceLocation``:
188233
series_instance_uid=hd.UID(),
189234
sop_instance_uid=hd.UID(),
190235
series_description="Enhanced Test Files",
191-
sort_key=lambda dcm: (dcm.KVP, dcm.SliceLocation),
236+
sort=False, # disable default sorting
192237
)
193238
194239
# Save out the new multiframe conversion

src/highdicom/legacy/sop.py

Lines changed: 25 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -198,7 +198,7 @@ def get_source_keywords(self) -> list[str]:
198198
return [self.dest_kw]
199199

200200

201-
def default_sort_key(x: Dataset) -> Tuple[Union[int, str, UID], ...]:
201+
def _default_sort_key(x: Dataset) -> Tuple[Union[int, str, UID], ...]:
202202
"""The default sort key to sort single frames before conversion.
203203
204204
Parameters
@@ -241,7 +241,7 @@ def __init__(
241241
transfer_syntax_uid: str | None = None,
242242
use_extended_offset_table: bool = False,
243243
require_volume: bool = False,
244-
sort_key: Callable | None = None,
244+
sort: bool = True,
245245
workers: int | Executor = 0,
246246
) -> None:
247247
"""
@@ -270,18 +270,24 @@ def __init__(
270270
require_volume: bool, optional
271271
Raise an error if the legacy datasets do not form a regularly-spaced
272272
volume.
273-
sort_key: Callable | None, optional
274-
A function by which the single-frame instances will be sorted to
275-
determine the order of frames in the newly created instance.
273+
sort: bool, optional
274+
Whether to sort datasets before placing them into the legacy
275+
instance. If True, datasets will be sorted using the default sort
276+
key. If False, frames of the new instance will be in the same order
277+
as the list of legacy instances were passed.
276278
workers: int | concurrent.futures.Executor, optional
277279
Number of worker processes (or executor object) to use for frame
278280
compression, if compression or transcoding is needed.
279281
280282
"""
281-
if sort_key is None:
282-
sort_key = default_sort_key
283+
if sort:
284+
self._legacy_datasets = sorted(
285+
list(legacy_datasets),
286+
key=_default_sort_key
287+
)
288+
else:
289+
self._legacy_datasets = list(legacy_datasets)
283290

284-
self._legacy_datasets = sorted(list(legacy_datasets), key=sort_key)
285291
self._destination = destination
286292
self._transfer_syntax_uid = transfer_syntax_uid
287293
self._workers = workers
@@ -2079,7 +2085,7 @@ def __init__(
20792085
transfer_syntax_uid: str | None = None,
20802086
use_extended_offset_table: bool = False,
20812087
require_volume: bool = False,
2082-
sort_key: Callable | None = None,
2088+
sort: bool = True,
20832089
contributing_equipment: Sequence[ContributingEquipment] | None = None,
20842090
workers: int | Executor = 0,
20852091
**kwargs: Any,
@@ -2121,9 +2127,15 @@ def __init__(
21212127
the standard, but users may wish to additionally impose this
21222128
stricter requirement to ensure the resulting files are easier
21232129
to work with.
2124-
sort_key: Callable | None, optional
2125-
A function by which the single-frame instances will be sorted to
2126-
determine the order of frames in the newly created instance.
2130+
sort: bool, optional
2131+
Sort the legacy datasets before placing them into the new instance
2132+
as frames. If False, frames are placed in the same order legacy
2133+
datasets were passed. When True, sorting is performed first by
2134+
Series Number, then Instance Number, then SOP Instance UID. To use
2135+
an alternative sort order, pre-sort the legacy datasets, and pass
2136+
``sort=False``. Though there are no requirements on sort order in
2137+
the standard, it is generally a best practice to use some sensible
2138+
sort order.
21272139
contributing_equipment: Sequence[highdicom.ContributingEquipment] | None, optional
21282140
Additional equipment that has contributed to the acquisition,
21292141
creation or modification of this instance.
@@ -2227,7 +2239,7 @@ def __init__(
22272239
workers=workers,
22282240
transfer_syntax_uid=transfer_syntax_uid,
22292241
use_extended_offset_table=use_extended_offset_table,
2230-
sort_key=sort_key,
2242+
sort=sort,
22312243
require_volume=require_volume,
22322244
)
22332245

tests/test_legacy.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1090,6 +1090,57 @@ def test_mixed_series(modality: Modality):
10901090
)
10911091

10921092

1093+
def test_unsorted():
1094+
# Combining two series is valid if other attributes are consistent
1095+
data_generator = DicomGenerator(5)
1096+
legacy_datasets = data_generator.generate_mixed_framesets(
1097+
Modality.PT, 1, True, True
1098+
)
1099+
1100+
# Reverse the order
1101+
reversed_legacy_datasets = legacy_datasets[::-1]
1102+
1103+
converted_sorted = LegacyConvertedEnhancedPETImage(
1104+
legacy_datasets=reversed_legacy_datasets,
1105+
series_instance_uid=UID(),
1106+
series_number=1,
1107+
sop_instance_uid=UID(),
1108+
instance_number=1,
1109+
sort=True,
1110+
)
1111+
1112+
# Frames should be sorted by instance number
1113+
for ds, pffg in zip(
1114+
legacy_datasets,
1115+
converted_sorted.PerFrameFunctionalGroupsSequence,
1116+
):
1117+
frame_source_uid = (
1118+
pffg.ConversionSourceAttributesSequence[0]
1119+
.ReferencedSOPInstanceUID
1120+
)
1121+
assert frame_source_uid == ds.SOPInstanceUID
1122+
1123+
converted_unsorted = LegacyConvertedEnhancedPETImage(
1124+
legacy_datasets=reversed_legacy_datasets,
1125+
series_instance_uid=UID(),
1126+
series_number=1,
1127+
sop_instance_uid=UID(),
1128+
instance_number=1,
1129+
sort=False,
1130+
)
1131+
1132+
# Frames should be in same order they were passed
1133+
for ds, pffg in zip(
1134+
reversed_legacy_datasets,
1135+
converted_unsorted.PerFrameFunctionalGroupsSequence,
1136+
):
1137+
frame_source_uid = (
1138+
pffg.ConversionSourceAttributesSequence[0]
1139+
.ReferencedSOPInstanceUID
1140+
)
1141+
assert frame_source_uid == ds.SOPInstanceUID
1142+
1143+
10931144
def test_body_part_mapping(modality: Modality):
10941145
"""Test that BodyPartExamined is correctly mapped to a coded
10951146
AnatomicRegionSequence.

0 commit comments

Comments
 (0)