Skip to content

Commit d35ecdc

Browse files
authored
Merge pull request #105 from Hendrik-code/development_robert
Development robert
2 parents f639bec + b5fad17 commit d35ecdc

14 files changed

Lines changed: 221 additions & 87 deletions

TPTBox/core/bids_constants.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,8 @@
163163
"localizer",
164164
"difference",
165165
"labels",
166+
"report",
167+
"pet",
166168
]
167169
# https://bids-specification.readthedocs.io/en/stable/appendices/entity-table.html
168170
formats_relaxed = [*formats, "t2", "t1", "t2c", "t1c", "mr", "snapshot", "t1dixon", "dwi", "ctb"]
@@ -221,7 +223,7 @@
221223
"OPT": "Ophthalmic Tomography",
222224
"OPV": "Ophthalmic Visual Field",
223225
"OSS": "Optical Surface Scan",
224-
"OT": "Other ",
226+
"OT": "Other",
225227
"PLAN": "Plan",
226228
"PR": "Presentation State",
227229
"PT": "Positron emission tomography (PET)",

TPTBox/core/bids_files.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -687,7 +687,7 @@ def get_changed_bids(
687687
auto_add_run_id=False,
688688
additional_folder: str | None = None,
689689
dataset_path: str | None = None,
690-
make_parent=True,
690+
make_parent=False,
691691
non_strict_mode=False,
692692
):
693693
ds = dataset_path if dataset_path is not None else self.get_path_decomposed()[0]

TPTBox/core/dicom/dicom_extract.py

Lines changed: 90 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -190,7 +190,68 @@ def dicom_to_nifti_multiframe(ds, nii_path):
190190
return nii_path
191191

192192

193-
def _convert_to_nifti(dicom_out_path, nii_path):
193+
def _export_pdf_from_dicom(dcm_path, out_pdf):
194+
assert len(dcm_path) == 1, dcm_path
195+
ds = dcm_path[0]
196+
197+
# verify modality / SOP class
198+
if ds.Modality.upper() != "PDF":
199+
raise ValueError("Not a PDF DICOM")
200+
201+
if "EncapsulatedDocument" not in ds:
202+
raise ValueError("No embedded PDF found")
203+
204+
pdf_bytes = ds.EncapsulatedDocument
205+
206+
out_pdf = Path(out_pdf)
207+
out_pdf.write_bytes(pdf_bytes)
208+
209+
210+
def _collect_text(ds, txt_lines: list[str] | None = None):
211+
if txt_lines is None:
212+
txt_lines = []
213+
214+
def _help_collect_text(content_sequence, level: int = 0):
215+
for item in content_sequence:
216+
prefix = " " * level
217+
218+
concept = ""
219+
220+
if hasattr(item, "ConceptNameCodeSequence"):
221+
try:
222+
concept = item.ConceptNameCodeSequence[0].CodeMeaning
223+
except Exception:
224+
pass
225+
226+
value = None
227+
228+
for attr in ["TextValue", "CodeMeaning", "NumericValue"]:
229+
if hasattr(item, attr):
230+
value = getattr(item, attr)
231+
break
232+
233+
if concept or value is not None:
234+
txt_lines.append(f"{prefix}{concept}: {value}")
235+
236+
if hasattr(item, "ContentSequence"):
237+
_help_collect_text(
238+
item.ContentSequence,
239+
level + 1,
240+
)
241+
242+
if hasattr(ds, "ContentSequence"):
243+
_help_collect_text(ds.ContentSequence)
244+
return txt_lines
245+
246+
247+
def _extract_txt_from_dicom(dcm_path, out_txt):
248+
lines = []
249+
for p in dcm_path:
250+
lines = _collect_text(p, lines)
251+
Path(out_txt).write_text("\n".join(lines))
252+
253+
254+
def _extract_nii_from_dicom(dicom_out_path, nii_path):
194255
"""
195256
Convert DICOM files to NIfTI format and handle common conversion errors.
196257
@@ -206,6 +267,7 @@ def _convert_to_nifti(dicom_out_path, nii_path):
206267
FunctionTimedOut: Raised if the DICOM-to-NIfTI conversion times out.
207268
ValueError: Raised for generic validation failures.
208269
"""
270+
209271
try:
210272
if isinstance(dicom_out_path, list):
211273
try:
@@ -217,6 +279,7 @@ def _convert_to_nifti(dicom_out_path, nii_path):
217279
return True
218280
except Exception as e:
219281
logger.on_debug("Multi-Frame DICOM did not work:", e)
282+
## The PDF dicom lands here
220283
convert_dicom.dicom_array_to_nifti(dicom_out_path, nii_path, True)
221284
else:
222285
# func_timeout(10, dicom2nifti.dicom_series_to_nifti, (dicom_out_path, nii_path, True))
@@ -246,6 +309,9 @@ def _convert_to_nifti(dicom_out_path, nii_path):
246309
logger.print_error()
247310

248311
return False
312+
except Exception:
313+
print(nii_path)
314+
249315
return True
250316

251317

@@ -264,7 +330,7 @@ def _get_paths(
264330
):
265331
if keys is None:
266332
keys = {}
267-
(mri_format, keys) = extract_keys_from_json(
333+
(mri_format, keys, ending) = extract_keys_from_json(
268334
simp_json,
269335
dcm_data_l,
270336
use_session,
@@ -277,7 +343,7 @@ def _get_paths(
277343
json_file_name, json_bids_name = _generate_bids_path(
278344
dataset_nifti_dir, keys, mri_format, simp_json, make_subject_chunks=make_subject_chunks, parent=parent
279345
)
280-
nii_path = str(json_file_name).replace(".json", "") + ".nii.gz"
346+
nii_path = str(json_file_name).replace(".json", "") + ending
281347
return json_file_name, json_bids_name, nii_path
282348

283349

@@ -364,11 +430,17 @@ def _from_dicom_to_nii(
364430
if exist and Path(nii_path).exists():
365431
logger.print("already exists:", json_file_name, ltype=Log_Type.STRANGE, verbose=verbose)
366432
return nii_path
367-
suc = _convert_to_nifti(dcm_data_l, nii_path)
433+
add_grid = False
434+
if nii_path.endswith(".pdf"):
435+
_export_pdf_from_dicom(dcm_data_l, nii_path)
436+
elif nii_path.endswith(".txt"):
437+
_extract_txt_from_dicom(dcm_data_l, nii_path)
438+
else:
439+
add_grid = _extract_nii_from_dicom(dcm_data_l, nii_path)
368440

369-
if suc:
441+
if add_grid:
370442
_add_grid_info_to_json(nii_path, json_file_name)
371-
return nii_path if suc else None
443+
return nii_path if add_grid else None
372444

373445

374446
def _add_grid_info_to_json(nii_path: Path | str, simp_json: Path | str, force_update=False, add=True):
@@ -390,7 +462,7 @@ def _add_grid_info_to_json(nii_path: Path | str, simp_json: Path | str, force_up
390462
return json_dict
391463

392464

393-
def _find_all_files(dcm_dirs: Path | list[Path]):
465+
def _find_all_files(dcm_dirs: Path | list[Path],verbose=False):
394466
"""
395467
Recursively find all DICOM directories or files in the given paths.
396468
@@ -400,6 +472,9 @@ def _find_all_files(dcm_dirs: Path | list[Path]):
400472
Yields:
401473
Path: Paths to directories or individual DICOM files found during the search.
402474
"""
475+
if verbose:
476+
logger.on_neutral("Start file searching")
477+
i = 0
403478
yield dcm_dirs
404479
dcm_dirs = dcm_dirs if isinstance(dcm_dirs, list) else [dcm_dirs]
405480
for dcm_dir in dcm_dirs:
@@ -408,9 +483,15 @@ def _find_all_files(dcm_dirs: Path | list[Path]):
408483
file = ""
409484
for file in files:
410485
if Path(file).is_file(): # str(file).endswith(".dcm") or str(file).endswith(".ima")
486+
if verbose:
487+
logger.on_neutral("File ",i,end="\r")
488+
i += 1
411489
yield Path(root, file).absolute().parent
412490
break
413491
else:
492+
if verbose:
493+
logger.on_neutral("File ",i,end="\r")
494+
i += 1
414495
yield Path(root, file)
415496
# if "." not in str(file):
416497
# yield Path(root, file).absolute().parent
@@ -606,7 +687,7 @@ def extract_dicom_folder(
606687
convert_dicom.settings.disable_validate_slice_increment()
607688
outs = {}
608689

609-
for p in _find_all_files(dicom_folder):
690+
for p in _find_all_files(dicom_folder,verbose=verbose):
610691
dicom_path = p
611692

612693
if str(dicom_path).endswith(".pkl"):
@@ -672,6 +753,7 @@ def process_series(key, files, parts):
672753
p, Path("/media/data/robert/datasets", "dataset-Durchleuchtung222"), False, False, validate_slice_increment=False
673754
)
674755

756+
675757
sys.exit()
676758
# s = "/home/robert/Downloads/bein/dataset-oberschenkel/rawdata/sub-1-3-46-670589-11-2889201787-2305829596-303261238-2367429497/mr/sub-1-3-46-670589-11-2889201787-2305829596-303261238-2367429497_sequ-406_mr.nii.gz"
677759
# nii2 = NII.load(s, False)

TPTBox/core/dicom/dicom_header_to_keys.py

Lines changed: 36 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,15 @@ def extract_keys_from_json( # noqa: C901
165165
def _get(key, default=None):
166166
if key not in simp_json:
167167
return keys.get(key, default)
168-
return str(simp_json[key]).replace("_", "-").replace(" ", "-").replace(".", "-")
168+
value = str(simp_json[key]).replace("_", "-").replace(" ", "-").replace(".", "-")
169+
# remove invalid filename characters
170+
value = re.sub(r'[<>:"/\\|?*\x00-\x1F]', "", value)
171+
# collapse repeated dashes
172+
value = re.sub(r"-+", "-", value)
173+
# strip leading/trailing dots and dashes
174+
value = value.strip(".-")
175+
176+
return value
169177

170178
"""Extract keys from JSON based on study and series descriptions."""
171179
#### NAKO FIXED ####
@@ -174,25 +182,28 @@ def _get(key, default=None):
174182
series_description = _get("SeriesDescription", "unnamed")
175183
"""Determine the MRI format based on the series description."""
176184
if "T2_TSE" in series_description:
177-
return "T2w", {"acq": "sag", "chunk": series_description.split("_")[-1], "sequ": simp_json["SeriesNumber"], **keys}
185+
return "T2w", {"acq": "sag", "chunk": series_description.split("_")[-1], "sequ": simp_json["SeriesNumber"], **keys}, ".nii.gz"
178186
elif "3D_GRE_TRA" in series_description:
179-
return "vibe", {
180-
"acq": "ax",
181-
"part": dixon_mapping[series_description.split("_")[-1].lower()],
182-
"chunk": _get("ProtocolName", "unnamed").split("_")[-1],
183-
**keys,
184-
}
187+
return (
188+
"vibe",
189+
{
190+
"acq": "ax",
191+
"part": dixon_mapping[series_description.split("_")[-1].lower()],
192+
"chunk": _get("ProtocolName", "unnamed").split("_")[-1],
193+
**keys,
194+
},
195+
".nii.gz",
196+
)
185197
elif "ME_vibe" in series_description:
186-
return "mevibe", {
187-
"acq": "ax",
188-
"part": dixon_mapping[series_description.split("_")[-1].lower()],
189-
"sequ": simp_json["SeriesNumber"],
190-
**keys,
191-
}
198+
return (
199+
"mevibe",
200+
{"acq": "ax", "part": dixon_mapping[series_description.split("_")[-1].lower()], "sequ": simp_json["SeriesNumber"], **keys},
201+
".nii.gz",
202+
)
192203
elif "PD" in series_description:
193-
return "pd", {"acq": "iso", **keys}
204+
return "pd", {"acq": "iso", **keys}, ".nii.gz"
194205
elif "T2_HASTE" in series_description:
195-
return "T2haste", {"acq": "ax", **keys}
206+
return "T2haste", {"acq": "ax", **keys}, ".nii.gz"
196207
else:
197208
raise NotImplementedError(series_description)
198209
# GENERAL
@@ -268,6 +279,8 @@ def _get(key, default=None):
268279
found = False
269280
if modality == "ct":
270281
mri_format = "ct"
282+
elif modality.lower() == "pt":
283+
mri_format = "pet"
271284
elif modality == "xa": # Angiography
272285
biplane = False
273286
if "BIPLANE A" in image_type or "SINGLE A" in image_type:
@@ -319,9 +332,14 @@ def _get(key, default=None):
319332
" km " in series_description.lower() or series_description.startswith("km") or series_description.endswith("km")
320333
) and keys.get("ce") is None:
321334
keys["ce"] = "ContrastAgent"
335+
elif modality.lower() == "pdf":
336+
return "report", keys, ".pdf"
337+
elif modality.lower() == "sr":
338+
keys["desc"] = _get("SeriesDescription", None)
339+
return "report", keys, ".txt"
322340
else:
323-
raise NotImplementedError(f"modality='{modality.upper()}', ({modalities.get(modality.upper())})")
341+
raise NotImplementedError(f"modality='{modality}', ({modalities.get(modality.upper(), 'Non Standard Modality key')})")
324342

325343
# ".*sub.*t1.*": "subtraktion",
326344
# "subtraktion.*t1.*": "subtraktion",
327-
return mri_format, keys
345+
return mri_format, keys, ".nii.gz"

0 commit comments

Comments
 (0)