Skip to content

Commit 9025061

Browse files
committed
add PDF export support
1 parent a2e3c54 commit 9025061

3 files changed

Lines changed: 114 additions & 25 deletions

File tree

TPTBox/core/bids_constants.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,7 @@
163163
"localizer",
164164
"difference",
165165
"labels",
166+
"report",
166167
]
167168
# https://bids-specification.readthedocs.io/en/stable/appendices/entity-table.html
168169
formats_relaxed = [*formats, "t2", "t1", "t2c", "t1c", "mr", "snapshot", "t1dixon", "dwi", "ctb"]
@@ -221,7 +222,7 @@
221222
"OPT": "Ophthalmic Tomography",
222223
"OPV": "Ophthalmic Visual Field",
223224
"OSS": "Optical Surface Scan",
224-
"OT": "Other ",
225+
"OT": "Other",
225226
"PLAN": "Plan",
226227
"PR": "Presentation State",
227228
"PT": "Positron emission tomography (PET)",

TPTBox/core/dicom/dicom_extract.py

Lines changed: 78 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -190,7 +190,68 @@ def dicom_to_nifti_multiframe(ds, nii_path):
190190
return nii_path
191191

192192

193-
def _convert_to_nifti(dicom_out_path, nii_path):
193+
def _export_pdf_from_dicom(dcm_path, out_pdf):
194+
assert len(dcm_path) == 1, dcm_path
195+
ds = dcm_path[0]
196+
197+
# verify modality / SOP class
198+
if ds.Modality.upper() != "PDF":
199+
raise ValueError("Not a PDF DICOM")
200+
201+
if "EncapsulatedDocument" not in ds:
202+
raise ValueError("No embedded PDF found")
203+
204+
pdf_bytes = ds.EncapsulatedDocument
205+
206+
out_pdf = Path(out_pdf)
207+
out_pdf.write_bytes(pdf_bytes)
208+
209+
210+
def _collect_text(ds, txt_lines: list[str] | None = None):
211+
if txt_lines is None:
212+
txt_lines = []
213+
214+
def _help_collect_text(content_sequence, level: int = 0):
215+
for item in content_sequence:
216+
prefix = " " * level
217+
218+
concept = ""
219+
220+
if hasattr(item, "ConceptNameCodeSequence"):
221+
try:
222+
concept = item.ConceptNameCodeSequence[0].CodeMeaning
223+
except Exception:
224+
pass
225+
226+
value = None
227+
228+
for attr in ["TextValue", "CodeMeaning", "NumericValue"]:
229+
if hasattr(item, attr):
230+
value = getattr(item, attr)
231+
break
232+
233+
if concept or value is not None:
234+
txt_lines.append(f"{prefix}{concept}: {value}")
235+
236+
if hasattr(item, "ContentSequence"):
237+
_help_collect_text(
238+
item.ContentSequence,
239+
level + 1,
240+
)
241+
242+
if hasattr(ds, "ContentSequence"):
243+
_help_collect_text(ds.ContentSequence)
244+
return txt_lines
245+
246+
247+
def _extract_txt_from_dicom(dcm_path, out_txt):
248+
lines = []
249+
for p in dcm_path:
250+
lines = _collect_text(p, lines)
251+
Path(out_txt).write_text("\n".join(lines))
252+
253+
254+
def _extract_nii_from_dicom(dicom_out_path, nii_path):
194255
"""
195256
Convert DICOM files to NIfTI format and handle common conversion errors.
196257
@@ -206,6 +267,7 @@ def _convert_to_nifti(dicom_out_path, nii_path):
206267
FunctionTimedOut: Raised if the DICOM-to-NIfTI conversion times out.
207268
ValueError: Raised for generic validation failures.
208269
"""
270+
209271
try:
210272
if isinstance(dicom_out_path, list):
211273
try:
@@ -217,6 +279,7 @@ def _convert_to_nifti(dicom_out_path, nii_path):
217279
return True
218280
except Exception as e:
219281
logger.on_debug("Multi-Frame DICOM did not work:", e)
282+
## The PDF dicom lands here
220283
convert_dicom.dicom_array_to_nifti(dicom_out_path, nii_path, True)
221284
else:
222285
# func_timeout(10, dicom2nifti.dicom_series_to_nifti, (dicom_out_path, nii_path, True))
@@ -246,6 +309,9 @@ def _convert_to_nifti(dicom_out_path, nii_path):
246309
logger.print_error()
247310

248311
return False
312+
except Exception:
313+
print(nii_path)
314+
249315
return True
250316

251317

@@ -264,7 +330,7 @@ def _get_paths(
264330
):
265331
if keys is None:
266332
keys = {}
267-
(mri_format, keys) = extract_keys_from_json(
333+
(mri_format, keys, ending) = extract_keys_from_json(
268334
simp_json,
269335
dcm_data_l,
270336
use_session,
@@ -277,7 +343,7 @@ def _get_paths(
277343
json_file_name, json_bids_name = _generate_bids_path(
278344
dataset_nifti_dir, keys, mri_format, simp_json, make_subject_chunks=make_subject_chunks, parent=parent
279345
)
280-
nii_path = str(json_file_name).replace(".json", "") + ".nii.gz"
346+
nii_path = str(json_file_name).replace(".json", "") + ending
281347
return json_file_name, json_bids_name, nii_path
282348

283349

@@ -364,11 +430,17 @@ def _from_dicom_to_nii(
364430
if exist and Path(nii_path).exists():
365431
logger.print("already exists:", json_file_name, ltype=Log_Type.STRANGE, verbose=verbose)
366432
return nii_path
367-
suc = _convert_to_nifti(dcm_data_l, nii_path)
433+
add_grid = False
434+
if nii_path.endswith(".pdf"):
435+
_export_pdf_from_dicom(dcm_data_l, nii_path)
436+
elif nii_path.endswith(".txt"):
437+
_extract_txt_from_dicom(dcm_data_l, nii_path)
438+
else:
439+
add_grid = _extract_nii_from_dicom(dcm_data_l, nii_path)
368440

369-
if suc:
441+
if add_grid:
370442
_add_grid_info_to_json(nii_path, json_file_name)
371-
return nii_path if suc else None
443+
return nii_path if add_grid else None
372444

373445

374446
def _add_grid_info_to_json(nii_path: Path | str, simp_json: Path | str, force_update=False, add=True):

TPTBox/core/dicom/dicom_header_to_keys.py

Lines changed: 34 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,15 @@ def extract_keys_from_json( # noqa: C901
165165
def _get(key, default=None):
166166
if key not in simp_json:
167167
return keys.get(key, default)
168-
return str(simp_json[key]).replace("_", "-").replace(" ", "-").replace(".", "-")
168+
value = str(simp_json[key]).replace("_", "-").replace(" ", "-").replace(".", "-")
169+
# remove invalid filename characters
170+
value = re.sub(r'[<>:"/\\|?*\x00-\x1F]', "", value)
171+
# collapse repeated dashes
172+
value = re.sub(r"-+", "-", value)
173+
# strip leading/trailing dots and dashes
174+
value = value.strip(".-")
175+
176+
return value
169177

170178
"""Extract keys from JSON based on study and series descriptions."""
171179
#### NAKO FIXED ####
@@ -174,25 +182,28 @@ def _get(key, default=None):
174182
series_description = _get("SeriesDescription", "unnamed")
175183
"""Determine the MRI format based on the series description."""
176184
if "T2_TSE" in series_description:
177-
return "T2w", {"acq": "sag", "chunk": series_description.split("_")[-1], "sequ": simp_json["SeriesNumber"], **keys}
185+
return "T2w", {"acq": "sag", "chunk": series_description.split("_")[-1], "sequ": simp_json["SeriesNumber"], **keys}, ".nii.gz"
178186
elif "3D_GRE_TRA" in series_description:
179-
return "vibe", {
180-
"acq": "ax",
181-
"part": dixon_mapping[series_description.split("_")[-1].lower()],
182-
"chunk": _get("ProtocolName", "unnamed").split("_")[-1],
183-
**keys,
184-
}
187+
return (
188+
"vibe",
189+
{
190+
"acq": "ax",
191+
"part": dixon_mapping[series_description.split("_")[-1].lower()],
192+
"chunk": _get("ProtocolName", "unnamed").split("_")[-1],
193+
**keys,
194+
},
195+
".nii.gz",
196+
)
185197
elif "ME_vibe" in series_description:
186-
return "mevibe", {
187-
"acq": "ax",
188-
"part": dixon_mapping[series_description.split("_")[-1].lower()],
189-
"sequ": simp_json["SeriesNumber"],
190-
**keys,
191-
}
198+
return (
199+
"mevibe",
200+
{"acq": "ax", "part": dixon_mapping[series_description.split("_")[-1].lower()], "sequ": simp_json["SeriesNumber"], **keys},
201+
".nii.gz",
202+
)
192203
elif "PD" in series_description:
193-
return "pd", {"acq": "iso", **keys}
204+
return "pd", {"acq": "iso", **keys}, ".nii.gz"
194205
elif "T2_HASTE" in series_description:
195-
return "T2haste", {"acq": "ax", **keys}
206+
return "T2haste", {"acq": "ax", **keys}, ".nii.gz"
196207
else:
197208
raise NotImplementedError(series_description)
198209
# GENERAL
@@ -319,9 +330,14 @@ def _get(key, default=None):
319330
" km " in series_description.lower() or series_description.startswith("km") or series_description.endswith("km")
320331
) and keys.get("ce") is None:
321332
keys["ce"] = "ContrastAgent"
333+
elif modality.lower() == "pdf":
334+
return "report", keys, ".pdf"
335+
elif modality.lower() == "sr":
336+
keys["desc"] = _get("SeriesDescription", None)
337+
return "report", keys, ".txt"
322338
else:
323-
raise NotImplementedError(f"modality='{modality.upper()}', ({modalities.get(modality.upper())})")
339+
raise NotImplementedError(f"modality='{modality}', ({modalities.get(modality.upper(), 'Non Standard Modality key')})")
324340

325341
# ".*sub.*t1.*": "subtraktion",
326342
# "subtraktion.*t1.*": "subtraktion",
327-
return mri_format, keys
343+
return mri_format, keys, ".nii.gz"

0 commit comments

Comments
 (0)