Skip to content

Commit 1479dfa

Browse files
refactored metrics to fix outlier metric, which is based on whole dataset and to enable better interfacing from codabench
1 parent bda0d05 commit 1479dfa

9 files changed

Lines changed: 244 additions & 149 deletions

File tree

README.md

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -81,22 +81,22 @@ train/
8181
│ ├── mri_chunk_3_out_phase.nii.gz
8282
│ ├── mri_combined_in_phase.nii.gz # stitched whole-body MRI, in-phase
8383
│ ├── mri_combined_out_phase.nii.gz # stitched whole-body MRI, out-of-phase
84-
│ ├── face_seg.nii.gz # face mask (MRI space)
84+
│ ├── mri_face_mask.nii.gz # binary face mask in MRI space
8585
│ └── metadata.json # {sex, age, height, weight}
8686
├── ct-label/ # ground-truth CT (train only)
8787
│ ├── ct.nii.gz # anonymized CT in HU
88-
│ ├── body_seg.nii.gz # body mask
88+
│ ├── body_seg.nii.gz # multi-class body segmentation
8989
│ ├── organ_seg.nii.gz # TotalSegmentator organ labels
90-
│ └── face_seg.nii.gz # face mask
90+
│ └── prediction_mask.nii.gz # binary mask: 1 where predictions are evaluated (excludes face + scanner bed)
9191
├── recon/ # sinogram data (labeled train + val)
9292
│ ├── mult_nac_rd85.hs/.s # multiplicative correction sinogram
9393
│ ├── add_nac_rd85.hs/.s # additive correction sinogram (scatter + randoms)
9494
│ ├── prompts_rd85.hs/.s # prompt (raw) sinogram
9595
│ ├── offset.json # bed position and gantry offset
9696
│ ├── ct_face_and_bed.nii.gz # GT CT values at face + scanner bed (for swap-back)
97-
│ └── face_and_bed_mask.nii.gz # face + scanner bed mask
97+
│ └── face_and_bed_mask.nii.gz # binary face + scanner bed mask
9898
└── pet-label/ # ground-truth PET (labeled train only)
99-
├── acpet.nii.gz # CT-attenuation-corrected PET (reference)
99+
├── pet.nii.gz # CT-attenuation-corrected PET (reference)
100100
├── body_seg.nii.gz # body mask in PET space
101101
└── organ_seg.nii.gz # organ labels in PET space
102102
```
@@ -213,12 +213,22 @@ The exact command used to run your container is:
213213

214214
```bash
215215
docker run --rm \
216+
--memory 128g \
217+
--network none \
216218
-v /path/to/sub-XXX/features:/data/features:ro \
217219
-v /path/to/output:/data/output \
218220
<your-image>
219221
```
220222

221-
No other files or directories are mounted. Your container must not require network access at inference time.
223+
**Constraints enforced at evaluation time:**
224+
225+
| Resource | Limit |
226+
|----------|-------|
227+
| RAM | 128 GB |
228+
| Wall-clock time | 5 minutes |
229+
| Network access | None (`--network none`) |
230+
231+
No other files or directories are mounted. Make sure all model weights and dependencies are baked into your image — no downloads at inference time.
222232

223233
Submit your image name and tag via Codabench (see [website](https://bic-mac-challenge.github.io/) for registration and submission instructions).
224234

src/evaluation/eval.py

Lines changed: 61 additions & 100 deletions
Original file line numberDiff line numberDiff line change
@@ -14,146 +14,107 @@
1414
import argparse
1515
import os
1616
import numpy as np
17-
import nibabel as nib
1817

1918
from metrics import (
2019
compute_whole_body_suv_mae,
21-
compute_brain_outlier_score,
2220
compute_organ_bias_from_totalseg,
2321
compute_whole_body_mu_mae,
2422
)
2523

2624

27-
def main():
28-
29-
parser = argparse.ArgumentParser(
30-
description="PET Attenuation Correction Challenge — Evaluation"
31-
)
25+
def evaluate_subject(subject_path, pred_pet_path=None, pred_ct_path=None):
26+
"""
27+
Run metrics for a single subject.
3228
33-
parser.add_argument(
34-
"--subject_path",
35-
required=True,
36-
help="Path to subject directory"
37-
)
29+
Parameters
30+
----------
31+
subject_path : str
32+
Path to the subject directory (must contain ct-label/ and pet-label/).
33+
pred_pet_path : str or None
34+
Path to predicted PET NIfTI. If given, runs PET metrics (SUV MAE, Organ Bias).
35+
pred_ct_path : str or None
36+
Path to predicted CT NIfTI. If given, runs CT MAE.
3837
39-
parser.add_argument(
40-
"--pred_pet",
41-
required=True,
42-
help="Path to predicted PET NIfTI"
43-
)
38+
Note
39+
----
40+
Brain Outlier Score is a dataset-level metric and cannot be computed per-subject.
41+
Use compute_brain_outlier_score() directly with paths from multiple subjects.
4442
45-
parser.add_argument(
46-
"--pred_ct",
47-
required=True,
48-
help="Path to predicted CT NIfTI"
49-
)
43+
Returns
44+
-------
45+
dict
46+
{metric_name: float}
47+
"""
5048

51-
parser.add_argument(
52-
"-all",
53-
action="store_true",
54-
help="Run all metrics"
55-
)
49+
if pred_pet_path is None and pred_ct_path is None:
50+
raise ValueError("At least one of pred_pet_path or pred_ct_path must be provided.")
5651

57-
parser.add_argument(
58-
"-specific_metric",
59-
choices=[
60-
"whole_body_mae",
61-
"brain_outlier",
62-
"organ_bias",
63-
"ct_mae",
64-
],
65-
help="Run specific metric only"
66-
)
67-
68-
args = parser.parse_args()
69-
70-
subject_path = args.subject_path
7152
ct_label_dir = os.path.join(subject_path, "ct-label")
7253
pet_label_dir = os.path.join(subject_path, "pet-label")
73-
features_dir = os.path.join(subject_path, "features")
74-
75-
gt_pet = os.path.join(pet_label_dir, "pet.nii.gz")
76-
gt_ct = os.path.join(ct_label_dir, "ct.nii.gz")
77-
body_seg_pet = os.path.join(pet_label_dir, "body_seg.nii.gz")
78-
organ_seg_pet = os.path.join(pet_label_dir, "organ_seg.nii.gz")
79-
body_seg_ct = os.path.join(ct_label_dir, "body_seg.nii.gz")
80-
meta_json = os.path.join(features_dir, "metadata.json")
8154

8255
results = {}
8356

84-
# =====================================================
85-
# 1. Whole-body SUV MAE
86-
# =====================================================
87-
88-
if args.all or args.specific_metric == "whole_body_mae":
57+
if pred_pet_path is not None:
58+
gt_pet = os.path.join(pet_label_dir, "pet.nii.gz")
59+
body_seg_pet = os.path.join(pet_label_dir, "body_seg.nii.gz")
60+
organ_seg_pet = os.path.join(pet_label_dir, "organ_seg.nii.gz")
8961

9062
results["Whole-body SUV MAE"] = compute_whole_body_suv_mae(
91-
pred_pet_path=args.pred_pet,
63+
pred_pet_path=pred_pet_path,
9264
gt_pet_path=gt_pet,
9365
body_mask_path=body_seg_pet,
94-
liver_mask_path=organ_seg_pet,
95-
json_path=meta_json,
96-
)
97-
98-
# =====================================================
99-
# 2. Brain Outlier Score
100-
# =====================================================
101-
102-
if args.all or args.specific_metric == "brain_outlier":
103-
104-
results["Brain Outlier Score"] = compute_brain_outlier_score(
105-
pred_paths=[args.pred_pet],
106-
gt_paths=[gt_pet],
107-
totalseg_paths=[organ_seg_pet],
66+
organ_seg_path=organ_seg_pet,
10867
)
10968

110-
# =====================================================
111-
# 3. Organ Bias
112-
# =====================================================
113-
114-
if args.all or args.specific_metric == "organ_bias":
115-
116-
11769
results["Organ Bias"] = compute_organ_bias_from_totalseg(
118-
pred_path=args.pred_pet,
70+
pred_path=pred_pet_path,
11971
gt_path=gt_pet,
12072
totalseg_path=organ_seg_pet,
121-
json_path=meta_json,
73+
body_mask_path=body_seg_pet,
12274
)
12375

124-
125-
# =====================================================
126-
# 4. CT MAE
127-
# =====================================================
128-
129-
if args.all or args.specific_metric == "ct_mae":
76+
if pred_ct_path is not None:
77+
gt_ct = os.path.join(ct_label_dir, "ct.nii.gz")
78+
body_seg_ct = os.path.join(ct_label_dir, "body_seg.nii.gz")
79+
organ_seg_ct = os.path.join(ct_label_dir, "organ_seg.nii.gz")
13080

13181
results["CT MAE"] = compute_whole_body_mu_mae(
132-
pred_ct_path=args.pred_ct,
82+
pred_ct_path=pred_ct_path,
13383
gt_ct_path=gt_ct,
13484
body_mask_path=body_seg_ct,
135-
liver_mask_path=organ_seg_pet,
85+
organ_seg_path=organ_seg_ct,
13686
)
13787

138-
# =====================================================
139-
# Print Results
140-
# =====================================================
88+
return results
14189

142-
print("\n================ Evaluation Results ================")
143-
print(f"Subject: {os.path.basename(subject_path)}")
144-
print("----------------------------------------------------")
14590

146-
if not results:
147-
print("No metric selected.")
148-
else:
149-
for name, value in results.items():
150-
if name == "Organ Bias":
151-
print(f"{name:<25}: {value:.6f}%")
152-
else:
153-
print(f"{name:<25}: {value:.6f}")
91+
def main():
15492

93+
parser = argparse.ArgumentParser(
94+
description="PET Attenuation Correction Challenge — Evaluation"
95+
)
96+
97+
parser.add_argument("--subject_path", required=True, help="Path to subject directory")
98+
parser.add_argument("--pred_pet", default=None, help="Path to predicted PET NIfTI")
99+
parser.add_argument("--pred_ct", default=None, help="Path to predicted CT NIfTI")
100+
101+
args = parser.parse_args()
102+
103+
if args.pred_pet is None and args.pred_ct is None:
104+
parser.error("At least one of --pred_pet or --pred_ct must be provided.")
105+
106+
results = evaluate_subject(args.subject_path, args.pred_pet, args.pred_ct)
107+
108+
print("\n================ Evaluation Results ================")
109+
print(f"Subject: {os.path.basename(args.subject_path)}")
110+
print("----------------------------------------------------")
111+
for name, value in results.items():
112+
unit = "%" if name == "Organ Bias" else ""
113+
print(f"{name:<25}: {value:.6f}{unit}")
155114
print("====================================================\n")
156115

116+
return results
117+
157118

158119
if __name__ == "__main__":
159120
main()

src/evaluation/eval_dataset.py

Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
"""
2+
Dataset-level Evaluation Script
3+
4+
Evaluates predictions across multiple subjects and reports per-subject
5+
and aggregate scores, matching the challenge leaderboard computation.
6+
7+
Brain Outlier Score is computed jointly across all subjects (as in the
8+
challenge), not averaged from per-subject values.
9+
10+
Usage:
11+
python eval_dataset.py --dataset_path <dir> --pred_dir <dir>
12+
13+
<dataset_path> root directory containing subject folders (e.g. train/)
14+
each subject must have ct-label/ and pet-label/ subdirs
15+
<pred_dir> directory containing one folder per subject, each with
16+
ct.nii.gz and pet.nii.gz
17+
18+
Example:
19+
python eval_dataset.py \\
20+
--dataset_path /data/bic-mac/train \\
21+
--pred_dir /results/my_method
22+
"""
23+
24+
import argparse
25+
import os
26+
27+
import numpy as np
28+
29+
from eval import evaluate_subject
30+
from metrics import compute_brain_outlier_score
31+
32+
33+
def evaluate(dataset_path, pred_dir, subjects=None):
34+
"""
35+
Evaluate predictions across multiple subjects.
36+
37+
Parameters
38+
----------
39+
dataset_path : str
40+
Root directory containing subject folders with ground-truth labels.
41+
pred_dir : str
42+
Directory containing one sub-folder per subject with ct.nii.gz and pet.nii.gz.
43+
subjects : list of str, optional
44+
Subject IDs to evaluate. Defaults to all sub-folders in pred_dir.
45+
46+
Returns
47+
-------
48+
dict
49+
Aggregate scores: CT MAE, Whole-body SUV MAE, Brain Outlier Score, Organ Bias.
50+
"""
51+
52+
if subjects is None:
53+
subjects = sorted(
54+
d for d in os.listdir(pred_dir)
55+
if os.path.isdir(os.path.join(pred_dir, d))
56+
)
57+
58+
if not subjects:
59+
raise ValueError(f"No subject folders found in {pred_dir}")
60+
61+
print(f"Evaluating {len(subjects)} subject(s): {subjects}\n")
62+
63+
per_subject = {}
64+
pred_pet_paths = []
65+
gt_pet_paths = []
66+
organ_seg_paths = []
67+
68+
for subject_id in subjects:
69+
subject_path = os.path.join(dataset_path, subject_id)
70+
pred_pet = os.path.join(pred_dir, subject_id, "pet.nii.gz")
71+
pred_ct = os.path.join(pred_dir, subject_id, "ct.nii.gz")
72+
73+
results = evaluate_subject(subject_path, pred_pet, pred_ct)
74+
per_subject[subject_id] = results
75+
76+
print(f" {subject_id}")
77+
for name, value in results.items():
78+
unit = "%" if name == "Organ Bias" else ""
79+
print(f" {name:<25}: {value:.6f}{unit}")
80+
81+
pred_pet_paths.append(pred_pet)
82+
gt_pet_paths.append(os.path.join(subject_path, "pet-label", "pet.nii.gz"))
83+
organ_seg_paths.append(os.path.join(subject_path, "pet-label", "organ_seg.nii.gz"))
84+
85+
# Brain outlier — dataset-level, computed jointly across all subjects
86+
brain_outlier = compute_brain_outlier_score(
87+
pred_paths=pred_pet_paths,
88+
gt_paths=gt_pet_paths,
89+
totalseg_paths=organ_seg_paths,
90+
)
91+
92+
all_results = list(per_subject.values())
93+
aggregate = {
94+
"CT MAE": float(np.mean([r["CT MAE"] for r in all_results])),
95+
"Whole-body SUV MAE": float(np.mean([r["Whole-body SUV MAE"] for r in all_results])),
96+
"Brain Outlier Score": float(brain_outlier),
97+
"Organ Bias": float(np.mean([r["Organ Bias"] for r in all_results])),
98+
}
99+
100+
print("\n================ Aggregate Results ================")
101+
for name, value in aggregate.items():
102+
unit = "%" if name == "Organ Bias" else ""
103+
print(f" {name:<25}: {value:.6f}{unit}")
104+
print("====================================================\n")
105+
106+
return aggregate
107+
108+
109+
def main():
110+
111+
parser = argparse.ArgumentParser(
112+
description="BIC-MAC Dataset-level Evaluation"
113+
)
114+
parser.add_argument(
115+
"--dataset_path",
116+
required=True,
117+
help="Root directory containing subject folders with ground-truth labels",
118+
)
119+
parser.add_argument(
120+
"--pred_dir",
121+
required=True,
122+
help="Directory containing one sub-folder per subject with ct.nii.gz and pet.nii.gz",
123+
)
124+
parser.add_argument(
125+
"--subjects",
126+
nargs="+",
127+
default=None,
128+
help="Explicit list of subject IDs to evaluate (default: all sub-folders in pred_dir)",
129+
)
130+
args = parser.parse_args()
131+
132+
evaluate(args.dataset_path, args.pred_dir, args.subjects)
133+
134+
135+
if __name__ == "__main__":
136+
main()

0 commit comments

Comments
 (0)