bic-mac-challenge
diff --git a/‎README.md‎
Lines changed: 16 additions & 6 deletions b/‎README.md‎
Lines changed: 16 additions & 6 deletions
diff --git a/‎src/evaluation/eval.py‎
Lines changed: 61 additions & 100 deletions b/‎src/evaluation/eval.py‎
Lines changed: 61 additions & 100 deletions
diff --git a/‎src/evaluation/eval_dataset.py‎
Lines changed: 136 additions & 0 deletions b/‎src/evaluation/eval_dataset.py‎
Lines changed: 136 additions & 0 deletions
@@ -81,22 +81,22 @@ train/
     │   ├── mri_chunk_3_out_phase.nii.gz
     │   ├── mri_combined_in_phase.nii.gz   # stitched whole-body MRI, in-phase
     │   ├── mri_combined_out_phase.nii.gz  # stitched whole-body MRI, out-of-phase
-    │   ├── face_seg.nii.gz                # face mask (MRI space)
+    │   ├── mri_face_mask.nii.gz           # binary face mask in MRI space
     │   └── metadata.json                  # {sex, age, height, weight}
     ├── ct-label/                          # ground-truth CT (train only)
     │   ├── ct.nii.gz                      # anonymized CT in HU
-    │   ├── body_seg.nii.gz                # body mask
+    │   ├── body_seg.nii.gz                # multi-class body segmentation
     │   ├── organ_seg.nii.gz               # TotalSegmentator organ labels
-    │   └── face_seg.nii.gz                # face mask
+    │   └── prediction_mask.nii.gz         # binary mask: 1 where predictions are evaluated (excludes face + scanner bed)
     ├── recon/                             # sinogram data (labeled train + val)
     │   ├── mult_nac_rd85.hs/.s            # multiplicative correction sinogram
     │   ├── add_nac_rd85.hs/.s             # additive correction sinogram (scatter + randoms)
     │   ├── prompts_rd85.hs/.s             # prompt (raw) sinogram
     │   ├── offset.json                    # bed position and gantry offset
     │   ├── ct_face_and_bed.nii.gz         # GT CT values at face + scanner bed (for swap-back)
-    │   └── face_and_bed_mask.nii.gz       # face + scanner bed mask
+    │   └── face_and_bed_mask.nii.gz       # binary face + scanner bed mask
     └── pet-label/                         # ground-truth PET (labeled train only)
-        ├── acpet.nii.gz                   # CT-attenuation-corrected PET (reference)
+        ├── pet.nii.gz                     # CT-attenuation-corrected PET (reference)
         ├── body_seg.nii.gz                # body mask in PET space
         └── organ_seg.nii.gz               # organ labels in PET space
 ```
@@ -213,12 +213,22 @@ The exact command used to run your container is:
 
 ```bash
 docker run --rm \
+  --memory 128g \
+  --network none \
   -v /path/to/sub-XXX/features:/data/features:ro \
   -v /path/to/output:/data/output \
   <your-image>
 ```
 
-No other files or directories are mounted. Your container must not require network access at inference time.
+**Constraints enforced at evaluation time:**
+
+| Resource | Limit |
+|----------|-------|
+| RAM | 128 GB |
+| Wall-clock time | 5 minutes |
+| Network access | None (`--network none`) |
+
+No other files or directories are mounted. Make sure all model weights and dependencies are baked into your image — no downloads at inference time.
 
 Submit your image name and tag via Codabench (see [website](https://bic-mac-challenge.github.io/) for registration and submission instructions).
 
 
@@ -14,146 +14,107 @@
 import argparse
 import os
 import numpy as np
-import nibabel as nib
 
 from metrics import (
     compute_whole_body_suv_mae,
-    compute_brain_outlier_score,
     compute_organ_bias_from_totalseg,
     compute_whole_body_mu_mae,
 )
 
 
-def main():
-
-    parser = argparse.ArgumentParser(
-        description="PET Attenuation Correction Challenge — Evaluation"
-    )
+def evaluate_subject(subject_path, pred_pet_path=None, pred_ct_path=None):
+    """
+    Run metrics for a single subject.
 
-    parser.add_argument(
-        "--subject_path",
-        required=True,
-        help="Path to subject directory"
-    )
+    Parameters
+    ----------
+    subject_path : str
+        Path to the subject directory (must contain ct-label/ and pet-label/).
+    pred_pet_path : str or None
+        Path to predicted PET NIfTI. If given, runs PET metrics (SUV MAE, Organ Bias).
+    pred_ct_path : str or None
+        Path to predicted CT NIfTI. If given, runs CT MAE.
 
-    parser.add_argument(
-        "--pred_pet",
-        required=True,
-        help="Path to predicted PET NIfTI"
-    )
+    Note
+    ----
+    Brain Outlier Score is a dataset-level metric and cannot be computed per-subject.
+    Use compute_brain_outlier_score() directly with paths from multiple subjects.
 
-    parser.add_argument(
-        "--pred_ct",
-        required=True,
-        help="Path to predicted CT NIfTI"
-)
+    Returns
+    -------
+    dict
+        {metric_name: float}
+    """
 
-    parser.add_argument(
-        "-all",
-        action="store_true",
-        help="Run all metrics"
-    )
+    if pred_pet_path is None and pred_ct_path is None:
+        raise ValueError("At least one of pred_pet_path or pred_ct_path must be provided.")
 
-    parser.add_argument(
-        "-specific_metric",
-        choices=[
-            "whole_body_mae",
-            "brain_outlier",
-            "organ_bias",
-            "ct_mae",
-        ],
-        help="Run specific metric only"
-    )
-
-    args = parser.parse_args()
-
-    subject_path  = args.subject_path
     ct_label_dir  = os.path.join(subject_path, "ct-label")
     pet_label_dir = os.path.join(subject_path, "pet-label")
-    features_dir  = os.path.join(subject_path, "features")
-
-    gt_pet        = os.path.join(pet_label_dir, "pet.nii.gz")
-    gt_ct         = os.path.join(ct_label_dir,  "ct.nii.gz")
-    body_seg_pet  = os.path.join(pet_label_dir, "body_seg.nii.gz")
-    organ_seg_pet = os.path.join(pet_label_dir, "organ_seg.nii.gz")
-    body_seg_ct   = os.path.join(ct_label_dir,  "body_seg.nii.gz")
-    meta_json     = os.path.join(features_dir,  "metadata.json")
 
     results = {}
 
-    # =====================================================
-    # 1. Whole-body SUV MAE
-    # =====================================================
-
-    if args.all or args.specific_metric == "whole_body_mae":
+    if pred_pet_path is not None:
+        gt_pet        = os.path.join(pet_label_dir, "pet.nii.gz")
+        body_seg_pet  = os.path.join(pet_label_dir, "body_seg.nii.gz")
+        organ_seg_pet = os.path.join(pet_label_dir, "organ_seg.nii.gz")
 
         results["Whole-body SUV MAE"] = compute_whole_body_suv_mae(
-            pred_pet_path=args.pred_pet,
+            pred_pet_path=pred_pet_path,
             gt_pet_path=gt_pet,
             body_mask_path=body_seg_pet,
-            liver_mask_path=organ_seg_pet,
-            json_path=meta_json,
-        )
-
-    # =====================================================
-    # 2. Brain Outlier Score
-    # =====================================================
-
-    if args.all or args.specific_metric == "brain_outlier":
-
-        results["Brain Outlier Score"] = compute_brain_outlier_score(
-            pred_paths=[args.pred_pet],
-            gt_paths=[gt_pet],
-            totalseg_paths=[organ_seg_pet],
+            organ_seg_path=organ_seg_pet,
         )
 
-    # =====================================================
-    # 3. Organ Bias
-    # =====================================================
-
-    if args.all or args.specific_metric == "organ_bias":
-
-        
         results["Organ Bias"] = compute_organ_bias_from_totalseg(
-            pred_path=args.pred_pet,
+            pred_path=pred_pet_path,
             gt_path=gt_pet,
             totalseg_path=organ_seg_pet,
-            json_path=meta_json,
+            body_mask_path=body_seg_pet,
         )
 
-    
-    # =====================================================
-    # 4. CT MAE
-    # =====================================================
-
-    if args.all or args.specific_metric == "ct_mae":
+    if pred_ct_path is not None:
+        gt_ct        = os.path.join(ct_label_dir,  "ct.nii.gz")
+        body_seg_ct  = os.path.join(ct_label_dir,  "body_seg.nii.gz")
+        organ_seg_ct = os.path.join(ct_label_dir,  "organ_seg.nii.gz")
 
         results["CT MAE"] = compute_whole_body_mu_mae(
-            pred_ct_path=args.pred_ct,
+            pred_ct_path=pred_ct_path,
             gt_ct_path=gt_ct,
             body_mask_path=body_seg_ct,
-            liver_mask_path=organ_seg_pet,
+            organ_seg_path=organ_seg_ct,
         )
 
-    # =====================================================
-    # Print Results
-    # =====================================================
+    return results
 
-    print("\n================ Evaluation Results ================")
-    print(f"Subject: {os.path.basename(subject_path)}")
-    print("----------------------------------------------------")
 
-    if not results:
-        print("No metric selected.")
-    else:
-        for name, value in results.items():
-            if name == "Organ Bias":
-                print(f"{name:<25}: {value:.6f}%")
-            else:
-                print(f"{name:<25}: {value:.6f}")
+def main():
 
+    parser = argparse.ArgumentParser(
+        description="PET Attenuation Correction Challenge — Evaluation"
+    )
+
+    parser.add_argument("--subject_path", required=True, help="Path to subject directory")
+    parser.add_argument("--pred_pet",     default=None,  help="Path to predicted PET NIfTI")
+    parser.add_argument("--pred_ct",      default=None,  help="Path to predicted CT NIfTI")
+
+    args = parser.parse_args()
+
+    if args.pred_pet is None and args.pred_ct is None:
+        parser.error("At least one of --pred_pet or --pred_ct must be provided.")
+
+    results = evaluate_subject(args.subject_path, args.pred_pet, args.pred_ct)
+
+    print("\n================ Evaluation Results ================")
+    print(f"Subject: {os.path.basename(args.subject_path)}")
+    print("----------------------------------------------------")
+    for name, value in results.items():
+        unit = "%" if name == "Organ Bias" else ""
+        print(f"{name:<25}: {value:.6f}{unit}")
     print("====================================================\n")
 
+    return results
+
 
 if __name__ == "__main__":
     main()
@@ -0,0 +1,136 @@
+"""
+Dataset-level Evaluation Script
+
+Evaluates predictions across multiple subjects and reports per-subject
+and aggregate scores, matching the challenge leaderboard computation.
+
+Brain Outlier Score is computed jointly across all subjects (as in the
+challenge), not averaged from per-subject values.
+
+Usage:
+    python eval_dataset.py --dataset_path <dir> --pred_dir <dir>
+
+    <dataset_path>  root directory containing subject folders (e.g. train/)
+                    each subject must have ct-label/ and pet-label/ subdirs
+    <pred_dir>      directory containing one folder per subject, each with
+                    ct.nii.gz and pet.nii.gz
+
+Example:
+    python eval_dataset.py \\
+        --dataset_path /data/bic-mac/train \\
+        --pred_dir /results/my_method
+"""
+
+import argparse
+import os
+
+import numpy as np
+
+from eval import evaluate_subject
+from metrics import compute_brain_outlier_score
+
+
+def evaluate(dataset_path, pred_dir, subjects=None):
+    """
+    Evaluate predictions across multiple subjects.
+
+    Parameters
+    ----------
+    dataset_path : str
+        Root directory containing subject folders with ground-truth labels.
+    pred_dir : str
+        Directory containing one sub-folder per subject with ct.nii.gz and pet.nii.gz.
+    subjects : list of str, optional
+        Subject IDs to evaluate. Defaults to all sub-folders in pred_dir.
+
+    Returns
+    -------
+    dict
+        Aggregate scores: CT MAE, Whole-body SUV MAE, Brain Outlier Score, Organ Bias.
+    """
+
+    if subjects is None:
+        subjects = sorted(
+            d for d in os.listdir(pred_dir)
+            if os.path.isdir(os.path.join(pred_dir, d))
+        )
+
+    if not subjects:
+        raise ValueError(f"No subject folders found in {pred_dir}")
+
+    print(f"Evaluating {len(subjects)} subject(s): {subjects}\n")
+
+    per_subject     = {}
+    pred_pet_paths  = []
+    gt_pet_paths    = []
+    organ_seg_paths = []
+
+    for subject_id in subjects:
+        subject_path = os.path.join(dataset_path, subject_id)
+        pred_pet     = os.path.join(pred_dir, subject_id, "pet.nii.gz")
+        pred_ct      = os.path.join(pred_dir, subject_id, "ct.nii.gz")
+
+        results = evaluate_subject(subject_path, pred_pet, pred_ct)
+        per_subject[subject_id] = results
+
+        print(f"  {subject_id}")
+        for name, value in results.items():
+            unit = "%" if name == "Organ Bias" else ""
+            print(f"    {name:<25}: {value:.6f}{unit}")
+
+        pred_pet_paths.append(pred_pet)
+        gt_pet_paths.append(os.path.join(subject_path, "pet-label", "pet.nii.gz"))
+        organ_seg_paths.append(os.path.join(subject_path, "pet-label", "organ_seg.nii.gz"))
+
+    # Brain outlier — dataset-level, computed jointly across all subjects
+    brain_outlier = compute_brain_outlier_score(
+        pred_paths=pred_pet_paths,
+        gt_paths=gt_pet_paths,
+        totalseg_paths=organ_seg_paths,
+    )
+
+    all_results = list(per_subject.values())
+    aggregate = {
+        "CT MAE":              float(np.mean([r["CT MAE"]             for r in all_results])),
+        "Whole-body SUV MAE":  float(np.mean([r["Whole-body SUV MAE"] for r in all_results])),
+        "Brain Outlier Score": float(brain_outlier),
+        "Organ Bias":          float(np.mean([r["Organ Bias"]         for r in all_results])),
+    }
+
+    print("\n================ Aggregate Results ================")
+    for name, value in aggregate.items():
+        unit = "%" if name == "Organ Bias" else ""
+        print(f"  {name:<25}: {value:.6f}{unit}")
+    print("====================================================\n")
+
+    return aggregate
+
+
+def main():
+
+    parser = argparse.ArgumentParser(
+        description="BIC-MAC Dataset-level Evaluation"
+    )
+    parser.add_argument(
+        "--dataset_path",
+        required=True,
+        help="Root directory containing subject folders with ground-truth labels",
+    )
+    parser.add_argument(
+        "--pred_dir",
+        required=True,
+        help="Directory containing one sub-folder per subject with ct.nii.gz and pet.nii.gz",
+    )
+    parser.add_argument(
+        "--subjects",
+        nargs="+",
+        default=None,
+        help="Explicit list of subject IDs to evaluate (default: all sub-folders in pred_dir)",
+    )
+    args = parser.parse_args()
+
+    evaluate(args.dataset_path, args.pred_dir, args.subjects)
+
+
+if __name__ == "__main__":
+    main()