Fix loss balancing wiring and SNEMI SDT config

Donglai Wei · Donglai Wei · commit ee82c46735dc · 2026-03-18T01:38:00.000-04:00
diff --git a/connectomics/training/loss/balancing.py b/connectomics/training/loss/balancing.py
@@ -188,9 +188,16 @@ def build_loss_weighter(
     if not hasattr(cfg, "model") or not hasattr(cfg.model, "loss"):
         return None
 
-    lb_cfg = getattr(cfg.model, "loss", None)
-    if lb_cfg is None:
+    loss_cfg = getattr(cfg.model, "loss", None)
+    if loss_cfg is None:
         return None
+
+    # Prefer the schema-defined nested loss_balancing block, but keep support
+    # for older flat configs that placed strategy fields directly under model.loss.
+    lb_cfg = getattr(loss_cfg, "loss_balancing", None)
+    if lb_cfg is None or getattr(lb_cfg, "strategy", None) is None:
+        lb_cfg = loss_cfg
+
     strategy = getattr(lb_cfg, "strategy", None)
     if strategy is None:
         return None
diff --git a/tests/unit/test_loss_balancing.py b/tests/unit/test_loss_balancing.py
@@ -0,0 +1,69 @@
+from types import SimpleNamespace
+
+import torch.nn as nn
+
+from connectomics.training.loss import (
+    GradNormLossWeighter,
+    UncertaintyLossWeighter,
+    build_loss_weighter,
+)
+
+
+def _cfg(
+    strategy=None,
+    *,
+    gradnorm_alpha=0.5,
+    gradnorm_lambda=1.0,
+    gradnorm_parameter_strategy="last",
+    legacy_flat=False,
+):
+    if legacy_flat:
+        loss = SimpleNamespace(
+            strategy=strategy,
+            gradnorm_alpha=gradnorm_alpha,
+            gradnorm_lambda=gradnorm_lambda,
+            gradnorm_parameter_strategy=gradnorm_parameter_strategy,
+        )
+    else:
+        loss = SimpleNamespace(
+            loss_balancing=SimpleNamespace(
+                strategy=strategy,
+                gradnorm_alpha=gradnorm_alpha,
+                gradnorm_lambda=gradnorm_lambda,
+                gradnorm_parameter_strategy=gradnorm_parameter_strategy,
+            )
+        )
+    return SimpleNamespace(model=SimpleNamespace(loss=loss))
+
+
+def test_build_loss_weighter_uses_nested_uncertainty_strategy():
+    weighter = build_loss_weighter(_cfg(strategy="uncertainty"), num_tasks=3)
+
+    assert isinstance(weighter, UncertaintyLossWeighter)
+
+
+def test_build_loss_weighter_uses_nested_gradnorm_settings():
+    model = nn.Sequential(nn.Linear(4, 3), nn.ReLU(), nn.Linear(3, 2))
+
+    weighter = build_loss_weighter(
+        _cfg(
+            strategy="gradnorm",
+            gradnorm_alpha=0.25,
+            gradnorm_lambda=2.5,
+            gradnorm_parameter_strategy="first",
+        ),
+        num_tasks=3,
+        model=model,
+    )
+
+    assert isinstance(weighter, GradNormLossWeighter)
+    assert weighter.alpha == 0.25
+    assert weighter.gradnorm_lambda == 2.5
+    assert len(weighter.shared_parameters) == 1
+    assert weighter.shared_parameters[0] is next(model.parameters())
+
+
+def test_build_loss_weighter_keeps_legacy_flat_strategy_support():
+    weighter = build_loss_weighter(_cfg(strategy="uncertainty", legacy_flat=True), num_tasks=2)
+
+    assert isinstance(weighter, UncertaintyLossWeighter)
diff --git a/tutorials/neuron_snemi_sdt.yaml b/tutorials/neuron_snemi_sdt.yaml
@@ -0,0 +1,139 @@
+experiment_name: rsunet_snemi_lee2017_modern_sdt
+description: SNEMI3D neuron affinity + SDT learning (9-channel affinity + 1-channel SDT)
+
+_base_:
+  - bases/all_profiles.yaml
+
+default:
+  system:
+    profile: all-gpu-cpu
+  model:
+    arch:
+      profile: rsunet
+    input_size: [16, 224, 224]
+    output_size: [16, 224, 224]
+    out_channels: 10
+    loss:
+      profile: loss_bd
+  data:
+    label_transform:
+      profile: label_affinity_9_sdt
+      resolution: [30, 6, 6]
+    dataloader:
+      profile: cached
+      patch_size: [16, 224, 224]
+    data_transform:
+      # Keep symmetric full-volume context padding on the inference input.
+      pad_size: [17, 128, 128]
+    augmentation:
+      profile: aug_em_neuron
+  inference:
+    sliding_window:
+      window_size: [16, 224, 224]
+      sw_batch_size: 1
+      keep_input_on_cpu: false
+    test_time_augmentation:
+      enabled: false
+      #enabled: true
+      patch_first_local: true
+      flip_axes: all
+      rotation90_axes: [[1, 2]]
+      activation_profile: act_bd
+        #select_channel: [0, 1, 2, 9]
+      ensemble_mode: [["0:9", min], ["9:", mean]]
+    postprocessing:
+      enabled: true
+      # crop_pad + affinity_crop[(17,0),(17,0),(17,0)] = pad_size [17,128,128]:
+      #   Z: 0+17=17, 17+0=17 | Y/X: 111+17=128, 128+0=128
+      crop_pad: [0, 17, 111, 128, 111, 128]
+    save_prediction:
+      enabled: true
+    decoding_profile: decoding_waterz
+    evaluation:
+      enabled: true
+      metrics: [adapted_rand]
+
+
+train:
+  data:
+    train:
+      image: datasets/SNEMI/train-input.tif
+      label: datasets/SNEMI/train-labels.tif
+
+  optimization:
+    profile: warmup_cosine_lr
+    max_epochs: 200
+    n_steps_per_epoch: 1000    
+  monitor:
+    logging:
+      scalar:
+        loss: [train_loss_total_epoch, train_loss_affinity_total, train_loss_sdt_total]
+        loss_every_n_steps: 50
+      images:
+        log_every_n_epochs: 10
+        max_images: 8
+        num_slices: 2
+        channel_mode: all
+    checkpoint:
+      save_top_k: 3
+      monitor: train_loss_total_epoch
+      mode: min
+
+test:
+  data:
+    test:
+      path: datasets/SNEMI/
+      # image: [train-input.tif, test-input_z29.h5]
+      # label: [train-labels.tif, test-labels.h5]
+      #image: train-input.tif
+      #label: train-labels.tif
+      #image: test-input_z29.h5
+      image: test-input.tif
+      label: test-labels.h5
+      resolution: [30, 6, 6]
+  inference:
+    decoding:
+    - profile: decoding_waterz
+      kwargs:
+        thresholds: 0.4
+        merge_function: aff85_his256
+        aff_threshold: [0.001, 0.999]
+      
+# ============================================================================
+# Parameter tuning for waterz agglomeration thresholds (--mode tune)
+# ============================================================================
+tune:
+  profile: tune_waterz
+  n_trials: 25
+  study_name: snemi_waterz_tuning
+  data:
+    val:
+      image: datasets/SNEMI/test-input.tif
+      label: datasets/SNEMI/test-labels.h5
+      #image: datasets/SNEMI/train-input.tif
+      #label: datasets/SNEMI/train-labels.tif
+  # Override profile defaults for SNEMI-specific search ranges
+  parameter_space:
+    decoding:
+      defaults:
+        thresholds: 0.4
+        merge_function: aff85_his256
+        aff_threshold: [0.001, 0.999]
+      parameters:
+        merge_function:
+          type: categorical
+          choices: [aff85_his256, aff75_his256, aff50_his256]
+          description: "Agglomeration scoring function (quantile via histogram)"
+        thresholds:
+          range: [0.1, 0.9]
+          step: 0.05
+        aff_threshold_low:
+          range: [0.001, 0.3]
+          step: 0.01
+          param_group: aff_threshold
+          tuple_index: 0
+        aff_threshold_high:
+          range: [0.8, 0.9999]
+          step: 0.01
+          param_group: aff_threshold
+          tuple_index: 1