Reuse agglomeration region graph for dust merge instead of rebuilding

Donglai Wei · claude · Donglai Wei · commit d0374ad06509 · 2026-03-26T19:33:10.000-04:00
The ec4f5cf refactor switched dust merge from reusing the agglomeration's region graph to calling waterz.merge_dust() which rebuilds from scratch with MeanAffinity scoring. This changed results because the dust merge used different edge weights than the agglomeration (p85 histogram quantile). Restore return_region_graph=True and invert the OneMinus uint8 scores back to affinities for waterz.merge_segments(), matching the original behavior. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
diff --git a/connectomics/decoding/decoders/waterz.py b/connectomics/decoding/decoders/waterz.py
@@ -147,11 +147,11 @@ def decode_waterz(
         min_instance_size: Minimum instance size in voxels. Instances smaller
             than this are removed (set to background). Set to 0 to disable.
             Default: 0
-        dust_merge: Enable dust postprocessing. When WaterZ returns a
-            reusable region graph, dust merging uses it directly via
-            ``waterz.merge_segments``; otherwise it falls back to
-            ``waterz.merge_dust``. When False, the dust merge and dust
-            removal thresholds below are ignored. Default: True
+        dust_merge: Enable dust postprocessing.  Reuses the agglomeration's
+            region graph (returned by waterz) and calls
+            ``waterz.merge_segments`` directly — no redundant graph rebuild.
+            When False, the dust merge and dust removal thresholds below
+            are ignored. Default: True
         dust_merge_size: Size+affinity dust merge (zwatershed-style).
             Segments with fewer voxels than this are merged into their
             highest-affinity neighbor.  Unlike *min_instance_size* which
@@ -296,19 +296,34 @@ def decode_waterz(
         waterz_kwargs["fragments"] = fragments.astype(np.uint64, copy=False)
 
     do_dust_merge = bool(dust_merge) and dust_merge_size > 0
+    waterz_kwargs["return_region_graph"] = do_dust_merge
 
     # waterz.waterz() runs watershed + region-graph once, then incrementally
     # merges for each threshold.  Returns all segmentations (copied).
     seg_list = waterz.waterz(affs, thresholds=thresholds_list, **waterz_kwargs)
 
     # Post-process each result
     processed: List[np.ndarray] = []
-    for seg in seg_list:
-        # Size+affinity dust merge via buildRegionGraphOnly (fast path)
+    for waterz_result in seg_list:
+        if do_dust_merge:
+            seg, (rg_id, rg_sc) = waterz_result
+        else:
+            seg = waterz_result
+
+        # Size+affinity dust merge reusing the agglomeration's region graph.
+        # rg_sc is uint8 sorted ascending (low score = high affinity).
+        # Invert OneMinus/One255Minus scores to raw affinities in [0, 1].
         if do_dust_merge:
             seg = seg.astype(np.uint64, copy=False)
-            waterz.merge_dust(
-                seg, affs,
+            rg_affs = (255.0 - rg_sc.astype(np.float32)) / 255.0
+            id1 = rg_id[:, 0].astype(np.uint64)
+            id2 = rg_id[:, 1].astype(np.uint64)
+            ids, cnts = np.unique(seg, return_counts=True)
+            max_id = int(ids.max()) if len(ids) else 0
+            counts = np.zeros(max_id + 1, dtype=np.uint64)
+            counts[ids] = cnts
+            waterz.merge_segments(
+                seg, rg_affs, id1, id2, counts,
                 size_th=dust_merge_size,
                 weight_th=dust_merge_affinity,
                 dust_th=dust_remove_size,
diff --git a/tests/unit/test_decode_waterz.py b/tests/unit/test_decode_waterz.py
@@ -21,8 +21,11 @@ def waterz(self, affs, thresholds, **kwargs):
         seg[:, :, :2] = 1
         seg[:, :, 2:] = 2
         if kwargs.get("return_region_graph", False):
-            rg = [{"u": 1, "v": 2, "score": 0.2}]
-            return [(seg.copy(), rg.copy()) for _ in thresholds]
+            # rgToArr format: (rg_id (N,2) uint32, rg_sc (N,) uint8)
+            # score=51 → affinity = (255-51)/255 ≈ 0.8
+            rg_id = np.array([[1, 2]], dtype=np.uint32)
+            rg_sc = np.array([51], dtype=np.uint8)
+            return [(seg.copy(), (rg_id.copy(), rg_sc.copy())) for _ in thresholds]
         return [seg.copy() for _ in thresholds]
 
     def merge_dust(self, seg, affs, size_th, weight_th, dust_th):
@@ -131,9 +134,10 @@ def test_decode_waterz_reuses_region_graph_for_dust_when_scores_are_compatible(
     ]
 
 
-def test_decode_waterz_falls_back_to_merge_dust_for_incompatible_scores(
+def test_decode_waterz_reuses_region_graph_for_any_scoring_function(
     monkeypatch,
 ):
+    """Region graph reuse works for any scoring function, not just OneMinus."""
     fake_waterz = _FakeWaterzModule()
     monkeypatch.setattr(waterz_decoder, "waterz", fake_waterz)
     monkeypatch.setattr(waterz_decoder, "WATERZ_AVAILABLE", True)
@@ -159,13 +163,10 @@ def test_decode_waterz_falls_back_to_merge_dust_for_incompatible_scores(
             "return_region_graph": True,
         }
     ]
-    assert fake_waterz.merge_segments_calls == []
-    assert fake_waterz.merge_dust_calls == [
-        {
-            "seg_shape": (4, 4, 4),
-            "aff_shape": (3, 4, 4, 4),
-            "size_th": 100,
-            "weight_th": 0.3,
-            "dust_th": 50,
-        }
-    ]
+    assert fake_waterz.merge_dust_calls == []
+    assert len(fake_waterz.merge_segments_calls) == 1
+    call = fake_waterz.merge_segments_calls[0]
+    assert call["seg_shape"] == (4, 4, 4)
+    assert call["size_th"] == 100
+    assert call["weight_th"] == 0.3
+    assert call["dust_th"] == 50