Send peak_sign to get_template_extremum_channel_peak_shift in remove_redundant_units (#4408)

ecobost · pre-commit-ci[bot] · alejoe91 · web-flow · commit a44b28a01ef7 · 2026-02-25T10:25:09.000+01:00
Co-authored-by: pre-commit-ci[bot] &lt;66853113+pre-commit-ci[bot]@users.noreply.github.com&gt;
Co-authored-by: Alessio Buccino &lt;alejoe9187@gmail.com&gt;
diff --git a/src/spikeinterface/core/sortinganalyzer.py b/src/spikeinterface/core/sortinganalyzer.py
@@ -170,27 +170,22 @@ def create_sorting_analyzer(
             **sparsity_kwargs,
         )
 
-    if format != "memory":
-        if format == "zarr":
-            if not is_path_remote(folder):
-                folder = clean_zarr_folder_name(folder)
-        if not is_path_remote(folder):
-            if Path(folder).is_dir():
-                if not overwrite:
-                    raise ValueError(f"Folder already exists {folder}! Use overwrite=True to overwrite it.")
-                else:
-                    shutil.rmtree(folder)
+    if format != "memory" and not is_path_remote(folder):
+        folder = clean_zarr_folder_name(folder) if format == "zarr" else folder
+        if Path(folder).is_dir():
+            if overwrite:
+                shutil.rmtree(folder)
+            else:
+                raise ValueError(f"Folder {folder} already exists! Use overwrite=True to overwrite it.")
 
     # handle sparsity
     if sparsity is not None:
         # some checks
         assert isinstance(sparsity, ChannelSparsity), "'sparsity' must be a ChannelSparsity object"
-        assert np.array_equal(
-            sorting.unit_ids, sparsity.unit_ids
-        ), "create_sorting_analyzer(): if external sparsity is given unit_ids must correspond"
-        assert np.array_equal(
-            recording.channel_ids, sparsity.channel_ids
-        ), "create_sorting_analyzer(): if external sparsity is given unit_ids must correspond"
+        error_msg = "If external sparsity is given, unit_ids must match sorting"
+        assert np.array_equal(sorting.unit_ids, sparsity.unit_ids), error_msg
+        error_msg = "If external sparsity is given, channel_ids must match recording"
+        assert np.array_equal(recording.channel_ids, sparsity.channel_ids), error_msg
     elif sparse:
         sparsity = estimate_sparsity(sorting, recording, **sparsity_kwargs)
     else:
diff --git a/src/spikeinterface/curation/remove_redundant.py b/src/spikeinterface/curation/remove_redundant.py
@@ -10,16 +10,16 @@
 
 
 def remove_redundant_units(
-    sorting_or_sorting_analyzer,
-    align=True,
-    unit_peak_shifts=None,
-    delta_time=0.4,
-    agreement_threshold=0.2,
-    duplicate_threshold=0.8,
-    remove_strategy="minimum_shift",
-    peak_sign="neg",
-    extra_outputs=False,
-) -> BaseSorting:
+    sorting_or_sorting_analyzer: BaseSorting | SortingAnalyzer,
+    align: bool = True,
+    unit_peak_shifts: dict[int, float] | None = None,
+    delta_time: float = 0.4,
+    agreement_threshold: float = 0.2,
+    duplicate_threshold: float = 0.8,
+    remove_strategy: str = "minimum_shift",
+    peak_sign: str = "neg",
+    extra_outputs: bool = False,
+) -> BaseSorting | tuple[BaseSorting, list[tuple[int, int]]]:
     """
     Removes redundant or duplicate units by comparing the sorting output with itself.
 
@@ -72,15 +72,14 @@ def remove_redundant_units(
         sorting = sorting_or_sorting_analyzer.sorting
         sorting_analyzer = sorting_or_sorting_analyzer
     else:
-        assert not align, "The 'align' option is only available when a SortingAnalyzer is used as input"
         # other remove strategies rely on sorting analyzer looking at templates
         assert remove_strategy == "max_spikes", "For a Sorting input the remove_strategy must be 'max_spikes'"
         sorting = sorting_or_sorting_analyzer
         sorting_analyzer = None
 
     if align and unit_peak_shifts is None:
         assert sorting_analyzer is not None, "For align=True must give a SortingAnalyzer or explicit unit_peak_shifts"
-        unit_peak_shifts = get_template_extremum_channel_peak_shift(sorting_analyzer)
+        unit_peak_shifts = get_template_extremum_channel_peak_shift(sorting_analyzer, peak_sign=peak_sign)
 
     if align:
         sorting_aligned = align_sorting(sorting, unit_peak_shifts)
@@ -108,25 +107,15 @@ def remove_redundant_units(
                 remove_unit_ids.append(u1)
             elif np.abs(unit_peak_shifts[u1]) < np.abs(unit_peak_shifts[u2]):
                 remove_unit_ids.append(u2)
-            else:
-                # equal shift use peak values
-                if np.abs(peak_values[u1]) < np.abs(peak_values[u2]):
-                    remove_unit_ids.append(u1)
-                else:
-                    remove_unit_ids.append(u2)
+            else:  # equal shift use peak values
+                remove_unit_ids.append(u1 if peak_values[u1] < peak_values[u2] else u2)
     elif remove_strategy == "highest_amplitude":
         for u1, u2 in redundant_unit_pairs:
-            if np.abs(peak_values[u1]) < np.abs(peak_values[u2]):
-                remove_unit_ids.append(u1)
-            else:
-                remove_unit_ids.append(u2)
+            remove_unit_ids.append(u1 if peak_values[u1] < peak_values[u2] else u2)
     elif remove_strategy == "max_spikes":
         num_spikes = sorting.count_num_spikes_per_unit(outputs="dict")
         for u1, u2 in redundant_unit_pairs:
-            if num_spikes[u1] < num_spikes[u2]:
-                remove_unit_ids.append(u1)
-            else:
-                remove_unit_ids.append(u2)
+            remove_unit_ids.append(u1 if num_spikes[u1] < num_spikes[u2] else u2)
     elif remove_strategy == "with_metrics":
         # TODO
         # @aurelien @alessio
@@ -144,7 +133,9 @@ def remove_redundant_units(
         return sorting_clean
 
 
-def find_redundant_units(sorting, delta_time: float = 0.4, agreement_threshold=0.2, duplicate_threshold=0.8):
+def find_redundant_units(
+    sorting: BaseSorting, delta_time: float = 0.4, agreement_threshold: float = 0.2, duplicate_threshold: float = 0.8
+) -> list[tuple[int, int]]:
     """
     Finds redundant or duplicate units by comparing the sorting output with itself.
 
@@ -162,9 +153,7 @@ def find_redundant_units(sorting, delta_time: float = 0.4, agreement_threshold=0
 
     Returns
     -------
-    list
-        The list of duplicate units
-    list of 2-element lists
+    list of 2-element tuples
         The list of duplicate pairs
     """
     from spikeinterface.comparison import compare_two_sorters
@@ -186,6 +175,6 @@ def find_redundant_units(sorting, delta_time: float = 0.4, agreement_threshold=0
         event_counts = comparison.event_counts1
         shared = max(n_coincidents / event_counts[unit_i], n_coincidents / event_counts[unit_j])
         if shared > duplicate_threshold:
-            redundant_unit_pairs.append([unit_i, unit_j])
+            redundant_unit_pairs.append((unit_i, unit_j))
 
     return redundant_unit_pairs