Merge pull request #4138 from alejoe91/fix-curation-api-doc

samuelgarcia · web-flow · commit 4d892e91b68e · 2025-09-19T10:51:34.000+02:00
Improve docs for curation module and model
diff --git a/doc/api.rst b/doc/api.rst
@@ -356,21 +356,36 @@ spikeinterface.curation
 .. automodule:: spikeinterface.curation
 
     .. autofunction:: apply_curation
-    .. autofunction:: get_potential_auto_merge
+    .. autofunction:: compute_merge_unit_groups
     .. autofunction:: find_redundant_units
     .. autofunction:: remove_redundant_units
     .. autofunction:: remove_duplicated_spikes
     .. autofunction:: remove_excess_spikes
-    .. autofunction:: load_model
     .. autofunction:: auto_label_units
+    .. autofunction:: load_model
     .. autofunction:: train_model
 
+Curation Model
+~~~~~~~~~~~~~~
+
+This section describes the ``pydantic`` curation model classes used to represent and manage curation actions
+such as merging and splitting units, as well as defining labels for units.
+
+.. automodule:: spikeinterface.curation.curation_model
+
+    .. autopydantic_model:: CurationModel
+    .. autopydantic_model:: Merge
+    .. autopydantic_model:: Split
+    .. autopydantic_model:: ManualLabel
+    .. autopydantic_model:: LabelDefinition
+
 Deprecated
 ~~~~~~~~~~
 .. automodule:: spikeinterface.curation
     :noindex:
 
     .. autofunction:: apply_sortingview_curation
+    .. autofunction:: get_potential_auto_merge
     .. autoclass:: CurationSorting
     .. autoclass:: MergeUnitsSorting
     .. autoclass:: SplitUnitSorting
diff --git a/doc/conf.py b/doc/conf.py
@@ -63,6 +63,7 @@
     'sphinx.ext.autosummary',
     'sphinx_gallery.gen_gallery',
     'numpydoc',
+    'sphinxcontrib.autodoc_pydantic',
     'sphinx.ext.autosectionlabel',
     'sphinx_design',
     'sphinxcontrib.jquery',
@@ -76,6 +77,8 @@
 
 numpydoc_show_class_members = False
 
+autodoc_pydantic_model_show_json = True
+autodoc_pydantic_model_show_config_summary = False
 
 # Add any paths that contain templates here, relative to this directory.
 templates_path = ['_templates']
diff --git a/pyproject.toml b/pyproject.toml
@@ -194,12 +194,13 @@ test = [
 
 docs = [
     "Sphinx",
+    "ipython",
     "sphinx_rtd_theme>=1.2",
     "sphinx-gallery",
     "sphinx-design",
     "numpydoc",
-    "ipython",
     "sphinxcontrib-jquery",
+    "autodoc_pydantic",
 
     # for notebooks in the gallery
     "MEArec", # Use as an example
diff --git a/src/spikeinterface/core/sorting_tools.py b/src/spikeinterface/core/sorting_tools.py
@@ -365,16 +365,13 @@ def set_properties_after_merging(
 
     for key in prop_keys:
         parent_values = sorting_pre_merge.get_property(key)
-        if parent_values.dtype.kind not in default_missing_values:
-            # if the property is boolean or integer there is no missing values so we skip
-            # for instance recursive "is_merged" will not be propagated
-            continue
 
         # propagate keep values
         shape = (len(sorting_post_merge.unit_ids),) + parent_values.shape[1:]
         new_values = np.empty(shape=shape, dtype=parent_values.dtype)
         new_values[keep_post_inds] = parent_values[keep_pre_inds]
 
+        skip_property = False
         for new_id, merge_group in zip(new_unit_ids, merge_unit_groups):
             merged_indices = sorting_pre_merge.ids_to_indices(merge_group)
             merge_values = parent_values[merged_indices]
@@ -384,9 +381,15 @@ def set_properties_after_merging(
                 # and new values only if they are all similar
                 new_values[new_index] = merge_values[0]
             else:
-
-                new_values[new_index] = default_missing_values[parent_values.dtype.kind]
-        sorting_post_merge.set_property(key, new_values)
+                if parent_values.dtype.kind not in default_missing_values:
+                    # if the property doesn't have a default missing value and it is not the same
+                    # for all merged units, we skip it
+                    skip_property = True
+                    break
+                else:
+                    new_values[new_index] = default_missing_values[parent_values.dtype.kind]
+        if not skip_property:
+            sorting_post_merge.set_property(key, new_values)
 
     # set is_merged property
     is_merged = np.ones(len(sorting_post_merge.unit_ids), dtype=bool)
diff --git a/src/spikeinterface/core/sortinganalyzer.py b/src/spikeinterface/core/sortinganalyzer.py
@@ -1072,6 +1072,10 @@ def _save_or_select_or_merge_or_split(
         if sorting_provenance is None:
             # if the original sorting object is not available anymore (kilosort folder deleted, ....), take the copy
             sorting_provenance = self.sorting
+        # add in-memory properties added to the analyzer
+        for key in self.sorting.get_property_keys():
+            if key not in sorting_provenance.get_property_keys():
+                sorting_provenance.set_property(key, self.sorting.get_property(key))
 
         if merge_unit_groups is None and split_units is None:
             # when only some unit_ids then the sorting must be sliced
diff --git a/src/spikeinterface/curation/auto_merge.py b/src/spikeinterface/curation/auto_merge.py
@@ -144,6 +144,7 @@ def compute_merge_unit_groups(
         * | "feature_neighbors": focused on finding unit pairs whose spikes are close in the feature space using kNN.
           | It uses the following steps: "num_spikes", "snr", "remove_contaminated", "unit_locations",
           | "knn", "quality_score"
+
         If `preset` is None, you can specify the steps manually with the `steps` parameter.
     resolve_graph : bool, default: True
         If True, the function resolves the potential unit pairs to be merged into multiple-unit merges.
diff --git a/src/spikeinterface/curation/curation_format.py b/src/spikeinterface/curation/curation_format.py
@@ -125,60 +125,14 @@ def apply_curation_labels(
     # Please note that manual_labels is done on the unit_ids before the merge!!!
     manual_labels = curation_label_to_vectors(curation_model)
 
-    # apply on non merged / split
-    merge_new_unit_ids = [m.new_unit_id for m in curation_model.merges]
-    split_new_unit_ids = [m.new_unit_ids for m in curation_model.splits]
-    split_new_unit_ids = list(chain(*split_new_unit_ids))
-
-    merged_split_units = merge_new_unit_ids + split_new_unit_ids
     for key, values in manual_labels.items():
         all_values = np.zeros(sorting.unit_ids.size, dtype=values.dtype)
         for unit_ind, unit_id in enumerate(sorting.unit_ids):
-            if unit_id not in merged_split_units:
-                ind = list(curation_model.unit_ids).index(unit_id)
-                all_values[unit_ind] = values[ind]
+            # if unit_id not in merged_split_units:
+            ind = list(curation_model.unit_ids).index(unit_id)
+            all_values[unit_ind] = values[ind]
         sorting.set_property(key, all_values)
 
-    for new_unit_id, merge in zip(merge_new_unit_ids, curation_model.merges):
-        old_group_ids = merge.unit_ids
-        for label_key, label_def in curation_model.label_definitions.items():
-            if label_def.exclusive:
-                group_values = []
-                for unit_id in old_group_ids:
-                    ind = list(curation_model.unit_ids).index(unit_id)
-                    value = manual_labels[label_key][ind]
-                    if value != "":
-                        group_values.append(value)
-                if len(set(group_values)) == 1:
-                    # all group has the same label or empty
-                    sorting.set_property(key, values=group_values[:1], ids=[new_unit_id])
-            else:
-                for key in label_def.label_options:
-                    group_values = []
-                    for unit_id in old_group_ids:
-                        ind = list(curation_model.unit_ids).index(unit_id)
-                        value = manual_labels[key][ind]
-                        group_values.append(value)
-                    new_value = np.any(group_values)
-                    sorting.set_property(key, values=[new_value], ids=[new_unit_id])
-
-    # splits
-    for split in curation_model.splits:
-        # propagate property of splut unit to new units
-        old_unit = split.unit_id
-        new_unit_ids = split.new_unit_ids
-        for label_key, label_def in curation_model.label_definitions.items():
-            if label_def.exclusive:
-                ind = list(curation_model.unit_ids).index(old_unit)
-                value = manual_labels[label_key][ind]
-                if value != "":
-                    sorting.set_property(label_key, values=[value] * len(new_unit_ids), ids=new_unit_ids)
-            else:
-                for key in label_def.label_options:
-                    ind = list(curation_model.unit_ids).index(old_unit)
-                    value = manual_labels[key][ind]
-                    sorting.set_property(key, values=[value] * len(new_unit_ids), ids=new_unit_ids)
-
 
 def apply_curation(
     sorting_or_analyzer: BaseSorting | SortingAnalyzer,
@@ -194,10 +148,11 @@ def apply_curation(
     Apply curation dict to a Sorting or a SortingAnalyzer.
 
     Steps are done in this order:
-      1. Apply removal using curation_dict["removed"]
-      2. Apply merges using curation_dict["merges"]
-      3. Apply splits using curation_dict["splits"]
-      4. Set labels using curation_dict["manual_labels"]
+
+      1. Apply labels using curation_dict["manual_labels"]
+      2. Apply removal using curation_dict["removed"]
+      3. Apply merges using curation_dict["merges"]
+      4. Apply splits using curation_dict["splits"]
 
     A new Sorting or SortingAnalyzer (in memory) is returned.
     The user (an adult) has the responsability to save it somewhere (or not).
@@ -243,33 +198,36 @@ def apply_curation(
     if isinstance(curation_dict_or_model, dict):
         curation_model = CurationModel(**curation_dict_or_model)
     else:
-        curation_model = curation_dict_or_model
+        curation_model = curation_dict_or_model.model_copy(deep=True)
 
     if not np.array_equal(np.asarray(curation_model.unit_ids), sorting_or_analyzer.unit_ids):
         raise ValueError("unit_ids from the curation_dict do not match the one from Sorting or SortingAnalyzer")
 
-    # 1. Remove units
+    # 1. Apply labels
+    apply_curation_labels(sorting_or_analyzer, curation_model)
+
+    # 2. Remove units
     if len(curation_model.removed) > 0:
         curated_sorting_or_analyzer = sorting_or_analyzer.remove_units(curation_model.removed)
     else:
         curated_sorting_or_analyzer = sorting_or_analyzer
 
-    # 2. Merge units
+    # 3. Merge units
     if len(curation_model.merges) > 0:
         merge_unit_groups = [m.unit_ids for m in curation_model.merges]
         merge_new_unit_ids = [m.new_unit_id for m in curation_model.merges if m.new_unit_id is not None]
         if len(merge_new_unit_ids) == 0:
             merge_new_unit_ids = None
         if isinstance(sorting_or_analyzer, BaseSorting):
-            curated_sorting_or_analyzer, _, new_unit_ids = apply_merges_to_sorting(
+            curated_sorting_or_analyzer, _, _ = apply_merges_to_sorting(
                 curated_sorting_or_analyzer,
                 merge_unit_groups=merge_unit_groups,
                 censor_ms=censor_ms,
                 new_id_strategy=new_id_strategy,
                 return_extra=True,
             )
         else:
-            curated_sorting_or_analyzer, new_unit_ids = curated_sorting_or_analyzer.merge_units(
+            curated_sorting_or_analyzer, _ = curated_sorting_or_analyzer.merge_units(
                 merge_unit_groups=merge_unit_groups,
                 censor_ms=censor_ms,
                 merging_mode=merging_mode,
@@ -280,10 +238,8 @@ def apply_curation(
                 verbose=verbose,
                 **job_kwargs,
             )
-        for i, merge_unit_id in enumerate(new_unit_ids):
-            curation_model.merges[i].new_unit_id = merge_unit_id
 
-    # 3. Split units
+    # 4. Split units
     if len(curation_model.splits) > 0:
         split_units = {}
         for split in curation_model.splits:
@@ -297,26 +253,21 @@ def apply_curation(
         if len(split_new_unit_ids) == 0:
             split_new_unit_ids = None
         if isinstance(sorting_or_analyzer, BaseSorting):
-            curated_sorting_or_analyzer, new_unit_ids = apply_splits_to_sorting(
+            curated_sorting_or_analyzer, _ = apply_splits_to_sorting(
                 curated_sorting_or_analyzer,
                 split_units,
                 new_unit_ids=split_new_unit_ids,
                 new_id_strategy=new_id_strategy,
                 return_extra=True,
             )
         else:
-            curated_sorting_or_analyzer, new_unit_ids = curated_sorting_or_analyzer.split_units(
+            curated_sorting_or_analyzer, _ = curated_sorting_or_analyzer.split_units(
                 split_units,
                 new_id_strategy=new_id_strategy,
                 return_new_unit_ids=True,
                 new_unit_ids=split_new_unit_ids,
                 format="memory",
                 verbose=verbose,
             )
-        for i, split_unit_ids in enumerate(new_unit_ids):
-            curation_model.splits[i].new_unit_ids = split_unit_ids
-
-    # 4. Apply labels
-    apply_curation_labels(curated_sorting_or_analyzer, curation_model)
 
     return curated_sorting_or_analyzer
diff --git a/src/spikeinterface/curation/curation_model.py b/src/spikeinterface/curation/curation_model.py
@@ -104,6 +104,13 @@ def add_label_definition_name(cls, label_definitions):
 
     @classmethod
     def check_manual_labels(cls, values):
+        """
+        Checks and validates the manual labels in the curation model.
+
+          * Checks if the unit_ids in each manual label exist in the unit_ids list.
+          * Validates that each label in the manual labels exists in the label_definitions.
+
+        """
         unit_ids = list(values["unit_ids"])
         manual_labels = values.get("manual_labels")
         if manual_labels is None:
@@ -135,6 +142,15 @@ def check_manual_labels(cls, values):
 
     @classmethod
     def check_merges(cls, values):
+        """
+        Checks and validates the merges in the curation model.
+
+          * Checks if the unit_ids in each merge group exist in the unit_ids list.
+          * Validates that each merge group has at least two unit IDs.
+          * Ensures that any new_unit_id provided does not already exist in the unit_ids list.
+          * Converts merges from dict format to list of Merge objects if necessary.
+
+        """
         unit_ids = list(values["unit_ids"])
         merges = values.get("merges")
         if merges is None:
@@ -184,15 +200,14 @@ def check_merges(cls, values):
     def check_splits(cls, values):
         """
         Checks and validates the splits in the curation model.
-        If `splits` is a dictionary with unit_id as key and split indices as values,
-        it converts it to a list of Split objects.
-        Each Split object is then validated:
-        - Checks if the unit_id exists in the unit_ids list.
-        - Validates the mode (indices or labels).
-        - If mode is indices, checks that indices are defined and not empty, and that there are no duplicate indices.
-        - If mode is labels, checks that labels are defined and not empty.
-        - Validates new unit IDs if provided, ensuring they are not already in the unit_ids list and match the
-          number of splits.
+
+          * Checks if the unit_id exists in the unit_ids list.
+          * Validates the mode (indices or labels).
+          * If mode is indices, checks that indices are defined and not empty, and that there are no duplicate indices.
+          * If mode is labels, checks that labels are defined and not empty.
+          * | Validates new unit IDs if provided, ensuring they are not already in the unit_ids list and match the
+            | number of splits.
+
         """
         unit_ids = list(values["unit_ids"])
         splits = values.get("splits")
@@ -279,6 +294,11 @@ def check_splits(cls, values):
 
     @classmethod
     def check_removed(cls, values):
+        """
+        Checks and validates the removed units in the curation model.
+        If `removed` is None, it initializes it as an empty list.
+        It then checks that each unit ID in `removed` exists in the `unit_ids` list.
+        """
         unit_ids = list(values["unit_ids"])
         removed = values.get("removed")
         if removed is None:
@@ -293,6 +313,11 @@ def check_removed(cls, values):
 
     @classmethod
     def convert_old_format(cls, values):
+        """
+        Converts old curation formats (v0 and v1) to the current format (v2).
+        v0 (sortingview) format is converted to v2 by extracting labels, merges, and unit IDs.
+        v1 format is updated to v2 by renaming fields and ensuring the structure matches the v2 format.
+        """
         format_version = values.get("format_version", "0")
         if format_version == "0":
             print("Conversion from format version v0 (sortingview) to v2")
diff --git a/src/spikeinterface/curation/tests/sv-sorting-curation-int.json b/src/spikeinterface/curation/tests/sv-sorting-curation-int.json
@@ -10,7 +10,7 @@
             "reject"
         ],
         "4": [
-            "noise"
+            "reject"
         ],
         "5": [
             "accept"
diff --git a/src/spikeinterface/curation/tests/sv-sorting-curation-str.json b/src/spikeinterface/curation/tests/sv-sorting-curation-str.json
@@ -10,7 +10,7 @@
             "reject"
         ],
         "d": [
-            "noise"
+            "reject"
         ],
         "e": [
             "accept"
diff --git a/src/spikeinterface/curation/tests/test_sortingview_curation.py b/src/spikeinterface/curation/tests/test_sortingview_curation.py
diff --git a/src/spikeinterface/extractors/neoextractors/openephys.py b/src/spikeinterface/extractors/neoextractors/openephys.py