@@ -24,6 +24,24 @@ class BasePhyKilosortSortingExtractor(BaseSorting):
2424 If True, empty units are removed from the sorting extractor.
2525 load_all_cluster_properties : bool, default: True
2626 If True, all cluster properties are loaded from the tsv/csv files.
27+
28+ Notes
29+ -----
30+ This extractor loads cluster properties from CSV/TSV files to enrich the sorting
31+ extractor with unit metadata such as quality labels, groups, and Kilosort metrics.
32+
33+ Cluster information is loaded in the following priority order:
34+ 1. From a dedicated cluster_info.csv/.tsv file if present
35+ 2. From all .csv/.tsv files in the folder that contain a 'cluster_id' column
36+ Typical files include cluster_group.tsv, cluster_info.tsv, cluster_KSLabel.tsv
37+ Files without cluster_id column are automatically skipped
38+ 3. If no files are found, minimal cluster info is generated with 'unsorted' labels
39+
40+ The cluster_id column is used as the merge key to combine properties from multiple files.
41+ All loaded properties are added to the sorting extractor as unit properties, with some
42+ renamed for SpikeInterface conventions: 'group' becomes 'quality', 'cluster_id'
43+ becomes 'original_cluster_id'. These properties can be accessed via ``sorting.get_property()``
44+ function.
2745 """
2846
2947 installation_mesg = (
@@ -84,6 +102,15 @@ def __init__(
84102 else :
85103 delimiter = ","
86104 new_property = pd .read_csv (file , delimiter = delimiter )
105+
106+ # Only merge files that contain a cluster_id column
107+ # This prevents KeyError when extraneous files don't have cluster_id
108+ # Typical aggregated files include cluster_group.tsv, cluster_info.tsv, cluster_KSLabel.tsv
109+ # See Phy docs: https://phy.readthedocs.io/en/latest/sorting_user_guide/
110+ # See: https://github.com/SpikeInterface/spikeinterface/issues/4124
111+ if "cluster_id" not in new_property .columns :
112+ continue
113+
87114 if cluster_info is None :
88115 cluster_info = new_property
89116 else :
0 commit comments