Skip to content

Commit b771821

Browse files
authored
Merge pull request #4127 from h-mayorquin/improve_phy_and_kilosort_extractors
Fix KeyError when loading Kilosort 2.5 output with files missing cluster id
2 parents b2411fd + 2b60ed2 commit b771821

2 files changed

Lines changed: 28 additions & 0 deletions

File tree

src/spikeinterface/extractors/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ def __getattr__(extractor_name):
5656
"Importing classes at __init__ has been deprecated in favor of only importing function-size wrappers "
5757
"and will be removed in 0.105.0. For developers that prefer working with the class versions of extractors "
5858
"they can be imported from spikeinterface.extractors.extractor_classes"
59+
f"For class {reading_function.__name__}"
5960
)
6061
warn(dep_msg)
6162
return reading_function

src/spikeinterface/extractors/phykilosortextractors.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,24 @@ class BasePhyKilosortSortingExtractor(BaseSorting):
2424
If True, empty units are removed from the sorting extractor.
2525
load_all_cluster_properties : bool, default: True
2626
If True, all cluster properties are loaded from the tsv/csv files.
27+
28+
Notes
29+
-----
30+
This extractor loads cluster properties from CSV/TSV files to enrich the sorting
31+
extractor with unit metadata such as quality labels, groups, and Kilosort metrics.
32+
33+
Cluster information is loaded in the following priority order:
34+
1. From a dedicated cluster_info.csv/.tsv file if present
35+
2. From all .csv/.tsv files in the folder that contain a 'cluster_id' column
36+
Typical files include cluster_group.tsv, cluster_info.tsv, cluster_KSLabel.tsv
37+
Files without cluster_id column are automatically skipped
38+
3. If no files are found, minimal cluster info is generated with 'unsorted' labels
39+
40+
The cluster_id column is used as the merge key to combine properties from multiple files.
41+
All loaded properties are added to the sorting extractor as unit properties, with some
42+
renamed for SpikeInterface conventions: 'group' becomes 'quality', 'cluster_id'
43+
becomes 'original_cluster_id'. These properties can be accessed via ``sorting.get_property()``
44+
function.
2745
"""
2846

2947
installation_mesg = (
@@ -84,6 +102,15 @@ def __init__(
84102
else:
85103
delimiter = ","
86104
new_property = pd.read_csv(file, delimiter=delimiter)
105+
106+
# Only merge files that contain a cluster_id column
107+
# This prevents KeyError when extraneous files don't have cluster_id
108+
# Typical aggregated files include cluster_group.tsv, cluster_info.tsv, cluster_KSLabel.tsv
109+
# See Phy docs: https://phy.readthedocs.io/en/latest/sorting_user_guide/
110+
# See: https://github.com/SpikeInterface/spikeinterface/issues/4124
111+
if "cluster_id" not in new_property.columns:
112+
continue
113+
87114
if cluster_info is None:
88115
cluster_info = new_property
89116
else:

0 commit comments

Comments
 (0)