-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathpreprocessing.py
More file actions
226 lines (196 loc) · 8.45 KB
/
preprocessing.py
File metadata and controls
226 lines (196 loc) · 8.45 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
# ...
# Copyright 2021 Brooks M. Musangu and Jan Drugowitsch.
# license Modified BSD, see LICENSE.txt for details.
# ...
from __future__ import division, print_function, unicode_literals
import sklearn
import numpy as np
__all__ = [
"EventTimesToCounts"
]
class EventTimesToCounts(sklearn.base.TransformerMixin):
"""
Bins sequence of event times into event counts within evenly spaced
time bins.
This class supports binning sequences of event times (e.g., spike trains)
into a matrix that contain event counts within evenly spaced time bins
(first bin starts at time 0s). It supports specifying the bin size, and
multiple options for how the last event time is determined.
Parameters
----------
bin_size : float, default : 0.02 [s]
The width of each time bin in seconds.
t_stop : float, optional, default : None
The largest time considered for binning. This time is assumed to
be the same across all event time sequences. If `t_stop` doesn't match
a bin boundary, `extrapolate_last_bin` determins whether or not the
last bin includes `t_stop` or not.
If not given, `t_stop` is set to the largest event time across all
sequences in the provided data. If the data is a `neo.SpikeTrain`
object, `t_stop` is set to `X[0].t_stop.magnitude`.
extrapolate_last_bin : boolean, optional, default : False
In cases where `t_stop` does not match a bin boundary, this option
determines whether the last bin includes `t_stop`. If `False`, the last
bin ends before `t_stop`, and all events after this final bin are
ignored. If `True`, the last bin includes `t_stop`, and event counts
are up-scaled to account for the fact that `t_stop` happens before the
end of the last time bin. For example, if `t_stop` falls into the
middle of the last bin, all event counts that fall into the last bin
are doubled. In this case, `X_out` of :meth:`transform` returns is of
type `float` as it might contain non-integer event counts for the last
bin.
Examples
--------
>>> import numpy as np
>>> from gpfa import EventTimesToCounts
>>> bin_size = 0.1 # [s]
>>> t_stop = 0.8 # [s]
>>> X = [
... [0, 0.1, 0.15, 0.4, 0.5, 0.6, 0.8],
... [0.05, 0.3, 0.4, 0.55, 0.7]
... ]
>>> ettc = EventTimesToCounts(
... bin_size=bin_size,
... t_stop=t_stop,
... extrapolate_last_bin=False
... )
>>> ettc.transform(X)
array([[1, 2, 0, 0, 1, 2, 0, 1],
[1, 0, 1, 0, 1, 1, 1, 0]])
>>> ettc_extrapolate_last_bin = EventTimesToCounts(
... bin_size=bin_size,
... t_stop=t_stop,
... extrapolate_last_bin=True
... )
>>> ettc.transform(X)
array([[1, 2, 0, 0, 1, 2, 0, 1],
[1, 0, 1, 0, 1, 1, 1, 0]])
>>> # Using defaults t_stop=None and extrapolate_last_bin=False
>>> ettc = EventTimesToCounts(bin_size)
>>> ettc.transform(X)
array([[1, 2, 0, 0, 1, 2, 0, 1],
[1, 0, 1, 0, 1, 1, 1, 0]])
>>> t_stop2 = 0.88 # [s]
>>> ettc = EventTimesToCounts(
... bin_size=bin_size,
... t_stop=t_stop2,
... extrapolate_last_bin=False
... )
>>> ettc.transform(X)
array([[1, 2, 0, 0, 1, 2, 0, 1],
[1, 0, 1, 0, 1, 1, 1, 0]])
>>> t_stop2 = 0.88 # [s]
>>> ettc_extrapolate_last_bin = EventTimesToCounts(
... bin_size=bin_size,
... t_stop=t_stop2,
... extrapolate_last_bin=True
... )
>>> ettc.transform(X)
array([[1. , 2. , 0. , 0. , 1. , 2. , 0. , 0. , 1.25],
[1. , 0. , 1. , 0. , 1. , 1. , 1. , 0. , 0. ]])
The following example only works if the `Neo package
<https://neo.readthedocs.io/>`_ is installed.
>>> import neo
>>> t_stop = 0.8 # [s]
>>> neoSpikeTrain = [
... neo.SpikeTrain(X[0],units='sec', t_stop=t_stop),
... neo.SpikeTrain(X[1], units='sec', t_stop=t_stop)
... ]
>>> ettc = EventTimesToCounts(
bin_size=bin_size,
t_stop=None,
extrapolate_last_bin=False
)
>>> ettc.transform(neoSpikeTrain)
array([[1, 2, 0, 0, 1, 2, 0, 1],
[1, 0, 1, 0, 1, 1, 1, 0]])
Methods
-------
transform:
Transforms data from event times to binned event counts
"""
def __init__(self, bin_size=0.02, t_stop=None,
extrapolate_last_bin=False):
self.bin_size = bin_size
self.t_stop = t_stop
self.extrapolate_last_bin = extrapolate_last_bin
def transform(self, X):
"""
Transforms data from event times to binned event counts
Parameters
----------
X : numpy.array or neo.SpikeTrain
An array-like containing #sequences of event time sequences
(usually sequences of `float`'s). Each element in `X` can contain a
different number of event times. The are all assumed to share the
same final time (i.e., ``t_stop``).
Returns
-------
X_out : numpy.array
A numpy matrix of size #sequences x #bins, containing the
binned event counts.
"""
# ==============================================
# set the starting time of the trial (`t_start`)
# and the end time (`t_stop`)
# ==============================================
t_start = 0
t_stop = self.t_stop
if t_stop is None:
if hasattr(X[0], 't_stop'):
t_stop = X[0].t_stop.magnitude
else:
t_stop = max(
map(lambda x: x[-1]
if (isinstance(x, np.ndarray) and np.any(x)) or \
(isinstance(x, list) and any(x))
else 0, X)
)
# ====================================
# get the bins based on the `bin_size`
# ====================================
edges = np.arange(t_start,
t_stop + self.bin_size * 0.1,
self.bin_size)
# =============================
# Check edges for extrapolation
# =============================
# we do not want any edge beyond `t_stop`,
# if there is any edge `> t_stop` we remove it
if edges[-1] > t_stop:
edges = edges[:-1]
# Check if user wants to extrapolate the last bin
extrapolate_last_bin = self.extrapolate_last_bin
if extrapolate_last_bin:
if t_stop > edges[-1]:
edges = np.hstack((edges, edges[-1] + self.bin_size))
last_bin_scaling = self.bin_size / (t_stop - edges[-2])
else:
extrapolate_last_bin = False
# =======================
# create an output matrix
# =======================
X_out = np.empty((len(X), len(edges) - 1),
dtype=(float if extrapolate_last_bin else int))
# =============================================================
# Loop over event time sequences to compute binned event counts
# =============================================================
for i, eventseq in enumerate(X):
# If neo.SpikeTrain, get the timesteps
# of each neuron via `eventseq.magnitude`
if hasattr(eventseq, 'units'):
if t_stop != eventseq.t_stop.magnitude:
raise ValueError(
f'The specified or computed `t_stop`: {t_stop} '
f'is different from the {i}_th spikeTrain `t_stop` '
"`t_stop` must be the same across all neurons."
)
eventseq = eventseq.magnitude
# binning happens here
X_out[i, :] = np.histogram(eventseq, edges)[0]
# ========================
# extrapolate the last bin
# ========================
if extrapolate_last_bin:
X_out[:, -1] *= last_bin_scaling
return X_out