BlockInverseGPFA/blockinvgpfa/preprocessing.py at e37b08149e8b72707ece4529a37529bbdcb4ca37 · DrugowitschLab/BlockInverseGPFA · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
# ...
# Copyright 2021 Brooks M. Musangu and Jan Drugowitsch.
# license Modified BSD, see LICENSE.txt for details.
# ...

from __future__ import division, print_function, unicode_literals

import sklearn
import numpy as np

__all__ = [
    "EventTimesToCounts"
]


class EventTimesToCounts(sklearn.base.TransformerMixin):
    """
    Bins sequence of event times into event counts within evenly spaced
    time bins.

    This class supports binning sequences of event times (e.g., spike trains)
    into a matrix that contain event counts within evenly spaced time bins
    (first bin starts at time 0s). It supports specifying the bin size, and
    multiple options for how the last event time is determined.

    Parameters
    ----------
    bin_size : float, default : 0.02 [s]
        The width of each time bin in seconds.

    t_stop : float, optional, default : None
        The largest time considered for binning. This time is assumed to
        be the same across all event time sequences. If `t_stop` doesn't match
        a bin boundary, `extrapolate_last_bin` determins whether or not the
        last bin includes `t_stop` or not.

        If not given, `t_stop` is set to the largest event time across all
        sequences in the provided data. If the data is a `neo.SpikeTrain`
        object, `t_stop` is set to `X[0].t_stop.magnitude`.

    extrapolate_last_bin : boolean, optional, default : False
        In cases where `t_stop` does not match a bin boundary, this option
        determines whether the last bin includes `t_stop`. If `False`, the last
        bin ends before `t_stop`, and all events after this final bin are
        ignored. If `True`, the last bin includes `t_stop`, and event counts
        are up-scaled to account for the fact that `t_stop` happens before the
        end of the last time bin. For example, if `t_stop` falls into the
        middle of the last bin, all event counts that fall into the last bin
        are doubled. In this case, `X_out` of :meth:`transform` returns is of
        type `float` as it might contain non-integer event counts for the last
        bin.

    Examples
    --------
    >>> import numpy as np
    >>> from gpfa import EventTimesToCounts
    >>> bin_size = 0.1  # [s]
    >>> t_stop = 0.8  # [s]
    >>> X = [
    ...     [0, 0.1, 0.15, 0.4, 0.5, 0.6, 0.8],
    ...     [0.05, 0.3, 0.4, 0.55, 0.7]
    ...     ]
    >>> ettc = EventTimesToCounts(
    ...                           bin_size=bin_size,
    ...                           t_stop=t_stop,
    ...                           extrapolate_last_bin=False
    ...                          )
    >>> ettc.transform(X)
    array([[1, 2, 0, 0, 1, 2, 0, 1],
           [1, 0, 1, 0, 1, 1, 1, 0]])

    >>> ettc_extrapolate_last_bin = EventTimesToCounts(
    ...                             bin_size=bin_size,
    ...                             t_stop=t_stop,
    ...                             extrapolate_last_bin=True
    ...                          )
    >>> ettc.transform(X)
    array([[1, 2, 0, 0, 1, 2, 0, 1],
           [1, 0, 1, 0, 1, 1, 1, 0]])

    >>> # Using defaults t_stop=None and extrapolate_last_bin=False
    >>> ettc = EventTimesToCounts(bin_size)
    >>> ettc.transform(X)
    array([[1, 2, 0, 0, 1, 2, 0, 1],
           [1, 0, 1, 0, 1, 1, 1, 0]])

    >>> t_stop2 = 0.88  # [s]
    >>> ettc = EventTimesToCounts(
    ...                           bin_size=bin_size,
    ...                           t_stop=t_stop2,
    ...                           extrapolate_last_bin=False
    ...                          )
    >>> ettc.transform(X)
    array([[1, 2, 0, 0, 1, 2, 0, 1],
           [1, 0, 1, 0, 1, 1, 1, 0]])

    >>> t_stop2 = 0.88  # [s]
    >>> ettc_extrapolate_last_bin = EventTimesToCounts(
    ...                             bin_size=bin_size,
    ...                             t_stop=t_stop2,
    ...                             extrapolate_last_bin=True
    ...                          )
    >>> ettc.transform(X)
    array([[1.  , 2.  , 0.  , 0.  , 1.  , 2.  , 0.  , 0.  , 1.25],
           [1.  , 0.  , 1.  , 0.  , 1.  , 1.  , 1.  , 0.  , 0.  ]])

    The following example only works if the `Neo package
    <https://neo.readthedocs.io/>`_ is installed.

    >>> import neo
    >>> t_stop = 0.8  # [s]
    >>> neoSpikeTrain = [
    ...     neo.SpikeTrain(X[0],units='sec', t_stop=t_stop),
    ...     neo.SpikeTrain(X[1], units='sec', t_stop=t_stop)
    ...     ]
    >>> ettc = EventTimesToCounts(
                        bin_size=bin_size,
                        t_stop=None,
                        extrapolate_last_bin=False
                        )
    >>> ettc.transform(neoSpikeTrain)
    array([[1, 2, 0, 0, 1, 2, 0, 1],
           [1, 0, 1, 0, 1, 1, 1, 0]])

    Methods
    -------
    transform:
        Transforms data from event times to binned event counts

    """
    def __init__(self, bin_size=0.02, t_stop=None,
                 extrapolate_last_bin=False):
        self.bin_size = bin_size
        self.t_stop = t_stop
        self.extrapolate_last_bin = extrapolate_last_bin

    def transform(self, X):
        """
        Transforms data from event times to binned event counts

        Parameters
        ----------
        X : numpy.array or neo.SpikeTrain
            An array-like containing #sequences of event time sequences
            (usually sequences of `float`'s). Each element in `X` can contain a
            different number of event times. The are all assumed to share the
            same final time (i.e., ``t_stop``).

        Returns
        -------
        X_out : numpy.array
            A numpy matrix of size #sequences x #bins, containing the
            binned event counts.
        """
        # ==============================================
        # set the starting time of the trial (`t_start`)
        # and the end time (`t_stop`)
        # ==============================================
        t_start = 0
        t_stop = self.t_stop
        if t_stop is None:
            if hasattr(X[0], 't_stop'):
                t_stop = X[0].t_stop.magnitude
            else:
                t_stop = max(
                    map(lambda x: x[-1]
                        if (isinstance(x, np.ndarray) and np.any(x)) or \
                           (isinstance(x, list) and any(x))
                        else 0, X)
                    )

        # ====================================
        # get the bins based on the `bin_size`
        # ====================================
        edges = np.arange(t_start,
                          t_stop + self.bin_size * 0.1,
                          self.bin_size)

        # =============================
        # Check edges for extrapolation
        # =============================
        # we do not want any edge beyond `t_stop`,
        # if there is any edge `> t_stop` we remove it
        if edges[-1] > t_stop:
            edges = edges[:-1]
        # Check if user wants to extrapolate the last bin
        extrapolate_last_bin = self.extrapolate_last_bin
        if extrapolate_last_bin:
            if t_stop > edges[-1]:
                edges = np.hstack((edges, edges[-1] + self.bin_size))
                last_bin_scaling = self.bin_size / (t_stop - edges[-2])
            else:
                extrapolate_last_bin = False

        # =======================
        # create an output matrix
        # =======================
        X_out = np.empty((len(X), len(edges) - 1),
                         dtype=(float if extrapolate_last_bin else int))

        # =============================================================
        # Loop over event time sequences to compute binned event counts
        # =============================================================
        for i, eventseq in enumerate(X):

            # If neo.SpikeTrain, get the timesteps
            # of each neuron via `eventseq.magnitude`
            if hasattr(eventseq, 'units'):
                if t_stop != eventseq.t_stop.magnitude:
                    raise ValueError(
                        f'The specified or computed `t_stop`: {t_stop} '
                        f'is different from the {i}_th spikeTrain `t_stop` '
                        "`t_stop` must be the same across all neurons."
                    )
                eventseq = eventseq.magnitude

            # binning happens here
            X_out[i, :] = np.histogram(eventseq, edges)[0]

        # ========================
        # extrapolate the last bin
        # ========================
        if extrapolate_last_bin:
            X_out[:, -1] *= last_bin_scaling

        return X_out