Deep-Kernelized-Auto-Encoder-with-Time-series-Cluster-Kernel/utils.py at master · FilippoMB/Deep-Kernelized-Auto-Encoder-with-Time-series-Cluster-Kernel · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
import matplotlib.pyplot as plt
from sklearn.decomposition import TruncatedSVD
import numpy as np
from scipy import interpolate


def dim_reduction_plot(data, label, block_flag):
    """
    Compute linear PCA and scatter the first two components
    """

    PCA_model = TruncatedSVD(n_components=3).fit(data)
    data_PCA = PCA_model.transform(data)
    idxc1 = np.where(label==0)
    idxc2 = np.where(label==1)
    plt.scatter(data_PCA[idxc1,0],data_PCA[idxc1,1],s=80,c='r', marker='^',linewidths = 0, label='healthy')
    plt.scatter(data_PCA[idxc2,0],data_PCA[idxc2,1],s=80,c='y', marker='o',linewidths = 0, label='infected')
    plt.gca().axes.get_xaxis().set_ticks([])
    plt.gca().axes.get_yaxis().set_ticks([])
    plt.title('PCA of the codes')
    plt.legend(scatterpoints=1,loc='best')
    plt.show(block=block_flag)

def ideal_kernel(labels):
    """
    Compute the ideal kernel K
    An entry k_ij = 0 if i and j have different class
    k_ij = 1 if i and j have same class
    """
    K = np.zeros([labels.shape[0], labels.shape[0]])

    for i in range(labels.shape[0]):
        k = labels[i] == labels
        k.astype(int)
        K[:,i] = k[:,0]
    return K


def interp_data(X, X_len, restore=False, interp_kind='linear'):
    """
    Interpolate data to match the same maximum length in X_len
    If restore is True, data are interpolated back to their original length
    data are assumed to be time-major
    interp_kind: can be 'linear', 'nearest', 'zero', 'slinear', 'quadratic', 'cubic'
    """

    [T, N, V] = X.shape
    X_new = np.zeros_like(X)

    # restore original lengths
    if restore:
        for n in range(N):
            t = np.linspace(start=0, stop=X_len[n], num=T)
            t_new = np.linspace(start=0, stop=X_len[n], num=X_len[n])
            for v in range(V):
                x_n_v = X[:,n,v]
                f = interpolate.interp1d(t, x_n_v, kind=interp_kind)
                X_new[:X_len[n],n,v] = f(t_new)

    # interpolate all data to length T
    else:
        for n in range(N):
            t = np.linspace(start=0, stop=X_len[n], num=X_len[n])
            t_new = np.linspace(start=0, stop=X_len[n], num=T)
            for v in range(V):
                x_n_v = X[:X_len[n],n,v]
                f = interpolate.interp1d(t, x_n_v, kind=interp_kind)
                X_new[:,n,v] = f(t_new)

    return X_new


def classify_with_knn(train_data, train_labels, test_data, test_labels, k=3, metric='minkowski'):
    """
    Perform classification with knn.
    """
    from sklearn.neighbors import KNeighborsClassifier
    from sklearn.metrics import f1_score, roc_auc_score

    neigh = KNeighborsClassifier(n_neighbors=k, metric=metric)
    neigh.fit(train_data, train_labels)
    accuracy = neigh.score(test_data, test_labels)
    pred_labels = neigh.predict(test_data)
    F1 = f1_score(test_labels, pred_labels)
    AUC = roc_auc_score(test_labels, pred_labels)

    return accuracy, F1, AUC

def mse_and_corr(targets, preds, targets_len):
    """
    targets and preds must have shape [time_steps, samples, variables]
    targets_len must have shape [samples,]
    """
    mse_list = []
    corr_list = []
    for i in range(targets.shape[1]):
        len_i = targets_len[i]
        test_data_i = targets[:len_i,i,:]
        pred_i = preds[:len_i,i,:]
        mse_list.append(np.mean((test_data_i-pred_i)**2))
        corr_list.append(np.corrcoef(test_data_i.flatten(), pred_i.flatten())[0,1])
    tot_mse = np.mean(mse_list)
    tot_corr = np.mean(corr_list)

    return tot_mse, tot_corr