forked from Gpialla/DataAugForTSC
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdata_preprocessing.py
More file actions
90 lines (75 loc) · 2.77 KB
/
Copy pathdata_preprocessing.py
File metadata and controls
90 lines (75 loc) · 2.77 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
# Imports
import numpy as np
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
def z_norm(x_train, x_test):
"""
Z-Normalization for the data
Args:
x_train (np.array): X_train
x_test (np.array): X_test
Returns:
(np.array, np.array): (X_train, X_test), both normalized
"""
# TODO: randomize x_train
B, TS, C = x_train.shape
x_reshape = x_train.reshape((B*TS, C))
mean_per_channel = x_reshape.mean(axis=0)
std_per_channel = x_reshape.std(axis=0)
std_per_channel[std_per_channel == 0] = 1.0
x_reshape = (x_reshape - mean_per_channel) / std_per_channel
x_train = x_reshape.reshape(B, TS, C)
B, TS, C = x_test.shape
x_reshape = x_test.reshape((B*TS, C))
mean_per_channel = x_reshape.mean(axis=0)
std_per_channel = x_reshape.std(axis=0)
std_per_channel[std_per_channel == 0] = 1.0
x_reshape = (x_reshape - mean_per_channel) / std_per_channel
x_test = x_reshape.reshape(B, TS, C)
return x_train, x_test
def feature_scaling(x_train, x_test):
"""
Args:
x_train (np.array)
x_test (np.array)
Returns:
x_train_norm, x_test_norm: x_train & x_test normalized btw -1 and 1
"""
max_ = np.max(x_train)
min_ = np.min(x_train)
x_train_norm = 2. * (x_train - min_) / (max_ - min_) - 1.
# x_test min and max are 'unknown'
x_test_norm = 2. * (x_test - min_) / (max_ - min_) - 1.
return x_train_norm, x_test_norm
def labels_encoding(y_train, y_test, format=None):
"""
Encoding for the labels
Format:
- None : Default encoding -> 0, 1, 2, ..., use with sparse categorical cross entropy
- OHE : One Hot Encoding, use with categorical cross entropy
Args:
y_train (np.array): Train labels
y_test (np.array): Test labels
format (string): The format for the encoding
Returns:
(np.array, np.array, int): The encoded labels, the number of classes
"""
# init the encoder
if format == None:
encoder = LabelEncoder()
elif format == "OHE":
encoder = OneHotEncoder(sparse=False)
# Change data format by expanding dimension
y_train = np.expand_dims(y_train, axis=1)
y_test = np.expand_dims(y_test, axis=1)
else:
raise ValueError("Error wrong parameter, either None or OHE expected!")
# Concat train and test
y_train_test = np.concatenate((y_train, y_test), axis=0)
# Count num_classes
num_classes = len(np.unique(y_train_test))
# Fit the encoder & transform data
new_y_train_test = encoder.fit_transform(y_train_test)
# Resplit the train and test
new_y_train = new_y_train_test[0:len(y_train)]
new_y_test = new_y_train_test[len(y_train):]
return new_y_train, new_y_test, num_classes, encoder