-
Notifications
You must be signed in to change notification settings - Fork 18
Expand file tree
/
Copy pathmodel_utils.py
More file actions
242 lines (192 loc) · 10.3 KB
/
Copy pathmodel_utils.py
File metadata and controls
242 lines (192 loc) · 10.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
import sys
sys.path.append("../utils")
import torch
import torch.nn as nn
from errors import InvalidArchitectureError
def compute_output_shape(current_shape, kernel_size, stride, padding):
"""
:param tuple current_shape: The current shape of the data before a convolution is applied.
:param tuple kernel_size: The kernel size of the current convolution operation.
:param tuple stride: The stride of the current convolution operation.
:param tuple padding: The padding of the current convolution operation.
:return: The shape after a convolution operation with the above parameters is applied.
:rtype: tuple
The formula used to compute the final shape is
component[i] = floor((N[i] - K[i] + 2 * P[i]) / S[i]) + 1
where, N = current shape of the data
K = kernel size
P = padding
S = stride
"""
# get the dimension of the data compute each component using the above formula
dimensions = len(current_shape)
return tuple((current_shape[i] - kernel_size[i] + 2 * padding[i]) // stride[i] + 1
for i in range(dimensions))
def compute_transpose_output_shape(current_shape, kernel_size, stride, padding):
"""
:param tuple current_shape: The current shape of the data before a transpose convolution is
applied.
:param tuple kernel_size: The kernel size of the current transpose convolution operation.
:param tuple stride: The stride of the current transpose convolution operation.
:param tuple padding: The padding of the current transpose convolution operation.
:return: The shape after a transpose convolution operation with the above parameters is
applied.
:rtype: tuple
The formula used to compute the final shape is
component[i] = (N[i] - 1) * S[i] - 2 * P[i] + (K[i] - 1) + 1
where, N = current shape of the data
K = kernel size
P = padding
S = stride
"""
# get the dimension of the data compute each component using the above formula
dimensions = len(current_shape)
return tuple((current_shape[i] - 1) * stride[i] - 2 * padding[i] + (kernel_size[i] - 1) + 1
for i in range(dimensions))
def compute_output_padding(current_shape, target_shape):
"""
:param tuple current_shape: The shape of the data after a transpose convolution operation
takes place.
:param tuple target_shape: The target shape that we would like our data to have after the
transpose convolution operation takes place.
:return: The output padding needed so that the shape of the image after a transpose
convolution is applied, is the same as the target shape.
:rtype: tuple
"""
# basically subtract each term to get the difference which will be the output padding
dimensions = len(current_shape)
return tuple(target_shape[i] - current_shape[i] for i in range(dimensions))
def invalid_shape(current_shape):
"""
:param tuple current_shape: The current shape of the data after a convolution is applied.
:return: True if the shape is invalid, that is, a negative or 0 components exists. Else, it
returns False.
:rtype: bool
"""
# check all components
for component in current_shape:
if component <= 0:
return True
# return False if they are ok
return False
def create_encoder(architecture, input_shape):
"""
:param dict architecture: A dictionary containing the hyperparameters that define the
architecture of the model.
:param tuple input_shape: A tuple that corresponds to the shape of the input.
:return: A PyTorch Sequential model that represents the encoder part of a VAE, along with the
final shape that a data point would have after the sequential is applied to it.
:rtype: (torch.nn.Sequential, tuple)
This method builds the encoder part of a VAE and returns it. It is common for all types of VAE.
"""
# initialize useful variables
in_channels = input_shape[0]
current_shape = (input_shape[1], input_shape[2])
# initialize a list that will store the shape produced in each layer
shape_per_layer = [current_shape]
# build the encoder part
conv_sets = []
# iterate through the lists that define the architecture of the encoder
for layer in range(architecture["conv_layers"]):
# get the variables from the dictionary for more verbose
out_channels = architecture["conv_channels"][layer]
kernel_size = architecture["conv_kernel_sizes"][layer]
stride = architecture["conv_strides"][layer]
padding = architecture["conv_paddings"][layer]
# add a set of Convolutional - Leaky ReLU - Batch Normalization sequential layers
conv_sets.append(
nn.Sequential(
nn.Conv2d(in_channels=in_channels,
out_channels=out_channels,
kernel_size=kernel_size,
stride=stride,
padding=padding),
nn.LeakyReLU(negative_slope=0.15),
nn.BatchNorm2d(out_channels))
)
# compute the new shape of the image
current_shape = compute_output_shape(current_shape=current_shape,
kernel_size=kernel_size,
stride=stride,
padding=padding)
shape_per_layer.append(current_shape)
# make sure that the shape is valid, and if not, raise an error
if invalid_shape(current_shape):
raise InvalidArchitectureError(shape=current_shape, layer=layer+1)
# the output channels of the current layer becomes the input channels of the next layer
in_channels = out_channels
# create a Sequential model and return it (* asterisk is used to unpack the list)
return nn.Sequential(*conv_sets), shape_per_layer
def create_decoder(architecture, encoder_shapes):
"""
:param dict architecture: A dictionary containing the hyperparameters that define the
architecture of the model.
:param list encoder_shapes: A list that contains the shape of the data after it is applied to
every set of convolutional layers.
:return: A PyTorch Sequential model that represents the decoder part of a VAE.
:rtype: torch.nn.Sequential
This method builds the decoder part of a VAE and returns it. It is common for all types of VAE.
"""
# now start building the decoder part
conv_sets = []
# initialize useful variables
in_channels = architecture["conv_channels"][-1]
# iterate through the lists that define the architecture of the decoder
for layer in range(architecture["conv_layers"] - 1, -1, -1):
# get the variables from the dictionary for more verbose
out_channels = architecture["conv_channels"][layer]
kernel_size = architecture["conv_kernel_sizes"][layer]
stride = architecture["conv_strides"][layer]
padding = architecture["conv_paddings"][layer]
# compute the output shape after a transpose convolution in order to get the output padding
current_shape = encoder_shapes[layer + 1]
target_shape = encoder_shapes[layer]
output_shape = compute_transpose_output_shape(current_shape=current_shape,
kernel_size=kernel_size,
stride=stride,
padding=padding)
output_padding = compute_output_padding(output_shape, target_shape)
# add a set of ConvolutionalTranspose - Leaky ReLU - Batch Normalization sequential layers
conv_sets.append(
nn.Sequential(
nn.ConvTranspose2d(in_channels=in_channels,
out_channels=out_channels,
kernel_size=kernel_size,
stride=stride,
padding=padding,
output_padding=output_padding),
nn.LeakyReLU(negative_slope=0.15),
nn.BatchNorm2d(out_channels))
)
# the output channels of the current layer becomes the input channels of the next layer
in_channels = out_channels
# create a Sequential model and return it (* asterisk is used to unpack the list)
return nn.Sequential(*conv_sets)
def create_output_layer(architecture, input_shape):
"""
:param dict architecture: A dictionary containing the hyperparameters that define the
architecture of the model.
:param tuple input_shape: A tuple that corresponds to the shape of the input.
:return: A PyTorch Sequential model that represents the output layer of a VAE.
:rtype: torch.nn.Sequential
This method creates the output layer of a VAE, that is, the layer where the data from the
output of the decoder gets fed in order to be finally reconstructed.
"""
# define the variables of the architecture for more verbose
in_channels = architecture["conv_channels"][0]
kernel_size = architecture["conv_kernel_sizes"][0]
stride = architecture["conv_strides"][0]
padding = architecture["conv_paddings"][0]
return nn.Sequential(nn.ConvTranspose2d(in_channels=in_channels,
out_channels=in_channels,
kernel_size=kernel_size,
stride=stride,
padding=padding),
nn.SELU(),
nn.BatchNorm2d(in_channels),
nn.Conv2d(in_channels=in_channels,
out_channels=input_shape[0],
kernel_size=kernel_size,
stride=stride,
padding=padding),
nn.Sigmoid())