Skip to content

Commit 1d2b2fe

Browse files
authored
Merge pull request #1173 from apache/dev-postgresql
Merge Dev
2 parents 7b87d0a + 3ac79d1 commit 1d2b2fe

7 files changed

Lines changed: 1231 additions & 0 deletions

File tree

examples/cnn_ms/README.md

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
<!--
2+
Licensed to the Apache Software Foundation (ASF) under one
3+
or more contributor license agreements. See the NOTICE file
4+
distributed with this work for additional information
5+
regarding copyright ownership. The ASF licenses this file
6+
to you under the Apache License, Version 2.0 (the
7+
"License"); you may not use this file except in compliance
8+
with the License. You may obtain a copy of the License at
9+
http://www.apache.org/licenses/LICENSE-2.0
10+
Unless required by applicable law or agreed to in writing,
11+
software distributed under the License is distributed on an
12+
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
13+
KIND, either express or implied. See the License for the
14+
specific language governing permissions and limitations
15+
under the License.
16+
-->
17+
18+
# Image Classification using Convolutional Neural Networks
19+
20+
Examples inside this folder show how to train CNN models using
21+
SINGA for image classification.
22+
23+
* `data` includes the scripts for preprocessing image datasets.
24+
Currently, MNIST, CIFAR10 and CIFAR100 are included.
25+
26+
* `model` includes the CNN model construction codes by creating
27+
a subclass of `Module` to wrap the neural network operations
28+
of each model. Then computational graph is enabled to optimized
29+
the memory and efficiency.
30+
31+
* `autograd` includes the codes to train CNN models by calling the
32+
[neural network operations](../../python/singa/autograd.py) imperatively.
33+
The computational graph is not created.
34+
35+
* `train_cnn.py` is the training script, which controls the training flow by
36+
doing BackPropagation and SGD update.
37+
38+
* `train_multiprocess.py` is the script for distributed training on a single
39+
node with multiple GPUs; it uses Python's multiprocessing module and NCCL.
40+
41+
* `train_mpi.py` is the script for distributed training (among multiple nodes)
42+
using MPI and NCCL for communication.
43+
44+
* `benchmark.py` tests the training throughput using `ResNet50` as the workload.
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
#
2+
# Licensed to the Apache Software Foundation (ASF) under one
3+
# or more contributor license agreements. See the NOTICE file
4+
# distributed with this work for additional information
5+
# regarding copyright ownership. The ASF licenses this file
6+
# to you under the Apache License, Version 2.0 (the
7+
# "License"); you may not use this file except in compliance
8+
# with the License. You may obtain a copy of the License at
9+
#
10+
# http://www.apache.org/licenses/LICENSE-2.0
11+
#
12+
# Unless required by applicable law or agreed to in writing,
13+
# software distributed under the License is distributed on an
14+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
# KIND, either express or implied. See the License for the
16+
# specific language governing permissions and limitations
17+
# under the License.
18+
#
19+
20+
from resnet_cifar10 import *
21+
import multiprocessing
22+
import sys
23+
24+
if __name__ == '__main__':
25+
26+
# Generate a NCCL ID to be used for collective communication
27+
nccl_id = singa.NcclIdHolder()
28+
29+
# Configure the number of GPUs to be used
30+
world_size = int(sys.argv[1])
31+
32+
# Testing the experimental partial-parameter update asynchronous training
33+
partial_update = True
34+
35+
process = []
36+
for local_rank in range(0, world_size):
37+
process.append(
38+
multiprocessing.Process(target=train_cifar10,
39+
args=(True, local_rank, world_size, nccl_id,
40+
partial_update)))
41+
42+
for p in process:
43+
p.start()
Lines changed: 303 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,303 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
# =============================================================================
17+
18+
from singa import autograd
19+
from singa import tensor
20+
from singa import device
21+
from singa import layer
22+
from singa import opt
23+
24+
import numpy as np
25+
from tqdm import trange
26+
27+
# the code is modified from
28+
# https://github.com/Cadene/pretrained-models.pytorch/blob/master/pretrainedmodels/models/xception.py
29+
30+
31+
class Block(layer.Layer):
32+
33+
def __init__(self,
34+
in_filters,
35+
out_filters,
36+
reps,
37+
strides=1,
38+
padding=0,
39+
start_with_relu=True,
40+
grow_first=True):
41+
super(Block, self).__init__()
42+
43+
if out_filters != in_filters or strides != 1:
44+
self.skip = layer.Conv2d(in_filters,
45+
out_filters,
46+
1,
47+
stride=strides,
48+
padding=padding,
49+
bias=False)
50+
self.skipbn = layer.BatchNorm2d(out_filters)
51+
else:
52+
self.skip = None
53+
54+
self.layers = []
55+
56+
filters = in_filters
57+
if grow_first:
58+
self.layers.append(layer.ReLU())
59+
self.layers.append(
60+
layer.SeparableConv2d(in_filters,
61+
out_filters,
62+
3,
63+
stride=1,
64+
padding=1,
65+
bias=False))
66+
self.layers.append(layer.BatchNorm2d(out_filters))
67+
filters = out_filters
68+
69+
for i in range(reps - 1):
70+
self.layers.append(layer.ReLU())
71+
self.layers.append(
72+
layer.SeparableConv2d(filters,
73+
filters,
74+
3,
75+
stride=1,
76+
padding=1,
77+
bias=False))
78+
self.layers.append(layer.BatchNorm2d(filters))
79+
80+
if not grow_first:
81+
self.layers.append(layer.ReLU())
82+
self.layers.append(
83+
layer.SeparableConv2d(in_filters,
84+
out_filters,
85+
3,
86+
stride=1,
87+
padding=1,
88+
bias=False))
89+
self.layers.append(layer.BatchNorm2d(out_filters))
90+
91+
if not start_with_relu:
92+
self.layers = self.layers[1:]
93+
else:
94+
self.layers[0] = layer.ReLU()
95+
96+
if strides != 1:
97+
self.layers.append(layer.MaxPool2d(3, strides, padding + 1))
98+
99+
self.register_layers(*self.layers)
100+
101+
self.add = layer.Add()
102+
103+
def forward(self, x):
104+
y = self.layers[0](x)
105+
for layer in self.layers[1:]:
106+
if isinstance(y, tuple):
107+
y = y[0]
108+
y = layer(y)
109+
110+
if self.skip is not None:
111+
skip = self.skip(x)
112+
skip = self.skipbn(skip)
113+
else:
114+
skip = x
115+
y = self.add(y, skip)
116+
return y
117+
118+
119+
__all__ = ['Xception']
120+
121+
122+
class Xception(layer.Layer):
123+
"""
124+
Xception optimized for the ImageNet dataset, as specified in
125+
https://arxiv.org/pdf/1610.02357.pdf
126+
"""
127+
128+
def __init__(self, num_classes=1000):
129+
""" Constructor
130+
Args:
131+
num_classes: number of classes
132+
"""
133+
super(Xception, self).__init__()
134+
self.num_classes = num_classes
135+
136+
self.conv1 = layer.Conv2d(3, 32, 3, 2, 0, bias=False)
137+
self.bn1 = layer.BatchNorm2d(32)
138+
self.relu1 = layer.ReLU()
139+
140+
self.conv2 = layer.Conv2d(32, 64, 3, 1, 1, bias=False)
141+
self.bn2 = layer.BatchNorm2d(64)
142+
self.relu2 = layer.ReLU()
143+
# do relu here
144+
145+
self.block1 = Block(64,
146+
128,
147+
2,
148+
2,
149+
padding=0,
150+
start_with_relu=False,
151+
grow_first=True)
152+
self.block2 = Block(128,
153+
256,
154+
2,
155+
2,
156+
padding=0,
157+
start_with_relu=True,
158+
grow_first=True)
159+
self.block3 = Block(256,
160+
728,
161+
2,
162+
2,
163+
padding=0,
164+
start_with_relu=True,
165+
grow_first=True)
166+
167+
self.block4 = Block(728,
168+
728,
169+
3,
170+
1,
171+
start_with_relu=True,
172+
grow_first=True)
173+
self.block5 = Block(728,
174+
728,
175+
3,
176+
1,
177+
start_with_relu=True,
178+
grow_first=True)
179+
self.block6 = Block(728,
180+
728,
181+
3,
182+
1,
183+
start_with_relu=True,
184+
grow_first=True)
185+
self.block7 = Block(728,
186+
728,
187+
3,
188+
1,
189+
start_with_relu=True,
190+
grow_first=True)
191+
192+
self.block8 = Block(728,
193+
728,
194+
3,
195+
1,
196+
start_with_relu=True,
197+
grow_first=True)
198+
self.block9 = Block(728,
199+
728,
200+
3,
201+
1,
202+
start_with_relu=True,
203+
grow_first=True)
204+
self.block10 = Block(728,
205+
728,
206+
3,
207+
1,
208+
start_with_relu=True,
209+
grow_first=True)
210+
self.block11 = Block(728,
211+
728,
212+
3,
213+
1,
214+
start_with_relu=True,
215+
grow_first=True)
216+
217+
self.block12 = Block(728,
218+
1024,
219+
2,
220+
2,
221+
start_with_relu=True,
222+
grow_first=False)
223+
224+
self.conv3 = layer.SeparableConv2d(1024, 1536, 3, 1, 1)
225+
self.bn3 = layer.BatchNorm2d(1536)
226+
self.relu3 = layer.ReLU()
227+
228+
# Relu Layer
229+
self.conv4 = layer.SeparableConv2d(1536, 2048, 3, 1, 1)
230+
self.bn4 = layer.BatchNorm2d(2048)
231+
232+
self.relu4 = layer.ReLU()
233+
self.globalpooling = layer.MaxPool2d(10, 1)
234+
self.flatten = layer.Flatten()
235+
self.fc = layer.Linear(2048, num_classes)
236+
237+
def features(self, input):
238+
x = self.conv1(input)
239+
x = self.bn1(x)
240+
x = self.relu1(x)
241+
242+
x = self.conv2(x)
243+
x = self.bn2(x)
244+
x = self.relu2(x)
245+
246+
x = self.block1(x)
247+
x = self.block2(x)
248+
x = self.block3(x)
249+
x = self.block4(x)
250+
x = self.block5(x)
251+
x = self.block6(x)
252+
x = self.block7(x)
253+
x = self.block8(x)
254+
x = self.block9(x)
255+
x = self.block10(x)
256+
x = self.block11(x)
257+
x = self.block12(x)
258+
259+
x = self.conv3(x)
260+
x = self.bn3(x)
261+
x = self.relu3(x)
262+
263+
x = self.conv4(x)
264+
x = self.bn4(x)
265+
return x
266+
267+
def logits(self, features):
268+
x = self.relu4(features)
269+
x = self.globalpooling(x)
270+
x = self.flatten(x)
271+
x = self.fc(x)
272+
return x
273+
274+
def forward(self, input):
275+
x = self.features(input)
276+
x = self.logits(x)
277+
return x
278+
279+
280+
if __name__ == '__main__':
281+
model = Xception(num_classes=1000)
282+
print('Start intialization............')
283+
dev = device.create_cuda_gpu_on(0)
284+
#dev = device.create_cuda_gpu()
285+
286+
niters = 20
287+
batch_size = 16
288+
IMG_SIZE = 299
289+
sgd = opt.SGD(lr=0.1, momentum=0.9, weight_decay=1e-5)
290+
291+
tx = tensor.Tensor((batch_size, 3, IMG_SIZE, IMG_SIZE), dev)
292+
ty = tensor.Tensor((batch_size,), dev, tensor.int32)
293+
autograd.training = True
294+
x = np.random.randn(batch_size, 3, IMG_SIZE, IMG_SIZE).astype(np.float32)
295+
y = np.random.randint(0, 1000, batch_size, dtype=np.int32)
296+
tx.copy_from_numpy(x)
297+
ty.copy_from_numpy(y)
298+
299+
with trange(niters) as t:
300+
for _ in t:
301+
x = model(tx)
302+
loss = autograd.softmax_cross_entropy(x, ty)
303+
sgd(loss)

0 commit comments

Comments
 (0)