Skip to content

Commit 3791483

Browse files
ReduceLayer && Pooling Layer && parallelfor (#268)
1 parent 1966fe8 commit 3791483

7 files changed

Lines changed: 802 additions & 376 deletions

File tree

include/layers/PoolingLayer.hpp

Lines changed: 105 additions & 211 deletions
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,15 @@
22
#include <algorithm>
33
#include <cmath>
44
#include <cstdlib>
5+
#include <limits>
56
#include <numeric>
67
#include <stdexcept>
78
#include <string>
89
#include <utility>
910
#include <vector>
1011

1112
#include "layers/Layer.hpp"
12-
#include "tbb/blocked_range2d.h"
13-
#include "tbb/parallel_for.h"
13+
#include "parallel/parallel.hpp"
1414

1515
namespace it_lab_ai {
1616

@@ -64,7 +64,6 @@ class PoolingLayer : public Layer {
6464
Shape dilations_;
6565
bool ceil_mode_;
6666
std::string poolingType_;
67-
ImplType implType_;
6867
};
6968

7069
inline size_t coord_size(size_t coord, const Shape& shape) {
@@ -98,12 +97,13 @@ class PoolingLayerImpl : public LayerImpl<ValueType> {
9897
PoolingLayerImpl(const Shape& input_shape, const Shape& pooling_shape,
9998
const std::string& pooling_type = "average")
10099
: PoolingLayerImpl(input_shape, pooling_shape, {2, 2}, {0, 0, 0, 0},
101-
{1, 1}, false, pooling_type) {}
100+
{1, 1}, false, pooling_type, ParBackend::kSeq) {}
102101
PoolingLayerImpl(const Shape& input_shape, const Shape& pooling_shape,
103102
const Shape& strides = {2, 2},
104103
const Shape& pads = {0, 0, 0, 0},
105104
const Shape& dilations = {1, 1}, bool ceil_mode = false,
106-
const std::string& pooling_type = "average");
105+
const std::string& pooling_type = "average",
106+
ParBackend parallel_backend = ParBackend::kSeq);
107107
PoolingLayerImpl(const PoolingLayerImpl& c) = default;
108108
PoolingLayerImpl& operator=(const PoolingLayerImpl& c) = default;
109109
std::vector<ValueType> run(
@@ -116,20 +116,22 @@ class PoolingLayerImpl : public LayerImpl<ValueType> {
116116
Shape dilations_;
117117
bool ceil_mode_;
118118
PoolingType poolingType_;
119+
ParBackend parallel_backend_;
119120
};
120121

121122
template <typename ValueType>
122123
PoolingLayerImpl<ValueType>::PoolingLayerImpl(
123124
const Shape& input_shape, const Shape& pooling_shape, const Shape& strides,
124125
const Shape& pads, const Shape& dilations, bool ceil_mode,
125-
const std::string& pooling_type)
126+
const std::string& pooling_type, ParBackend parallel_backend)
126127
: LayerImpl<ValueType>(input_shape, input_shape),
127128
poolingShape_(pooling_shape),
128129
strides_(strides),
129130
pads_(pads),
130131
dilations_(dilations),
131132
ceil_mode_(ceil_mode),
132-
poolingType_(kAverage) {
133+
poolingType_(kAverage),
134+
parallel_backend_(parallel_backend) {
133135
if (pooling_shape[0] == 0 && pooling_shape[1] == 0) {
134136
poolingShape_ = Shape({input_shape[input_shape.dims() - 2],
135137
input_shape[input_shape.dims() - 1]});
@@ -208,221 +210,113 @@ std::vector<ValueType> PoolingLayerImpl<ValueType>::run(
208210
int batch_dim = this->inputShape_.dims() > spatial_dims ? 0 : -1;
209211
int channel_dim = this->inputShape_.dims() > spatial_dims + 1 ? 1 : -1;
210212

211-
for (size_t n = 0; n < (batch_dim >= 0 ? this->outputShape_[batch_dim] : 1);
212-
n++) {
213-
for (size_t c = 0;
214-
c < (channel_dim >= 0 ? this->outputShape_[channel_dim] : 1); c++) {
215-
for (size_t h = 0;
216-
h < this->outputShape_[this->outputShape_.dims() - spatial_dims];
217-
h++) {
218-
for (size_t w = 0;
219-
w < (spatial_dims > 1
220-
? this->outputShape_[this->outputShape_.dims() -
221-
spatial_dims + 1]
222-
: 1);
223-
w++) {
224-
std::vector<ValueType> pooling_buf;
225-
226-
int start_h =
227-
static_cast<int>(h * strides_[0]) - static_cast<int>(pads_[0]);
228-
int start_w = spatial_dims > 1 ? static_cast<int>(w * strides_[1]) -
229-
static_cast<int>(pads_[2])
230-
: 0;
231-
232-
for (size_t kh = 0; kh < poolingShape_[0]; kh++) {
233-
for (size_t kw = 0; kw < (spatial_dims > 1 ? poolingShape_[1] : 1);
234-
kw++) {
235-
int pos_h = start_h + static_cast<int>(kh * dilations_[0]);
236-
int pos_w = spatial_dims > 1
237-
? start_w + static_cast<int>(kw * dilations_[1])
238-
: 0;
239-
240-
if (pos_h >= 0 &&
241-
pos_h < static_cast<int>(
242-
this->inputShape_[this->inputShape_.dims() -
243-
spatial_dims]) &&
244-
(spatial_dims <= 1 ||
245-
(pos_w >= 0 &&
246-
pos_w < static_cast<int>(
247-
this->inputShape_[this->inputShape_.dims() -
248-
spatial_dims + 1])))) {
249-
std::vector<size_t> input_coords(this->inputShape_.dims(), 0);
250-
if (batch_dim >= 0) input_coords[batch_dim] = n;
251-
if (channel_dim >= 0) input_coords[channel_dim] = c;
252-
input_coords[this->inputShape_.dims() - spatial_dims] = pos_h;
253-
if (spatial_dims > 1) {
254-
input_coords[this->inputShape_.dims() - spatial_dims + 1] =
255-
pos_w;
256-
}
213+
size_t out_h = this->outputShape_[this->outputShape_.dims() - spatial_dims];
214+
size_t out_w =
215+
spatial_dims > 1
216+
? this->outputShape_[this->outputShape_.dims() - spatial_dims + 1]
217+
: 1;
218+
size_t out_n = batch_dim >= 0 ? this->outputShape_[batch_dim] : 1;
219+
size_t out_c = channel_dim >= 0 ? this->outputShape_[channel_dim] : 1;
220+
221+
size_t total_work = out_n * out_c * out_h * out_w;
222+
223+
parallel::Options options;
224+
options.backend = parallel_backend_;
225+
226+
parallel::parallel_for(
227+
total_work,
228+
[&](size_t idx) {
229+
size_t tmp = idx;
230+
size_t w = 0;
231+
if (spatial_dims > 1) {
232+
w = tmp % out_w;
233+
tmp /= out_w;
234+
}
235+
size_t h = tmp % out_h;
236+
tmp /= out_h;
257237

258-
size_t input_index = this->inputShape_.get_index(input_coords);
259-
pooling_buf.push_back(input[input_index]);
238+
size_t c = 0;
239+
if (channel_dim >= 0) {
240+
c = tmp % out_c;
241+
tmp /= out_c;
242+
}
243+
size_t n = batch_dim >= 0 ? tmp : 0;
244+
245+
int start_h =
246+
static_cast<int>(h * strides_[0]) - static_cast<int>(pads_[0]);
247+
int start_w = spatial_dims > 1 ? static_cast<int>(w * strides_[1]) -
248+
static_cast<int>(pads_[2])
249+
: 0;
250+
251+
auto sum = ValueType(0);
252+
ValueType max_val = std::numeric_limits<ValueType>::lowest();
253+
size_t count = 0;
254+
255+
for (size_t kh = 0; kh < poolingShape_[0]; kh++) {
256+
for (size_t kw = 0; kw < (spatial_dims > 1 ? poolingShape_[1] : 1);
257+
kw++) {
258+
int pos_h = start_h + static_cast<int>(kh * dilations_[0]);
259+
int pos_w = spatial_dims > 1
260+
? start_w + static_cast<int>(kw * dilations_[1])
261+
: 0;
262+
263+
if (pos_h >= 0 &&
264+
pos_h < static_cast<int>(
265+
this->inputShape_[this->inputShape_.dims() -
266+
spatial_dims]) &&
267+
(spatial_dims <= 1 ||
268+
(pos_w >= 0 &&
269+
pos_w < static_cast<int>(
270+
this->inputShape_[this->inputShape_.dims() -
271+
spatial_dims + 1])))) {
272+
std::vector<size_t> input_coords(this->inputShape_.dims(), 0);
273+
if (batch_dim >= 0) input_coords[batch_dim] = n;
274+
if (channel_dim >= 0) input_coords[channel_dim] = c;
275+
input_coords[this->inputShape_.dims() - spatial_dims] = pos_h;
276+
if (spatial_dims > 1) {
277+
input_coords[this->inputShape_.dims() - spatial_dims + 1] =
278+
pos_w;
260279
}
261-
}
262-
}
263280

264-
std::vector<size_t> output_coords(this->outputShape_.dims(), 0);
265-
if (batch_dim >= 0) output_coords[batch_dim] = n;
266-
if (channel_dim >= 0) output_coords[channel_dim] = c;
267-
output_coords[this->outputShape_.dims() - spatial_dims] = h;
268-
if (spatial_dims > 1) {
269-
output_coords[this->outputShape_.dims() - spatial_dims + 1] = w;
270-
}
281+
size_t input_index = this->inputShape_.get_index(input_coords);
282+
ValueType val = input[input_index];
271283

272-
size_t output_index = this->outputShape_.get_index(output_coords);
273-
274-
if (!pooling_buf.empty()) {
275-
switch (this->poolingType_) {
276-
case kAverage:
277-
res[output_index] = avg_pooling(pooling_buf);
278-
break;
279-
case kMax:
280-
res[output_index] = max_pooling(pooling_buf);
281-
break;
282-
default:
283-
throw std::runtime_error("Unknown pooling type");
284+
if (this->poolingType_ == kMax) {
285+
if (count == 0 || val > max_val) {
286+
max_val = val;
287+
}
288+
} else {
289+
sum += val;
290+
}
291+
++count;
284292
}
285293
}
286294
}
287-
}
288-
}
289-
}
290-
291-
return res;
292-
}
293-
294-
template <typename ValueType>
295-
class PoolingLayerImplTBB : public PoolingLayerImpl<ValueType> {
296-
public:
297-
PoolingLayerImplTBB(const Shape& input_shape, const Shape& pooling_shape,
298-
const Shape& strides = {2, 2},
299-
const Shape& pads = {0, 0, 0, 0},
300-
const Shape& dilations = {1, 1}, bool ceil_mode = false,
301-
const std::string& pooling_type = "average")
302-
: PoolingLayerImpl<ValueType>(input_shape, pooling_shape, strides, pads,
303-
dilations, ceil_mode, pooling_type) {}
304-
std::vector<ValueType> run(
305-
const std::vector<ValueType>& input) const override;
306-
};
307-
308-
template <typename ValueType>
309-
std::vector<ValueType> PoolingLayerImplTBB<ValueType>::run(
310-
const std::vector<ValueType>& input) const {
311-
if (input.size() != this->inputShape_.count()) {
312-
throw std::invalid_argument("Input size doesn't fit pooling layer");
313-
}
314295

315-
std::vector<ValueType> res(this->outputShape_.count(), ValueType(0));
296+
if (count == 0) return;
316297

317-
size_t spatial_dims = this->poolingShape_.dims();
318-
int batch_dim = this->inputShape_.dims() > spatial_dims ? 0 : -1;
319-
int channel_dim = this->inputShape_.dims() > spatial_dims + 1 ? 1 : -1;
298+
std::vector<size_t> output_coords(this->outputShape_.dims(), 0);
299+
if (batch_dim >= 0) output_coords[batch_dim] = n;
300+
if (channel_dim >= 0) output_coords[channel_dim] = c;
301+
output_coords[this->outputShape_.dims() - spatial_dims] = h;
302+
if (spatial_dims > 1) {
303+
output_coords[this->outputShape_.dims() - spatial_dims + 1] = w;
304+
}
320305

321-
oneapi::tbb::parallel_for(
322-
oneapi::tbb::blocked_range<size_t>(
323-
0, batch_dim >= 0 ? this->outputShape_[batch_dim] : 1),
324-
[&](const oneapi::tbb::blocked_range<size_t>& r1) {
325-
for (size_t n = r1.begin(); n < r1.end(); n++) {
326-
oneapi::tbb::parallel_for(
327-
oneapi::tbb::blocked_range<size_t>(
328-
0, channel_dim >= 0 ? this->outputShape_[channel_dim] : 1),
329-
[&](const oneapi::tbb::blocked_range<size_t>& r2) {
330-
for (size_t c = r2.begin(); c < r2.end(); c++) {
331-
for (size_t h = 0;
332-
h < this->outputShape_[this->outputShape_.dims() -
333-
spatial_dims];
334-
h++) {
335-
for (size_t w = 0;
336-
w <
337-
(spatial_dims > 1
338-
? this->outputShape_[this->outputShape_.dims() -
339-
spatial_dims + 1]
340-
: 1);
341-
w++) {
342-
std::vector<ValueType> pooling_buf;
343-
344-
int start_h = static_cast<int>(h * this->strides_[0]) -
345-
static_cast<int>(this->pads_[0]);
346-
int start_w =
347-
spatial_dims > 1
348-
? static_cast<int>(w * this->strides_[1]) -
349-
static_cast<int>(this->pads_[2])
350-
: 0;
351-
352-
for (size_t kh = 0; kh < this->poolingShape_[0]; kh++) {
353-
for (size_t kw = 0;
354-
kw <
355-
(spatial_dims > 1 ? this->poolingShape_[1] : 1);
356-
kw++) {
357-
int pos_h = start_h + static_cast<int>(
358-
kh * this->dilations_[0]);
359-
int pos_w =
360-
spatial_dims > 1
361-
? start_w + static_cast<int>(
362-
kw * this->dilations_[1])
363-
: 0;
364-
365-
if (pos_h >= 0 &&
366-
pos_h < static_cast<int>(
367-
this->inputShape_[this->inputShape_
368-
.dims() -
369-
spatial_dims]) &&
370-
(spatial_dims <= 1 ||
371-
(pos_w >= 0 &&
372-
pos_w < static_cast<int>(
373-
this->inputShape_
374-
[this->inputShape_.dims() -
375-
spatial_dims + 1])))) {
376-
std::vector<size_t> input_coords(
377-
this->inputShape_.dims(), 0);
378-
if (batch_dim >= 0) input_coords[batch_dim] = n;
379-
if (channel_dim >= 0) input_coords[channel_dim] = c;
380-
input_coords[this->inputShape_.dims() -
381-
spatial_dims] = pos_h;
382-
if (spatial_dims > 1) {
383-
input_coords[this->inputShape_.dims() -
384-
spatial_dims + 1] = pos_w;
385-
}
386-
387-
size_t input_index =
388-
this->inputShape_.get_index(input_coords);
389-
pooling_buf.push_back(input[input_index]);
390-
}
391-
}
392-
}
393-
394-
std::vector<size_t> output_coords(
395-
this->outputShape_.dims(), 0);
396-
if (batch_dim >= 0) output_coords[batch_dim] = n;
397-
if (channel_dim >= 0) output_coords[channel_dim] = c;
398-
output_coords[this->outputShape_.dims() - spatial_dims] =
399-
h;
400-
if (spatial_dims > 1) {
401-
output_coords[this->outputShape_.dims() - spatial_dims +
402-
1] = w;
403-
}
404-
405-
size_t output_index =
406-
this->outputShape_.get_index(output_coords);
407-
408-
if (!pooling_buf.empty()) {
409-
switch (this->poolingType_) {
410-
case kAverage:
411-
res[output_index] = avg_pooling(pooling_buf);
412-
break;
413-
case kMax:
414-
res[output_index] = max_pooling(pooling_buf);
415-
break;
416-
default:
417-
throw std::runtime_error("Unknown pooling type");
418-
}
419-
}
420-
}
421-
}
422-
}
423-
});
306+
size_t output_index = this->outputShape_.get_index(output_coords);
307+
308+
switch (this->poolingType_) {
309+
case kAverage:
310+
res[output_index] = sum / static_cast<ValueType>(count);
311+
break;
312+
case kMax:
313+
res[output_index] = max_val;
314+
break;
315+
default:
316+
throw std::runtime_error("Unknown pooling type");
424317
}
425-
});
318+
},
319+
options);
426320

427321
return res;
428322
}

0 commit comments

Comments
 (0)