22#include < algorithm>
33#include < cmath>
44#include < cstdlib>
5+ #include < limits>
56#include < numeric>
67#include < stdexcept>
78#include < string>
89#include < utility>
910#include < vector>
1011
1112#include " layers/Layer.hpp"
12- #include " tbb/blocked_range2d.h"
13- #include " tbb/parallel_for.h"
13+ #include " parallel/parallel.hpp"
1414
1515namespace it_lab_ai {
1616
@@ -64,7 +64,6 @@ class PoolingLayer : public Layer {
6464 Shape dilations_;
6565 bool ceil_mode_;
6666 std::string poolingType_;
67- ImplType implType_;
6867};
6968
7069inline size_t coord_size (size_t coord, const Shape& shape) {
@@ -98,12 +97,13 @@ class PoolingLayerImpl : public LayerImpl<ValueType> {
9897 PoolingLayerImpl (const Shape& input_shape, const Shape& pooling_shape,
9998 const std::string& pooling_type = " average" )
10099 : PoolingLayerImpl(input_shape, pooling_shape, {2 , 2 }, {0 , 0 , 0 , 0 },
101- {1 , 1 }, false , pooling_type) {}
100+ {1 , 1 }, false , pooling_type, ParBackend:: kSeq ) {}
102101 PoolingLayerImpl (const Shape& input_shape, const Shape& pooling_shape,
103102 const Shape& strides = {2 , 2 },
104103 const Shape& pads = {0 , 0 , 0 , 0 },
105104 const Shape& dilations = {1 , 1 }, bool ceil_mode = false ,
106- const std::string& pooling_type = " average" );
105+ const std::string& pooling_type = " average" ,
106+ ParBackend parallel_backend = ParBackend::kSeq );
107107 PoolingLayerImpl (const PoolingLayerImpl& c) = default ;
108108 PoolingLayerImpl& operator =(const PoolingLayerImpl& c) = default ;
109109 std::vector<ValueType> run (
@@ -116,20 +116,22 @@ class PoolingLayerImpl : public LayerImpl<ValueType> {
116116 Shape dilations_;
117117 bool ceil_mode_;
118118 PoolingType poolingType_;
119+ ParBackend parallel_backend_;
119120};
120121
121122template <typename ValueType>
122123PoolingLayerImpl<ValueType>::PoolingLayerImpl(
123124 const Shape& input_shape, const Shape& pooling_shape, const Shape& strides,
124125 const Shape& pads, const Shape& dilations, bool ceil_mode,
125- const std::string& pooling_type)
126+ const std::string& pooling_type, ParBackend parallel_backend )
126127 : LayerImpl<ValueType>(input_shape, input_shape),
127128 poolingShape_ (pooling_shape),
128129 strides_(strides),
129130 pads_(pads),
130131 dilations_(dilations),
131132 ceil_mode_(ceil_mode),
132- poolingType_(kAverage ) {
133+ poolingType_(kAverage ),
134+ parallel_backend_(parallel_backend) {
133135 if (pooling_shape[0 ] == 0 && pooling_shape[1 ] == 0 ) {
134136 poolingShape_ = Shape ({input_shape[input_shape.dims () - 2 ],
135137 input_shape[input_shape.dims () - 1 ]});
@@ -208,221 +210,113 @@ std::vector<ValueType> PoolingLayerImpl<ValueType>::run(
208210 int batch_dim = this ->inputShape_ .dims () > spatial_dims ? 0 : -1 ;
209211 int channel_dim = this ->inputShape_ .dims () > spatial_dims + 1 ? 1 : -1 ;
210212
211- for (size_t n = 0 ; n < (batch_dim >= 0 ? this ->outputShape_ [batch_dim] : 1 );
212- n++) {
213- for (size_t c = 0 ;
214- c < (channel_dim >= 0 ? this ->outputShape_ [channel_dim] : 1 ); c++) {
215- for (size_t h = 0 ;
216- h < this ->outputShape_ [this ->outputShape_ .dims () - spatial_dims];
217- h++) {
218- for (size_t w = 0 ;
219- w < (spatial_dims > 1
220- ? this ->outputShape_ [this ->outputShape_ .dims () -
221- spatial_dims + 1 ]
222- : 1 );
223- w++) {
224- std::vector<ValueType> pooling_buf;
225-
226- int start_h =
227- static_cast <int >(h * strides_[0 ]) - static_cast <int >(pads_[0 ]);
228- int start_w = spatial_dims > 1 ? static_cast <int >(w * strides_[1 ]) -
229- static_cast <int >(pads_[2 ])
230- : 0 ;
231-
232- for (size_t kh = 0 ; kh < poolingShape_[0 ]; kh++) {
233- for (size_t kw = 0 ; kw < (spatial_dims > 1 ? poolingShape_[1 ] : 1 );
234- kw++) {
235- int pos_h = start_h + static_cast <int >(kh * dilations_[0 ]);
236- int pos_w = spatial_dims > 1
237- ? start_w + static_cast <int >(kw * dilations_[1 ])
238- : 0 ;
239-
240- if (pos_h >= 0 &&
241- pos_h < static_cast <int >(
242- this ->inputShape_ [this ->inputShape_ .dims () -
243- spatial_dims]) &&
244- (spatial_dims <= 1 ||
245- (pos_w >= 0 &&
246- pos_w < static_cast <int >(
247- this ->inputShape_ [this ->inputShape_ .dims () -
248- spatial_dims + 1 ])))) {
249- std::vector<size_t > input_coords (this ->inputShape_ .dims (), 0 );
250- if (batch_dim >= 0 ) input_coords[batch_dim] = n;
251- if (channel_dim >= 0 ) input_coords[channel_dim] = c;
252- input_coords[this ->inputShape_ .dims () - spatial_dims] = pos_h;
253- if (spatial_dims > 1 ) {
254- input_coords[this ->inputShape_ .dims () - spatial_dims + 1 ] =
255- pos_w;
256- }
213+ size_t out_h = this ->outputShape_ [this ->outputShape_ .dims () - spatial_dims];
214+ size_t out_w =
215+ spatial_dims > 1
216+ ? this ->outputShape_ [this ->outputShape_ .dims () - spatial_dims + 1 ]
217+ : 1 ;
218+ size_t out_n = batch_dim >= 0 ? this ->outputShape_ [batch_dim] : 1 ;
219+ size_t out_c = channel_dim >= 0 ? this ->outputShape_ [channel_dim] : 1 ;
220+
221+ size_t total_work = out_n * out_c * out_h * out_w;
222+
223+ parallel::Options options;
224+ options.backend = parallel_backend_;
225+
226+ parallel::parallel_for (
227+ total_work,
228+ [&](size_t idx) {
229+ size_t tmp = idx;
230+ size_t w = 0 ;
231+ if (spatial_dims > 1 ) {
232+ w = tmp % out_w;
233+ tmp /= out_w;
234+ }
235+ size_t h = tmp % out_h;
236+ tmp /= out_h;
257237
258- size_t input_index = this ->inputShape_ .get_index (input_coords);
259- pooling_buf.push_back (input[input_index]);
238+ size_t c = 0 ;
239+ if (channel_dim >= 0 ) {
240+ c = tmp % out_c;
241+ tmp /= out_c;
242+ }
243+ size_t n = batch_dim >= 0 ? tmp : 0 ;
244+
245+ int start_h =
246+ static_cast <int >(h * strides_[0 ]) - static_cast <int >(pads_[0 ]);
247+ int start_w = spatial_dims > 1 ? static_cast <int >(w * strides_[1 ]) -
248+ static_cast <int >(pads_[2 ])
249+ : 0 ;
250+
251+ auto sum = ValueType (0 );
252+ ValueType max_val = std::numeric_limits<ValueType>::lowest ();
253+ size_t count = 0 ;
254+
255+ for (size_t kh = 0 ; kh < poolingShape_[0 ]; kh++) {
256+ for (size_t kw = 0 ; kw < (spatial_dims > 1 ? poolingShape_[1 ] : 1 );
257+ kw++) {
258+ int pos_h = start_h + static_cast <int >(kh * dilations_[0 ]);
259+ int pos_w = spatial_dims > 1
260+ ? start_w + static_cast <int >(kw * dilations_[1 ])
261+ : 0 ;
262+
263+ if (pos_h >= 0 &&
264+ pos_h < static_cast <int >(
265+ this ->inputShape_ [this ->inputShape_ .dims () -
266+ spatial_dims]) &&
267+ (spatial_dims <= 1 ||
268+ (pos_w >= 0 &&
269+ pos_w < static_cast <int >(
270+ this ->inputShape_ [this ->inputShape_ .dims () -
271+ spatial_dims + 1 ])))) {
272+ std::vector<size_t > input_coords (this ->inputShape_ .dims (), 0 );
273+ if (batch_dim >= 0 ) input_coords[batch_dim] = n;
274+ if (channel_dim >= 0 ) input_coords[channel_dim] = c;
275+ input_coords[this ->inputShape_ .dims () - spatial_dims] = pos_h;
276+ if (spatial_dims > 1 ) {
277+ input_coords[this ->inputShape_ .dims () - spatial_dims + 1 ] =
278+ pos_w;
260279 }
261- }
262- }
263280
264- std::vector<size_t > output_coords (this ->outputShape_ .dims (), 0 );
265- if (batch_dim >= 0 ) output_coords[batch_dim] = n;
266- if (channel_dim >= 0 ) output_coords[channel_dim] = c;
267- output_coords[this ->outputShape_ .dims () - spatial_dims] = h;
268- if (spatial_dims > 1 ) {
269- output_coords[this ->outputShape_ .dims () - spatial_dims + 1 ] = w;
270- }
281+ size_t input_index = this ->inputShape_ .get_index (input_coords);
282+ ValueType val = input[input_index];
271283
272- size_t output_index = this ->outputShape_ .get_index (output_coords);
273-
274- if (!pooling_buf.empty ()) {
275- switch (this ->poolingType_ ) {
276- case kAverage :
277- res[output_index] = avg_pooling (pooling_buf);
278- break ;
279- case kMax :
280- res[output_index] = max_pooling (pooling_buf);
281- break ;
282- default :
283- throw std::runtime_error (" Unknown pooling type" );
284+ if (this ->poolingType_ == kMax ) {
285+ if (count == 0 || val > max_val) {
286+ max_val = val;
287+ }
288+ } else {
289+ sum += val;
290+ }
291+ ++count;
284292 }
285293 }
286294 }
287- }
288- }
289- }
290-
291- return res;
292- }
293-
294- template <typename ValueType>
295- class PoolingLayerImplTBB : public PoolingLayerImpl <ValueType> {
296- public:
297- PoolingLayerImplTBB (const Shape& input_shape, const Shape& pooling_shape,
298- const Shape& strides = {2 , 2 },
299- const Shape& pads = {0 , 0 , 0 , 0 },
300- const Shape& dilations = {1 , 1 }, bool ceil_mode = false ,
301- const std::string& pooling_type = " average" )
302- : PoolingLayerImpl<ValueType>(input_shape, pooling_shape, strides, pads,
303- dilations, ceil_mode, pooling_type) {}
304- std::vector<ValueType> run (
305- const std::vector<ValueType>& input) const override ;
306- };
307-
308- template <typename ValueType>
309- std::vector<ValueType> PoolingLayerImplTBB<ValueType>::run(
310- const std::vector<ValueType>& input) const {
311- if (input.size () != this ->inputShape_ .count ()) {
312- throw std::invalid_argument (" Input size doesn't fit pooling layer" );
313- }
314295
315- std::vector<ValueType> res ( this -> outputShape_ . count (), ValueType ( 0 )) ;
296+ if ( count == 0 ) return ;
316297
317- size_t spatial_dims = this ->poolingShape_ .dims ();
318- int batch_dim = this ->inputShape_ .dims () > spatial_dims ? 0 : -1 ;
319- int channel_dim = this ->inputShape_ .dims () > spatial_dims + 1 ? 1 : -1 ;
298+ std::vector<size_t > output_coords (this ->outputShape_ .dims (), 0 );
299+ if (batch_dim >= 0 ) output_coords[batch_dim] = n;
300+ if (channel_dim >= 0 ) output_coords[channel_dim] = c;
301+ output_coords[this ->outputShape_ .dims () - spatial_dims] = h;
302+ if (spatial_dims > 1 ) {
303+ output_coords[this ->outputShape_ .dims () - spatial_dims + 1 ] = w;
304+ }
320305
321- oneapi::tbb::parallel_for (
322- oneapi::tbb::blocked_range<size_t >(
323- 0 , batch_dim >= 0 ? this ->outputShape_ [batch_dim] : 1 ),
324- [&](const oneapi::tbb::blocked_range<size_t >& r1) {
325- for (size_t n = r1.begin (); n < r1.end (); n++) {
326- oneapi::tbb::parallel_for (
327- oneapi::tbb::blocked_range<size_t >(
328- 0 , channel_dim >= 0 ? this ->outputShape_ [channel_dim] : 1 ),
329- [&](const oneapi::tbb::blocked_range<size_t >& r2) {
330- for (size_t c = r2.begin (); c < r2.end (); c++) {
331- for (size_t h = 0 ;
332- h < this ->outputShape_ [this ->outputShape_ .dims () -
333- spatial_dims];
334- h++) {
335- for (size_t w = 0 ;
336- w <
337- (spatial_dims > 1
338- ? this ->outputShape_ [this ->outputShape_ .dims () -
339- spatial_dims + 1 ]
340- : 1 );
341- w++) {
342- std::vector<ValueType> pooling_buf;
343-
344- int start_h = static_cast <int >(h * this ->strides_ [0 ]) -
345- static_cast <int >(this ->pads_ [0 ]);
346- int start_w =
347- spatial_dims > 1
348- ? static_cast <int >(w * this ->strides_ [1 ]) -
349- static_cast <int >(this ->pads_ [2 ])
350- : 0 ;
351-
352- for (size_t kh = 0 ; kh < this ->poolingShape_ [0 ]; kh++) {
353- for (size_t kw = 0 ;
354- kw <
355- (spatial_dims > 1 ? this ->poolingShape_ [1 ] : 1 );
356- kw++) {
357- int pos_h = start_h + static_cast <int >(
358- kh * this ->dilations_ [0 ]);
359- int pos_w =
360- spatial_dims > 1
361- ? start_w + static_cast <int >(
362- kw * this ->dilations_ [1 ])
363- : 0 ;
364-
365- if (pos_h >= 0 &&
366- pos_h < static_cast <int >(
367- this ->inputShape_ [this ->inputShape_
368- .dims () -
369- spatial_dims]) &&
370- (spatial_dims <= 1 ||
371- (pos_w >= 0 &&
372- pos_w < static_cast <int >(
373- this ->inputShape_
374- [this ->inputShape_ .dims () -
375- spatial_dims + 1 ])))) {
376- std::vector<size_t > input_coords (
377- this ->inputShape_ .dims (), 0 );
378- if (batch_dim >= 0 ) input_coords[batch_dim] = n;
379- if (channel_dim >= 0 ) input_coords[channel_dim] = c;
380- input_coords[this ->inputShape_ .dims () -
381- spatial_dims] = pos_h;
382- if (spatial_dims > 1 ) {
383- input_coords[this ->inputShape_ .dims () -
384- spatial_dims + 1 ] = pos_w;
385- }
386-
387- size_t input_index =
388- this ->inputShape_ .get_index (input_coords);
389- pooling_buf.push_back (input[input_index]);
390- }
391- }
392- }
393-
394- std::vector<size_t > output_coords (
395- this ->outputShape_ .dims (), 0 );
396- if (batch_dim >= 0 ) output_coords[batch_dim] = n;
397- if (channel_dim >= 0 ) output_coords[channel_dim] = c;
398- output_coords[this ->outputShape_ .dims () - spatial_dims] =
399- h;
400- if (spatial_dims > 1 ) {
401- output_coords[this ->outputShape_ .dims () - spatial_dims +
402- 1 ] = w;
403- }
404-
405- size_t output_index =
406- this ->outputShape_ .get_index (output_coords);
407-
408- if (!pooling_buf.empty ()) {
409- switch (this ->poolingType_ ) {
410- case kAverage :
411- res[output_index] = avg_pooling (pooling_buf);
412- break ;
413- case kMax :
414- res[output_index] = max_pooling (pooling_buf);
415- break ;
416- default :
417- throw std::runtime_error (" Unknown pooling type" );
418- }
419- }
420- }
421- }
422- }
423- });
306+ size_t output_index = this ->outputShape_ .get_index (output_coords);
307+
308+ switch (this ->poolingType_ ) {
309+ case kAverage :
310+ res[output_index] = sum / static_cast <ValueType>(count);
311+ break ;
312+ case kMax :
313+ res[output_index] = max_val;
314+ break ;
315+ default :
316+ throw std::runtime_error (" Unknown pooling type" );
424317 }
425- });
318+ },
319+ options);
426320
427321 return res;
428322}
0 commit comments