11#pragma once
2- #include < omp.h>
3-
42#include < algorithm>
53#include < execution>
64#include < functional>
1311
1412#include " layers/Shape.hpp"
1513#include " layers/Tensor.hpp"
16- #include " oneapi/tbb.h "
14+ #include " parallel/parallel.hpp "
1715
1816namespace it_lab_ai {
1917
@@ -39,6 +37,7 @@ enum LayerType : uint8_t {
3937};
4038
4139enum ImplType : uint8_t { kDefault , kTBB , kSTL };
40+ using ParBackend = parallel::Backend;
4241
4342class Layer ;
4443
@@ -55,7 +54,8 @@ class Layer {
5554 PostOperations postops;
5655 int getID () const { return id_; }
5756 void setID (int id) { id_ = id; }
58- void setTypeParall (int type) { type_parall_ = type; }
57+ void setParallelBackend (ParBackend backend) { parallel_backend_ = backend; }
58+ ParBackend getParallelBackend () const { return parallel_backend_; }
5959 LayerType getName () const { return type_; }
6060 virtual void run (const std::vector<Tensor>& input,
6161 std::vector<Tensor>& output) = 0;
@@ -66,7 +66,7 @@ class Layer {
6666 protected:
6767 int id_ = 0 ;
6868 LayerType type_;
69- int type_parall_ ;
69+ ParBackend parallel_backend_ = ParBackend::Seq ;
7070};
7171
7272template <typename ValueType>
@@ -90,126 +90,4 @@ class LayerImpl {
9090 Shape inputShape_;
9191 Shape outputShape_;
9292};
93-
94- template <typename Func>
95- inline void parallel_for (int count, Func func, int mode = 0 ) {
96- static bool stl_available = true ;
97- static bool tbb_available = true ;
98- static bool omp_available = true ;
99- const int MIN_CHUNK_SIZE = 1000 ;
100- if (count < MIN_CHUNK_SIZE ) {
101- mode = 0 ;
102- }
103-
104- switch (mode) {
105- case 0 : // Sequential
106- {
107- for (int i = 0 ; i < count; ++i) {
108- func (i);
109- }
110- break ;
111- }
112-
113- case 1 : // STL
114- {
115- if (stl_available) {
116- try {
117- int num_threads =
118- static_cast <int >(std::thread::hardware_concurrency ());
119- if (num_threads == 0 ) num_threads = 4 ;
120-
121- int min_chunk_size = std::max (1000 , count / (num_threads * 4 ));
122- if (count / num_threads < min_chunk_size) {
123- num_threads = std::max (1 , count / min_chunk_size);
124- }
125-
126- std::vector<std::thread> threads;
127- threads.reserve (num_threads);
128-
129- int chunk_size = count / num_threads;
130- int remainder = count % num_threads;
131-
132- int start = 0 ;
133- for (int t = 0 ; t < num_threads; ++t) {
134- int end = start + chunk_size + (t < remainder ? 1 : 0 );
135- if (start >= end) break ;
136-
137- threads.emplace_back ([start, end, &func]() {
138- for (int i = start; i < end; ++i) {
139- func (i);
140- }
141- });
142-
143- start = end;
144- }
145-
146- for (auto & thread : threads) {
147- thread.join ();
148- }
149-
150- } catch (const std::exception& e) {
151- std::cout << " Thread execution failed: " << e.what ()
152- << " . Falling back to sequential.\n " ;
153- stl_available = false ;
154- for (int i = 0 ; i < count; ++i) func (i);
155- }
156- } else {
157- for (int i = 0 ; i < count; ++i) func (i);
158- }
159- break ;
160- }
161-
162- case 2 : // Intel TBB
163- {
164- if (tbb_available) {
165- try {
166- oneapi::tbb::parallel_for (
167- oneapi::tbb::blocked_range<int >(0 , count),
168- [&](const oneapi::tbb::blocked_range<int >& range) {
169- for (int i = range.begin (); i < range.end (); ++i) {
170- func (i);
171- }
172- },
173- oneapi::tbb::auto_partitioner ());
174- } catch (const std::exception& e) {
175- std::cout << " TBB execution failed: " << e.what ()
176- << " . Falling back to sequential.\n " ;
177- tbb_available = false ;
178- for (int i = 0 ; i < count; ++i) func (i);
179- }
180- } else {
181- for (int i = 0 ; i < count; ++i) func (i);
182- }
183- break ;
184- }
185-
186- case 3 : // OpenMP
187- {
188- if (omp_available) {
189- try {
190- int num_threads = omp_get_max_threads ();
191-
192- int chunk_size = std::max (1000 , count / (num_threads * 8 ));
193-
194- #pragma omp parallel for schedule(static, chunk_size) num_threads(num_threads)
195- for (int i = 0 ; i < count; ++i) {
196- func (i);
197- }
198-
199- } catch (...) {
200- std::cout << " OpenMP execution failed. Falling back to sequential.\n " ;
201- omp_available = false ;
202- for (int i = 0 ; i < count; ++i) func (i);
203- }
204- } else {
205- for (int i = 0 ; i < count; ++i) func (i);
206- }
207- break ;
208- }
209-
210- default :
211- for (int i = 0 ; i < count; ++i) func (i);
212- }
213- }
214-
21593} // namespace it_lab_ai
0 commit comments