Skip to content

Commit 3cb8263

Browse files
author
AndreySorokin7
committed
fix
1 parent 6f82796 commit 3cb8263

7 files changed

Lines changed: 302 additions & 202 deletions

File tree

include/CMakeLists.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,3 +15,6 @@ set(PERF_HEADERS "${perf_headers}" PARENT_SCOPE)
1515

1616
file(GLOB_RECURSE reader_headers Weights_Reader/*.h Weights_Reader/*.hpp)
1717
set(READER_HEADERS "${reader_headers}" PARENT_SCOPE)
18+
19+
file(GLOB_RECURSE parallel_headers parallel/*.h parallel/*.hpp)
20+
set(READER_HEADERS "${parallel_headers}" PARENT_SCOPE)

include/layers/EWLayer.hpp

Lines changed: 33 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ class EWLayerImpl : public LayerImpl<ValueType> {
4646
public:
4747
EWLayerImpl() = delete;
4848
EWLayerImpl(const Shape& shape, std::string function, float alpha = 0.0F,
49-
float beta = 0.0F, int type_parall = 0);
49+
float beta = 0.0F, ParBackend parallel_backend = ParBackend::Seq);
5050
EWLayerImpl(const EWLayerImpl& c) = default;
5151
EWLayerImpl& operator=(const EWLayerImpl& c) = default;
5252
std::vector<ValueType> run(
@@ -56,72 +56,74 @@ class EWLayerImpl : public LayerImpl<ValueType> {
5656
std::string func_;
5757
float alpha_;
5858
float beta_;
59-
int type_parall_;
59+
ParBackend parallel_backend_;
6060
};
6161

6262
template <typename ValueType>
6363
EWLayerImpl<ValueType>::EWLayerImpl(const Shape& shape, std::string function,
64-
float alpha, float beta, int type_parall)
64+
float alpha, float beta,
65+
ParBackend parallel_backend)
6566
: LayerImpl<ValueType>(shape, shape),
6667
func_(std::move(function)),
6768
alpha_(alpha),
6869
beta_(beta),
69-
type_parall_(type_parall) {}
70+
parallel_backend_(parallel_backend) {}
7071

7172
template <typename ValueType>
7273
std::vector<ValueType> EWLayerImpl<ValueType>::run(
7374
const std::vector<ValueType>& input) const {
7475
std::vector<ValueType> res(this->outputShape_.count());
75-
int available_threads = -1;
76-
if (type_parall_ == 0) available_threads = 1;
77-
if (type_parall_ == 1)
78-
available_threads = std::thread::hardware_concurrency();
79-
if (type_parall_ == 2)
80-
available_threads = oneapi::tbb::info::default_concurrency();
81-
if (type_parall_ == 3) available_threads = omp_get_max_threads();
76+
77+
// Получаем настройки параллельности
78+
parallel::Options options;
79+
options.backend = parallel_backend_;
8280

8381
if (func_ == "relu") {
84-
parallel_for(
82+
parallel::parallel_for(
8583
input.size(),
86-
[&](int i) {
84+
[&](std::size_t i) {
8785
res[i] = input[i] > ValueType(0) ? input[i] : ValueType(0);
8886
},
89-
type_parall_);
87+
options);
9088
} else if (func_ == "tanh") {
91-
parallel_for(
89+
parallel::parallel_for(
9290
input.size(),
93-
[&](int i) { res[i] = static_cast<ValueType>(std::tanh(input[i])); },
94-
type_parall_);
91+
[&](std::size_t i) {
92+
res[i] = static_cast<ValueType>(std::tanh(input[i]));
93+
},
94+
options);
9595
} else if (func_ == "sin") {
96-
parallel_for(
96+
parallel::parallel_for(
9797
input.size(),
98-
[&](int i) { res[i] = static_cast<ValueType>(std::sin(input[i])); },
99-
type_parall_);
98+
[&](std::size_t i) {
99+
res[i] = static_cast<ValueType>(std::sin(input[i]));
100+
},
101+
options);
100102
} else if (func_ == "minus") {
101-
parallel_for(
102-
input.size(), [&](int i) { res[i] = -input[i]; }, type_parall_);
103+
parallel::parallel_for(
104+
input.size(), [&](std::size_t i) { res[i] = -input[i]; }, options);
103105
} else if (func_ == "linear") {
104-
parallel_for(
106+
parallel::parallel_for(
105107
input.size(),
106-
[&](int i) {
108+
[&](std::size_t i) {
107109
res[i] = input[i] * static_cast<ValueType>(alpha_) +
108110
static_cast<ValueType>(beta_);
109111
},
110-
type_parall_);
112+
options);
111113
} else if (func_ == "sigmoid") {
112114
if constexpr (std::is_integral_v<ValueType>) {
113-
parallel_for(
115+
parallel::parallel_for(
114116
input.size(),
115-
[&](int i) {
117+
[&](std::size_t i) {
116118
auto x_float = static_cast<float>(input[i]);
117119
float result = 1.0F / (1.0F + std::exp(-x_float));
118120
res[i] = static_cast<ValueType>(std::round(result));
119121
},
120-
type_parall_);
122+
options);
121123
} else {
122-
parallel_for(
124+
parallel::parallel_for(
123125
input.size(),
124-
[&](int i) {
126+
[&](std::size_t i) {
125127
ValueType x = input[i];
126128
if (x >= ValueType(0)) {
127129
ValueType z = std::exp(-x);
@@ -131,7 +133,7 @@ std::vector<ValueType> EWLayerImpl<ValueType>::run(
131133
res[i] = z / (ValueType(1) + z);
132134
}
133135
},
134-
type_parall_);
136+
options);
135137
}
136138
} else {
137139
throw std::invalid_argument("No such function for EWLayer");

include/layers/Layer.hpp

Lines changed: 5 additions & 127 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,4 @@
11
#pragma once
2-
#include <omp.h>
3-
42
#include <algorithm>
53
#include <execution>
64
#include <functional>
@@ -13,7 +11,7 @@
1311

1412
#include "layers/Shape.hpp"
1513
#include "layers/Tensor.hpp"
16-
#include "oneapi/tbb.h"
14+
#include "parallel/parallel.hpp"
1715

1816
namespace it_lab_ai {
1917

@@ -39,6 +37,7 @@ enum LayerType : uint8_t {
3937
};
4038

4139
enum ImplType : uint8_t { kDefault, kTBB, kSTL };
40+
using ParBackend = parallel::Backend;
4241

4342
class Layer;
4443

@@ -55,7 +54,8 @@ class Layer {
5554
PostOperations postops;
5655
int getID() const { return id_; }
5756
void setID(int id) { id_ = id; }
58-
void setTypeParall(int type) { type_parall_ = type; }
57+
void setParallelBackend(ParBackend backend) { parallel_backend_ = backend; }
58+
ParBackend getParallelBackend() const { return parallel_backend_; }
5959
LayerType getName() const { return type_; }
6060
virtual void run(const std::vector<Tensor>& input,
6161
std::vector<Tensor>& output) = 0;
@@ -66,7 +66,7 @@ class Layer {
6666
protected:
6767
int id_ = 0;
6868
LayerType type_;
69-
int type_parall_;
69+
ParBackend parallel_backend_ = ParBackend::Seq;
7070
};
7171

7272
template <typename ValueType>
@@ -90,126 +90,4 @@ class LayerImpl {
9090
Shape inputShape_;
9191
Shape outputShape_;
9292
};
93-
94-
template <typename Func>
95-
inline void parallel_for(int count, Func func, int mode = 0) {
96-
static bool stl_available = true;
97-
static bool tbb_available = true;
98-
static bool omp_available = true;
99-
const int MIN_CHUNK_SIZE = 1000;
100-
if (count < MIN_CHUNK_SIZE) {
101-
mode = 0;
102-
}
103-
104-
switch (mode) {
105-
case 0: // Sequential
106-
{
107-
for (int i = 0; i < count; ++i) {
108-
func(i);
109-
}
110-
break;
111-
}
112-
113-
case 1: // STL
114-
{
115-
if (stl_available) {
116-
try {
117-
int num_threads =
118-
static_cast<int>(std::thread::hardware_concurrency());
119-
if (num_threads == 0) num_threads = 4;
120-
121-
int min_chunk_size = std::max(1000, count / (num_threads * 4));
122-
if (count / num_threads < min_chunk_size) {
123-
num_threads = std::max(1, count / min_chunk_size);
124-
}
125-
126-
std::vector<std::thread> threads;
127-
threads.reserve(num_threads);
128-
129-
int chunk_size = count / num_threads;
130-
int remainder = count % num_threads;
131-
132-
int start = 0;
133-
for (int t = 0; t < num_threads; ++t) {
134-
int end = start + chunk_size + (t < remainder ? 1 : 0);
135-
if (start >= end) break;
136-
137-
threads.emplace_back([start, end, &func]() {
138-
for (int i = start; i < end; ++i) {
139-
func(i);
140-
}
141-
});
142-
143-
start = end;
144-
}
145-
146-
for (auto& thread : threads) {
147-
thread.join();
148-
}
149-
150-
} catch (const std::exception& e) {
151-
std::cout << "Thread execution failed: " << e.what()
152-
<< ". Falling back to sequential.\n";
153-
stl_available = false;
154-
for (int i = 0; i < count; ++i) func(i);
155-
}
156-
} else {
157-
for (int i = 0; i < count; ++i) func(i);
158-
}
159-
break;
160-
}
161-
162-
case 2: // Intel TBB
163-
{
164-
if (tbb_available) {
165-
try {
166-
oneapi::tbb::parallel_for(
167-
oneapi::tbb::blocked_range<int>(0, count),
168-
[&](const oneapi::tbb::blocked_range<int>& range) {
169-
for (int i = range.begin(); i < range.end(); ++i) {
170-
func(i);
171-
}
172-
},
173-
oneapi::tbb::auto_partitioner());
174-
} catch (const std::exception& e) {
175-
std::cout << "TBB execution failed: " << e.what()
176-
<< ". Falling back to sequential.\n";
177-
tbb_available = false;
178-
for (int i = 0; i < count; ++i) func(i);
179-
}
180-
} else {
181-
for (int i = 0; i < count; ++i) func(i);
182-
}
183-
break;
184-
}
185-
186-
case 3: // OpenMP
187-
{
188-
if (omp_available) {
189-
try {
190-
int num_threads = omp_get_max_threads();
191-
192-
int chunk_size = std::max(1000, count / (num_threads * 8));
193-
194-
#pragma omp parallel for schedule(static, chunk_size) num_threads(num_threads)
195-
for (int i = 0; i < count; ++i) {
196-
func(i);
197-
}
198-
199-
} catch (...) {
200-
std::cout << "OpenMP execution failed. Falling back to sequential.\n";
201-
omp_available = false;
202-
for (int i = 0; i < count; ++i) func(i);
203-
}
204-
} else {
205-
for (int i = 0; i < count; ++i) func(i);
206-
}
207-
break;
208-
}
209-
210-
default:
211-
for (int i = 0; i < count; ++i) func(i);
212-
}
213-
}
214-
21593
} // namespace it_lab_ai

0 commit comments

Comments
 (0)