Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
51 commits
Select commit Hold shift + click to select a range
f14195d
1
Nov 4, 2025
b19191f
fix
Nov 5, 2025
9efc295
Merge branch 'main' into AndreySorokin7/Add_parall_ew_layer
AndreySorokin7 Nov 5, 2025
30a33ff
fix
Nov 5, 2025
4a3d16e
fix
Nov 5, 2025
406387d
fix
Nov 5, 2025
6f82796
fix
Nov 5, 2025
3cb8263
fix
Nov 12, 2025
2d369b6
fix
Nov 12, 2025
0412b6a
fix
Nov 12, 2025
5521152
fix
Nov 12, 2025
4293356
fix
Nov 12, 2025
0ba7e1b
fix
Nov 12, 2025
f5f0f14
fix
Nov 12, 2025
8222f23
fix
Nov 12, 2025
0bb0d02
fix
Nov 12, 2025
66b3b93
fix
Nov 12, 2025
04c3815
fix
Nov 12, 2025
7430245
fix
Nov 12, 2025
56c6d89
fix
Nov 17, 2025
46bfe0b
fix
Nov 20, 2025
16e39bf
fix
Nov 20, 2025
4eac55a
fix
Nov 20, 2025
bbaa426
fix
Nov 20, 2025
3d7d56c
fix
Nov 20, 2025
4684d9c
fix
Nov 20, 2025
0d7e4bf
fix
Nov 20, 2025
83732e7
fix
Nov 20, 2025
a9dc8c3
fix
Nov 20, 2025
02b39ab
fix
Nov 20, 2025
5f921f1
fix
Nov 20, 2025
4a8c2f5
fix
Nov 20, 2025
a453ed9
fix
Nov 20, 2025
40a343b
fix
Nov 20, 2025
e1a1825
fix
Nov 20, 2025
15ee554
fix
Nov 20, 2025
a19776c
fix
Nov 20, 2025
f15530c
fix
Nov 26, 2025
14ab537
Merge branch 'main' into AndreySorokin7/Add_parall_ew_layer
AndreySorokin7 Nov 28, 2025
2232cd6
Update test_ewlayer.cpp
AndreySorokin7 Dec 1, 2025
e5c56ac
Update backends.hpp
AndreySorokin7 Dec 1, 2025
2a3bd2a
Update backends.hpp
AndreySorokin7 Dec 1, 2025
971aac0
Update backends.hpp
AndreySorokin7 Dec 1, 2025
70c83f1
fix
Dec 2, 2025
ba9ea84
fix
Dec 2, 2025
23db74a
fix
Dec 3, 2025
c656a85
fix
Dec 3, 2025
8e1c6f1
fix
Dec 3, 2025
0be51fa
fix
Dec 3, 2025
7f88c7a
fix
Dec 3, 2025
d9f3d13
link_libraries
Dec 8, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 10 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,16 @@ set(CMAKE_CXX_STANDARD 17)

enable_testing()

find_package(OpenMP REQUIRED)

if(OpenMP_FOUND)
message(STATUS "OpenMP found - enabling parallel support")
add_definitions(-DHAS_OPENMP)
link_libraries(OpenMP::OpenMP_CXX)
else()
message(STATUS "OpenMP not found - parallel features disabled")
endif()

include_directories("include")

list(APPEND CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake")
Expand All @@ -34,8 +44,6 @@ add_subdirectory(3rdparty)

include(cmake/opencv_config.cmake)

find_package(OpenMP REQUIRED)

if (NOT WIN32)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Wextra -Werror")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -Werror")
Expand Down
3 changes: 3 additions & 0 deletions include/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,6 @@ set(PERF_HEADERS "${perf_headers}" PARENT_SCOPE)

file(GLOB_RECURSE reader_headers Weights_Reader/*.h Weights_Reader/*.hpp)
set(READER_HEADERS "${reader_headers}" PARENT_SCOPE)

file(GLOB_RECURSE parallel_headers parallel/*.h parallel/*.hpp)
set(LAYERS_HEADERS "${parallel_headers}" PARENT_SCOPE)
96 changes: 62 additions & 34 deletions include/layers/EWLayer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,8 @@ class EWLayerImpl : public LayerImpl<ValueType> {
public:
EWLayerImpl() = delete;
EWLayerImpl(const Shape& shape, std::string function, float alpha = 0.0F,
float beta = 0.0F);
float beta = 0.0F,
ParBackend parallel_backend = ParBackend::kSeq);
EWLayerImpl(const EWLayerImpl& c) = default;
EWLayerImpl& operator=(const EWLayerImpl& c) = default;
std::vector<ValueType> run(
Expand All @@ -56,57 +57,84 @@ class EWLayerImpl : public LayerImpl<ValueType> {
std::string func_;
float alpha_;
float beta_;
ParBackend parallel_backend_;
};

template <typename ValueType>
EWLayerImpl<ValueType>::EWLayerImpl(const Shape& shape, std::string function,
float alpha, float beta)
float alpha, float beta,
ParBackend parallel_backend)
: LayerImpl<ValueType>(shape, shape),
func_(std::move(function)),
alpha_(alpha),
beta_(beta) {}
beta_(beta),
parallel_backend_(parallel_backend) {}

template <typename ValueType>
std::vector<ValueType> EWLayerImpl<ValueType>::run(
const std::vector<ValueType>& input) const {
std::vector<ValueType> res(this->outputShape_.count());

parallel::Options options;
options.backend = parallel_backend_;

if (func_ == "relu") {
std::transform(input.begin(), input.end(), res.begin(), relu<ValueType>);
parallel::parallel_for(
input.size(),
[&](std::size_t i) {
res[i] = input[i] > ValueType(0) ? input[i] : ValueType(0);
},
options);
} else if (func_ == "tanh") {
auto tanh = [&](const ValueType& value) -> ValueType {
return static_cast<ValueType>(std::tanh(value));
};
std::transform(input.begin(), input.end(), res.begin(), tanh);
parallel::parallel_for(
input.size(),
[&](std::size_t i) {
res[i] = static_cast<ValueType>(std::tanh(input[i]));
},
options);
} else if (func_ == "sin") {
auto sin = [&](const ValueType& value) -> ValueType {
return static_cast<ValueType>(std::sin(value));
};
std::transform(input.begin(), input.end(), res.begin(), sin);
parallel::parallel_for(
input.size(),
[&](std::size_t i) {
res[i] = static_cast<ValueType>(std::sin(input[i]));
},
options);
} else if (func_ == "minus") {
auto minus = [&](const ValueType& value) -> ValueType { return -value; };
std::transform(input.begin(), input.end(), res.begin(), minus);
parallel::parallel_for(
input.size(), [&](std::size_t i) { res[i] = -input[i]; }, options);
} else if (func_ == "linear") {
auto linear = [&](const ValueType& value) -> ValueType {
return value * static_cast<ValueType>(alpha_) +
static_cast<ValueType>(beta_);
};
std::transform(input.begin(), input.end(), res.begin(), linear);
parallel::parallel_for(
input.size(),
[&](std::size_t i) {
res[i] = input[i] * static_cast<ValueType>(alpha_) +
static_cast<ValueType>(beta_);
},
options);
} else if (func_ == "sigmoid") {
auto sigmoid = [](ValueType x) -> ValueType {
if constexpr (std::is_integral_v<ValueType>) {
auto x_float = static_cast<float>(x);
float result = 1.0F / (1.0F + std::exp(-x_float));
return static_cast<ValueType>(std::round(result));
} else {
if (x >= ValueType(0)) {
ValueType z = std::exp(-x);
return ValueType(1) / (ValueType(1) + z);
}
ValueType z = std::exp(x);
return z / (ValueType(1) + z);
}
};
std::transform(input.cbegin(), input.cend(), res.begin(), sigmoid);
if constexpr (std::is_integral_v<ValueType>) {
parallel::parallel_for(
input.size(),
[&](std::size_t i) {
auto x_float = static_cast<float>(input[i]);
float result = 1.0F / (1.0F + std::exp(-x_float));
res[i] = static_cast<ValueType>(std::round(result));
},
options);
} else {
parallel::parallel_for(
input.size(),
[&](std::size_t i) {
ValueType x = input[i];
if (x >= ValueType(0)) {
ValueType z = std::exp(-x);
res[i] = ValueType(1) / (ValueType(1) + z);
} else {
ValueType z = std::exp(x);
res[i] = z / (ValueType(1) + z);
}
},
options);
}
} else {
throw std::invalid_argument("No such function for EWLayer");
}
Expand Down
11 changes: 9 additions & 2 deletions include/layers/Layer.hpp
Original file line number Diff line number Diff line change
@@ -1,13 +1,17 @@
#pragma once
#include <algorithm>
#include <execution>
#include <functional>
#include <initializer_list>
#include <iostream>
#include <numeric>
#include <stdexcept>
#include <string>
#include <vector>

#include "layers/Shape.hpp"
#include "layers/Tensor.hpp"
#include "oneapi/tbb.h"
#include "parallel/parallel.hpp"

namespace it_lab_ai {

Expand All @@ -33,6 +37,7 @@ enum LayerType : uint8_t {
};

enum ImplType : uint8_t { kDefault, kTBB, kSTL };
using ParBackend = parallel::Backend;

class Layer;

Expand All @@ -49,6 +54,8 @@ class Layer {
PostOperations postops;
int getID() const { return id_; }
void setID(int id) { id_ = id; }
void setParallelBackend(ParBackend backend) { parallel_backend_ = backend; }
ParBackend getParallelBackend() const { return parallel_backend_; }
LayerType getName() const { return type_; }
virtual void run(const std::vector<Tensor>& input,
std::vector<Tensor>& output) = 0;
Expand All @@ -59,6 +66,7 @@ class Layer {
protected:
int id_ = 0;
LayerType type_;
ParBackend parallel_backend_ = ParBackend::kSeq;
};

template <typename ValueType>
Expand All @@ -82,5 +90,4 @@ class LayerImpl {
Shape inputShape_;
Shape outputShape_;
};

} // namespace it_lab_ai
120 changes: 120 additions & 0 deletions include/parallel/backends.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
#pragma once
#include <oneapi/tbb/blocked_range.h>
#include <oneapi/tbb/info.h>
#include <oneapi/tbb/parallel_for.h>

#include <cstddef>
#include <cstdint>
#include <functional>
#include <limits>
#include <thread>
#include <vector>

namespace it_lab_ai {
namespace parallel {

enum class Backend : std::uint8_t {
kSeq = 0,
kThreads = 1,
kTbb = 2,
kOmp = 3
};

struct Options {
Backend backend = Backend::kSeq;
int max_threads = 0;
std::size_t min_parallel_n = 1000;
std::size_t grain = 1024;
};

inline void impl_seq(std::size_t count,
const std::function<void(std::size_t)>& func) {
for (std::size_t i = 0; i < count; ++i) {
func(i);
}
}

inline void impl_threads(std::size_t count,
const std::function<void(std::size_t)>& func,
const Options& opt) {
int num_threads = opt.max_threads > 0
? opt.max_threads
: static_cast<int>(std::thread::hardware_concurrency());
if (num_threads == 0) num_threads = 4;

std::size_t min_chunk_size = std::max(opt.grain, count / (num_threads * 4));
if (count / num_threads < min_chunk_size) {
num_threads = std::max(1, static_cast<int>(count / min_chunk_size));
}

std::vector<std::thread> threads;
threads.reserve(num_threads);

std::size_t chunk_size = count / num_threads;
std::size_t remainder = count % num_threads;

std::size_t start = 0;
for (int t = 0; t < num_threads; ++t) {
std::size_t end =
start + chunk_size + (t < static_cast<int>(remainder) ? 1 : 0);
if (start >= end) break;

threads.emplace_back([start, end, &func]() {
for (std::size_t i = start; i < end; ++i) {
func(i);
}
});

start = end;
}

for (auto& thread : threads) {
thread.join();
}
}

inline void impl_tbb(std::size_t count,
const std::function<void(std::size_t)>& func,
const Options& opt) {
oneapi::tbb::parallel_for(
oneapi::tbb::blocked_range<std::size_t>(0, count, opt.grain),
[&](const oneapi::tbb::blocked_range<std::size_t>& range) {
for (std::size_t i = range.begin(); i < range.end(); ++i) {
func(i);
}
},
oneapi::tbb::auto_partitioner());
}

#ifdef HAS_OPENMP
inline void impl_omp(std::size_t count,
const std::function<void(std::size_t)>& func,
const Options& opt) {
if (count == 0) return;

int num_threads = opt.max_threads > 0
? opt.max_threads
: static_cast<int>(std::thread::hardware_concurrency());

static_cast<void>(std::max(opt.grain, count / (num_threads * 8)));

int int_count = static_cast<int>(count);
if (int_count < 0 || static_cast<std::size_t>(int_count) != count) {
impl_seq(count, func);
return;
}
#pragma omp parallel for schedule(static) num_threads(num_threads)
for (int i = 0; i < int_count; ++i) {
func(static_cast<std::size_t>(i));
}
}
#else
inline void impl_omp(std::size_t count,
const std::function<void(std::size_t)>& func,
const Options& opt) {
impl_seq(count, func);
}
#endif

} // namespace parallel
} // namespace it_lab_ai
Loading
Loading