embedded-dev-research
diff --git a/‎include/CMakeLists.txt‎
Lines changed: 3 additions & 0 deletions b/‎include/CMakeLists.txt‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎include/layers/EWLayer.hpp‎
Lines changed: 33 additions & 31 deletions b/‎include/layers/EWLayer.hpp‎
Lines changed: 33 additions & 31 deletions
diff --git a/‎include/layers/Layer.hpp‎
Lines changed: 5 additions & 127 deletions b/‎include/layers/Layer.hpp‎
Lines changed: 5 additions & 127 deletions
@@ -15,3 +15,6 @@ set(PERF_HEADERS "${perf_headers}" PARENT_SCOPE)
 
 file(GLOB_RECURSE reader_headers Weights_Reader/*.h Weights_Reader/*.hpp)
 set(READER_HEADERS "${reader_headers}" PARENT_SCOPE)
+
+file(GLOB_RECURSE parallel_headers parallel/*.h parallel/*.hpp)
+set(READER_HEADERS "${parallel_headers}" PARENT_SCOPE)
@@ -46,7 +46,7 @@ class EWLayerImpl : public LayerImpl<ValueType> {
  public:
   EWLayerImpl() = delete;
   EWLayerImpl(const Shape& shape, std::string function, float alpha = 0.0F,
-              float beta = 0.0F, int type_parall = 0);
+              float beta = 0.0F, ParBackend parallel_backend = ParBackend::Seq);
   EWLayerImpl(const EWLayerImpl& c) = default;
   EWLayerImpl& operator=(const EWLayerImpl& c) = default;
   std::vector<ValueType> run(
@@ -56,72 +56,74 @@ class EWLayerImpl : public LayerImpl<ValueType> {
   std::string func_;
   float alpha_;
   float beta_;
-  int type_parall_;
+  ParBackend parallel_backend_;
 };
 
 template <typename ValueType>
 EWLayerImpl<ValueType>::EWLayerImpl(const Shape& shape, std::string function,
-                                    float alpha, float beta, int type_parall)
+                                    float alpha, float beta,
+                                    ParBackend parallel_backend)
     : LayerImpl<ValueType>(shape, shape),
       func_(std::move(function)),
       alpha_(alpha),
       beta_(beta),
-      type_parall_(type_parall) {}
+      parallel_backend_(parallel_backend) {}
 
 template <typename ValueType>
 std::vector<ValueType> EWLayerImpl<ValueType>::run(
     const std::vector<ValueType>& input) const {
   std::vector<ValueType> res(this->outputShape_.count());
-  int available_threads = -1;
-  if (type_parall_ == 0) available_threads = 1;
-  if (type_parall_ == 1)
-    available_threads = std::thread::hardware_concurrency();
-  if (type_parall_ == 2)
-    available_threads = oneapi::tbb::info::default_concurrency();
-  if (type_parall_ == 3) available_threads = omp_get_max_threads();
+
+  // Получаем настройки параллельности
+  parallel::Options options;
+  options.backend = parallel_backend_;
 
   if (func_ == "relu") {
-    parallel_for(
+    parallel::parallel_for(
         input.size(),
-        [&](int i) {
+        [&](std::size_t i) {
           res[i] = input[i] > ValueType(0) ? input[i] : ValueType(0);
         },
-        type_parall_);
+        options);
   } else if (func_ == "tanh") {
-    parallel_for(
+    parallel::parallel_for(
         input.size(),
-        [&](int i) { res[i] = static_cast<ValueType>(std::tanh(input[i])); },
-        type_parall_);
+        [&](std::size_t i) {
+          res[i] = static_cast<ValueType>(std::tanh(input[i]));
+        },
+        options);
   } else if (func_ == "sin") {
-    parallel_for(
+    parallel::parallel_for(
         input.size(),
-        [&](int i) { res[i] = static_cast<ValueType>(std::sin(input[i])); },
-        type_parall_);
+        [&](std::size_t i) {
+          res[i] = static_cast<ValueType>(std::sin(input[i]));
+        },
+        options);
   } else if (func_ == "minus") {
-    parallel_for(
-        input.size(), [&](int i) { res[i] = -input[i]; }, type_parall_);
+    parallel::parallel_for(
+        input.size(), [&](std::size_t i) { res[i] = -input[i]; }, options);
   } else if (func_ == "linear") {
-    parallel_for(
+    parallel::parallel_for(
         input.size(),
-        [&](int i) {
+        [&](std::size_t i) {
           res[i] = input[i] * static_cast<ValueType>(alpha_) +
                    static_cast<ValueType>(beta_);
         },
-        type_parall_);
+        options);
   } else if (func_ == "sigmoid") {
     if constexpr (std::is_integral_v<ValueType>) {
-      parallel_for(
+      parallel::parallel_for(
           input.size(),
-          [&](int i) {
+          [&](std::size_t i) {
             auto x_float = static_cast<float>(input[i]);
             float result = 1.0F / (1.0F + std::exp(-x_float));
             res[i] = static_cast<ValueType>(std::round(result));
           },
-          type_parall_);
+          options);
     } else {
-      parallel_for(
+      parallel::parallel_for(
           input.size(),
-          [&](int i) {
+          [&](std::size_t i) {
             ValueType x = input[i];
             if (x >= ValueType(0)) {
               ValueType z = std::exp(-x);
@@ -131,7 +133,7 @@ std::vector<ValueType> EWLayerImpl<ValueType>::run(
               res[i] = z / (ValueType(1) + z);
             }
           },
-          type_parall_);
+          options);
     }
   } else {
     throw std::invalid_argument("No such function for EWLayer");
 
@@ -1,6 +1,4 @@
 #pragma once
-#include <omp.h>
-
 #include <algorithm>
 #include <execution>
 #include <functional>
@@ -13,7 +11,7 @@
 
 #include "layers/Shape.hpp"
 #include "layers/Tensor.hpp"
-#include "oneapi/tbb.h"
+#include "parallel/parallel.hpp"
 
 namespace it_lab_ai {
 
@@ -39,6 +37,7 @@ enum LayerType : uint8_t {
 };
 
 enum ImplType : uint8_t { kDefault, kTBB, kSTL };
+using ParBackend = parallel::Backend;
 
 class Layer;
 
@@ -55,7 +54,8 @@ class Layer {
   PostOperations postops;
   int getID() const { return id_; }
   void setID(int id) { id_ = id; }
-  void setTypeParall(int type) { type_parall_ = type; }
+  void setParallelBackend(ParBackend backend) { parallel_backend_ = backend; }
+  ParBackend getParallelBackend() const { return parallel_backend_; }
   LayerType getName() const { return type_; }
   virtual void run(const std::vector<Tensor>& input,
                    std::vector<Tensor>& output) = 0;
@@ -66,7 +66,7 @@ class Layer {
  protected:
   int id_ = 0;
   LayerType type_;
-  int type_parall_;
+  ParBackend parallel_backend_ = ParBackend::Seq;
 };
 
 template <typename ValueType>
@@ -90,126 +90,4 @@ class LayerImpl {
   Shape inputShape_;
   Shape outputShape_;
 };
-
-template <typename Func>
-inline void parallel_for(int count, Func func, int mode = 0) {
-  static bool stl_available = true;
-  static bool tbb_available = true;
-  static bool omp_available = true;
-  const int MIN_CHUNK_SIZE = 1000;
-  if (count < MIN_CHUNK_SIZE) {
-    mode = 0;
-  }
-
-  switch (mode) {
-    case 0:  // Sequential
-    {
-      for (int i = 0; i < count; ++i) {
-        func(i);
-      }
-      break;
-    }
-
-    case 1:  // STL
-    {
-      if (stl_available) {
-        try {
-          int num_threads =
-              static_cast<int>(std::thread::hardware_concurrency());
-          if (num_threads == 0) num_threads = 4;
-
-          int min_chunk_size = std::max(1000, count / (num_threads * 4));
-          if (count / num_threads < min_chunk_size) {
-            num_threads = std::max(1, count / min_chunk_size);
-          }
-
-          std::vector<std::thread> threads;
-          threads.reserve(num_threads);
-
-          int chunk_size = count / num_threads;
-          int remainder = count % num_threads;
-
-          int start = 0;
-          for (int t = 0; t < num_threads; ++t) {
-            int end = start + chunk_size + (t < remainder ? 1 : 0);
-            if (start >= end) break;
-
-            threads.emplace_back([start, end, &func]() {
-              for (int i = start; i < end; ++i) {
-                func(i);
-              }
-            });
-
-            start = end;
-          }
-
-          for (auto& thread : threads) {
-            thread.join();
-          }
-
-        } catch (const std::exception& e) {
-          std::cout << "Thread execution failed: " << e.what()
-                    << ". Falling back to sequential.\n";
-          stl_available = false;
-          for (int i = 0; i < count; ++i) func(i);
-        }
-      } else {
-        for (int i = 0; i < count; ++i) func(i);
-      }
-      break;
-    }
-
-    case 2:  // Intel TBB
-    {
-      if (tbb_available) {
-        try {
-          oneapi::tbb::parallel_for(
-              oneapi::tbb::blocked_range<int>(0, count),
-              [&](const oneapi::tbb::blocked_range<int>& range) {
-                for (int i = range.begin(); i < range.end(); ++i) {
-                  func(i);
-                }
-              },
-              oneapi::tbb::auto_partitioner());
-        } catch (const std::exception& e) {
-          std::cout << "TBB execution failed: " << e.what()
-                    << ". Falling back to sequential.\n";
-          tbb_available = false;
-          for (int i = 0; i < count; ++i) func(i);
-        }
-      } else {
-        for (int i = 0; i < count; ++i) func(i);
-      }
-      break;
-    }
-
-    case 3:  // OpenMP
-    {
-      if (omp_available) {
-        try {
-          int num_threads = omp_get_max_threads();
-
-          int chunk_size = std::max(1000, count / (num_threads * 8));
-
-#pragma omp parallel for schedule(static, chunk_size) num_threads(num_threads)
-          for (int i = 0; i < count; ++i) {
-            func(i);
-          }
-
-        } catch (...) {
-          std::cout << "OpenMP execution failed. Falling back to sequential.\n";
-          omp_available = false;
-          for (int i = 0; i < count; ++i) func(i);
-        }
-      } else {
-        for (int i = 0; i < count; ++i) func(i);
-      }
-      break;
-    }
-
-    default:
-      for (int i = 0; i < count; ++i) func(i);
-  }
-}
-
 }  // namespace it_lab_ai