Changes

NeiroYT · NeiroYT · commit 06c0d3a5cc2f · 2026-02-27T17:57:30.000+03:00
diff --git a/include/CMakeLists.txt b/include/CMakeLists.txt
@@ -5,16 +5,17 @@ file(GLOB_RECURSE graphT_headers graph_transformations/*.h graph_transformations
 set(GRAPHT_HEADERS "${graphT_headers}" PARENT_SCOPE)
 
 file(GLOB_RECURSE layers_headers layers/*.h layers/*.hpp)
-set(LAYERS_HEADERS "${layers_headers}" PARENT_SCOPE)
+file(GLOB_RECURSE parallel_headers parallel/*.h parallel/*.hpp)
+set(LAYERS_HEADERS "${layers_headers}" "${parallel_headers}" PARENT_SCOPE)
 
 file(GLOB_RECURSE layers_oneDNN_headers layers_oneDNN/*.h layers_oneDNN/*.hpp)
 set(LAYERS_ONEDNN_HEADERS "${layers_oneDNN_headers}" PARENT_SCOPE)
 
+file(GLOB_RECURSE layers_fused_headers layers_fused/*.h layers_fused/*.hpp)
+set(LAYERS_FUSED_HEADERS "${layers_fused_headers}" PARENT_SCOPE)
+
 file(GLOB_RECURSE perf_headers perf/*.h perf/*.hpp)
 set(PERF_HEADERS "${perf_headers}" PARENT_SCOPE)
 
 file(GLOB_RECURSE reader_headers Weights_Reader/*.h Weights_Reader/*.hpp)
 set(READER_HEADERS "${reader_headers}" PARENT_SCOPE)
-
-file(GLOB_RECURSE parallel_headers parallel/*.h parallel/*.hpp)
-set(LAYERS_HEADERS "${parallel_headers}" PARENT_SCOPE)
diff --git a/include/layers/Layer.hpp b/include/layers/Layer.hpp
@@ -34,7 +34,8 @@ enum LayerType : uint8_t {
   kReshape,
   kSoftmax,
   kMatmul,
-  kBatchNormalization
+  kBatchNormalization,
+  kConvRelu
 };
 
 enum ImplType : uint8_t { kDefault, kTBB, kSTL };
diff --git a/include/layers_fused/ConvRelu.hpp b/include/layers_fused/ConvRelu.hpp
@@ -0,0 +1,69 @@
+#pragma once
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "layers/Layer.hpp"
+#include "layers/Tensor.hpp"
+
+namespace it_lab_ai {
+
+template <typename T>
+void relu(Tensor& t) {
+  Shape sh = t.get_shape();
+  for (size_t i = 0; i < sh.count(); i++) {
+    if ((*t.as<T>())[i] < 0) {
+      (*t.as<T>())[i] = 0;
+    }
+  }
+}
+
+class ConvReluLayer : Layer {
+ private:
+  size_t stride_;
+  size_t pads_;
+  size_t dilations_;
+  std::shared_ptr<Tensor> kernel_;
+  std::shared_ptr<Tensor> bias_;
+  size_t group_;
+  bool useLegacyImpl_;
+
+ public:
+  ConvReluLayer() : Layer(kConvRelu), kernel_(nullptr), bias_(nullptr) {
+    stride_ = 0;
+    pads_ = 0;
+    dilations_ = 0;
+  }
+  ConvReluLayer(size_t step, size_t pads, size_t dilations,
+                const Tensor& kernel, const Tensor& bias = Tensor(),
+                size_t group = 1, bool useLegacyImpl = false)
+      : Layer(kConvRelu),
+        kernel_(std::make_shared<Tensor>(kernel)),
+        bias_(std::make_shared<Tensor>(bias)) {
+    stride_ = step;
+    pads_ = pads;
+    group_ = group;
+    dilations_ = dilations;
+    useLegacyImpl_ = useLegacyImpl;
+  }
+  ConvReluLayer(size_t step, size_t pads, size_t dilations,
+                std::shared_ptr<Tensor> kernel,
+                std::shared_ptr<Tensor> bias = std::make_shared<Tensor>(),
+                size_t group = 1, bool useLegacyImpl = false)
+      : Layer(kConvRelu), kernel_(std::move(kernel)), bias_(std::move(bias)) {
+    stride_ = step;
+    pads_ = pads;
+    group_ = group;
+    dilations_ = dilations;
+    useLegacyImpl_ = useLegacyImpl;
+  }
+  void run(const std::vector<Tensor>& input,
+           std::vector<Tensor>& output) override;
+  void run(const std::vector<Tensor>& input, std::vector<Tensor>& output,
+           const RuntimeOptions& options) override;
+#ifdef ENABLE_STATISTIC_WEIGHTS
+  Tensor get_weights() override { return *kernel_; }
+#endif
+};
+}  // namespace it_lab_ai
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
@@ -2,5 +2,6 @@ add_subdirectory(graph)
 add_subdirectory(graph_transformations)
 add_subdirectory(perf)
 add_subdirectory(layers)
+add_subdirectory(layers_fused)
 add_subdirectory(layers_oneDNN)
 add_subdirectory(Weights_Reader)
diff --git a/src/layers_fused/CMakeLists.txt b/src/layers_fused/CMakeLists.txt
@@ -0,0 +1,8 @@
+file(GLOB_RECURSE layers_fused_src *.cpp)
+add_library(layers_fused_lib STATIC "${LAYERS_FUSED_HEADERS}" "${layers_fused_src}")
+
+target_link_libraries(layers_fused_lib PUBLIC layers_lib)
+target_link_libraries(layers_fused_lib PUBLIC TBB_unified)
+target_link_libraries(layers_fused_lib PUBLIC OpenMP::OpenMP_CXX)
+target_link_libraries(layers_fused_lib PUBLIC dnnl)
+target_link_libraries(layers_fused_lib PUBLIC Kokkos_imported)
diff --git a/src/layers_fused/ConvRelu.cpp b/src/layers_fused/ConvRelu.cpp
@@ -0,0 +1,175 @@
+#include "layers_fused/ConvRelu.hpp"
+#include "layers/ConvLayer.hpp"
+
+namespace it_lab_ai {
+
+void ConvReluLayer::run(const std::vector<Tensor>& input,
+                        std::vector<Tensor>& output) {
+  RuntimeOptions default_options;
+  run(input, output, default_options);
+}
+
+void ConvReluLayer::run(const std::vector<Tensor>& input,
+                             std::vector<Tensor>& output,
+                             const RuntimeOptions& options) {
+  if (kernel_ == nullptr || bias_ == nullptr) {
+    throw std::runtime_error("ConvReluLayer: no weights or bias");
+  }
+  if (input.size() != 1) {
+    throw std::runtime_error("ConvReluLayer: Input tensors not 1");
+  }
+  if (input[0].get_shape().dims() != 4) {
+    throw std::out_of_range("input must be 4-dimensional");
+  }
+
+  ParBackend backend = options.par_backend;
+
+  if (group_ > 1) {
+    if (group_ == input[0].get_shape()[1] &&
+        group_ == kernel_->get_shape()[0]) {
+      switch (input[0].get_type()) {
+        case Type::kFloat:
+          DepthwiseConv4D<float>(input[0], *kernel_, *bias_, output[0], stride_,
+                                 pads_, dilations_, backend);
+          relu<float>(output[0]);
+          break;
+        case Type::kInt:
+          DepthwiseConv4D<int>(input[0], *kernel_, *bias_, output[0], stride_,
+                               pads_, dilations_, backend);
+          relu<int>(output[0]);
+          break;
+        default:
+          throw std::runtime_error(
+              "Unsupported type for depthwise convolution");
+      }
+      return;
+    }
+  }
+
+  switch (input[0].get_type()) {
+    case Type::kInt: {
+      if (kernel_->get_shape().dims() == 2) {
+        if (dilations_ > 0) {
+          dilations_--;
+        }
+        ConvImpl<int> used_impl(
+            stride_, pads_, dilations_,
+            static_cast<int>(
+                input[0].get_shape()[input[0].get_shape().dims() - 1]),
+            static_cast<int>(
+                input[0].get_shape()[input[0].get_shape().dims() - 2]),
+            static_cast<int>(
+                input[0].get_shape()[input[0].get_shape().dims() - 3]),
+            input[0].get_shape()[input[0].get_shape().dims() - 1] *
+                input[0].get_shape()[input[0].get_shape().dims() - 2],
+            bias_->empty() ? std::vector<int>() : *bias_->as<int>());
+        auto sizeforshape = static_cast<size_t>(
+            ((static_cast<int>(
+                  input[0].get_shape()[input[0].get_shape().dims() - 1]) -
+              1 -
+              static_cast<int>(
+                  (1 + kernel_->get_shape()[kernel_->get_shape().dims() - 1]) *
+                      dilations_ +
+                  kernel_->get_shape()[kernel_->get_shape().dims() - 1] - 1)) /
+             static_cast<int>(stride_)) +
+            1);
+
+        Shape sh({1, 3, sizeforshape, sizeforshape});
+        output[0] = make_tensor<int>(
+            used_impl.run(
+                *input[0].as<int>(),
+                static_cast<int>(
+                    input[0].get_shape()[input[0].get_shape().dims() - 1]) +
+                    2 * static_cast<int>(pads_),
+                static_cast<int>(
+                    input[0].get_shape()[input[0].get_shape().dims() - 2]) +
+                    2 * static_cast<int>(pads_),
+                *kernel_->as<int>(),
+                kernel_->get_shape()[kernel_->get_shape().dims() - 1],
+                (1 + kernel_->get_shape()[kernel_->get_shape().dims() - 1]) *
+                        dilations_ +
+                    kernel_->get_shape()[kernel_->get_shape().dims() - 1],
+                static_cast<int>(
+                    ((1 +
+                      kernel_->get_shape()[kernel_->get_shape().dims() - 1]) *
+                         dilations_ +
+                     kernel_->get_shape()[kernel_->get_shape().dims() - 1] -
+                     1) /
+                    2)),
+            sh);
+      } else {
+        Conv4D<int>(input[0], *kernel_, *bias_, output[0], stride_, pads_,
+                    group_, dilations_, backend);
+      }
+      relu<int>(output[0]);
+      break;
+    }
+    case Type::kFloat: {
+      if (kernel_->get_shape().dims() == 2) {
+        if (dilations_ > 0) {
+          dilations_--;
+        }
+        ConvImpl<float> used_impl(
+            stride_, pads_, dilations_,
+            static_cast<int>(
+                input[0].get_shape()[input[0].get_shape().dims() - 1]),
+            static_cast<int>(
+                input[0].get_shape()[input[0].get_shape().dims() - 2]),
+            static_cast<int>(
+                input[0].get_shape()[input[0].get_shape().dims() - 3]),
+            input[0].get_shape()[input[0].get_shape().dims() - 1] *
+                input[0].get_shape()[input[0].get_shape().dims() - 2],
+            bias_->empty() ? std::vector<float>() : *bias_->as<float>());
+        auto sizeforshape = static_cast<size_t>(
+            ((static_cast<int>(
+                  input[0].get_shape()[input[0].get_shape().dims() - 1]) -
+              1 -
+              static_cast<int>(
+                  (1 + kernel_->get_shape()[kernel_->get_shape().dims() - 1]) *
+                      dilations_ +
+                  kernel_->get_shape()[kernel_->get_shape().dims() - 1] - 1)) /
+             static_cast<int>(stride_)) +
+            1);
+
+        Shape sh({1, 3, sizeforshape, sizeforshape});
+        output[0] = make_tensor<float>(
+            used_impl.run(
+                *input[0].as<float>(),
+                static_cast<int>(
+                    input[0].get_shape()[input[0].get_shape().dims() - 1]) +
+                    2 * static_cast<int>(pads_),
+                static_cast<int>(
+                    input[0].get_shape()[input[0].get_shape().dims() - 2]) +
+                    2 * static_cast<int>(pads_),
+                *kernel_->as<float>(),
+                kernel_->get_shape()[kernel_->get_shape().dims() - 1],
+                (1 + kernel_->get_shape()[kernel_->get_shape().dims() - 1]) *
+                        dilations_ +
+                    kernel_->get_shape()[kernel_->get_shape().dims() - 1],
+                static_cast<int>(
+                    ((1 +
+                      kernel_->get_shape()[kernel_->get_shape().dims() - 1]) *
+                         dilations_ +
+                     kernel_->get_shape()[kernel_->get_shape().dims() - 1] -
+                     1) /
+                    2)),
+            sh);
+      } else {
+        if (useLegacyImpl_) {
+          Conv4D_Legacy<float>(input[0], *kernel_, *bias_, output[0], stride_,
+                               pads_, dilations_, backend);
+        } else {
+          Conv4D<float>(input[0], *kernel_, *bias_, output[0], stride_, pads_,
+                        group_, dilations_, backend);
+        }
+      }
+      relu<float>(output[0]);
+      break;
+    }
+    default: {
+      throw std::runtime_error("Unsupported tensor type");
+    }
+  }
+}
+
+}  // namespace it_lab_ai
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
@@ -3,7 +3,7 @@ file(GLOB_RECURSE TEST_SRC_FILES ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp)
 add_executable(run_test ${TEST_SRC_FILES})
 
 target_link_libraries(run_test PUBLIC OpenMP::OpenMP_CXX)
-target_link_libraries(run_test PUBLIC perf_lib layers_lib layers_oneDNN_lib)
+target_link_libraries(run_test PUBLIC perf_lib layers_lib layers_oneDNN_lib layers_fused_lib)
 target_link_libraries(run_test PUBLIC gtest)
 target_link_libraries(run_test PUBLIC ReadLib)
 target_link_libraries(run_test PUBLIC reader_lib)
diff --git a/test/single_layer_fused/test_convrelu.cpp b/test/single_layer_fused/test_convrelu.cpp