From 58ebd7eece5f2425f9c13294125e326716e30953 Mon Sep 17 00:00:00 2001 From: Semyon1104 Date: Wed, 26 Nov 2025 16:58:22 +0300 Subject: [PATCH 01/24] sketch --- app/Graph/build.cpp | 2 -- app/Graph/build.hpp | 10 ++++------ app/Graph/graph_build.cpp | 22 ++++++++++++++-------- include/graph/graph.hpp | 8 +++++--- include/graph/runtime_options.hpp | 9 +++++++++ include/layers/Layer.hpp | 5 +++++ src/layers/CMakeLists.txt | 5 +++++ 7 files changed, 42 insertions(+), 19 deletions(-) create mode 100644 include/graph/runtime_options.hpp diff --git a/app/Graph/build.cpp b/app/Graph/build.cpp index a1cd4771a..1e3d5e34c 100644 --- a/app/Graph/build.cpp +++ b/app/Graph/build.cpp @@ -4,8 +4,6 @@ using namespace it_lab_ai; -bool LayerFactory::onednn_ = false; - std::unordered_map model_paths = { {"alexnet_mnist", MODEL_PATH_H5}, {"googlenet", MODEL_PATH_GOOGLENET_ONNX}, diff --git a/app/Graph/build.hpp b/app/Graph/build.hpp index 0eb2e2d52..d89bcb034 100644 --- a/app/Graph/build.hpp +++ b/app/Graph/build.hpp @@ -13,6 +13,7 @@ #include "Weights_Reader/reader_weights.hpp" #include "graph/graph.hpp" +#include "graph/runtime_options.hpp" #include "layers/BatchNormalizationLayer.hpp" #include "layers/BinaryOpLayer.hpp" #include "layers/ConcatLayer.hpp" @@ -72,16 +73,13 @@ void print_time_stats(it_lab_ai::Graph& graph); namespace it_lab_ai { class LayerFactory { - private: - static bool onednn_; - public: - static void configure(bool onednn) { onednn_ = onednn; } - static std::unique_ptr createEwLayer(const std::string& function, + const RuntimeOptions& options, float alpha = 1.0F, float beta = 0.0F) { - if (onednn_ && EwLayerOneDnn::is_function_supported(function)) { + if (options.backend == Backend::OneDnn && + EwLayerOneDnn::is_function_supported(function)) { return std::make_unique(function, alpha, beta); } return std::make_unique(function, alpha, beta); diff --git a/app/Graph/graph_build.cpp b/app/Graph/graph_build.cpp index e4c40e21c..555e68830 100644 --- a/app/Graph/graph_build.cpp +++ b/app/Graph/graph_build.cpp @@ -9,16 +9,22 @@ using namespace it_lab_ai; int main(int argc, char* argv[]) { std::string model_name = "alexnet_mnist"; - bool onednn = false; + RuntimeOptions options; + for (int i = 1; i < argc; ++i) { - if (std::string(argv[i]) == "--model" && i + 1 < argc) { - model_name = argv[++i]; - } else if (std::string(argv[i]) == "--onednn") { - onednn = true; + if (std::string(argv[i]) == "--onednn") { + options.backend = Backend::OneDnn; + } else if (std::string(argv[i]) == "--threads" && i + 1 < argc) { + options.threads = std::stoi(argv[++i]); + } else if (std::string(argv[i]) == "--parallel") { + options.parallel = true; } } - it_lab_ai::LayerFactory::configure(onednn); + RuntimeOptions options; + options.backend = Backend::OneDnn; + options.threads = 4; + options.parallel = true; std::string json_path = model_paths[model_name]; @@ -66,7 +72,7 @@ int main(int argc, char* argv[]) { std::cout << "Starting inference..." << std::endl; try { - graph.inference(); + graph.inference(options); std::cout << "Inference completed successfully." << std::endl; } catch (const std::exception& e) { std::cerr << "ERROR during inference: " << e.what() << std::endl; @@ -106,7 +112,7 @@ int main(int argc, char* argv[]) { std::cout << "Starting inference..." << std::endl; try { - graph.inference(); + graph.inference(options); std::cout << "Inference completed successfully." << std::endl; } catch (const std::exception& e) { std::cerr << "ERROR during inference: " << e.what() << std::endl; diff --git a/include/graph/graph.hpp b/include/graph/graph.hpp index ff27b0403..162eb78d5 100644 --- a/include/graph/graph.hpp +++ b/include/graph/graph.hpp @@ -11,6 +11,7 @@ #include #include "layers/Layer.hpp" +#include "runtime_options.hpp" namespace it_lab_ai { @@ -224,7 +225,7 @@ class Graph { return false; } - void inference() { + void inference(const RuntimeOptions& options) { std::vector> countinout = getInOutDegrees(); std::vector traversal = getTraversalOrder(); count_used_split_distribution_ = 0; @@ -262,7 +263,7 @@ class Graph { } } } - layers_[current_layer]->run(inten_, outten_); + layers_[current_layer]->run(inten_, outten_, options); #ifdef ENABLE_STATISTIC_TENSORS tensors_.push_back(inten_[0]); @@ -277,7 +278,8 @@ class Graph { if (layers_[current_layer]->postops.count > 0) { for (unsigned int j = 0; j < layers_[current_layer]->postops.count; j++) { - layers_[current_layer]->postops.layers[j]->run(inten_, outten_); + layers_[current_layer]->postops.layers[j]->run(inten_, outten_, + options); } inten_ = outten_; } diff --git a/include/graph/runtime_options.hpp b/include/graph/runtime_options.hpp new file mode 100644 index 000000000..176d45f91 --- /dev/null +++ b/include/graph/runtime_options.hpp @@ -0,0 +1,9 @@ +#pragma once + +enum class Backend { Naive, OneDnn }; + +struct RuntimeOptions { + Backend backend{Backend::Naive}; + int threads{0}; + bool parallel{false}; +}; diff --git a/include/layers/Layer.hpp b/include/layers/Layer.hpp index 2da4e0a51..28ee349fe 100644 --- a/include/layers/Layer.hpp +++ b/include/layers/Layer.hpp @@ -8,6 +8,7 @@ #include "layers/Shape.hpp" #include "layers/Tensor.hpp" #include "oneapi/tbb.h" +#include "graph/runtime_options.hpp" namespace it_lab_ai { @@ -52,6 +53,10 @@ class Layer { LayerType getName() const { return type_; } virtual void run(const std::vector& input, std::vector& output) = 0; + virtual void run(std::vector& input, std::vector& output, + const RuntimeOptions& options) { + run(input, output); + } #ifdef ENABLE_STATISTIC_WEIGHTS virtual Tensor get_weights() = 0; #endif diff --git a/src/layers/CMakeLists.txt b/src/layers/CMakeLists.txt index f8ac6d84b..8770ee911 100644 --- a/src/layers/CMakeLists.txt +++ b/src/layers/CMakeLists.txt @@ -1,4 +1,9 @@ file(GLOB_RECURSE layers_src *.cpp) add_library(layers_lib STATIC "${LAYERS_HEADERS}" "${layers_src}") + +target_include_directories(layers_lib PUBLIC + ${CMAKE_CURRENT_SOURCE_DIR}/../include +) + target_link_libraries(layers_lib PUBLIC TBB_unified) target_link_libraries(layers_lib PUBLIC dnnl) From 2de413417fe23c554b1e2922121e94a347541744 Mon Sep 17 00:00:00 2001 From: Semyon1104 Date: Wed, 26 Nov 2025 17:33:25 +0300 Subject: [PATCH 02/24] change structure --- app/Accuracy/accuracy_check.cpp | 6 ++- app/Graph/acc_check.cpp | 25 ++++++----- app/Graph/build.cpp | 25 ++++++----- app/Graph/build.hpp | 8 ++-- app/Graph/graph_build.cpp | 13 ++---- include/layers/Layer.hpp | 2 +- test/inference/test_inference.cpp | 70 ++++++++++++++++++++++++++----- 7 files changed, 104 insertions(+), 45 deletions(-) diff --git a/app/Accuracy/accuracy_check.cpp b/app/Accuracy/accuracy_check.cpp index f6eb2519e..4fbb1735c 100644 --- a/app/Accuracy/accuracy_check.cpp +++ b/app/Accuracy/accuracy_check.cpp @@ -10,6 +10,10 @@ using namespace it_lab_ai; int main() { + RuntimeOptions options; + options.backend = Backend::Naive; + options.threads = 4; + options.parallel = true; std::string image_path = IMAGE1_PATH; cv::Mat image = cv::imread(image_path); if (image.empty()) { @@ -66,7 +70,7 @@ int main() { graph.makeConnection(a4_ptr, a5_ptr); graph.makeConnection(a5_ptr, a6_ptr); graph.setOutput(a5_ptr, output); - graph.inference(); + graph.inference(options); std::vector tmp = *output.as(); std::vector tmp_output = softmax(*output.as()); for (float i : tmp) { diff --git a/app/Graph/acc_check.cpp b/app/Graph/acc_check.cpp index b3e6cbece..e8b0289be 100644 --- a/app/Graph/acc_check.cpp +++ b/app/Graph/acc_check.cpp @@ -12,15 +12,18 @@ using namespace it_lab_ai; int main(int argc, char* argv[]) { std::string model_name = "alexnet_mnist"; - bool onednn = false; + RuntimeOptions options; + for (int i = 1; i < argc; ++i) { - if (std::string(argv[i]) == "--model" && i + 1 < argc) { - model_name = argv[++i]; - } else if (std::string(argv[i]) == "--onednn") { - onednn = true; + if (std::string(argv[i]) == "--onednn") { + options.backend = Backend::OneDnn; + } else if (std::string(argv[i]) == "--parallel") { + options.parallel = true; + } else if (std::string(argv[i]) == "--threads" && i + 1 < argc) { + options.threads = std::stoi(argv[++i]); } } - it_lab_ai::LayerFactory::configure(onednn); + std::string dataset_path; if (model_name == "alexnet_mnist") { dataset_path = MNIST_PATH; @@ -68,7 +71,7 @@ int main(int argc, char* argv[]) { for (int j = 0; j < 28; ++j) { size_t a = ind; for (size_t n = 0; n < name; n++) a += counts[n] + 1; - res[(a) * 28 * 28 + i * 28 + j] = channels[0].at(j, i); + res[(a)*28 * 28 + i * 28 + j] = channels[0].at(j, i); } } } @@ -77,8 +80,8 @@ int main(int argc, char* argv[]) { Tensor t = make_tensor(res, sh); input = t; Graph graph; - build_graph_linear(graph, input, output, false); - graph.inference(); + build_graph_linear(graph, input, output, options, false); + graph.inference(options); print_time_stats(graph); std::vector> tmp_output = softmax(*output.as(), 10); @@ -187,8 +190,8 @@ int main(int argc, char* argv[]) { it_lab_ai::Tensor(output_shape, it_lab_ai::Type::kFloat); Graph graph; - build_graph(graph, input, output, json_path, false); - graph.inference(); + build_graph(graph, input, output, json_path, options, false); + graph.inference(options); print_time_stats(graph); std::vector> processed_outputs; const std::vector& raw_output = *output.as(); diff --git a/app/Graph/build.cpp b/app/Graph/build.cpp index 1e3d5e34c..934b52778 100644 --- a/app/Graph/build.cpp +++ b/app/Graph/build.cpp @@ -12,7 +12,8 @@ std::unordered_map model_paths = { {"yolo", MODEL_PATH_YOLO11NET_ONNX}}; void build_graph_linear(it_lab_ai::Graph& graph, it_lab_ai::Tensor& input, - it_lab_ai::Tensor& output, bool comments) { + it_lab_ai::Tensor& output, RuntimeOptions options, + bool comments) { if (comments) { for (size_t i = 0; i < input.get_shape().dims(); i++) { std::cout << input.get_shape()[i] << ' '; @@ -87,7 +88,7 @@ void build_graph_linear(it_lab_ai::Graph& graph, it_lab_ai::Tensor& input, if (comments) std::cout << "ConvLayer added to layers." << std::endl; } if (layer_type.find("relu") != std::string::npos) { - auto ew_layer = LayerFactory::createEwLayer("relu"); + auto ew_layer = LayerFactory::createEwLayer("relu", options); layer_ptrs.push_back(ew_layer.get()); layers.push_back(std::move(ew_layer)); layerpostop.push_back(true); @@ -222,8 +223,8 @@ std::string layerTypeToString(it_lab_ai::LayerType type) { void build_graph(it_lab_ai::Graph& graph, it_lab_ai::Tensor& input, it_lab_ai::Tensor& output, const std::string& json_path, - bool comments) { - auto parse_result = parse_json_model(json_path, comments); + RuntimeOptions options, bool comments) { + auto parse_result = parse_json_model(options, json_path, comments); auto& layers = parse_result.layers; auto& name_to_layer_ptr = parse_result.name_to_layer_ptr; @@ -327,7 +328,8 @@ void build_graph(it_lab_ai::Graph& graph, it_lab_ai::Tensor& input, } } -ParseResult parse_json_model(const std::string& json_path, bool comments) { +ParseResult parse_json_model(RuntimeOptions options, + const std::string& json_path, bool comments) { ParseResult result; auto& layers = result.layers; @@ -442,9 +444,9 @@ ParseResult parse_json_model(const std::string& json_path, bool comments) { layer = std::move(conv_layer); } else if (layer_type.find("Relu") != std::string::npos || layer_type.find("relu") != std::string::npos) { - layer = LayerFactory::createEwLayer("relu"); + layer = LayerFactory::createEwLayer("relu", options); } else if (layer_type.find("Sigmoid") != std::string::npos) { - layer = LayerFactory::createEwLayer("sigmoid"); + layer = LayerFactory::createEwLayer("sigmoid", options); } else if (layer_type.find("Dense") != std::string::npos || layer_type.find("FullyConnected") != std::string::npos) { it_lab_ai::Tensor tensor = it_lab_ai::create_tensor_from_json( @@ -663,13 +665,16 @@ ParseResult parse_json_model(const std::string& json_path, bool comments) { if (layer_type == "Mul") { ew_operation = "linear"; - layer = LayerFactory::createEwLayer(ew_operation, value, 0.0F); + layer = + LayerFactory::createEwLayer(ew_operation, options, value, 0.0F); } else if (layer_type == "Add") { ew_operation = "linear"; - layer = LayerFactory::createEwLayer(ew_operation, 1.0F, value); + layer = + LayerFactory::createEwLayer(ew_operation, options, 1.0F, value); } else if (layer_type == "Sub") { ew_operation = "linear"; - layer = LayerFactory::createEwLayer(ew_operation, 1.0F, -value); + layer = LayerFactory::createEwLayer(ew_operation, options, 1.0F, + -value); } else { continue; } diff --git a/app/Graph/build.hpp b/app/Graph/build.hpp index d89bcb034..d25811e0d 100644 --- a/app/Graph/build.hpp +++ b/app/Graph/build.hpp @@ -53,13 +53,15 @@ struct ParseResult { void build_graph(it_lab_ai::Graph& graph, it_lab_ai::Tensor& input, it_lab_ai::Tensor& output, const std::string& json_path, - bool comments); + RuntimeOptions options, bool comments); void build_graph_linear(it_lab_ai::Graph& graph, it_lab_ai::Tensor& input, - it_lab_ai::Tensor& output, bool comments); + it_lab_ai::Tensor& output, RuntimeOptions options, + bool comments); std::unordered_map load_class_names( const std::string& filename); -ParseResult parse_json_model(const std::string& json_path, bool comments); +ParseResult parse_json_model(RuntimeOptions options, + const std::string& json_path, bool comments); std::vector get_input_shape_from_json(const std::string& json_path); std::vector process_model_output(const std::vector& output, diff --git a/app/Graph/graph_build.cpp b/app/Graph/graph_build.cpp index 555e68830..e90dfb24c 100644 --- a/app/Graph/graph_build.cpp +++ b/app/Graph/graph_build.cpp @@ -14,18 +14,13 @@ int main(int argc, char* argv[]) { for (int i = 1; i < argc; ++i) { if (std::string(argv[i]) == "--onednn") { options.backend = Backend::OneDnn; - } else if (std::string(argv[i]) == "--threads" && i + 1 < argc) { - options.threads = std::stoi(argv[++i]); } else if (std::string(argv[i]) == "--parallel") { options.parallel = true; + } else if (std::string(argv[i]) == "--threads" && i + 1 < argc) { + options.threads = std::stoi(argv[++i]); } } - RuntimeOptions options; - options.backend = Backend::OneDnn; - options.threads = 4; - options.parallel = true; - std::string json_path = model_paths[model_name]; std::vector input_shape; @@ -68,7 +63,7 @@ int main(int argc, char* argv[]) { std::vector vec(75, 3); it_lab_ai::Tensor output = it_lab_ai::make_tensor(vec, sh1); Graph graph; - build_graph_linear(graph, input, output, true); + build_graph_linear(graph, input, output, options, true); std::cout << "Starting inference..." << std::endl; try { @@ -108,7 +103,7 @@ int main(int argc, char* argv[]) { it_lab_ai::Tensor output({1, output_classes}, it_lab_ai::Type::kFloat); Graph graph; - build_graph(graph, input, output, json_path, false); + build_graph(graph, input, output, json_path, options, false); std::cout << "Starting inference..." << std::endl; try { diff --git a/include/layers/Layer.hpp b/include/layers/Layer.hpp index 28ee349fe..f3effae9c 100644 --- a/include/layers/Layer.hpp +++ b/include/layers/Layer.hpp @@ -54,7 +54,7 @@ class Layer { virtual void run(const std::vector& input, std::vector& output) = 0; virtual void run(std::vector& input, std::vector& output, - const RuntimeOptions& options) { + [[maybe_unused]] const RuntimeOptions& options) { run(input, output); } #ifdef ENABLE_STATISTIC_WEIGHTS diff --git a/test/inference/test_inference.cpp b/test/inference/test_inference.cpp index 6ed2aa0d5..799cd48ba 100644 --- a/test/inference/test_inference.cpp +++ b/test/inference/test_inference.cpp @@ -18,6 +18,11 @@ using namespace it_lab_ai; TEST(bfs, check_struct_graph) { + RuntimeOptions options; + options.backend = Backend::Naive; + options.parallel = false; + options.threads = 4; + Graph graph; Shape sh1({1, 5, 5, 3}); std::vector vec; @@ -95,13 +100,18 @@ TEST(bfs, check_struct_graph) { graph.makeConnection(a10_ptr, a12_ptr); graph.setOutput(a12_ptr, output); - graph.inference(); + graph.inference(options); std::vector tmp = *output.as(); std::vector res(36, 81); ASSERT_EQ(tmp, res); } TEST(bfs, check_struct_graph_not_used_yolo) { + RuntimeOptions options; + options.backend = Backend::Naive; + options.parallel = false; + options.threads = 4; + Graph graph; Shape sh1({1, 4, 2, 2}); std::vector vec; @@ -156,13 +166,18 @@ TEST(bfs, check_struct_graph_not_used_yolo) { graph.makeConnection(a3_3_4_ptr, a3_2_ptr); graph.setOutput(a4_ptr, output); - graph.inference(); + graph.inference(options); std::vector tmp = *output.as(); std::vector res(16, 3); ASSERT_EQ(tmp, res); } TEST(bfs, check_struct_graph_resnet1) { + RuntimeOptions options; + options.backend = Backend::Naive; + options.parallel = false; + options.threads = 4; + Graph graph; Shape sh1({1, 2, 2, 2}); std::vector vec; @@ -209,13 +224,18 @@ TEST(bfs, check_struct_graph_resnet1) { graph.makeConnection(a3_ptr, a4_ptr); graph.setOutput(a4_ptr, output); - graph.inference(); + graph.inference(options); std::vector tmp = *output.as(); std::vector res(4, 12); ASSERT_EQ(tmp, res); } TEST(bfs, check_struct_graph_resnet2) { + RuntimeOptions options; + options.backend = Backend::Naive; + options.parallel = false; + options.threads = 4; + Graph graph; Shape sh1({1, 2, 2, 2}); std::vector vec; @@ -259,13 +279,18 @@ TEST(bfs, check_struct_graph_resnet2) { graph.makeConnection(a3_ptr, a4_ptr); graph.setOutput(a4_ptr, output); - graph.inference(); + graph.inference(options); std::vector tmp = *output.as(); std::vector res(4, 12); ASSERT_EQ(tmp, res); } TEST(bfs, check_struct_graph_google1) { + RuntimeOptions options; + options.backend = Backend::Naive; + options.parallel = false; + options.threads = 4; + Graph graph; Shape sh1({1, 2, 2, 2}); std::vector vec; @@ -307,7 +332,7 @@ TEST(bfs, check_struct_graph_google1) { graph.makeConnection(a2_1_ptr, a3_ptr); graph.setOutput(a3_ptr, output); - graph.inference(); + graph.inference(options); std::vector tmp = *output.as(); std::vector res(32, 3); for (int c = 8; c < 24; c++) { @@ -317,6 +342,11 @@ TEST(bfs, check_struct_graph_google1) { } TEST(bfs, check_result_vec) { + RuntimeOptions options; + options.backend = Backend::Naive; + options.parallel = false; + options.threads = 4; + Graph graph; Shape sh1({1, 5, 5, 3}); std::vector vec; @@ -344,7 +374,7 @@ TEST(bfs, check_result_vec) { graph.makeConnection(a2_ptr, a4_ptr); graph.setOutput(a4_ptr, output); - graph.inference(); + graph.inference(options); std::vector tmp = *output.as(); std::vector res = {81, 81, 81}; #ifdef ENABLE_STATISTIC_TENSORS @@ -396,6 +426,11 @@ TEST(bfs, check_result_vec) { } TEST(bfs, check_end_to_end) { + RuntimeOptions options; + options.backend = Backend::Naive; + options.parallel = false; + options.threads = 4; + Graph graph; Shape sh1({1, 5, 5, 3}); std::vector vec; @@ -434,7 +469,7 @@ TEST(bfs, check_end_to_end) { graph.makeConnection(a4_ptr, a5_ptr); graph.setOutput(a5_ptr, output); - graph.inference(); + graph.inference(options); std::vector tmp = *output.as(); ASSERT_GT(tmp.size(), 0); @@ -444,6 +479,11 @@ TEST(bfs, check_end_to_end) { } TEST(bfs, check_struct_layer) { + RuntimeOptions options; + options.backend = Backend::Naive; + options.parallel = false; + options.threads = 4; + Graph graph; Shape sh1({1, 5, 5, 3}); std::vector vec; @@ -470,13 +510,18 @@ TEST(bfs, check_struct_layer) { graph.makeConnection(a2_ptr, a3_ptr); graph.setOutput(a3_ptr, output); - graph.inference(); + graph.inference(options); std::vector tmp = *output.as(); std::vector res = {81, 81, 81}; ASSERT_EQ(tmp, res); } TEST(bfs, check_struct_layer_added) { + RuntimeOptions options; + options.backend = Backend::Naive; + options.parallel = false; + options.threads = 4; + Graph graph; Shape sh1({1, 5, 5, 3}); std::vector vec; @@ -508,13 +553,18 @@ TEST(bfs, check_struct_layer_added) { graph.makeConnection(a2_ptr, a3_ptr); graph.setOutput(a3_ptr, output); - graph.inference(); + graph.inference(options); std::vector tmp = *output.as(); std::vector res = {189, 189, 189}; ASSERT_EQ(tmp, res); } FLAKY_TEST(bfs, check_struct_graph_split) { + RuntimeOptions options; + options.backend = Backend::Naive; + options.parallel = false; + options.threads = 4; + std::vector>> split = { {{12, 0}, {13, 0}, {14, 0}}}; Graph graph(151, split); @@ -594,7 +644,7 @@ FLAKY_TEST(bfs, check_struct_graph_split) { graph.makeConnection(a10_ptr, a12_ptr); graph.setOutput(a12_ptr, output); - graph.inference(); + graph.inference(options); std::vector tmp = *output.as(); std::vector res(36, 81); ASSERT_EQ(tmp, res); From c4aba54c10bc32a26e2594f312180438960667c0 Mon Sep 17 00:00:00 2001 From: Semyon1104 Date: Wed, 26 Nov 2025 17:37:19 +0300 Subject: [PATCH 03/24] clang --- app/Graph/acc_check.cpp | 2 +- include/layers/Layer.hpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/app/Graph/acc_check.cpp b/app/Graph/acc_check.cpp index e8b0289be..d8ae284c5 100644 --- a/app/Graph/acc_check.cpp +++ b/app/Graph/acc_check.cpp @@ -71,7 +71,7 @@ int main(int argc, char* argv[]) { for (int j = 0; j < 28; ++j) { size_t a = ind; for (size_t n = 0; n < name; n++) a += counts[n] + 1; - res[(a)*28 * 28 + i * 28 + j] = channels[0].at(j, i); + res[(a) * 28 * 28 + i * 28 + j] = channels[0].at(j, i); } } } diff --git a/include/layers/Layer.hpp b/include/layers/Layer.hpp index f3effae9c..8359f291a 100644 --- a/include/layers/Layer.hpp +++ b/include/layers/Layer.hpp @@ -5,10 +5,10 @@ #include #include +#include "graph/runtime_options.hpp" #include "layers/Shape.hpp" #include "layers/Tensor.hpp" #include "oneapi/tbb.h" -#include "graph/runtime_options.hpp" namespace it_lab_ai { From 6acc10c0151393b31508bc3a095cb914c28ccfff Mon Sep 17 00:00:00 2001 From: Semyon1104 Date: Wed, 26 Nov 2025 19:05:00 +0300 Subject: [PATCH 04/24] fix --- app/Accuracy/accuracy_check.cpp | 2 +- app/Graph/acc_check.cpp | 6 ++++-- app/Graph/build.hpp | 2 +- app/Graph/graph_build.cpp | 6 ++++-- include/graph/runtime_options.hpp | 4 ++-- test/inference/test_inference.cpp | 20 ++++++++++---------- 6 files changed, 22 insertions(+), 18 deletions(-) diff --git a/app/Accuracy/accuracy_check.cpp b/app/Accuracy/accuracy_check.cpp index 4fbb1735c..4a3c3333a 100644 --- a/app/Accuracy/accuracy_check.cpp +++ b/app/Accuracy/accuracy_check.cpp @@ -11,7 +11,7 @@ using namespace it_lab_ai; int main() { RuntimeOptions options; - options.backend = Backend::Naive; + options.backend = Backend::kNaive; options.threads = 4; options.parallel = true; std::string image_path = IMAGE1_PATH; diff --git a/app/Graph/acc_check.cpp b/app/Graph/acc_check.cpp index d8ae284c5..e51829b9a 100644 --- a/app/Graph/acc_check.cpp +++ b/app/Graph/acc_check.cpp @@ -15,8 +15,10 @@ int main(int argc, char* argv[]) { RuntimeOptions options; for (int i = 1; i < argc; ++i) { - if (std::string(argv[i]) == "--onednn") { - options.backend = Backend::OneDnn; + if (std::string(argv[i]) == "--model" && i + 1 < argc) { + model_name = argv[++i]; + } else if (std::string(argv[i]) == "--onednn") { + options.backend = Backend::kOneDnn; } else if (std::string(argv[i]) == "--parallel") { options.parallel = true; } else if (std::string(argv[i]) == "--threads" && i + 1 < argc) { diff --git a/app/Graph/build.hpp b/app/Graph/build.hpp index d25811e0d..20632ecd5 100644 --- a/app/Graph/build.hpp +++ b/app/Graph/build.hpp @@ -80,7 +80,7 @@ class LayerFactory { const RuntimeOptions& options, float alpha = 1.0F, float beta = 0.0F) { - if (options.backend == Backend::OneDnn && + if (options.backend == Backend::kOneDnn && EwLayerOneDnn::is_function_supported(function)) { return std::make_unique(function, alpha, beta); } diff --git a/app/Graph/graph_build.cpp b/app/Graph/graph_build.cpp index e90dfb24c..e5ed606b1 100644 --- a/app/Graph/graph_build.cpp +++ b/app/Graph/graph_build.cpp @@ -12,8 +12,10 @@ int main(int argc, char* argv[]) { RuntimeOptions options; for (int i = 1; i < argc; ++i) { - if (std::string(argv[i]) == "--onednn") { - options.backend = Backend::OneDnn; + if (std::string(argv[i]) == "--model" && i + 1 < argc) { + model_name = argv[++i]; + } else if (std::string(argv[i]) == "--onednn") { + options.backend = Backend::kOneDnn; } else if (std::string(argv[i]) == "--parallel") { options.parallel = true; } else if (std::string(argv[i]) == "--threads" && i + 1 < argc) { diff --git a/include/graph/runtime_options.hpp b/include/graph/runtime_options.hpp index 176d45f91..da97452da 100644 --- a/include/graph/runtime_options.hpp +++ b/include/graph/runtime_options.hpp @@ -1,9 +1,9 @@ #pragma once -enum class Backend { Naive, OneDnn }; +enum class Backend { kNaive, kOneDnn }; struct RuntimeOptions { - Backend backend{Backend::Naive}; + Backend backend{Backend::kNaive}; int threads{0}; bool parallel{false}; }; diff --git a/test/inference/test_inference.cpp b/test/inference/test_inference.cpp index 799cd48ba..f6b201a25 100644 --- a/test/inference/test_inference.cpp +++ b/test/inference/test_inference.cpp @@ -19,7 +19,7 @@ using namespace it_lab_ai; TEST(bfs, check_struct_graph) { RuntimeOptions options; - options.backend = Backend::Naive; + options.backend = Backend::kNaive; options.parallel = false; options.threads = 4; @@ -108,7 +108,7 @@ TEST(bfs, check_struct_graph) { TEST(bfs, check_struct_graph_not_used_yolo) { RuntimeOptions options; - options.backend = Backend::Naive; + options.backend = Backend::kNaive; options.parallel = false; options.threads = 4; @@ -174,7 +174,7 @@ TEST(bfs, check_struct_graph_not_used_yolo) { TEST(bfs, check_struct_graph_resnet1) { RuntimeOptions options; - options.backend = Backend::Naive; + options.backend = Backend::kNaive; options.parallel = false; options.threads = 4; @@ -232,7 +232,7 @@ TEST(bfs, check_struct_graph_resnet1) { TEST(bfs, check_struct_graph_resnet2) { RuntimeOptions options; - options.backend = Backend::Naive; + options.backend = Backend::kNaive; options.parallel = false; options.threads = 4; @@ -287,7 +287,7 @@ TEST(bfs, check_struct_graph_resnet2) { TEST(bfs, check_struct_graph_google1) { RuntimeOptions options; - options.backend = Backend::Naive; + options.backend = Backend::kNaive; options.parallel = false; options.threads = 4; @@ -343,7 +343,7 @@ TEST(bfs, check_struct_graph_google1) { TEST(bfs, check_result_vec) { RuntimeOptions options; - options.backend = Backend::Naive; + options.backend = Backend::kNaive; options.parallel = false; options.threads = 4; @@ -427,7 +427,7 @@ TEST(bfs, check_result_vec) { TEST(bfs, check_end_to_end) { RuntimeOptions options; - options.backend = Backend::Naive; + options.backend = Backend::kNaive; options.parallel = false; options.threads = 4; @@ -480,7 +480,7 @@ TEST(bfs, check_end_to_end) { TEST(bfs, check_struct_layer) { RuntimeOptions options; - options.backend = Backend::Naive; + options.backend = Backend::kNaive; options.parallel = false; options.threads = 4; @@ -518,7 +518,7 @@ TEST(bfs, check_struct_layer) { TEST(bfs, check_struct_layer_added) { RuntimeOptions options; - options.backend = Backend::Naive; + options.backend = Backend::kNaive; options.parallel = false; options.threads = 4; @@ -561,7 +561,7 @@ TEST(bfs, check_struct_layer_added) { FLAKY_TEST(bfs, check_struct_graph_split) { RuntimeOptions options; - options.backend = Backend::Naive; + options.backend = Backend::kNaive; options.parallel = false; options.threads = 4; From 8e654f8d7c69107014093622b96a91e06c1fddd1 Mon Sep 17 00:00:00 2001 From: Semyon1104 Date: Thu, 27 Nov 2025 18:03:18 +0300 Subject: [PATCH 05/24] uint --- include/graph/runtime_options.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/graph/runtime_options.hpp b/include/graph/runtime_options.hpp index da97452da..5cfcf2883 100644 --- a/include/graph/runtime_options.hpp +++ b/include/graph/runtime_options.hpp @@ -1,6 +1,6 @@ #pragma once -enum class Backend { kNaive, kOneDnn }; +enum class Backend : uint8_t { kNaive, kOneDnn }; struct RuntimeOptions { Backend backend{Backend::kNaive}; From d52b1b147d87203cf9af17081f0fb8d2def7baf9 Mon Sep 17 00:00:00 2001 From: Semyon1104 Date: Thu, 27 Nov 2025 18:09:50 +0300 Subject: [PATCH 06/24] uint --- include/graph/runtime_options.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/include/graph/runtime_options.hpp b/include/graph/runtime_options.hpp index 5cfcf2883..933457438 100644 --- a/include/graph/runtime_options.hpp +++ b/include/graph/runtime_options.hpp @@ -1,4 +1,5 @@ #pragma once +#include enum class Backend : uint8_t { kNaive, kOneDnn }; From 65d2cfd2291bc6caecc1f2e75f6b2280d4029c06 Mon Sep 17 00:00:00 2001 From: Semyon1104 Date: Thu, 27 Nov 2025 18:11:27 +0300 Subject: [PATCH 07/24] clang --- include/graph/runtime_options.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/graph/runtime_options.hpp b/include/graph/runtime_options.hpp index 933457438..3d2786447 100644 --- a/include/graph/runtime_options.hpp +++ b/include/graph/runtime_options.hpp @@ -1,5 +1,5 @@ #pragma once -#include +#include enum class Backend : uint8_t { kNaive, kOneDnn }; From 8b613b8a372d900bdb1aa33a76008e23e32dc679 Mon Sep 17 00:00:00 2001 From: Semyon1104 Date: Sat, 29 Nov 2025 15:04:39 +0300 Subject: [PATCH 08/24] add parallel kinds --- app/Graph/graph_build.cpp | 33 +++++++++++++++++++++++++- include/graph/runtime_options.hpp | 9 +++++++ include/layers/ConvLayer.hpp | 3 ++- src/layers/ConvLayer.cpp | 39 +++++++++++++++++++++++++++++++ 4 files changed, 82 insertions(+), 2 deletions(-) diff --git a/app/Graph/graph_build.cpp b/app/Graph/graph_build.cpp index e5ed606b1..825110153 100644 --- a/app/Graph/graph_build.cpp +++ b/app/Graph/graph_build.cpp @@ -16,8 +16,39 @@ int main(int argc, char* argv[]) { model_name = argv[++i]; } else if (std::string(argv[i]) == "--onednn") { options.backend = Backend::kOneDnn; - } else if (std::string(argv[i]) == "--parallel") { + if (options.parallel) { + std::cout << "Warning: oneDNN backend is not compatible with parallel " + "execution. Disabling parallelism." + << std::endl; + options.parallel = false; + options.parallel_backend = ParallelBackend::kNone; + } + } else if (std::string(argv[i]) == "--parallel" && i + 1 < argc) { + if (options.backend == Backend::kOneDnn) { + std::cout << "Warning: Parallel execution is not compatible with " + "oneDNN backend. Ignoring --parallel option." + << std::endl; + i++; + continue; + } + options.parallel = true; + std::string backend_str = argv[++i]; + if (backend_str == "tbb") { + options.parallel_backend = ParallelBackend::kTBB; + } else if (backend_str == "stl") { + options.parallel_backend = ParallelBackend::kSTL; + } else if (backend_str == "omp") { + options.parallel_backend = ParallelBackend::kOMP; + } else if (backend_str == "kokkos") { + options.parallel_backend = ParallelBackend::kKokkos; + } else if (backend_str == "sycl") { + options.parallel_backend = ParallelBackend::kSycl; + } else { + std::cerr << "Unknown parallel backend: " << backend_str + << ". Using default (TBB)." << std::endl; + options.parallel_backend = ParallelBackend::kTBB; + } } else if (std::string(argv[i]) == "--threads" && i + 1 < argc) { options.threads = std::stoi(argv[++i]); } diff --git a/include/graph/runtime_options.hpp b/include/graph/runtime_options.hpp index 3d2786447..d6b0370aa 100644 --- a/include/graph/runtime_options.hpp +++ b/include/graph/runtime_options.hpp @@ -2,9 +2,18 @@ #include enum class Backend : uint8_t { kNaive, kOneDnn }; +enum class ParallelBackend : uint8_t { + kNone, + kTBB, + kSTL, + kOMP, + kKokkos, + kSycl +}; struct RuntimeOptions { Backend backend{Backend::kNaive}; + ParallelBackend parallel_backend{ParallelBackend::kNone}; int threads{0}; bool parallel{false}; }; diff --git a/include/layers/ConvLayer.hpp b/include/layers/ConvLayer.hpp index 8ce581317..256c28dab 100644 --- a/include/layers/ConvLayer.hpp +++ b/include/layers/ConvLayer.hpp @@ -50,9 +50,10 @@ class ConvolutionalLayer : public Layer { implType_ = implType; useLegacyImpl_ = useLegacyImpl; } - void run(const std::vector& input, std::vector& output) override; + void run(const std::vector& input, std::vector& output, + const RuntimeOptions& options); #ifdef ENABLE_STATISTIC_WEIGHTS Tensor get_weights() override { return kernel_; } #endif diff --git a/src/layers/ConvLayer.cpp b/src/layers/ConvLayer.cpp index 28c45e555..38b09d62f 100644 --- a/src/layers/ConvLayer.cpp +++ b/src/layers/ConvLayer.cpp @@ -4,6 +4,13 @@ namespace it_lab_ai { void ConvolutionalLayer::run(const std::vector& input, std::vector& output) { + RuntimeOptions default_options; + run(input, output, default_options); +} + +void ConvolutionalLayer::run(const std::vector& input, + std::vector& output, + const RuntimeOptions& options) { if (input.size() != 1) { throw std::runtime_error("ConvolutionalLayer: Input tensors not 1"); } @@ -28,6 +35,29 @@ void ConvolutionalLayer::run(const std::vector& input, return; } } + if (options.parallel) { + switch (options.parallel_backend) { + case ParallelBackend::kTBB: + implType_ = kTBB; + break; + case ParallelBackend::kSTL: + implType_ = kSTL; + break; + // case ParallelBackend::kOMP: + // implType = kOMP; + // break; + // case ParallelBackend::kKokkos: + // implType = kKokkos; + // break; + // case ParallelBackend::kSycl: + // implType = kSycl; + // break; + case ParallelBackend::kNone: + default: + implType_ = kDefault; + break; + } + } switch (input[0].get_type()) { case Type::kInt: { if (kernel_.get_shape().dims() == 2) { @@ -84,6 +114,11 @@ void ConvolutionalLayer::run(const std::vector& input, group_, dilations_); break; } + /*case kTBB: { + Conv4DTBB(input[0], kernel_, bias_, output[0], stride_, pads_, + group_, dilations_); + break; + }*/ default: { Conv4D(input[0], kernel_, bias_, output[0], stride_, pads_, group_, dilations_); @@ -152,6 +187,10 @@ void ConvolutionalLayer::run(const std::vector& input, pads_, group_, dilations_); break; } + /*case kTBB: { + Conv4DTBB(input[0], kernel_, bias_, output[0], stride_, + pads_, group_, dilations_); break; + }*/ default: { Conv4D(input[0], kernel_, bias_, output[0], stride_, pads_, group_, dilations_); From 92160c0eba23cb1beb6b59b09598ab5f44497f86 Mon Sep 17 00:00:00 2001 From: Semyon1104 Date: Sat, 29 Nov 2025 15:22:48 +0300 Subject: [PATCH 09/24] fix --- app/Graph/graph_build.cpp | 4 ++-- include/layers/Layer.hpp | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/app/Graph/graph_build.cpp b/app/Graph/graph_build.cpp index 825110153..af2090390 100644 --- a/app/Graph/graph_build.cpp +++ b/app/Graph/graph_build.cpp @@ -46,8 +46,8 @@ int main(int argc, char* argv[]) { options.parallel_backend = ParallelBackend::kSycl; } else { std::cerr << "Unknown parallel backend: " << backend_str - << ". Using default (TBB)." << std::endl; - options.parallel_backend = ParallelBackend::kTBB; + << ". Using default (STL)." << std::endl; + options.parallel_backend = ParallelBackend::kSTL; } } else if (std::string(argv[i]) == "--threads" && i + 1 < argc) { options.threads = std::stoi(argv[++i]); diff --git a/include/layers/Layer.hpp b/include/layers/Layer.hpp index 8359f291a..de1a32f1c 100644 --- a/include/layers/Layer.hpp +++ b/include/layers/Layer.hpp @@ -53,7 +53,7 @@ class Layer { LayerType getName() const { return type_; } virtual void run(const std::vector& input, std::vector& output) = 0; - virtual void run(std::vector& input, std::vector& output, + virtual void run(const std::vector& input, std::vector& output, [[maybe_unused]] const RuntimeOptions& options) { run(input, output); } From 4951cdea2c7af84e14b0c8f73f87a45062800ac9 Mon Sep 17 00:00:00 2001 From: Semyon1104 Date: Sat, 29 Nov 2025 15:34:04 +0300 Subject: [PATCH 10/24] fix --- include/layers/ConvLayer.hpp | 2 +- include/layers/Layer.hpp | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/include/layers/ConvLayer.hpp b/include/layers/ConvLayer.hpp index 256c28dab..aa7ab3b46 100644 --- a/include/layers/ConvLayer.hpp +++ b/include/layers/ConvLayer.hpp @@ -53,7 +53,7 @@ class ConvolutionalLayer : public Layer { void run(const std::vector& input, std::vector& output) override; void run(const std::vector& input, std::vector& output, - const RuntimeOptions& options); + const RuntimeOptions& options) override; #ifdef ENABLE_STATISTIC_WEIGHTS Tensor get_weights() override { return kernel_; } #endif diff --git a/include/layers/Layer.hpp b/include/layers/Layer.hpp index de1a32f1c..3f173b067 100644 --- a/include/layers/Layer.hpp +++ b/include/layers/Layer.hpp @@ -53,7 +53,8 @@ class Layer { LayerType getName() const { return type_; } virtual void run(const std::vector& input, std::vector& output) = 0; - virtual void run(const std::vector& input, std::vector& output, + virtual void run(const std::vector& input, + std::vector& output, [[maybe_unused]] const RuntimeOptions& options) { run(input, output); } From ab918c81bc2a9930c7acb518160a07b8cf34ec0a Mon Sep 17 00:00:00 2001 From: Semyon1104 Date: Mon, 1 Dec 2025 20:21:47 +0300 Subject: [PATCH 11/24] pooling, tests, constructors --- app/Graph/build.cpp | 12 +++--- include/layers/ConvLayer.hpp | 4 +- include/layers/PoolingLayer.hpp | 14 +++---- src/layers/PoolingLayer.cpp | 49 ++++++++++++++++++++++++- test/benchmarking/test_layers_time.cpp | 46 ++++++++++++++++------- test/single_layer/test_convlayer.cpp | 23 +++++++----- test/single_layer/test_poolinglayer.cpp | 17 ++++++--- 7 files changed, 118 insertions(+), 47 deletions(-) diff --git a/app/Graph/build.cpp b/app/Graph/build.cpp index 934b52778..76315ae13 100644 --- a/app/Graph/build.cpp +++ b/app/Graph/build.cpp @@ -81,7 +81,7 @@ void build_graph_linear(it_lab_ai::Graph& graph, it_lab_ai::Tensor& input, it_lab_ai::Tensor tmp_values = tensor; it_lab_ai::Tensor tmp_bias = it_lab_ai::make_tensor(tensor.get_bias()); auto conv_layer = std::make_unique( - 1, pads, 1, tmp_values, tmp_bias, kDefault, 1, true); + 1, pads, 1, tmp_values, tmp_bias, 1, true); layer_ptrs.push_back(conv_layer.get()); layers.push_back(std::move(conv_layer)); layerpostop.push_back(false); @@ -116,7 +116,7 @@ void build_graph_linear(it_lab_ai::Graph& graph, it_lab_ai::Tensor& input, std::cout << "PoolingLayer shape: " << shape[0] << "x" << shape[1] << std::endl; auto pool_layer = - std::make_unique(shape, pooltype, kDefault); + std::make_unique(shape, pooltype); layer_ptrs.push_back(pool_layer.get()); layers.push_back(std::move(pool_layer)); layerpostop.push_back(false); @@ -440,7 +440,7 @@ ParseResult parse_json_model(RuntimeOptions options, it_lab_ai::Tensor tmp_bias = it_lab_ai::make_tensor(tensor.get_bias()); auto conv_layer = std::make_unique( - stride, pads, dilations, tmp_tensor, tmp_bias, kDefault, group); + stride, pads, dilations, tmp_tensor, tmp_bias, group); layer = std::move(conv_layer); } else if (layer_type.find("Relu") != std::string::npos || layer_type.find("relu") != std::string::npos) { @@ -476,7 +476,7 @@ ParseResult parse_json_model(RuntimeOptions options, << std::endl; } else if (layer_type == "GlobalAveragePool") { auto pool_layer = std::make_unique( - it_lab_ai::Shape({0, 0}), "average", kDefault); + it_lab_ai::Shape({0, 0}), "average"); layer = std::move(pool_layer); if (comments) { std::cout << "GlobalAveragePool layer added (will use input spatial " @@ -537,8 +537,8 @@ ParseResult parse_json_model(RuntimeOptions options, } } - auto pool_layer = std::make_unique( - shape, pooltype, kDefault); + auto pool_layer = + std::make_unique(shape, pooltype); try { if (strides[0] != 2 || strides[1] != 2) { diff --git a/include/layers/ConvLayer.hpp b/include/layers/ConvLayer.hpp index aa7ab3b46..22f24ebdf 100644 --- a/include/layers/ConvLayer.hpp +++ b/include/layers/ConvLayer.hpp @@ -38,8 +38,7 @@ class ConvolutionalLayer : public Layer { } ConvolutionalLayer(size_t step, size_t pads, size_t dilations, const Tensor& kernel, const Tensor& bias = Tensor(), - ImplType implType = kDefault, size_t group = 1, - bool useLegacyImpl = false) + size_t group = 1, bool useLegacyImpl = false) : Layer(kConvolution) { stride_ = step; pads_ = pads; @@ -47,7 +46,6 @@ class ConvolutionalLayer : public Layer { dilations_ = dilations; kernel_ = kernel; bias_ = bias; - implType_ = implType; useLegacyImpl_ = useLegacyImpl; } void run(const std::vector& input, diff --git a/include/layers/PoolingLayer.hpp b/include/layers/PoolingLayer.hpp index 2998324a6..b46b62dea 100644 --- a/include/layers/PoolingLayer.hpp +++ b/include/layers/PoolingLayer.hpp @@ -21,26 +21,22 @@ class PoolingLayer : public Layer { PoolingLayer(const Shape& pooling_shape, const Shape& strides = {2, 2}, const Shape& pads = {0, 0, 0, 0}, const Shape& dilations = {1, 1}, bool ceil_mode = false, - std::string pooling_type = "average", - ImplType implType = kDefault) + std::string pooling_type = "average") : Layer(kPooling), poolingShape_(pooling_shape), strides_(strides), pads_(pads), dilations_(dilations), ceil_mode_(ceil_mode), - poolingType_(std::move(pooling_type)), - implType_(implType) {} - PoolingLayer(const Shape& pooling_shape, std::string pooling_type = "average", - ImplType implType = kDefault) + poolingType_(std::move(pooling_type)) {} + PoolingLayer(const Shape& pooling_shape, std::string pooling_type = "average") : Layer(kPooling), poolingShape_(pooling_shape), strides_({2, 2}), pads_({0, 0, 0, 0}), dilations_({1, 1}), ceil_mode_(false), - poolingType_(std::move(pooling_type)), - implType_(implType) {} + poolingType_(std::move(pooling_type)) {} void setStrides(size_t h, size_t w) { strides_ = {h, w}; } void setPads(size_t top, size_t bottom, size_t left, size_t right) { pads_ = {top, bottom, left, right}; @@ -49,6 +45,8 @@ class PoolingLayer : public Layer { void setCeilMode(bool ceil_mode) { ceil_mode_ = ceil_mode; } void run(const std::vector& input, std::vector& output) override; + void run(const std::vector& input, std::vector& output, + const RuntimeOptions& options) override; #ifdef ENABLE_STATISTIC_WEIGHTS Tensor get_weights() override { std::vector v = {0}; diff --git a/src/layers/PoolingLayer.cpp b/src/layers/PoolingLayer.cpp index 749fdadfd..2c1f7ce21 100644 --- a/src/layers/PoolingLayer.cpp +++ b/src/layers/PoolingLayer.cpp @@ -3,11 +3,40 @@ namespace it_lab_ai { void PoolingLayer::run(const std::vector& input, - std::vector& output) { + std::vector& output) { + RuntimeOptions default_options; + run(input, output, default_options); +} + +void PoolingLayer::run(const std::vector& input, + std::vector& output, + const RuntimeOptions& options) { if (input.size() != 1) { throw std::runtime_error("PoolingLayer: Input tensors not 1"); } - + if (options.parallel) { + switch (options.parallel_backend) { + case ParallelBackend::kTBB: + implType_ = kTBB; + break; + case ParallelBackend::kSTL: + implType_ = kSTL; + break; + // case ParallelBackend::kOMP: + // implType = kOMP; + // break; + // case ParallelBackend::kKokkos: + // implType = kKokkos; + // break; + // case ParallelBackend::kSycl: + // implType = kSycl; + // break; + case ParallelBackend::kNone: + default: + implType_ = kDefault; + break; + } + } switch (input[0].get_type()) { case Type::kInt: { switch (implType_) { @@ -19,6 +48,14 @@ void PoolingLayer::run(const std::vector& input, used_impl.get_output_shape()); break; } + /*case kSTL: { + PoolingLayerImplSTL used_impl( + input[0].get_shape(), poolingShape_, strides_, pads_, dilations_, + ceil_mode_, poolingType_); + output[0] = make_tensor(used_impl.run(*input[0].as()), + used_impl.get_output_shape()); + break; + }*/ default: { PoolingLayerImpl used_impl(input[0].get_shape(), poolingShape_, strides_, pads_, dilations_, @@ -40,6 +77,14 @@ void PoolingLayer::run(const std::vector& input, used_impl.get_output_shape()); break; } + /*case kSTL: { + PoolingLayerImplSTL used_impl( + input[0].get_shape(), poolingShape_, strides_, pads_, dilations_, + ceil_mode_, poolingType_); + output[0] = make_tensor(used_impl.run(*input[0].as()), + used_impl.get_output_shape()); + break; + }*/ default: { PoolingLayerImpl used_impl(input[0].get_shape(), poolingShape_, strides_, pads_, dilations_, diff --git a/test/benchmarking/test_layers_time.cpp b/test/benchmarking/test_layers_time.cpp index ad2d8dc3a..1b89b6c01 100644 --- a/test/benchmarking/test_layers_time.cpp +++ b/test/benchmarking/test_layers_time.cpp @@ -10,10 +10,11 @@ using namespace it_lab_ai; -void test_func(Layer& p, const Tensor& input, Tensor& output) { +void test_func(Layer& p, const Tensor& input, Tensor& output, + const RuntimeOptions& options) { std::vector in{input}; std::vector out{output}; - p.run(in, out); + p.run(in, out, options); } TEST(pooling_test, is_pooling_tbb_ok) { @@ -30,12 +31,20 @@ TEST(pooling_test, is_pooling_tbb_ok) { } Tensor input = make_tensor(a1, test_shape); Tensor output; - PoolingLayer p1(Shape({2, 2}), "max", kDefault); - PoolingLayer p2(Shape({2, 2}), "max", kTBB); - double count1 = - elapsed_time(test_func, p1, input, output); - double count2 = - elapsed_time(test_func, p2, input, output); + RuntimeOptions options_seq; + options_seq.parallel = false; + options_seq.parallel_backend = ParallelBackend::kNone; + + RuntimeOptions options_tbb; + options_tbb.parallel = true; + options_tbb.parallel_backend = ParallelBackend::kTBB; + + PoolingLayer p1(Shape({2, 2}), "max"); + PoolingLayer p2(Shape({2, 2}), "max"); + double count1 = elapsed_time(test_func, p1, input, output, + options_seq); + double count2 = elapsed_time(test_func, p2, input, output, + options_tbb); std::cout << count1 << " vs. " << count2 << " (parallel)\n"; } @@ -58,11 +67,20 @@ TEST(conv_test, is_conv_stl_ok) { Tensor input = make_tensor(a1, test_shape); Tensor kernel = make_tensor(a2, Shape({5, 5, 3, 16})); Tensor output; - ConvolutionalLayer p1(1, 1, 2, kernel, Tensor(), kDefault); - ConvolutionalLayer p2(1, 1, 2, kernel, Tensor(), kSTL); - double count1 = - elapsed_time(test_func, p1, input, output); - double count2 = - elapsed_time(test_func, p2, input, output); + + RuntimeOptions options_seq; + options_seq.parallel = false; + options_seq.parallel_backend = ParallelBackend::kNone; + + RuntimeOptions options_stl; + options_stl.parallel = true; + options_stl.parallel_backend = ParallelBackend::kTBB; + + ConvolutionalLayer p1(1, 1, 2, kernel, Tensor()); + ConvolutionalLayer p2(1, 1, 2, kernel, Tensor()); + double count1 = elapsed_time(test_func, p1, input, output, + options_seq); + double count2 = elapsed_time(test_func, p2, input, output, + options_stl); std::cout << count1 << " vs. " << count2 << " (parallel)\n"; } diff --git a/test/single_layer/test_convlayer.cpp b/test/single_layer/test_convlayer.cpp index 41b4400fd..8ab977a94 100644 --- a/test/single_layer/test_convlayer.cpp +++ b/test/single_layer/test_convlayer.cpp @@ -445,7 +445,7 @@ TEST(ConvolutionalLayerTest, DepthwiseIntegration) { std::vector output_vec(32, 0.0f); Tensor output = make_tensor(output_vec, output_shape); - ConvolutionalLayer layer(1, 1, 1, kernel, Tensor(), kDefault, 2); + ConvolutionalLayer layer(1, 1, 1, kernel, Tensor(), 2); std::vector in{input}; std::vector out{output}; @@ -569,7 +569,7 @@ TEST(ConvolutionalLayerTest, DepthwiseViaConvolutionalLayer) { std::vector output_vec(8, 0.0f); Tensor output = make_tensor(output_vec, output_shape); - ConvolutionalLayer layer(1, 0, 1, kernel, Tensor(), kDefault, 2); + ConvolutionalLayer layer(1, 0, 1, kernel, Tensor(), 2); std::vector in{input}; std::vector out{output}; layer.run(in, out); @@ -583,6 +583,11 @@ TEST(ConvolutionalLayerTest, DepthwiseViaConvolutionalLayer) { } TEST(ConvolutionalLayerTest, Conv4DSTLViaConvolutionalLayer) { + RuntimeOptions options; + options.backend = Backend::kNaive; + options.parallel = true; + options.parallel_backend = ParallelBackend::kSTL; + std::vector image(48, 1.0f); Shape input_shape({1, 3, 4, 4}); Tensor input = make_tensor(image, input_shape); @@ -595,10 +600,10 @@ TEST(ConvolutionalLayerTest, Conv4DSTLViaConvolutionalLayer) { std::vector output_vec(8, 0.0f); Tensor output = make_tensor(output_vec, output_shape); - ConvolutionalLayer layer(1, 0, 1, kernel, Tensor(), kSTL); + ConvolutionalLayer layer(1, 0, 1, kernel, Tensor()); std::vector in{input}; std::vector out{output}; - layer.run(in, out); + layer.run(in, out, options); std::vector result = *out[0].as(); @@ -684,7 +689,7 @@ TEST(ConvolutionalLayerTest, Conv4DLegacyViaConvolutionalLayer) { std::vector output_vec(8, 0.0f); Tensor output = make_tensor(output_vec, output_shape); - ConvolutionalLayer layer(1, 0, 1, kernel, Tensor(), kDefault, 1, true); + ConvolutionalLayer layer(1, 0, 1, kernel, Tensor(), 1, true); std::vector in{input}; std::vector out{output}; @@ -747,7 +752,7 @@ TEST(ConvolutionalLayerTest, DepthwiseConv4DIntPathCoverage) { std::vector output_vec(6, 0); Tensor output = make_tensor(output_vec, output_shape); - ConvolutionalLayer layer(1, 0, 1, kernel, bias, kDefault, 2); + ConvolutionalLayer layer(1, 0, 1, kernel, bias, 2); std::vector in{input}; std::vector out{output}; @@ -776,7 +781,7 @@ TEST(ConvolutionalLayerTest, DepthwiseConv4DFloatPathCoverage) { std::vector output_vec(2, 0.0f); Tensor output = make_tensor(output_vec, output_shape); - ConvolutionalLayer layer(1, 0, 1, kernel, bias, kDefault, 2); + ConvolutionalLayer layer(1, 0, 1, kernel, bias, 2); std::vector in{input}; std::vector out{output}; @@ -801,7 +806,7 @@ TEST(ConvolutionalLayerTest, DepthwiseConv4DNoBiasIntPathCoverage) { std::vector output_vec(2, 0); Tensor output = make_tensor(output_vec, output_shape); - ConvolutionalLayer layer(1, 0, 1, kernel, Tensor(), kDefault, 2); + ConvolutionalLayer layer(1, 0, 1, kernel, Tensor(), 2); std::vector in{input}; std::vector out{output}; @@ -827,7 +832,7 @@ TEST(ConvolutionalLayerTest, DepthwiseConv4DNoBiasFloatPathCoverage) { std::vector output_vec(2, 0.0f); Tensor output = make_tensor(output_vec, output_shape); - ConvolutionalLayer layer(1, 0, 1, kernel, Tensor(), kDefault, 2); + ConvolutionalLayer layer(1, 0, 1, kernel, Tensor(), 2); std::vector in{input}; std::vector out{output}; diff --git a/test/single_layer/test_poolinglayer.cpp b/test/single_layer/test_poolinglayer.cpp index ff8ccc826..98b3a8951 100644 --- a/test/single_layer/test_poolinglayer.cpp +++ b/test/single_layer/test_poolinglayer.cpp @@ -284,10 +284,13 @@ TEST(poolinglayer, new_pooling_layer_can_run_int_avg) { } TEST(poolinglayer, new_pooling_layer_can_run_int_avg_tbb) { + RuntimeOptions options; + options.backend = Backend::kNaive; + options.parallel = true; + options.parallel_backend = ParallelBackend::kTBB; Shape inpshape = {4, 4}; Shape poolshape = {2, 2}; - PoolingLayer a(poolshape, {2, 2}, {0, 0, 0, 0}, {1, 1}, false, "average", - it_lab_ai::kTBB); + PoolingLayer a(poolshape, {2, 2}, {0, 0, 0, 0}, {1, 1}, false, "average"); std::vector input({9, 8, 7, 6, 5, 4, 3, 2, 2, 3, 4, 5, 6, 7, 8, 9}); PoolingLayerImplTBB impl(inpshape, poolshape, {2, 2}, {0, 0, 0, 0}, @@ -300,7 +303,7 @@ TEST(poolinglayer, new_pooling_layer_can_run_int_avg_tbb) { std::vector in{make_tensor(input, inpshape)}; std::vector out{output}; - a.run(in, out); + a.run(in, out, options); std::vector true_output = {6, 4, 4, 6}; for (size_t i = 0; i < true_output.size(); i++) { @@ -324,14 +327,18 @@ TEST(poolinglayer, new_pooling_layer_can_run_1d_pooling_float) { } TEST(poolinglayer, new_pooling_layer_tbb_can_run_1d_pooling_float) { + RuntimeOptions options; + options.backend = Backend::kNaive; + options.parallel = true; + options.parallel_backend = ParallelBackend::kTBB; Shape inpshape = {8}; Shape poolshape = {3}; - PoolingLayer a(poolshape, "average", it_lab_ai::kTBB); + PoolingLayer a(poolshape, "average"); std::vector input({9.0F, 8.0F, 7.0F, 6.0F, 5.0F, 4.0F, 3.0F, 2.0F}); Tensor output = make_tensor({0}); std::vector in{make_tensor(input, inpshape)}; std::vector out{output}; - a.run(in, out); + a.run(in, out, options); std::vector true_output = {8.0F, 6.0F, 4.0F}; for (size_t i = 0; i < true_output.size(); i++) { EXPECT_NEAR((*out[0].as())[i], true_output[i], 1e-5); From 1b11751f4b258c05c28cd46df17e4effb6bcb758 Mon Sep 17 00:00:00 2001 From: Semyon1104 Date: Mon, 1 Dec 2025 20:23:59 +0300 Subject: [PATCH 12/24] cl --- src/layers/PoolingLayer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/layers/PoolingLayer.cpp b/src/layers/PoolingLayer.cpp index 2c1f7ce21..4a544fa57 100644 --- a/src/layers/PoolingLayer.cpp +++ b/src/layers/PoolingLayer.cpp @@ -3,7 +3,7 @@ namespace it_lab_ai { void PoolingLayer::run(const std::vector& input, - std::vector& output) { + std::vector& output) { RuntimeOptions default_options; run(input, output, default_options); } From e1e5067b5cd667adac0564fe8aa50c749b956bce Mon Sep 17 00:00:00 2001 From: Semyon1104 Date: Tue, 2 Dec 2025 12:41:05 +0300 Subject: [PATCH 13/24] codecov --- src/layers/ConvLayer.cpp | 4 +- src/layers/PoolingLayer.cpp | 4 +- test/single_layer/test_convlayer.cpp | 135 +++++++++++++++++++++++- test/single_layer/test_poolinglayer.cpp | 76 +++++++++++++ 4 files changed, 214 insertions(+), 5 deletions(-) diff --git a/src/layers/ConvLayer.cpp b/src/layers/ConvLayer.cpp index 38b09d62f..6b759c52d 100644 --- a/src/layers/ConvLayer.cpp +++ b/src/layers/ConvLayer.cpp @@ -37,9 +37,9 @@ void ConvolutionalLayer::run(const std::vector& input, } if (options.parallel) { switch (options.parallel_backend) { - case ParallelBackend::kTBB: + /*case ParallelBackend::kTBB: implType_ = kTBB; - break; + break;*/ case ParallelBackend::kSTL: implType_ = kSTL; break; diff --git a/src/layers/PoolingLayer.cpp b/src/layers/PoolingLayer.cpp index 4a544fa57..2b238cb22 100644 --- a/src/layers/PoolingLayer.cpp +++ b/src/layers/PoolingLayer.cpp @@ -19,9 +19,9 @@ void PoolingLayer::run(const std::vector& input, case ParallelBackend::kTBB: implType_ = kTBB; break; - case ParallelBackend::kSTL: + /*case ParallelBackend::kSTL: implType_ = kSTL; - break; + break;*/ // case ParallelBackend::kOMP: // implType = kOMP; // break; diff --git a/test/single_layer/test_convlayer.cpp b/test/single_layer/test_convlayer.cpp index 8ab977a94..57be4854d 100644 --- a/test/single_layer/test_convlayer.cpp +++ b/test/single_layer/test_convlayer.cpp @@ -1038,4 +1038,137 @@ TEST(ConvolutionalLayerTest, Float4DKernelWorking) { std::vector result = *out[0].as(); ASSERT_EQ(result.size(), 4); -} \ No newline at end of file +} + +TEST(ConvolutionalLayerTest, Conv4DWithParallelNoneBackend) { + RuntimeOptions options; + options.backend = Backend::kNaive; + options.parallel = true; + options.parallel_backend = ParallelBackend::kNone; + + std::vector image(48, 1.0f); + Shape input_shape({1, 3, 4, 4}); + Tensor input = make_tensor(image, input_shape); + + std::vector kernelvec(54, 1.0f); + Shape kernel_shape({2, 3, 3, 3}); + Tensor kernel = make_tensor(kernelvec, kernel_shape); + + Shape output_shape({1, 2, 2, 2}); + std::vector output_vec(8, 0.0f); + Tensor output = make_tensor(output_vec, output_shape); + + ConvolutionalLayer layer(1, 0, 1, kernel, Tensor()); + std::vector in{input}; + std::vector out{output}; + layer.run(in, out, options); + + std::vector result = *out[0].as(); + + float expected_value = 27.0f; + for (size_t i = 0; i < result.size(); ++i) { + ASSERT_NEAR(result[i], expected_value, 1e-5f); + } +} + +TEST(ConvolutionalLayerTest, Conv4DWithParallelDefaultFallback) { + RuntimeOptions options; + options.backend = Backend::kNaive; + options.parallel = true; + + std::vector image(48, 1.0f); + Shape input_shape({1, 3, 4, 4}); + Tensor input = make_tensor(image, input_shape); + + std::vector kernelvec(54, 1.0f); + Shape kernel_shape({2, 3, 3, 3}); + Tensor kernel = make_tensor(kernelvec, kernel_shape); + + Shape output_shape({1, 2, 2, 2}); + std::vector output_vec(8, 0.0f); + Tensor output = make_tensor(output_vec, output_shape); + + ConvolutionalLayer layer(1, 0, 1, kernel, Tensor()); + std::vector in{input}; + std::vector out{output}; + layer.run(in, out, options); + + std::vector result = *out[0].as(); + + float expected_value = 27.0f; + for (size_t i = 0; i < result.size(); ++i) { + ASSERT_NEAR(result[i], expected_value, 1e-5f); + } +} + +TEST(ConvolutionalLayerTest, Conv4DWithoutParallelFlag) { + RuntimeOptions options; + options.backend = Backend::kNaive; + options.parallel = false; + options.parallel_backend = ParallelBackend::kSTL; + + std::vector image(48, 1.0f); + Shape input_shape({1, 3, 4, 4}); + Tensor input = make_tensor(image, input_shape); + + std::vector kernelvec(54, 1.0f); + Shape kernel_shape({2, 3, 3, 3}); + Tensor kernel = make_tensor(kernelvec, kernel_shape); + + Shape output_shape({1, 2, 2, 2}); + std::vector output_vec(8, 0.0f); + Tensor output = make_tensor(output_vec, output_shape); + + ConvolutionalLayer layer(1, 0, 1, kernel, Tensor()); + std::vector in{input}; + std::vector out{output}; + layer.run(in, out, options); + + std::vector result = *out[0].as(); + + float expected_value = 27.0f; + for (size_t i = 0; i < result.size(); ++i) { + ASSERT_NEAR(result[i], expected_value, 1e-5f); + } +} + +TEST(ConvolutionalLayerTest, Conv4DLegacyFloatWithParallelNone) { + RuntimeOptions options; + options.backend = Backend::kNaive; + options.parallel = true; + options.parallel_backend = ParallelBackend::kNone; + + std::vector image(48, 1.0f); + Shape input_shape({1, 3, 4, 4}); + Tensor input = make_tensor(image, input_shape); + + std::vector kernelvec(54, 1.0f); + Shape kernel_shape({3, 3, 3, 2}); + Tensor kernel = make_tensor(kernelvec, kernel_shape); + + std::vector biasvec = {0.5f, 1.0f}; + Tensor bias = make_tensor(biasvec, Shape({2})); + + ConvolutionalLayer layer(1, 0, 1, kernel, bias, 1, true); + std::vector in{input}; + + size_t out_height = (4 + 2 * 0 - 1 * (3 - 1) - 1) / 1 + 1; + size_t out_width = (4 + 2 * 0 - 1 * (3 - 1) - 1) / 1 + 1; + Shape output_shape({1, 2, out_height, out_width}); + std::vector output_vec(8, 0.0f); + Tensor output = make_tensor(output_vec, output_shape); + std::vector out{output}; + + layer.run(in, out, options); + + std::vector result = *out[0].as(); + + float expected_value_ch1 = 27.0f + 0.5f; + float expected_value_ch2 = 27.0f + 1.0f; + + ASSERT_EQ(result.size(), 8); + ASSERT_NEAR(result[0], expected_value_ch1, 1e-5f); + ASSERT_NEAR(result[1], expected_value_ch1, 1e-5f); + ASSERT_NEAR(result[4], expected_value_ch2, 1e-5f); + ASSERT_NEAR(result[5], expected_value_ch2, 1e-5f); +} diff --git a/test/single_layer/test_poolinglayer.cpp b/test/single_layer/test_poolinglayer.cpp index 98b3a8951..bc178b007 100644 --- a/test/single_layer/test_poolinglayer.cpp +++ b/test/single_layer/test_poolinglayer.cpp @@ -439,3 +439,79 @@ TEST(poolinglayer, maxpool_onnx_with_pooling_layer) { EXPECT_LE(val, 10.0f); } } + +TEST(poolinglayer, new_pooling_layer_with_parallel_none) { + RuntimeOptions options; + options.backend = Backend::kNaive; + options.parallel = true; + options.parallel_backend = ParallelBackend::kNone; + + Shape inpshape = {4, 4}; + Shape poolshape = {2, 2}; + PoolingLayer a(poolshape, {1, 1}, {1, 1, 1, 1}, {1, 1}, false, "average"); + std::vector input({9.0F, 8.0F, 7.0F, 6.0F, 5.0F, 4.0F, 3.0F, 2.0F, + 2.0F, 3.0F, 4.0F, 5.0F, 6.0F, 7.0F, 8.0F, 9.0F}); + Tensor output = make_tensor({0}); + std::vector in{make_tensor(input, inpshape)}; + std::vector out{output}; + a.run(in, out, options); + EXPECT_EQ(out[0].get_shape().count(), 25); +} + +TEST(poolinglayer, new_pooling_layer_int_avg_with_parallel_none) { + RuntimeOptions options; + options.backend = Backend::kNaive; + options.parallel = true; + options.parallel_backend = ParallelBackend::kNone; + + Shape inpshape = {4, 4}; + Shape poolshape = {2, 2}; + PoolingLayer a(poolshape, {2, 2}, {0, 0, 0, 0}, {1, 1}, false, "average"); + std::vector input({9, 8, 7, 6, 5, 4, 3, 2, 2, 3, 4, 5, 6, 7, 8, 9}); + + PoolingLayerImpl impl(inpshape, poolshape, {2, 2}, {0, 0, 0, 0}, {1, 1}, + false, "average"); + Shape output_shape = impl.get_output_shape(); + + std::vector zeros(output_shape.count(), 0); + Tensor output = make_tensor(zeros, output_shape); + + std::vector in{make_tensor(input, inpshape)}; + std::vector out{output}; + + a.run(in, out, options); + + std::vector true_output = {6, 4, 4, 6}; + for (size_t i = 0; i < true_output.size(); i++) { + EXPECT_NEAR((*out[0].as())[i], true_output[i], 1e-5); + } +} + +TEST(poolinglayer, new_pooling_layer_int_avg_without_parallel_flag) { + RuntimeOptions options; + options.backend = Backend::kNaive; + options.parallel = false; + options.parallel_backend = ParallelBackend::kTBB; + + Shape inpshape = {4, 4}; + Shape poolshape = {2, 2}; + PoolingLayer a(poolshape, {2, 2}, {0, 0, 0, 0}, {1, 1}, false, "average"); + std::vector input({9, 8, 7, 6, 5, 4, 3, 2, 2, 3, 4, 5, 6, 7, 8, 9}); + + PoolingLayerImpl impl(inpshape, poolshape, {2, 2}, {0, 0, 0, 0}, {1, 1}, + false, "average"); + Shape output_shape = impl.get_output_shape(); + + std::vector zeros(output_shape.count(), 0); + Tensor output = make_tensor(zeros, output_shape); + + std::vector in{make_tensor(input, inpshape)}; + std::vector out{output}; + + a.run(in, out, options); + + std::vector true_output = {6, 4, 4, 6}; + for (size_t i = 0; i < true_output.size(); i++) { + EXPECT_NEAR((*out[0].as())[i], true_output[i], 1e-5); + } +} From fc2950b82cd9a5fcf168b62aa9b00338d66bef3f Mon Sep 17 00:00:00 2001 From: Semyon1104 Date: Tue, 2 Dec 2025 18:44:03 +0300 Subject: [PATCH 14/24] fix From 50a0285831ef6607afbab262251682d3d93c2052 Mon Sep 17 00:00:00 2001 From: Semyon1104 Date: Sun, 14 Dec 2025 18:42:56 +0300 Subject: [PATCH 15/24] add ParBackend --- 3rdparty/opencv | 2 +- app/Graph/build.hpp | 3 +- app/Graph/graph_build.cpp | 23 ++--- include/graph/runtime_options.hpp | 30 ++++-- include/layers/EWLayer.hpp | 2 + include/layers/Layer.hpp | 3 - include/layers/PoolingLayer.hpp | 12 +-- src/layers/ConvLayer.cpp | 6 +- src/layers/EWLayer.cpp | 12 ++- src/layers/PoolingLayer.cpp | 6 +- test/benchmarking/test_layers_time.cpp | 8 +- test/single_layer/test_convlayer.cpp | 8 +- test/single_layer/test_ewlayer.cpp | 130 +++++++++++------------- test/single_layer/test_poolinglayer.cpp | 10 +- 14 files changed, 125 insertions(+), 130 deletions(-) diff --git a/3rdparty/opencv b/3rdparty/opencv index 49486f61f..2e3ccb4e8 160000 --- a/3rdparty/opencv +++ b/3rdparty/opencv @@ -1 +1 @@ -Subproject commit 49486f61fb25722cbcf586b7f4320921d46fb38e +Subproject commit 2e3ccb4e8e098cf481c938998ff941e320fa4f85 diff --git a/app/Graph/build.hpp b/app/Graph/build.hpp index 20632ecd5..8ff85d725 100644 --- a/app/Graph/build.hpp +++ b/app/Graph/build.hpp @@ -34,6 +34,8 @@ #include "layers/TransposeLayer.hpp" #include "layers_oneDNN/EWLayer.hpp" +using namespace it_lab_ai; + extern std::unordered_map model_paths; struct ParseResult { @@ -72,7 +74,6 @@ it_lab_ai::Tensor prepare_image(const cv::Mat& image, it_lab_ai::Tensor prepare_mnist_image(const cv::Mat& image); void print_time_stats(it_lab_ai::Graph& graph); - namespace it_lab_ai { class LayerFactory { public: diff --git a/app/Graph/graph_build.cpp b/app/Graph/graph_build.cpp index af2090390..1d2e5bed1 100644 --- a/app/Graph/graph_build.cpp +++ b/app/Graph/graph_build.cpp @@ -16,12 +16,12 @@ int main(int argc, char* argv[]) { model_name = argv[++i]; } else if (std::string(argv[i]) == "--onednn") { options.backend = Backend::kOneDnn; - if (options.parallel) { + if (options.isParallel()) { std::cout << "Warning: oneDNN backend is not compatible with parallel " "execution. Disabling parallelism." << std::endl; - options.parallel = false; - options.parallel_backend = ParallelBackend::kNone; + options.setParallelBackend( + ParBackend::kSeq); // Автоматически sets parallel=false } } else if (std::string(argv[i]) == "--parallel" && i + 1 < argc) { if (options.backend == Backend::kOneDnn) { @@ -32,22 +32,17 @@ int main(int argc, char* argv[]) { continue; } - options.parallel = true; std::string backend_str = argv[++i]; if (backend_str == "tbb") { - options.parallel_backend = ParallelBackend::kTBB; - } else if (backend_str == "stl") { - options.parallel_backend = ParallelBackend::kSTL; + options.setParallelBackend(ParBackend::kTbb); + } else if (backend_str == "threads" || backend_str == "stl") { + options.setParallelBackend(ParBackend::kThreads); } else if (backend_str == "omp") { - options.parallel_backend = ParallelBackend::kOMP; - } else if (backend_str == "kokkos") { - options.parallel_backend = ParallelBackend::kKokkos; - } else if (backend_str == "sycl") { - options.parallel_backend = ParallelBackend::kSycl; + options.setParallelBackend(ParBackend::kOmp); } else { std::cerr << "Unknown parallel backend: " << backend_str - << ". Using default (STL)." << std::endl; - options.parallel_backend = ParallelBackend::kSTL; + << ". Using default (Threads)." << std::endl; + options.setParallelBackend(ParBackend::kThreads); } } else if (std::string(argv[i]) == "--threads" && i + 1 < argc) { options.threads = std::stoi(argv[++i]); diff --git a/include/graph/runtime_options.hpp b/include/graph/runtime_options.hpp index d6b0370aa..435248457 100644 --- a/include/graph/runtime_options.hpp +++ b/include/graph/runtime_options.hpp @@ -1,19 +1,31 @@ #pragma once #include +#include "parallel/parallel.hpp" + +namespace it_lab_ai { + enum class Backend : uint8_t { kNaive, kOneDnn }; -enum class ParallelBackend : uint8_t { - kNone, - kTBB, - kSTL, - kOMP, - kKokkos, - kSycl -}; +using ParBackend = parallel::Backend; struct RuntimeOptions { Backend backend{Backend::kNaive}; - ParallelBackend parallel_backend{ParallelBackend::kNone}; + ParBackend par_backend{ParBackend::kSeq}; int threads{0}; bool parallel{false}; + + ParBackend getEffectiveParBackend() const { + return parallel ? par_backend : ParBackend::kSeq; + } + + void setParallelBackend(ParBackend p) { + par_backend = p; + parallel = (p != ParBackend::kSeq); + } + + bool isParallel() const { + return parallel && (par_backend != ParBackend::kSeq); + } }; + +} // namespace it_lab_ai diff --git a/include/layers/EWLayer.hpp b/include/layers/EWLayer.hpp index ebac14f7c..cf9dbda85 100644 --- a/include/layers/EWLayer.hpp +++ b/include/layers/EWLayer.hpp @@ -28,6 +28,8 @@ class EWLayer : public Layer { void run(const std::vector& input, std::vector& output) override; + void run(const std::vector& input, std::vector& output, + const RuntimeOptions& options) override; #ifdef ENABLE_STATISTIC_WEIGHTS Tensor get_weights() override { std::vector v = {0}; diff --git a/include/layers/Layer.hpp b/include/layers/Layer.hpp index 84645620c..00655e6c2 100644 --- a/include/layers/Layer.hpp +++ b/include/layers/Layer.hpp @@ -55,8 +55,6 @@ class Layer { PostOperations postops; int getID() const { return id_; } void setID(int id) { id_ = id; } - void setParallelBackend(ParBackend backend) { parallel_backend_ = backend; } - ParBackend getParallelBackend() const { return parallel_backend_; } LayerType getName() const { return type_; } virtual void run(const std::vector& input, std::vector& output) = 0; @@ -72,7 +70,6 @@ class Layer { protected: int id_ = 0; LayerType type_; - ParBackend parallel_backend_ = ParBackend::kSeq; }; template diff --git a/include/layers/PoolingLayer.hpp b/include/layers/PoolingLayer.hpp index 7f5d5b601..a11ed8463 100644 --- a/include/layers/PoolingLayer.hpp +++ b/include/layers/PoolingLayer.hpp @@ -22,27 +22,23 @@ class PoolingLayer : public Layer { const Shape& strides = {2, 2}, const Shape& pads = {0, 0, 0, 0}, const Shape& dilations = {1, 1}, bool ceil_mode = false, - std::string pooling_type = "average", - ImplType implType = kDefault) + std::string pooling_type = "average") : Layer(kPooling), poolingShape_(pooling_shape), strides_(strides), pads_(pads), dilations_(dilations), ceil_mode_(ceil_mode), - poolingType_(std::move(pooling_type)), - implType_(implType) {} + poolingType_(std::move(pooling_type)) {} explicit PoolingLayer(const Shape& pooling_shape, - std::string pooling_type = "average", - ImplType implType = kDefault) + std::string pooling_type = "average") : Layer(kPooling), poolingShape_(pooling_shape), strides_({2, 2}), pads_({0, 0, 0, 0}), dilations_({1, 1}), ceil_mode_(false), - poolingType_(std::move(pooling_type)), - implType_(implType) {} + poolingType_(std::move(pooling_type)) {} void setStrides(size_t h, size_t w) { strides_ = {h, w}; } void setPads(size_t top, size_t bottom, size_t left, size_t right) { pads_ = {top, bottom, left, right}; diff --git a/src/layers/ConvLayer.cpp b/src/layers/ConvLayer.cpp index 6b759c52d..ba8fdd078 100644 --- a/src/layers/ConvLayer.cpp +++ b/src/layers/ConvLayer.cpp @@ -36,11 +36,11 @@ void ConvolutionalLayer::run(const std::vector& input, } } if (options.parallel) { - switch (options.parallel_backend) { + switch (options.par_backend) { /*case ParallelBackend::kTBB: implType_ = kTBB; break;*/ - case ParallelBackend::kSTL: + case ParBackend::kThreads: implType_ = kSTL; break; // case ParallelBackend::kOMP: @@ -52,7 +52,7 @@ void ConvolutionalLayer::run(const std::vector& input, // case ParallelBackend::kSycl: // implType = kSycl; // break; - case ParallelBackend::kNone: + case ParBackend::kSeq: default: implType_ = kDefault; break; diff --git a/src/layers/EWLayer.cpp b/src/layers/EWLayer.cpp index 6a7da06c0..be6d104a3 100644 --- a/src/layers/EWLayer.cpp +++ b/src/layers/EWLayer.cpp @@ -5,13 +5,20 @@ namespace it_lab_ai { -void EWLayer::run(const std::vector& input, + void EWLayer::run(const std::vector& input, std::vector& output) { + RuntimeOptions default_options; + run(input, output, default_options); +} + +void EWLayer::run(const std::vector& input, + std::vector& output, + const RuntimeOptions& options) { if (input.size() != 1) { throw std::runtime_error("EWLayer: Input tensors not 1"); } - ParBackend backend = getParallelBackend(); + ParBackend backend = options.getEffectiveParBackend(); switch (input[0].get_type()) { case Type::kInt: { @@ -34,4 +41,5 @@ void EWLayer::run(const std::vector& input, } } + } // namespace it_lab_ai diff --git a/src/layers/PoolingLayer.cpp b/src/layers/PoolingLayer.cpp index 2b238cb22..be2a34323 100644 --- a/src/layers/PoolingLayer.cpp +++ b/src/layers/PoolingLayer.cpp @@ -15,8 +15,8 @@ void PoolingLayer::run(const std::vector& input, throw std::runtime_error("PoolingLayer: Input tensors not 1"); } if (options.parallel) { - switch (options.parallel_backend) { - case ParallelBackend::kTBB: + switch (options.par_backend) { + case ParBackend::kTbb: implType_ = kTBB; break; /*case ParallelBackend::kSTL: @@ -31,7 +31,7 @@ void PoolingLayer::run(const std::vector& input, // case ParallelBackend::kSycl: // implType = kSycl; // break; - case ParallelBackend::kNone: + case ParBackend::kSeq: default: implType_ = kDefault; break; diff --git a/test/benchmarking/test_layers_time.cpp b/test/benchmarking/test_layers_time.cpp index 1b89b6c01..cbdfa5054 100644 --- a/test/benchmarking/test_layers_time.cpp +++ b/test/benchmarking/test_layers_time.cpp @@ -33,11 +33,11 @@ TEST(pooling_test, is_pooling_tbb_ok) { Tensor output; RuntimeOptions options_seq; options_seq.parallel = false; - options_seq.parallel_backend = ParallelBackend::kNone; + options_seq.par_backend = ParBackend::kSeq; RuntimeOptions options_tbb; options_tbb.parallel = true; - options_tbb.parallel_backend = ParallelBackend::kTBB; + options_tbb.par_backend = ParBackend::kTbb; PoolingLayer p1(Shape({2, 2}), "max"); PoolingLayer p2(Shape({2, 2}), "max"); @@ -70,11 +70,11 @@ TEST(conv_test, is_conv_stl_ok) { RuntimeOptions options_seq; options_seq.parallel = false; - options_seq.parallel_backend = ParallelBackend::kNone; + options_seq.par_backend = ParBackend::kSeq; RuntimeOptions options_stl; options_stl.parallel = true; - options_stl.parallel_backend = ParallelBackend::kTBB; + options_stl.par_backend = ParBackend::kTbb; ConvolutionalLayer p1(1, 1, 2, kernel, Tensor()); ConvolutionalLayer p2(1, 1, 2, kernel, Tensor()); diff --git a/test/single_layer/test_convlayer.cpp b/test/single_layer/test_convlayer.cpp index 57be4854d..664ac313d 100644 --- a/test/single_layer/test_convlayer.cpp +++ b/test/single_layer/test_convlayer.cpp @@ -586,7 +586,7 @@ TEST(ConvolutionalLayerTest, Conv4DSTLViaConvolutionalLayer) { RuntimeOptions options; options.backend = Backend::kNaive; options.parallel = true; - options.parallel_backend = ParallelBackend::kSTL; + options.par_backend = ParBackend::kThreads; std::vector image(48, 1.0f); Shape input_shape({1, 3, 4, 4}); @@ -1044,7 +1044,7 @@ TEST(ConvolutionalLayerTest, Conv4DWithParallelNoneBackend) { RuntimeOptions options; options.backend = Backend::kNaive; options.parallel = true; - options.parallel_backend = ParallelBackend::kNone; + options.par_backend = ParBackend::kSeq; std::vector image(48, 1.0f); Shape input_shape({1, 3, 4, 4}); @@ -1105,7 +1105,7 @@ TEST(ConvolutionalLayerTest, Conv4DWithoutParallelFlag) { RuntimeOptions options; options.backend = Backend::kNaive; options.parallel = false; - options.parallel_backend = ParallelBackend::kSTL; + options.par_backend = ParBackend::kThreads; std::vector image(48, 1.0f); Shape input_shape({1, 3, 4, 4}); @@ -1136,7 +1136,7 @@ TEST(ConvolutionalLayerTest, Conv4DLegacyFloatWithParallelNone) { RuntimeOptions options; options.backend = Backend::kNaive; options.parallel = true; - options.parallel_backend = ParallelBackend::kNone; + options.par_backend = ParBackend::kSeq; std::vector image(48, 1.0f); Shape input_shape({1, 3, 4, 4}); diff --git a/test/single_layer/test_ewlayer.cpp b/test/single_layer/test_ewlayer.cpp index 7ed0e7434..930d7608c 100644 --- a/test/single_layer/test_ewlayer.cpp +++ b/test/single_layer/test_ewlayer.cpp @@ -11,7 +11,7 @@ using namespace it_lab_ai; class EWTestsParameterized : public ::testing::TestWithParam< std::tuple, EWLayerImpl, - std::vector, std::function > > {}; + std::vector, std::function>> {}; // 1) input; 2) constructed ewlayerimpl; 3) expected_output; 4) lambda_expr. TEST_P(EWTestsParameterized, element_wise_works_correctly) { @@ -218,14 +218,7 @@ TEST(ewlayer, new_ewlayer_can_sigmoid_float_extreme_values) { } TEST(ewlayer, parallel_for_ew) { - EWLayer layer0("relu"); - layer0.setParallelBackend(ParBackend::kSeq); - EWLayer layer1("relu"); - layer1.setParallelBackend(ParBackend::kThreads); - EWLayer layer2("relu"); - layer2.setParallelBackend(ParBackend::kTbb); - EWLayer layer3("relu"); - layer3.setParallelBackend(ParBackend::kOmp); + EWLayer layer("relu"); std::vector vec(8000000, -1); Tensor input = make_tensor(vec); @@ -233,52 +226,27 @@ TEST(ewlayer, parallel_for_ew) { std::vector in{input}; std::vector out{output}; - auto start = std::chrono::high_resolution_clock::now(); - layer0.run(in, out); - auto end = std::chrono::high_resolution_clock::now(); - auto total_duration = - std::chrono::duration_cast(end - start); - for (size_t i = 0; i < 8000000; i++) { - EXPECT_EQ((*out[0].as())[i], 0); - } + std::vector backends = {ParBackend::kSeq, ParBackend::kThreads, + ParBackend::kTbb, ParBackend::kOmp}; - start = std::chrono::high_resolution_clock::now(); - layer1.run(in, out); - end = std::chrono::high_resolution_clock::now(); - total_duration = - std::chrono::duration_cast(end - start); - for (size_t i = 0; i < 8000000; i++) { - EXPECT_EQ((*out[0].as())[i], 0); - } + for (auto backend : backends) { + RuntimeOptions options; + options.setParallelBackend(backend); - start = std::chrono::high_resolution_clock::now(); - layer2.run(in, out); - end = std::chrono::high_resolution_clock::now(); - total_duration = - std::chrono::duration_cast(end - start); - for (size_t i = 0; i < 8000000; i++) { - EXPECT_EQ((*out[0].as())[i], 0); - } + auto start = std::chrono::high_resolution_clock::now(); + layer.run(in, out, options); + auto end = std::chrono::high_resolution_clock::now(); + auto duration = + std::chrono::duration_cast(end - start); - start = std::chrono::high_resolution_clock::now(); - layer3.run(in, out); - end = std::chrono::high_resolution_clock::now(); - total_duration = - std::chrono::duration_cast(end - start); - for (size_t i = 0; i < 8000000; i++) { - EXPECT_EQ((*out[0].as())[i], 0); + for (size_t i = 0; i < 8000000; i++) { + EXPECT_EQ((*out[0].as())[i], 0); + } } } -TEST(ewlayer, parallel_for_ew_sigmoid) { - EWLayer layer0("sigmoid"); - layer0.setParallelBackend(ParBackend::kSeq); - EWLayer layer1("sigmoid"); - layer1.setParallelBackend(ParBackend::kThreads); - EWLayer layer2("sigmoid"); - layer2.setParallelBackend(ParBackend::kTbb); - EWLayer layer3("sigmoid"); - layer3.setParallelBackend(ParBackend::kOmp); +TEST(ewlayer, parallel_for_ew_sigmoid_compact) { + EWLayer layer("sigmoid"); std::vector vec(8000000, -1); Tensor input = make_tensor(vec); @@ -286,31 +254,47 @@ TEST(ewlayer, parallel_for_ew_sigmoid) { std::vector in{input}; std::vector out{output}; - auto start = std::chrono::high_resolution_clock::now(); - layer0.run(in, out); - auto end = std::chrono::high_resolution_clock::now(); - auto total_duration = - std::chrono::duration_cast(end - start); - - start = std::chrono::high_resolution_clock::now(); - layer1.run(in, out); - end = std::chrono::high_resolution_clock::now(); - total_duration = - std::chrono::duration_cast(end - start); - - start = std::chrono::high_resolution_clock::now(); - layer2.run(in, out); - end = std::chrono::high_resolution_clock::now(); - total_duration = - std::chrono::duration_cast(end - start); - - start = std::chrono::high_resolution_clock::now(); - layer3.run(in, out); - end = std::chrono::high_resolution_clock::now(); - total_duration = - std::chrono::duration_cast(end - start); + std::vector> backends = { + {ParBackend::kSeq, "Sequential"}, + {ParBackend::kThreads, "Threads"}, + {ParBackend::kTbb, "TBB"}, + {ParBackend::kOmp, "OpenMP"}}; + + std::vector reference_result; + bool first = true; + + for (const auto& [backend, name] : backends) { + RuntimeOptions options; + options.parallel = (backend != ParBackend::kSeq); + options.par_backend = backend; + if (backend == ParBackend::kThreads) { + options.threads = 4; + } - EXPECT_EQ(0, 0); + auto start = std::chrono::high_resolution_clock::now(); + layer.run(in, out, options); + auto end = std::chrono::high_resolution_clock::now(); + auto duration = + std::chrono::duration_cast(end - start); + + std::cout << "Sigmoid " << name << " time: " << duration.count() << " ms" + << std::endl; + + auto current_result = *out[0].as(); + if (first) { + reference_result = current_result; + first = false; + for (size_t i = 0; i < 100; i++) { + EXPECT_EQ(current_result[i], 0) + << "Invalid sigmoid result at index " << i; + } + } else { + for (size_t i = 0; i < reference_result.size(); i++) { + EXPECT_EQ(current_result[i], reference_result[i]) + << "Mismatch with " << name << " at index " << i; + } + } + } } TEST(ewlayer, parallel_for_direct) { diff --git a/test/single_layer/test_poolinglayer.cpp b/test/single_layer/test_poolinglayer.cpp index bc178b007..b1bfd798a 100644 --- a/test/single_layer/test_poolinglayer.cpp +++ b/test/single_layer/test_poolinglayer.cpp @@ -287,7 +287,7 @@ TEST(poolinglayer, new_pooling_layer_can_run_int_avg_tbb) { RuntimeOptions options; options.backend = Backend::kNaive; options.parallel = true; - options.parallel_backend = ParallelBackend::kTBB; + options.par_backend = ParBackend::kTbb; Shape inpshape = {4, 4}; Shape poolshape = {2, 2}; PoolingLayer a(poolshape, {2, 2}, {0, 0, 0, 0}, {1, 1}, false, "average"); @@ -330,7 +330,7 @@ TEST(poolinglayer, new_pooling_layer_tbb_can_run_1d_pooling_float) { RuntimeOptions options; options.backend = Backend::kNaive; options.parallel = true; - options.parallel_backend = ParallelBackend::kTBB; + options.par_backend = ParBackend::kTbb; Shape inpshape = {8}; Shape poolshape = {3}; PoolingLayer a(poolshape, "average"); @@ -444,7 +444,7 @@ TEST(poolinglayer, new_pooling_layer_with_parallel_none) { RuntimeOptions options; options.backend = Backend::kNaive; options.parallel = true; - options.parallel_backend = ParallelBackend::kNone; + options.par_backend = ParBackend::kSeq; Shape inpshape = {4, 4}; Shape poolshape = {2, 2}; @@ -462,7 +462,7 @@ TEST(poolinglayer, new_pooling_layer_int_avg_with_parallel_none) { RuntimeOptions options; options.backend = Backend::kNaive; options.parallel = true; - options.parallel_backend = ParallelBackend::kNone; + options.par_backend = ParBackend::kSeq; Shape inpshape = {4, 4}; Shape poolshape = {2, 2}; @@ -491,7 +491,7 @@ TEST(poolinglayer, new_pooling_layer_int_avg_without_parallel_flag) { RuntimeOptions options; options.backend = Backend::kNaive; options.parallel = false; - options.parallel_backend = ParallelBackend::kTBB; + options.par_backend = ParBackend::kTbb; Shape inpshape = {4, 4}; Shape poolshape = {2, 2}; From 1b9e0bbd24a39796d66f372bcbb024d220071e20 Mon Sep 17 00:00:00 2001 From: Semyon1104 Date: Sun, 14 Dec 2025 18:46:18 +0300 Subject: [PATCH 16/24] clang --- src/layers/EWLayer.cpp | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/layers/EWLayer.cpp b/src/layers/EWLayer.cpp index be6d104a3..dc159f1f6 100644 --- a/src/layers/EWLayer.cpp +++ b/src/layers/EWLayer.cpp @@ -5,15 +5,14 @@ namespace it_lab_ai { - void EWLayer::run(const std::vector& input, +void EWLayer::run(const std::vector& input, std::vector& output) { RuntimeOptions default_options; run(input, output, default_options); } -void EWLayer::run(const std::vector& input, - std::vector& output, - const RuntimeOptions& options) { +void EWLayer::run(const std::vector& input, std::vector& output, + const RuntimeOptions& options) { if (input.size() != 1) { throw std::runtime_error("EWLayer: Input tensors not 1"); } @@ -41,5 +40,4 @@ void EWLayer::run(const std::vector& input, } } - } // namespace it_lab_ai From babc6a59d8981af03267fa56d20c6f1f06c29433 Mon Sep 17 00:00:00 2001 From: Semyon1104 Date: Mon, 15 Dec 2025 09:58:00 +0300 Subject: [PATCH 17/24] fix --- test/single_layer/test_ewlayer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/single_layer/test_ewlayer.cpp b/test/single_layer/test_ewlayer.cpp index 930d7608c..0174fbf98 100644 --- a/test/single_layer/test_ewlayer.cpp +++ b/test/single_layer/test_ewlayer.cpp @@ -238,7 +238,7 @@ TEST(ewlayer, parallel_for_ew) { auto end = std::chrono::high_resolution_clock::now(); auto duration = std::chrono::duration_cast(end - start); - + std::cout << " time: " << duration.count() << " ms" << std::endl; for (size_t i = 0; i < 8000000; i++) { EXPECT_EQ((*out[0].as())[i], 0); } From 9f41ec6218b230f385d36ebd59ff62492e6bf787 Mon Sep 17 00:00:00 2001 From: Semyon1104 Date: Mon, 15 Dec 2025 15:07:04 +0300 Subject: [PATCH 18/24] endl --- app/Graph/graph_build.cpp | 16 ++++++++-------- src/layers_oneDNN/EWLayer.cpp | 12 ++++++++---- 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/app/Graph/graph_build.cpp b/app/Graph/graph_build.cpp index 1d2e5bed1..9e4f0093d 100644 --- a/app/Graph/graph_build.cpp +++ b/app/Graph/graph_build.cpp @@ -19,7 +19,7 @@ int main(int argc, char* argv[]) { if (options.isParallel()) { std::cout << "Warning: oneDNN backend is not compatible with parallel " "execution. Disabling parallelism." - << std::endl; + << '\n'; options.setParallelBackend( ParBackend::kSeq); // Автоматически sets parallel=false } @@ -27,7 +27,7 @@ int main(int argc, char* argv[]) { if (options.backend == Backend::kOneDnn) { std::cout << "Warning: Parallel execution is not compatible with " "oneDNN backend. Ignoring --parallel option." - << std::endl; + << '\n'; i++; continue; } @@ -41,7 +41,7 @@ int main(int argc, char* argv[]) { options.setParallelBackend(ParBackend::kOmp); } else { std::cerr << "Unknown parallel backend: " << backend_str - << ". Using default (Threads)." << std::endl; + << ". Using default (Threads)." << '\n'; options.setParallelBackend(ParBackend::kThreads); } } else if (std::string(argv[i]) == "--threads" && i + 1 < argc) { @@ -93,12 +93,12 @@ int main(int argc, char* argv[]) { Graph graph; build_graph_linear(graph, input, output, options, true); - std::cout << "Starting inference..." << std::endl; + std::cout << "Starting inference..." << '\n'; try { graph.inference(options); - std::cout << "Inference completed successfully." << std::endl; + std::cout << "Inference completed successfully." << '\n'; } catch (const std::exception& e) { - std::cerr << "ERROR during inference: " << e.what() << std::endl; + std::cerr << "ERROR during inference: " << e.what() << '\n'; } print_time_stats(graph); std::vector tmp_output = softmax(*output.as()); @@ -136,9 +136,9 @@ int main(int argc, char* argv[]) { std::cout << "Starting inference..." << std::endl; try { graph.inference(options); - std::cout << "Inference completed successfully." << std::endl; + std::cout << "Inference completed successfully." << '\n'; } catch (const std::exception& e) { - std::cerr << "ERROR during inference: " << e.what() << std::endl; + std::cerr << "ERROR during inference: " << e.what() << '\n'; } print_time_stats(graph); std::vector tmp_output = diff --git a/src/layers_oneDNN/EWLayer.cpp b/src/layers_oneDNN/EWLayer.cpp index b93bb45d8..319999b03 100644 --- a/src/layers_oneDNN/EWLayer.cpp +++ b/src/layers_oneDNN/EWLayer.cpp @@ -113,10 +113,14 @@ void EwLayerOneDnn::initialize_onednn(const Shape& shape, Type data_type) { } dnnl::memory::data_type dnnl_data_type; - if (data_type == Type::kFloat) { - dnnl_data_type = dnnl::memory::data_type::f32; - } else { - throw std::invalid_argument("Unsupported data type for oneDNN EW layer"); + + switch (data_type) { + case Type::kFloat: + dnnl_data_type = dnnl::memory::data_type::f32; + break; + default: + throw std::invalid_argument( + "Unsupported data type for oneDNN EW layer"); } memory_desc_ = dnnl::memory::desc(dims, dnnl_data_type, format); From 01756e7ffef9e2a60139069cb1a001e9350887ad Mon Sep 17 00:00:00 2001 From: Semyon1104 Date: Mon, 15 Dec 2025 15:31:37 +0300 Subject: [PATCH 19/24] no_discard --- include/graph/runtime_options.hpp | 4 ++-- include/layers/Layer.hpp | 2 +- src/layers_oneDNN/EWLayer.cpp | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/include/graph/runtime_options.hpp b/include/graph/runtime_options.hpp index 435248457..3f8301cbb 100644 --- a/include/graph/runtime_options.hpp +++ b/include/graph/runtime_options.hpp @@ -14,7 +14,7 @@ struct RuntimeOptions { int threads{0}; bool parallel{false}; - ParBackend getEffectiveParBackend() const { + [[nodiscard]] ParBackend getEffectiveParBackend() const { return parallel ? par_backend : ParBackend::kSeq; } @@ -23,7 +23,7 @@ struct RuntimeOptions { parallel = (p != ParBackend::kSeq); } - bool isParallel() const { + [[nodiscard]] bool isParallel() const { return parallel && (par_backend != ParBackend::kSeq); } }; diff --git a/include/layers/Layer.hpp b/include/layers/Layer.hpp index 39c2b3edb..00ddeb7cc 100644 --- a/include/layers/Layer.hpp +++ b/include/layers/Layer.hpp @@ -55,7 +55,7 @@ class Layer { PostOperations postops; [[nodiscard]] int getID() const { return id_; } void setID(int id) { id_ = id; } - LayerType getName() const { return type_; } + [[nodiscard]] LayerType getName() const { return type_; } virtual void run(const std::vector& input, std::vector& output) = 0; virtual void run(const std::vector& input, diff --git a/src/layers_oneDNN/EWLayer.cpp b/src/layers_oneDNN/EWLayer.cpp index b93bb45d8..2fdf38904 100644 --- a/src/layers_oneDNN/EWLayer.cpp +++ b/src/layers_oneDNN/EWLayer.cpp @@ -59,7 +59,7 @@ void EwLayerOneDnn::run(const std::vector& input, } } catch (const std::exception& e) { - std::cerr << "oneDNN execution failed: " << e.what() << std::endl; + std::cerr << "oneDNN execution failed: " << e.what() << '\n'; throw; } } @@ -143,7 +143,7 @@ void EwLayerOneDnn::initialize_onednn(const Shape& shape, Type data_type) { } catch (const std::exception& e) { std::cerr << "oneDNN initialization failed for function '" << func_ - << "': " << e.what() << std::endl; + << "': " << e.what() << '\n'; throw; } } From 1adbb7db5848a57c33c5bf0f9b12e42138655572 Mon Sep 17 00:00:00 2001 From: Semyon1104 Date: Mon, 15 Dec 2025 17:49:14 +0300 Subject: [PATCH 20/24] Revert opencv submodule to 4.x branch --- 3rdparty/opencv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/3rdparty/opencv b/3rdparty/opencv index 2e3ccb4e8..cd0699a33 160000 --- a/3rdparty/opencv +++ b/3rdparty/opencv @@ -1 +1 @@ -Subproject commit 2e3ccb4e8e098cf481c938998ff941e320fa4f85 +Subproject commit cd0699a338d421634a6e3da51130fa60c0908d73 From a4da6329eaed6620ccbc854b64d740d82e9b0958 Mon Sep 17 00:00:00 2001 From: Semyon1104 Date: Tue, 16 Dec 2025 11:53:47 +0300 Subject: [PATCH 21/24] version opencv --- 3rdparty/opencv | 2 +- app/Graph/graph_build.cpp | 3 +-- src/layers/ConvLayer.cpp | 21 --------------------- src/layers/PoolingLayer.cpp | 28 ---------------------------- 4 files changed, 2 insertions(+), 52 deletions(-) diff --git a/3rdparty/opencv b/3rdparty/opencv index cd0699a33..49486f61f 160000 --- a/3rdparty/opencv +++ b/3rdparty/opencv @@ -1 +1 @@ -Subproject commit cd0699a338d421634a6e3da51130fa60c0908d73 +Subproject commit 49486f61fb25722cbcf586b7f4320921d46fb38e diff --git a/app/Graph/graph_build.cpp b/app/Graph/graph_build.cpp index d38030f6f..a12e01a8c 100644 --- a/app/Graph/graph_build.cpp +++ b/app/Graph/graph_build.cpp @@ -20,8 +20,7 @@ int main(int argc, char* argv[]) { std::cout << "Warning: oneDNN backend is not compatible with parallel " "execution. Disabling parallelism." << '\n'; - options.setParallelBackend( - ParBackend::kSeq); // Автоматически sets parallel=false + options.setParallelBackend(ParBackend::kSeq); } } else if (std::string(argv[i]) == "--parallel" && i + 1 < argc) { if (options.backend == Backend::kOneDnn) { diff --git a/src/layers/ConvLayer.cpp b/src/layers/ConvLayer.cpp index ba8fdd078..60bce7187 100644 --- a/src/layers/ConvLayer.cpp +++ b/src/layers/ConvLayer.cpp @@ -37,21 +37,9 @@ void ConvolutionalLayer::run(const std::vector& input, } if (options.parallel) { switch (options.par_backend) { - /*case ParallelBackend::kTBB: - implType_ = kTBB; - break;*/ case ParBackend::kThreads: implType_ = kSTL; break; - // case ParallelBackend::kOMP: - // implType = kOMP; - // break; - // case ParallelBackend::kKokkos: - // implType = kKokkos; - // break; - // case ParallelBackend::kSycl: - // implType = kSycl; - // break; case ParBackend::kSeq: default: implType_ = kDefault; @@ -114,11 +102,6 @@ void ConvolutionalLayer::run(const std::vector& input, group_, dilations_); break; } - /*case kTBB: { - Conv4DTBB(input[0], kernel_, bias_, output[0], stride_, pads_, - group_, dilations_); - break; - }*/ default: { Conv4D(input[0], kernel_, bias_, output[0], stride_, pads_, group_, dilations_); @@ -187,10 +170,6 @@ void ConvolutionalLayer::run(const std::vector& input, pads_, group_, dilations_); break; } - /*case kTBB: { - Conv4DTBB(input[0], kernel_, bias_, output[0], stride_, - pads_, group_, dilations_); break; - }*/ default: { Conv4D(input[0], kernel_, bias_, output[0], stride_, pads_, group_, dilations_); diff --git a/src/layers/PoolingLayer.cpp b/src/layers/PoolingLayer.cpp index be2a34323..b672510b0 100644 --- a/src/layers/PoolingLayer.cpp +++ b/src/layers/PoolingLayer.cpp @@ -19,18 +19,6 @@ void PoolingLayer::run(const std::vector& input, case ParBackend::kTbb: implType_ = kTBB; break; - /*case ParallelBackend::kSTL: - implType_ = kSTL; - break;*/ - // case ParallelBackend::kOMP: - // implType = kOMP; - // break; - // case ParallelBackend::kKokkos: - // implType = kKokkos; - // break; - // case ParallelBackend::kSycl: - // implType = kSycl; - // break; case ParBackend::kSeq: default: implType_ = kDefault; @@ -48,14 +36,6 @@ void PoolingLayer::run(const std::vector& input, used_impl.get_output_shape()); break; } - /*case kSTL: { - PoolingLayerImplSTL used_impl( - input[0].get_shape(), poolingShape_, strides_, pads_, dilations_, - ceil_mode_, poolingType_); - output[0] = make_tensor(used_impl.run(*input[0].as()), - used_impl.get_output_shape()); - break; - }*/ default: { PoolingLayerImpl used_impl(input[0].get_shape(), poolingShape_, strides_, pads_, dilations_, @@ -77,14 +57,6 @@ void PoolingLayer::run(const std::vector& input, used_impl.get_output_shape()); break; } - /*case kSTL: { - PoolingLayerImplSTL used_impl( - input[0].get_shape(), poolingShape_, strides_, pads_, dilations_, - ceil_mode_, poolingType_); - output[0] = make_tensor(used_impl.run(*input[0].as()), - used_impl.get_output_shape()); - break; - }*/ default: { PoolingLayerImpl used_impl(input[0].get_shape(), poolingShape_, strides_, pads_, dilations_, From 6a64c3830ab79e687cb39b7f112df58c738c53f2 Mon Sep 17 00:00:00 2001 From: Semyon1104 Date: Tue, 16 Dec 2025 12:57:24 +0300 Subject: [PATCH 22/24] fix using, acc_check --- app/Graph/acc_check.cpp | 29 +++++++++++++++++++++++++++-- app/Graph/build.hpp | 8 +++----- src/layers/CMakeLists.txt | 4 ---- 3 files changed, 30 insertions(+), 11 deletions(-) diff --git a/app/Graph/acc_check.cpp b/app/Graph/acc_check.cpp index d1aac7035..438b0cb3a 100644 --- a/app/Graph/acc_check.cpp +++ b/app/Graph/acc_check.cpp @@ -19,8 +19,33 @@ int main(int argc, char* argv[]) { model_name = argv[++i]; } else if (std::string(argv[i]) == "--onednn") { options.backend = Backend::kOneDnn; - } else if (std::string(argv[i]) == "--parallel") { - options.parallel = true; + if (options.isParallel()) { + std::cout << "Warning: oneDNN backend is not compatible with parallel " + "execution. Disabling parallelism." + << '\n'; + options.setParallelBackend(ParBackend::kSeq); + } + } else if (std::string(argv[i]) == "--parallel" && i + 1 < argc) { + if (options.backend == Backend::kOneDnn) { + std::cout << "Warning: Parallel execution is not compatible with " + "oneDNN backend. Ignoring --parallel option." + << '\n'; + i++; + continue; + } + + std::string backend_str = argv[++i]; + if (backend_str == "tbb") { + options.setParallelBackend(ParBackend::kTbb); + } else if (backend_str == "threads" || backend_str == "stl") { + options.setParallelBackend(ParBackend::kThreads); + } else if (backend_str == "omp") { + options.setParallelBackend(ParBackend::kOmp); + } else { + std::cerr << "Unknown parallel backend: " << backend_str + << ". Using default (Threads)." << '\n'; + options.setParallelBackend(ParBackend::kThreads); + } } else if (std::string(argv[i]) == "--threads" && i + 1 < argc) { options.threads = std::stoi(argv[++i]); } diff --git a/app/Graph/build.hpp b/app/Graph/build.hpp index 8ff85d725..0e7c627e3 100644 --- a/app/Graph/build.hpp +++ b/app/Graph/build.hpp @@ -34,8 +34,6 @@ #include "layers/TransposeLayer.hpp" #include "layers_oneDNN/EWLayer.hpp" -using namespace it_lab_ai; - extern std::unordered_map model_paths; struct ParseResult { @@ -55,14 +53,14 @@ struct ParseResult { void build_graph(it_lab_ai::Graph& graph, it_lab_ai::Tensor& input, it_lab_ai::Tensor& output, const std::string& json_path, - RuntimeOptions options, bool comments); + it_lab_ai::RuntimeOptions options, bool comments); void build_graph_linear(it_lab_ai::Graph& graph, it_lab_ai::Tensor& input, - it_lab_ai::Tensor& output, RuntimeOptions options, + it_lab_ai::Tensor& output, it_lab_ai::RuntimeOptions options, bool comments); std::unordered_map load_class_names( const std::string& filename); -ParseResult parse_json_model(RuntimeOptions options, +ParseResult parse_json_model(it_lab_ai::RuntimeOptions options, const std::string& json_path, bool comments); std::vector get_input_shape_from_json(const std::string& json_path); diff --git a/src/layers/CMakeLists.txt b/src/layers/CMakeLists.txt index 8362a1223..e441507fd 100644 --- a/src/layers/CMakeLists.txt +++ b/src/layers/CMakeLists.txt @@ -1,10 +1,6 @@ file(GLOB_RECURSE layers_src *.cpp) add_library(layers_lib STATIC "${LAYERS_HEADERS}" "${layers_src}") -target_include_directories(layers_lib PUBLIC - ${CMAKE_CURRENT_SOURCE_DIR}/../include -) - target_link_libraries(layers_lib PUBLIC TBB_unified) target_link_libraries(layers_lib PUBLIC OpenMP::OpenMP_CXX) target_link_libraries(layers_lib PUBLIC dnnl) From 241f1a9a7d4073ab80a834694529e00fde1ba796 Mon Sep 17 00:00:00 2001 From: Semyon1104 Date: Tue, 16 Dec 2025 13:12:15 +0300 Subject: [PATCH 23/24] copy in build-onGit --- app/Graph/CMakeLists.txt | 2 +- app/Graph/build.hpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/app/Graph/CMakeLists.txt b/app/Graph/CMakeLists.txt index 69e934d36..b1d39dce0 100644 --- a/app/Graph/CMakeLists.txt +++ b/app/Graph/CMakeLists.txt @@ -22,7 +22,7 @@ target_link_libraries(Graph_Build BuildGraph) add_executable(ACC acc_check.cpp) target_link_libraries(ACC BuildGraph) -if (WIN32) +if (WIN32 AND EXISTS "${OPENCV_BUILD_DIR}/bin") add_custom_command(TARGET Graph_Build POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_directory "${OPENCV_BUILD_DIR}/bin/." diff --git a/app/Graph/build.hpp b/app/Graph/build.hpp index 0e7c627e3..bbba3318f 100644 --- a/app/Graph/build.hpp +++ b/app/Graph/build.hpp @@ -55,8 +55,8 @@ void build_graph(it_lab_ai::Graph& graph, it_lab_ai::Tensor& input, it_lab_ai::Tensor& output, const std::string& json_path, it_lab_ai::RuntimeOptions options, bool comments); void build_graph_linear(it_lab_ai::Graph& graph, it_lab_ai::Tensor& input, - it_lab_ai::Tensor& output, it_lab_ai::RuntimeOptions options, - bool comments); + it_lab_ai::Tensor& output, + it_lab_ai::RuntimeOptions options, bool comments); std::unordered_map load_class_names( const std::string& filename); From 88e0e6f49f01752adaf941cc291c7694677c83a2 Mon Sep 17 00:00:00 2001 From: Semyon1104 <129722895+Semyon1104@users.noreply.github.com> Date: Tue, 16 Dec 2025 13:57:40 +0300 Subject: [PATCH 24/24] Update app/Graph/CMakeLists.txt Co-authored-by: Arseniy Obolenskiy --- app/Graph/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/Graph/CMakeLists.txt b/app/Graph/CMakeLists.txt index b1d39dce0..69e934d36 100644 --- a/app/Graph/CMakeLists.txt +++ b/app/Graph/CMakeLists.txt @@ -22,7 +22,7 @@ target_link_libraries(Graph_Build BuildGraph) add_executable(ACC acc_check.cpp) target_link_libraries(ACC BuildGraph) -if (WIN32 AND EXISTS "${OPENCV_BUILD_DIR}/bin") +if (WIN32) add_custom_command(TARGET Graph_Build POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_directory "${OPENCV_BUILD_DIR}/bin/."