diff --git a/app/Accuracy/accuracy_check.cpp b/app/Accuracy/accuracy_check.cpp index f6eb2519e..4a3c3333a 100644 --- a/app/Accuracy/accuracy_check.cpp +++ b/app/Accuracy/accuracy_check.cpp @@ -10,6 +10,10 @@ using namespace it_lab_ai; int main() { + RuntimeOptions options; + options.backend = Backend::kNaive; + options.threads = 4; + options.parallel = true; std::string image_path = IMAGE1_PATH; cv::Mat image = cv::imread(image_path); if (image.empty()) { @@ -66,7 +70,7 @@ int main() { graph.makeConnection(a4_ptr, a5_ptr); graph.makeConnection(a5_ptr, a6_ptr); graph.setOutput(a5_ptr, output); - graph.inference(); + graph.inference(options); std::vector tmp = *output.as(); std::vector tmp_output = softmax(*output.as()); for (float i : tmp) { diff --git a/app/Graph/acc_check.cpp b/app/Graph/acc_check.cpp index 02451b763..438b0cb3a 100644 --- a/app/Graph/acc_check.cpp +++ b/app/Graph/acc_check.cpp @@ -12,15 +12,45 @@ using namespace it_lab_ai; int main(int argc, char* argv[]) { std::string model_name = "alexnet_mnist"; - bool onednn = false; + RuntimeOptions options; + for (int i = 1; i < argc; ++i) { if (std::string(argv[i]) == "--model" && i + 1 < argc) { model_name = argv[++i]; } else if (std::string(argv[i]) == "--onednn") { - onednn = true; + options.backend = Backend::kOneDnn; + if (options.isParallel()) { + std::cout << "Warning: oneDNN backend is not compatible with parallel " + "execution. Disabling parallelism." + << '\n'; + options.setParallelBackend(ParBackend::kSeq); + } + } else if (std::string(argv[i]) == "--parallel" && i + 1 < argc) { + if (options.backend == Backend::kOneDnn) { + std::cout << "Warning: Parallel execution is not compatible with " + "oneDNN backend. Ignoring --parallel option." + << '\n'; + i++; + continue; + } + + std::string backend_str = argv[++i]; + if (backend_str == "tbb") { + options.setParallelBackend(ParBackend::kTbb); + } else if (backend_str == "threads" || backend_str == "stl") { + options.setParallelBackend(ParBackend::kThreads); + } else if (backend_str == "omp") { + options.setParallelBackend(ParBackend::kOmp); + } else { + std::cerr << "Unknown parallel backend: " << backend_str + << ". Using default (Threads)." << '\n'; + options.setParallelBackend(ParBackend::kThreads); + } + } else if (std::string(argv[i]) == "--threads" && i + 1 < argc) { + options.threads = std::stoi(argv[++i]); } } - it_lab_ai::LayerFactory::configure(onednn); + std::string dataset_path; if (model_name == "alexnet_mnist") { dataset_path = MNIST_PATH; @@ -77,8 +107,8 @@ int main(int argc, char* argv[]) { Tensor t = make_tensor(res, sh); input = t; Graph graph; - build_graph_linear(graph, input, output, false); - graph.inference(); + build_graph_linear(graph, input, output, options, false); + graph.inference(options); print_time_stats(graph); std::vector> tmp_output = softmax(*output.as(), 10); @@ -186,8 +216,8 @@ int main(int argc, char* argv[]) { it_lab_ai::Tensor(output_shape, it_lab_ai::Type::kFloat); Graph graph; - build_graph(graph, input, output, json_path, false); - graph.inference(); + build_graph(graph, input, output, json_path, options, false); + graph.inference(options); print_time_stats(graph); std::vector> processed_outputs; const std::vector& raw_output = *output.as(); diff --git a/app/Graph/build.cpp b/app/Graph/build.cpp index b16ded830..f6b33ef07 100644 --- a/app/Graph/build.cpp +++ b/app/Graph/build.cpp @@ -4,8 +4,6 @@ using namespace it_lab_ai; -bool LayerFactory::onednn_ = false; - std::unordered_map model_paths = { {"alexnet_mnist", MODEL_PATH_H5}, {"googlenet", MODEL_PATH_GOOGLENET_ONNX}, @@ -14,7 +12,8 @@ std::unordered_map model_paths = { {"yolo", MODEL_PATH_YOLO11NET_ONNX}}; void build_graph_linear(it_lab_ai::Graph& graph, it_lab_ai::Tensor& input, - it_lab_ai::Tensor& output, bool comments) { + it_lab_ai::Tensor& output, RuntimeOptions options, + bool comments) { if (comments) { for (size_t i = 0; i < input.get_shape().dims(); i++) { std::cout << input.get_shape()[i] << ' '; @@ -83,14 +82,14 @@ void build_graph_linear(it_lab_ai::Graph& graph, it_lab_ai::Tensor& input, it_lab_ai::Tensor tmp_values = tensor; it_lab_ai::Tensor tmp_bias = it_lab_ai::make_tensor(tensor.get_bias()); auto conv_layer = std::make_unique( - 1, pads, 1, tmp_values, tmp_bias, kDefault, 1, true); + 1, pads, 1, tmp_values, tmp_bias, 1, true); layer_ptrs.push_back(conv_layer.get()); layers.push_back(std::move(conv_layer)); layerpostop.push_back(false); if (comments) std::cout << "ConvLayer added to layers." << '\n'; } if (layer_type.find("relu") != std::string::npos) { - auto ew_layer = LayerFactory::createEwLayer("relu"); + auto ew_layer = LayerFactory::createEwLayer("relu", options); layer_ptrs.push_back(ew_layer.get()); layers.push_back(std::move(ew_layer)); layerpostop.push_back(true); @@ -120,7 +119,7 @@ void build_graph_linear(it_lab_ai::Graph& graph, it_lab_ai::Tensor& input, << '\n'; } auto pool_layer = - std::make_unique(shape, pooltype, kDefault); + std::make_unique(shape, pooltype); layer_ptrs.push_back(pool_layer.get()); layers.push_back(std::move(pool_layer)); layerpostop.push_back(false); @@ -195,8 +194,8 @@ std::string get_base_layer_name(const std::string& tensor_name) { void build_graph(it_lab_ai::Graph& graph, it_lab_ai::Tensor& input, it_lab_ai::Tensor& output, const std::string& json_path, - bool comments) { - auto parse_result = parse_json_model(json_path, comments); + RuntimeOptions options, bool comments) { + auto parse_result = parse_json_model(options, json_path, comments); auto& layers = parse_result.layers; auto& name_to_layer_ptr = parse_result.name_to_layer_ptr; @@ -300,7 +299,8 @@ void build_graph(it_lab_ai::Graph& graph, it_lab_ai::Tensor& input, } } -ParseResult parse_json_model(const std::string& json_path, bool comments) { +ParseResult parse_json_model(RuntimeOptions options, + const std::string& json_path, bool comments) { ParseResult result; auto& layers = result.layers; @@ -411,13 +411,13 @@ ParseResult parse_json_model(const std::string& json_path, bool comments) { it_lab_ai::Tensor tmp_bias = it_lab_ai::make_tensor(tensor.get_bias()); auto conv_layer = std::make_unique( - stride, pads, dilations, tmp_tensor, tmp_bias, kDefault, group); + stride, pads, dilations, tmp_tensor, tmp_bias, group); layer = std::move(conv_layer); } else if (layer_type.find("Relu") != std::string::npos || layer_type.find("relu") != std::string::npos) { - layer = LayerFactory::createEwLayer("relu"); + layer = LayerFactory::createEwLayer("relu", options); } else if (layer_type.find("Sigmoid") != std::string::npos) { - layer = LayerFactory::createEwLayer("sigmoid"); + layer = LayerFactory::createEwLayer("sigmoid", options); } else if (layer_type.find("Dense") != std::string::npos || layer_type.find("FullyConnected") != std::string::npos) { it_lab_ai::Tensor tensor = it_lab_ai::create_tensor_from_json( @@ -448,7 +448,7 @@ ParseResult parse_json_model(const std::string& json_path, bool comments) { } } else if (layer_type == "GlobalAveragePool") { auto pool_layer = std::make_unique( - it_lab_ai::Shape({0, 0}), "average", kDefault); + it_lab_ai::Shape({0, 0}), "average"); layer = std::move(pool_layer); if (comments) { std::cout << "GlobalAveragePool layer added (will use input spatial " @@ -509,8 +509,8 @@ ParseResult parse_json_model(const std::string& json_path, bool comments) { } } - auto pool_layer = std::make_unique( - shape, pooltype, kDefault); + auto pool_layer = + std::make_unique(shape, pooltype); try { if (strides[0] != 2 || strides[1] != 2) { @@ -637,13 +637,16 @@ ParseResult parse_json_model(const std::string& json_path, bool comments) { if (layer_type == "Mul") { ew_operation = "linear"; - layer = LayerFactory::createEwLayer(ew_operation, value, 0.0F); + layer = + LayerFactory::createEwLayer(ew_operation, options, value, 0.0F); } else if (layer_type == "Add") { ew_operation = "linear"; - layer = LayerFactory::createEwLayer(ew_operation, 1.0F, value); + layer = + LayerFactory::createEwLayer(ew_operation, options, 1.0F, value); } else if (layer_type == "Sub") { ew_operation = "linear"; - layer = LayerFactory::createEwLayer(ew_operation, 1.0F, -value); + layer = LayerFactory::createEwLayer(ew_operation, options, 1.0F, + -value); } else { continue; } diff --git a/app/Graph/build.hpp b/app/Graph/build.hpp index 0eb2e2d52..bbba3318f 100644 --- a/app/Graph/build.hpp +++ b/app/Graph/build.hpp @@ -13,6 +13,7 @@ #include "Weights_Reader/reader_weights.hpp" #include "graph/graph.hpp" +#include "graph/runtime_options.hpp" #include "layers/BatchNormalizationLayer.hpp" #include "layers/BinaryOpLayer.hpp" #include "layers/ConcatLayer.hpp" @@ -52,13 +53,15 @@ struct ParseResult { void build_graph(it_lab_ai::Graph& graph, it_lab_ai::Tensor& input, it_lab_ai::Tensor& output, const std::string& json_path, - bool comments); + it_lab_ai::RuntimeOptions options, bool comments); void build_graph_linear(it_lab_ai::Graph& graph, it_lab_ai::Tensor& input, - it_lab_ai::Tensor& output, bool comments); + it_lab_ai::Tensor& output, + it_lab_ai::RuntimeOptions options, bool comments); std::unordered_map load_class_names( const std::string& filename); -ParseResult parse_json_model(const std::string& json_path, bool comments); +ParseResult parse_json_model(it_lab_ai::RuntimeOptions options, + const std::string& json_path, bool comments); std::vector get_input_shape_from_json(const std::string& json_path); std::vector process_model_output(const std::vector& output, @@ -69,19 +72,15 @@ it_lab_ai::Tensor prepare_image(const cv::Mat& image, it_lab_ai::Tensor prepare_mnist_image(const cv::Mat& image); void print_time_stats(it_lab_ai::Graph& graph); - namespace it_lab_ai { class LayerFactory { - private: - static bool onednn_; - public: - static void configure(bool onednn) { onednn_ = onednn; } - static std::unique_ptr createEwLayer(const std::string& function, + const RuntimeOptions& options, float alpha = 1.0F, float beta = 0.0F) { - if (onednn_ && EwLayerOneDnn::is_function_supported(function)) { + if (options.backend == Backend::kOneDnn && + EwLayerOneDnn::is_function_supported(function)) { return std::make_unique(function, alpha, beta); } return std::make_unique(function, alpha, beta); diff --git a/app/Graph/graph_build.cpp b/app/Graph/graph_build.cpp index c6918b8e2..a12e01a8c 100644 --- a/app/Graph/graph_build.cpp +++ b/app/Graph/graph_build.cpp @@ -9,17 +9,45 @@ using namespace it_lab_ai; int main(int argc, char* argv[]) { std::string model_name = "alexnet_mnist"; - bool onednn = false; + RuntimeOptions options; + for (int i = 1; i < argc; ++i) { if (std::string(argv[i]) == "--model" && i + 1 < argc) { model_name = argv[++i]; } else if (std::string(argv[i]) == "--onednn") { - onednn = true; + options.backend = Backend::kOneDnn; + if (options.isParallel()) { + std::cout << "Warning: oneDNN backend is not compatible with parallel " + "execution. Disabling parallelism." + << '\n'; + options.setParallelBackend(ParBackend::kSeq); + } + } else if (std::string(argv[i]) == "--parallel" && i + 1 < argc) { + if (options.backend == Backend::kOneDnn) { + std::cout << "Warning: Parallel execution is not compatible with " + "oneDNN backend. Ignoring --parallel option." + << '\n'; + i++; + continue; + } + + std::string backend_str = argv[++i]; + if (backend_str == "tbb") { + options.setParallelBackend(ParBackend::kTbb); + } else if (backend_str == "threads" || backend_str == "stl") { + options.setParallelBackend(ParBackend::kThreads); + } else if (backend_str == "omp") { + options.setParallelBackend(ParBackend::kOmp); + } else { + std::cerr << "Unknown parallel backend: " << backend_str + << ". Using default (Threads)." << '\n'; + options.setParallelBackend(ParBackend::kThreads); + } + } else if (std::string(argv[i]) == "--threads" && i + 1 < argc) { + options.threads = std::stoi(argv[++i]); } } - it_lab_ai::LayerFactory::configure(onednn); - std::string json_path = model_paths[model_name]; std::vector input_shape; @@ -62,11 +90,11 @@ int main(int argc, char* argv[]) { std::vector vec(75, 3); it_lab_ai::Tensor output = it_lab_ai::make_tensor(vec, sh1); Graph graph; - build_graph_linear(graph, input, output, true); + build_graph_linear(graph, input, output, options, true); std::cout << "Starting inference..." << '\n'; try { - graph.inference(); + graph.inference(options); std::cout << "Inference completed successfully." << '\n'; } catch (const std::exception& e) { std::cerr << "ERROR during inference: " << e.what() << '\n'; @@ -102,11 +130,11 @@ int main(int argc, char* argv[]) { it_lab_ai::Tensor output({1, output_classes}, it_lab_ai::Type::kFloat); Graph graph; - build_graph(graph, input, output, json_path, false); + build_graph(graph, input, output, json_path, options, false); std::cout << "Starting inference..." << '\n'; try { - graph.inference(); + graph.inference(options); std::cout << "Inference completed successfully." << '\n'; } catch (const std::exception& e) { std::cerr << "ERROR during inference: " << e.what() << '\n'; diff --git a/include/graph/graph.hpp b/include/graph/graph.hpp index 93114b768..9b6825163 100644 --- a/include/graph/graph.hpp +++ b/include/graph/graph.hpp @@ -11,6 +11,7 @@ #include #include "layers/Layer.hpp" +#include "runtime_options.hpp" namespace it_lab_ai { @@ -224,7 +225,7 @@ class Graph { return false; } - void inference() { + void inference(const RuntimeOptions& options) { std::vector> countinout = getInOutDegrees(); std::vector traversal = getTraversalOrder(); count_used_split_distribution_ = 0; @@ -262,7 +263,7 @@ class Graph { } } } - layers_[current_layer]->run(inten_, outten_); + layers_[current_layer]->run(inten_, outten_, options); #ifdef ENABLE_STATISTIC_TENSORS tensors_.push_back(inten_[0]); @@ -277,7 +278,8 @@ class Graph { if (layers_[current_layer]->postops.count > 0) { for (unsigned int j = 0; j < layers_[current_layer]->postops.count; j++) { - layers_[current_layer]->postops.layers[j]->run(inten_, outten_); + layers_[current_layer]->postops.layers[j]->run(inten_, outten_, + options); } inten_ = outten_; } diff --git a/include/graph/runtime_options.hpp b/include/graph/runtime_options.hpp new file mode 100644 index 000000000..3f8301cbb --- /dev/null +++ b/include/graph/runtime_options.hpp @@ -0,0 +1,31 @@ +#pragma once +#include + +#include "parallel/parallel.hpp" + +namespace it_lab_ai { + +enum class Backend : uint8_t { kNaive, kOneDnn }; +using ParBackend = parallel::Backend; + +struct RuntimeOptions { + Backend backend{Backend::kNaive}; + ParBackend par_backend{ParBackend::kSeq}; + int threads{0}; + bool parallel{false}; + + [[nodiscard]] ParBackend getEffectiveParBackend() const { + return parallel ? par_backend : ParBackend::kSeq; + } + + void setParallelBackend(ParBackend p) { + par_backend = p; + parallel = (p != ParBackend::kSeq); + } + + [[nodiscard]] bool isParallel() const { + return parallel && (par_backend != ParBackend::kSeq); + } +}; + +} // namespace it_lab_ai diff --git a/include/layers/ConvLayer.hpp b/include/layers/ConvLayer.hpp index 0408b0117..b7fb56c01 100644 --- a/include/layers/ConvLayer.hpp +++ b/include/layers/ConvLayer.hpp @@ -38,8 +38,7 @@ class ConvolutionalLayer : public Layer { } ConvolutionalLayer(size_t step, size_t pads, size_t dilations, const Tensor& kernel, const Tensor& bias = Tensor(), - ImplType implType = kDefault, size_t group = 1, - bool useLegacyImpl = false) + size_t group = 1, bool useLegacyImpl = false) : Layer(kConvolution) { stride_ = step; pads_ = pads; @@ -47,12 +46,12 @@ class ConvolutionalLayer : public Layer { dilations_ = dilations; kernel_ = kernel; bias_ = bias; - implType_ = implType; useLegacyImpl_ = useLegacyImpl; } - void run(const std::vector& input, std::vector& output) override; + void run(const std::vector& input, std::vector& output, + const RuntimeOptions& options) override; #ifdef ENABLE_STATISTIC_WEIGHTS Tensor get_weights() override { return kernel_; } #endif diff --git a/include/layers/EWLayer.hpp b/include/layers/EWLayer.hpp index ebac14f7c..cf9dbda85 100644 --- a/include/layers/EWLayer.hpp +++ b/include/layers/EWLayer.hpp @@ -28,6 +28,8 @@ class EWLayer : public Layer { void run(const std::vector& input, std::vector& output) override; + void run(const std::vector& input, std::vector& output, + const RuntimeOptions& options) override; #ifdef ENABLE_STATISTIC_WEIGHTS Tensor get_weights() override { std::vector v = {0}; diff --git a/include/layers/Layer.hpp b/include/layers/Layer.hpp index 5f00d5f62..00ddeb7cc 100644 --- a/include/layers/Layer.hpp +++ b/include/layers/Layer.hpp @@ -9,6 +9,7 @@ #include #include +#include "graph/runtime_options.hpp" #include "layers/Shape.hpp" #include "layers/Tensor.hpp" #include "parallel/parallel.hpp" @@ -54,13 +55,14 @@ class Layer { PostOperations postops; [[nodiscard]] int getID() const { return id_; } void setID(int id) { id_ = id; } - void setParallelBackend(ParBackend backend) { parallel_backend_ = backend; } - [[nodiscard]] ParBackend getParallelBackend() const { - return parallel_backend_; - } [[nodiscard]] LayerType getName() const { return type_; } virtual void run(const std::vector& input, std::vector& output) = 0; + virtual void run(const std::vector& input, + std::vector& output, + [[maybe_unused]] const RuntimeOptions& options) { + run(input, output); + } #ifdef ENABLE_STATISTIC_WEIGHTS virtual Tensor get_weights() = 0; #endif @@ -68,7 +70,6 @@ class Layer { protected: int id_ = 0; LayerType type_; - ParBackend parallel_backend_ = ParBackend::kSeq; }; template diff --git a/include/layers/PoolingLayer.hpp b/include/layers/PoolingLayer.hpp index 40705fa7d..f4924b5de 100644 --- a/include/layers/PoolingLayer.hpp +++ b/include/layers/PoolingLayer.hpp @@ -22,27 +22,23 @@ class PoolingLayer : public Layer { const Shape& strides = {2, 2}, const Shape& pads = {0, 0, 0, 0}, const Shape& dilations = {1, 1}, bool ceil_mode = false, - std::string pooling_type = "average", - ImplType implType = kDefault) + std::string pooling_type = "average") : Layer(kPooling), poolingShape_(pooling_shape), strides_(strides), pads_(pads), dilations_(dilations), ceil_mode_(ceil_mode), - poolingType_(std::move(pooling_type)), - implType_(implType) {} + poolingType_(std::move(pooling_type)) {} explicit PoolingLayer(const Shape& pooling_shape, - std::string pooling_type = "average", - ImplType implType = kDefault) + std::string pooling_type = "average") : Layer(kPooling), poolingShape_(pooling_shape), strides_({2, 2}), pads_({0, 0, 0, 0}), dilations_({1, 1}), ceil_mode_(false), - poolingType_(std::move(pooling_type)), - implType_(implType) {} + poolingType_(std::move(pooling_type)) {} void setStrides(size_t h, size_t w) { strides_ = {h, w}; } void setPads(size_t top, size_t bottom, size_t left, size_t right) { pads_ = {top, bottom, left, right}; @@ -51,6 +47,8 @@ class PoolingLayer : public Layer { void setCeilMode(bool ceil_mode) { ceil_mode_ = ceil_mode; } void run(const std::vector& input, std::vector& output) override; + void run(const std::vector& input, std::vector& output, + const RuntimeOptions& options) override; #ifdef ENABLE_STATISTIC_WEIGHTS Tensor get_weights() override { std::vector v = {0}; diff --git a/src/layers/CMakeLists.txt b/src/layers/CMakeLists.txt index 50f14846e..e441507fd 100644 --- a/src/layers/CMakeLists.txt +++ b/src/layers/CMakeLists.txt @@ -1,5 +1,6 @@ file(GLOB_RECURSE layers_src *.cpp) add_library(layers_lib STATIC "${LAYERS_HEADERS}" "${layers_src}") + target_link_libraries(layers_lib PUBLIC TBB_unified) target_link_libraries(layers_lib PUBLIC OpenMP::OpenMP_CXX) target_link_libraries(layers_lib PUBLIC dnnl) diff --git a/src/layers/ConvLayer.cpp b/src/layers/ConvLayer.cpp index 28c45e555..60bce7187 100644 --- a/src/layers/ConvLayer.cpp +++ b/src/layers/ConvLayer.cpp @@ -4,6 +4,13 @@ namespace it_lab_ai { void ConvolutionalLayer::run(const std::vector& input, std::vector& output) { + RuntimeOptions default_options; + run(input, output, default_options); +} + +void ConvolutionalLayer::run(const std::vector& input, + std::vector& output, + const RuntimeOptions& options) { if (input.size() != 1) { throw std::runtime_error("ConvolutionalLayer: Input tensors not 1"); } @@ -28,6 +35,17 @@ void ConvolutionalLayer::run(const std::vector& input, return; } } + if (options.parallel) { + switch (options.par_backend) { + case ParBackend::kThreads: + implType_ = kSTL; + break; + case ParBackend::kSeq: + default: + implType_ = kDefault; + break; + } + } switch (input[0].get_type()) { case Type::kInt: { if (kernel_.get_shape().dims() == 2) { diff --git a/src/layers/EWLayer.cpp b/src/layers/EWLayer.cpp index 6a7da06c0..dc159f1f6 100644 --- a/src/layers/EWLayer.cpp +++ b/src/layers/EWLayer.cpp @@ -7,11 +7,17 @@ namespace it_lab_ai { void EWLayer::run(const std::vector& input, std::vector& output) { + RuntimeOptions default_options; + run(input, output, default_options); +} + +void EWLayer::run(const std::vector& input, std::vector& output, + const RuntimeOptions& options) { if (input.size() != 1) { throw std::runtime_error("EWLayer: Input tensors not 1"); } - ParBackend backend = getParallelBackend(); + ParBackend backend = options.getEffectiveParBackend(); switch (input[0].get_type()) { case Type::kInt: { diff --git a/src/layers/PoolingLayer.cpp b/src/layers/PoolingLayer.cpp index 749fdadfd..b672510b0 100644 --- a/src/layers/PoolingLayer.cpp +++ b/src/layers/PoolingLayer.cpp @@ -4,10 +4,27 @@ namespace it_lab_ai { void PoolingLayer::run(const std::vector& input, std::vector& output) { + RuntimeOptions default_options; + run(input, output, default_options); +} + +void PoolingLayer::run(const std::vector& input, + std::vector& output, + const RuntimeOptions& options) { if (input.size() != 1) { throw std::runtime_error("PoolingLayer: Input tensors not 1"); } - + if (options.parallel) { + switch (options.par_backend) { + case ParBackend::kTbb: + implType_ = kTBB; + break; + case ParBackend::kSeq: + default: + implType_ = kDefault; + break; + } + } switch (input[0].get_type()) { case Type::kInt: { switch (implType_) { diff --git a/test/benchmarking/test_layers_time.cpp b/test/benchmarking/test_layers_time.cpp index ad2d8dc3a..cbdfa5054 100644 --- a/test/benchmarking/test_layers_time.cpp +++ b/test/benchmarking/test_layers_time.cpp @@ -10,10 +10,11 @@ using namespace it_lab_ai; -void test_func(Layer& p, const Tensor& input, Tensor& output) { +void test_func(Layer& p, const Tensor& input, Tensor& output, + const RuntimeOptions& options) { std::vector in{input}; std::vector out{output}; - p.run(in, out); + p.run(in, out, options); } TEST(pooling_test, is_pooling_tbb_ok) { @@ -30,12 +31,20 @@ TEST(pooling_test, is_pooling_tbb_ok) { } Tensor input = make_tensor(a1, test_shape); Tensor output; - PoolingLayer p1(Shape({2, 2}), "max", kDefault); - PoolingLayer p2(Shape({2, 2}), "max", kTBB); - double count1 = - elapsed_time(test_func, p1, input, output); - double count2 = - elapsed_time(test_func, p2, input, output); + RuntimeOptions options_seq; + options_seq.parallel = false; + options_seq.par_backend = ParBackend::kSeq; + + RuntimeOptions options_tbb; + options_tbb.parallel = true; + options_tbb.par_backend = ParBackend::kTbb; + + PoolingLayer p1(Shape({2, 2}), "max"); + PoolingLayer p2(Shape({2, 2}), "max"); + double count1 = elapsed_time(test_func, p1, input, output, + options_seq); + double count2 = elapsed_time(test_func, p2, input, output, + options_tbb); std::cout << count1 << " vs. " << count2 << " (parallel)\n"; } @@ -58,11 +67,20 @@ TEST(conv_test, is_conv_stl_ok) { Tensor input = make_tensor(a1, test_shape); Tensor kernel = make_tensor(a2, Shape({5, 5, 3, 16})); Tensor output; - ConvolutionalLayer p1(1, 1, 2, kernel, Tensor(), kDefault); - ConvolutionalLayer p2(1, 1, 2, kernel, Tensor(), kSTL); - double count1 = - elapsed_time(test_func, p1, input, output); - double count2 = - elapsed_time(test_func, p2, input, output); + + RuntimeOptions options_seq; + options_seq.parallel = false; + options_seq.par_backend = ParBackend::kSeq; + + RuntimeOptions options_stl; + options_stl.parallel = true; + options_stl.par_backend = ParBackend::kTbb; + + ConvolutionalLayer p1(1, 1, 2, kernel, Tensor()); + ConvolutionalLayer p2(1, 1, 2, kernel, Tensor()); + double count1 = elapsed_time(test_func, p1, input, output, + options_seq); + double count2 = elapsed_time(test_func, p2, input, output, + options_stl); std::cout << count1 << " vs. " << count2 << " (parallel)\n"; } diff --git a/test/inference/test_inference.cpp b/test/inference/test_inference.cpp index 6ed2aa0d5..f6b201a25 100644 --- a/test/inference/test_inference.cpp +++ b/test/inference/test_inference.cpp @@ -18,6 +18,11 @@ using namespace it_lab_ai; TEST(bfs, check_struct_graph) { + RuntimeOptions options; + options.backend = Backend::kNaive; + options.parallel = false; + options.threads = 4; + Graph graph; Shape sh1({1, 5, 5, 3}); std::vector vec; @@ -95,13 +100,18 @@ TEST(bfs, check_struct_graph) { graph.makeConnection(a10_ptr, a12_ptr); graph.setOutput(a12_ptr, output); - graph.inference(); + graph.inference(options); std::vector tmp = *output.as(); std::vector res(36, 81); ASSERT_EQ(tmp, res); } TEST(bfs, check_struct_graph_not_used_yolo) { + RuntimeOptions options; + options.backend = Backend::kNaive; + options.parallel = false; + options.threads = 4; + Graph graph; Shape sh1({1, 4, 2, 2}); std::vector vec; @@ -156,13 +166,18 @@ TEST(bfs, check_struct_graph_not_used_yolo) { graph.makeConnection(a3_3_4_ptr, a3_2_ptr); graph.setOutput(a4_ptr, output); - graph.inference(); + graph.inference(options); std::vector tmp = *output.as(); std::vector res(16, 3); ASSERT_EQ(tmp, res); } TEST(bfs, check_struct_graph_resnet1) { + RuntimeOptions options; + options.backend = Backend::kNaive; + options.parallel = false; + options.threads = 4; + Graph graph; Shape sh1({1, 2, 2, 2}); std::vector vec; @@ -209,13 +224,18 @@ TEST(bfs, check_struct_graph_resnet1) { graph.makeConnection(a3_ptr, a4_ptr); graph.setOutput(a4_ptr, output); - graph.inference(); + graph.inference(options); std::vector tmp = *output.as(); std::vector res(4, 12); ASSERT_EQ(tmp, res); } TEST(bfs, check_struct_graph_resnet2) { + RuntimeOptions options; + options.backend = Backend::kNaive; + options.parallel = false; + options.threads = 4; + Graph graph; Shape sh1({1, 2, 2, 2}); std::vector vec; @@ -259,13 +279,18 @@ TEST(bfs, check_struct_graph_resnet2) { graph.makeConnection(a3_ptr, a4_ptr); graph.setOutput(a4_ptr, output); - graph.inference(); + graph.inference(options); std::vector tmp = *output.as(); std::vector res(4, 12); ASSERT_EQ(tmp, res); } TEST(bfs, check_struct_graph_google1) { + RuntimeOptions options; + options.backend = Backend::kNaive; + options.parallel = false; + options.threads = 4; + Graph graph; Shape sh1({1, 2, 2, 2}); std::vector vec; @@ -307,7 +332,7 @@ TEST(bfs, check_struct_graph_google1) { graph.makeConnection(a2_1_ptr, a3_ptr); graph.setOutput(a3_ptr, output); - graph.inference(); + graph.inference(options); std::vector tmp = *output.as(); std::vector res(32, 3); for (int c = 8; c < 24; c++) { @@ -317,6 +342,11 @@ TEST(bfs, check_struct_graph_google1) { } TEST(bfs, check_result_vec) { + RuntimeOptions options; + options.backend = Backend::kNaive; + options.parallel = false; + options.threads = 4; + Graph graph; Shape sh1({1, 5, 5, 3}); std::vector vec; @@ -344,7 +374,7 @@ TEST(bfs, check_result_vec) { graph.makeConnection(a2_ptr, a4_ptr); graph.setOutput(a4_ptr, output); - graph.inference(); + graph.inference(options); std::vector tmp = *output.as(); std::vector res = {81, 81, 81}; #ifdef ENABLE_STATISTIC_TENSORS @@ -396,6 +426,11 @@ TEST(bfs, check_result_vec) { } TEST(bfs, check_end_to_end) { + RuntimeOptions options; + options.backend = Backend::kNaive; + options.parallel = false; + options.threads = 4; + Graph graph; Shape sh1({1, 5, 5, 3}); std::vector vec; @@ -434,7 +469,7 @@ TEST(bfs, check_end_to_end) { graph.makeConnection(a4_ptr, a5_ptr); graph.setOutput(a5_ptr, output); - graph.inference(); + graph.inference(options); std::vector tmp = *output.as(); ASSERT_GT(tmp.size(), 0); @@ -444,6 +479,11 @@ TEST(bfs, check_end_to_end) { } TEST(bfs, check_struct_layer) { + RuntimeOptions options; + options.backend = Backend::kNaive; + options.parallel = false; + options.threads = 4; + Graph graph; Shape sh1({1, 5, 5, 3}); std::vector vec; @@ -470,13 +510,18 @@ TEST(bfs, check_struct_layer) { graph.makeConnection(a2_ptr, a3_ptr); graph.setOutput(a3_ptr, output); - graph.inference(); + graph.inference(options); std::vector tmp = *output.as(); std::vector res = {81, 81, 81}; ASSERT_EQ(tmp, res); } TEST(bfs, check_struct_layer_added) { + RuntimeOptions options; + options.backend = Backend::kNaive; + options.parallel = false; + options.threads = 4; + Graph graph; Shape sh1({1, 5, 5, 3}); std::vector vec; @@ -508,13 +553,18 @@ TEST(bfs, check_struct_layer_added) { graph.makeConnection(a2_ptr, a3_ptr); graph.setOutput(a3_ptr, output); - graph.inference(); + graph.inference(options); std::vector tmp = *output.as(); std::vector res = {189, 189, 189}; ASSERT_EQ(tmp, res); } FLAKY_TEST(bfs, check_struct_graph_split) { + RuntimeOptions options; + options.backend = Backend::kNaive; + options.parallel = false; + options.threads = 4; + std::vector>> split = { {{12, 0}, {13, 0}, {14, 0}}}; Graph graph(151, split); @@ -594,7 +644,7 @@ FLAKY_TEST(bfs, check_struct_graph_split) { graph.makeConnection(a10_ptr, a12_ptr); graph.setOutput(a12_ptr, output); - graph.inference(); + graph.inference(options); std::vector tmp = *output.as(); std::vector res(36, 81); ASSERT_EQ(tmp, res); diff --git a/test/single_layer/test_convlayer.cpp b/test/single_layer/test_convlayer.cpp index 41b4400fd..664ac313d 100644 --- a/test/single_layer/test_convlayer.cpp +++ b/test/single_layer/test_convlayer.cpp @@ -445,7 +445,7 @@ TEST(ConvolutionalLayerTest, DepthwiseIntegration) { std::vector output_vec(32, 0.0f); Tensor output = make_tensor(output_vec, output_shape); - ConvolutionalLayer layer(1, 1, 1, kernel, Tensor(), kDefault, 2); + ConvolutionalLayer layer(1, 1, 1, kernel, Tensor(), 2); std::vector in{input}; std::vector out{output}; @@ -569,7 +569,7 @@ TEST(ConvolutionalLayerTest, DepthwiseViaConvolutionalLayer) { std::vector output_vec(8, 0.0f); Tensor output = make_tensor(output_vec, output_shape); - ConvolutionalLayer layer(1, 0, 1, kernel, Tensor(), kDefault, 2); + ConvolutionalLayer layer(1, 0, 1, kernel, Tensor(), 2); std::vector in{input}; std::vector out{output}; layer.run(in, out); @@ -583,6 +583,11 @@ TEST(ConvolutionalLayerTest, DepthwiseViaConvolutionalLayer) { } TEST(ConvolutionalLayerTest, Conv4DSTLViaConvolutionalLayer) { + RuntimeOptions options; + options.backend = Backend::kNaive; + options.parallel = true; + options.par_backend = ParBackend::kThreads; + std::vector image(48, 1.0f); Shape input_shape({1, 3, 4, 4}); Tensor input = make_tensor(image, input_shape); @@ -595,10 +600,10 @@ TEST(ConvolutionalLayerTest, Conv4DSTLViaConvolutionalLayer) { std::vector output_vec(8, 0.0f); Tensor output = make_tensor(output_vec, output_shape); - ConvolutionalLayer layer(1, 0, 1, kernel, Tensor(), kSTL); + ConvolutionalLayer layer(1, 0, 1, kernel, Tensor()); std::vector in{input}; std::vector out{output}; - layer.run(in, out); + layer.run(in, out, options); std::vector result = *out[0].as(); @@ -684,7 +689,7 @@ TEST(ConvolutionalLayerTest, Conv4DLegacyViaConvolutionalLayer) { std::vector output_vec(8, 0.0f); Tensor output = make_tensor(output_vec, output_shape); - ConvolutionalLayer layer(1, 0, 1, kernel, Tensor(), kDefault, 1, true); + ConvolutionalLayer layer(1, 0, 1, kernel, Tensor(), 1, true); std::vector in{input}; std::vector out{output}; @@ -747,7 +752,7 @@ TEST(ConvolutionalLayerTest, DepthwiseConv4DIntPathCoverage) { std::vector output_vec(6, 0); Tensor output = make_tensor(output_vec, output_shape); - ConvolutionalLayer layer(1, 0, 1, kernel, bias, kDefault, 2); + ConvolutionalLayer layer(1, 0, 1, kernel, bias, 2); std::vector in{input}; std::vector out{output}; @@ -776,7 +781,7 @@ TEST(ConvolutionalLayerTest, DepthwiseConv4DFloatPathCoverage) { std::vector output_vec(2, 0.0f); Tensor output = make_tensor(output_vec, output_shape); - ConvolutionalLayer layer(1, 0, 1, kernel, bias, kDefault, 2); + ConvolutionalLayer layer(1, 0, 1, kernel, bias, 2); std::vector in{input}; std::vector out{output}; @@ -801,7 +806,7 @@ TEST(ConvolutionalLayerTest, DepthwiseConv4DNoBiasIntPathCoverage) { std::vector output_vec(2, 0); Tensor output = make_tensor(output_vec, output_shape); - ConvolutionalLayer layer(1, 0, 1, kernel, Tensor(), kDefault, 2); + ConvolutionalLayer layer(1, 0, 1, kernel, Tensor(), 2); std::vector in{input}; std::vector out{output}; @@ -827,7 +832,7 @@ TEST(ConvolutionalLayerTest, DepthwiseConv4DNoBiasFloatPathCoverage) { std::vector output_vec(2, 0.0f); Tensor output = make_tensor(output_vec, output_shape); - ConvolutionalLayer layer(1, 0, 1, kernel, Tensor(), kDefault, 2); + ConvolutionalLayer layer(1, 0, 1, kernel, Tensor(), 2); std::vector in{input}; std::vector out{output}; @@ -1033,4 +1038,137 @@ TEST(ConvolutionalLayerTest, Float4DKernelWorking) { std::vector result = *out[0].as(); ASSERT_EQ(result.size(), 4); -} \ No newline at end of file +} + +TEST(ConvolutionalLayerTest, Conv4DWithParallelNoneBackend) { + RuntimeOptions options; + options.backend = Backend::kNaive; + options.parallel = true; + options.par_backend = ParBackend::kSeq; + + std::vector image(48, 1.0f); + Shape input_shape({1, 3, 4, 4}); + Tensor input = make_tensor(image, input_shape); + + std::vector kernelvec(54, 1.0f); + Shape kernel_shape({2, 3, 3, 3}); + Tensor kernel = make_tensor(kernelvec, kernel_shape); + + Shape output_shape({1, 2, 2, 2}); + std::vector output_vec(8, 0.0f); + Tensor output = make_tensor(output_vec, output_shape); + + ConvolutionalLayer layer(1, 0, 1, kernel, Tensor()); + std::vector in{input}; + std::vector out{output}; + layer.run(in, out, options); + + std::vector result = *out[0].as(); + + float expected_value = 27.0f; + for (size_t i = 0; i < result.size(); ++i) { + ASSERT_NEAR(result[i], expected_value, 1e-5f); + } +} + +TEST(ConvolutionalLayerTest, Conv4DWithParallelDefaultFallback) { + RuntimeOptions options; + options.backend = Backend::kNaive; + options.parallel = true; + + std::vector image(48, 1.0f); + Shape input_shape({1, 3, 4, 4}); + Tensor input = make_tensor(image, input_shape); + + std::vector kernelvec(54, 1.0f); + Shape kernel_shape({2, 3, 3, 3}); + Tensor kernel = make_tensor(kernelvec, kernel_shape); + + Shape output_shape({1, 2, 2, 2}); + std::vector output_vec(8, 0.0f); + Tensor output = make_tensor(output_vec, output_shape); + + ConvolutionalLayer layer(1, 0, 1, kernel, Tensor()); + std::vector in{input}; + std::vector out{output}; + layer.run(in, out, options); + + std::vector result = *out[0].as(); + + float expected_value = 27.0f; + for (size_t i = 0; i < result.size(); ++i) { + ASSERT_NEAR(result[i], expected_value, 1e-5f); + } +} + +TEST(ConvolutionalLayerTest, Conv4DWithoutParallelFlag) { + RuntimeOptions options; + options.backend = Backend::kNaive; + options.parallel = false; + options.par_backend = ParBackend::kThreads; + + std::vector image(48, 1.0f); + Shape input_shape({1, 3, 4, 4}); + Tensor input = make_tensor(image, input_shape); + + std::vector kernelvec(54, 1.0f); + Shape kernel_shape({2, 3, 3, 3}); + Tensor kernel = make_tensor(kernelvec, kernel_shape); + + Shape output_shape({1, 2, 2, 2}); + std::vector output_vec(8, 0.0f); + Tensor output = make_tensor(output_vec, output_shape); + + ConvolutionalLayer layer(1, 0, 1, kernel, Tensor()); + std::vector in{input}; + std::vector out{output}; + layer.run(in, out, options); + + std::vector result = *out[0].as(); + + float expected_value = 27.0f; + for (size_t i = 0; i < result.size(); ++i) { + ASSERT_NEAR(result[i], expected_value, 1e-5f); + } +} + +TEST(ConvolutionalLayerTest, Conv4DLegacyFloatWithParallelNone) { + RuntimeOptions options; + options.backend = Backend::kNaive; + options.parallel = true; + options.par_backend = ParBackend::kSeq; + + std::vector image(48, 1.0f); + Shape input_shape({1, 3, 4, 4}); + Tensor input = make_tensor(image, input_shape); + + std::vector kernelvec(54, 1.0f); + Shape kernel_shape({3, 3, 3, 2}); + Tensor kernel = make_tensor(kernelvec, kernel_shape); + + std::vector biasvec = {0.5f, 1.0f}; + Tensor bias = make_tensor(biasvec, Shape({2})); + + ConvolutionalLayer layer(1, 0, 1, kernel, bias, 1, true); + std::vector in{input}; + + size_t out_height = (4 + 2 * 0 - 1 * (3 - 1) - 1) / 1 + 1; + size_t out_width = (4 + 2 * 0 - 1 * (3 - 1) - 1) / 1 + 1; + Shape output_shape({1, 2, out_height, out_width}); + std::vector output_vec(8, 0.0f); + Tensor output = make_tensor(output_vec, output_shape); + std::vector out{output}; + + layer.run(in, out, options); + + std::vector result = *out[0].as(); + + float expected_value_ch1 = 27.0f + 0.5f; + float expected_value_ch2 = 27.0f + 1.0f; + + ASSERT_EQ(result.size(), 8); + ASSERT_NEAR(result[0], expected_value_ch1, 1e-5f); + ASSERT_NEAR(result[1], expected_value_ch1, 1e-5f); + ASSERT_NEAR(result[4], expected_value_ch2, 1e-5f); + ASSERT_NEAR(result[5], expected_value_ch2, 1e-5f); +} diff --git a/test/single_layer/test_ewlayer.cpp b/test/single_layer/test_ewlayer.cpp index 7ed0e7434..0174fbf98 100644 --- a/test/single_layer/test_ewlayer.cpp +++ b/test/single_layer/test_ewlayer.cpp @@ -11,7 +11,7 @@ using namespace it_lab_ai; class EWTestsParameterized : public ::testing::TestWithParam< std::tuple, EWLayerImpl, - std::vector, std::function > > {}; + std::vector, std::function>> {}; // 1) input; 2) constructed ewlayerimpl; 3) expected_output; 4) lambda_expr. TEST_P(EWTestsParameterized, element_wise_works_correctly) { @@ -218,14 +218,7 @@ TEST(ewlayer, new_ewlayer_can_sigmoid_float_extreme_values) { } TEST(ewlayer, parallel_for_ew) { - EWLayer layer0("relu"); - layer0.setParallelBackend(ParBackend::kSeq); - EWLayer layer1("relu"); - layer1.setParallelBackend(ParBackend::kThreads); - EWLayer layer2("relu"); - layer2.setParallelBackend(ParBackend::kTbb); - EWLayer layer3("relu"); - layer3.setParallelBackend(ParBackend::kOmp); + EWLayer layer("relu"); std::vector vec(8000000, -1); Tensor input = make_tensor(vec); @@ -233,52 +226,27 @@ TEST(ewlayer, parallel_for_ew) { std::vector in{input}; std::vector out{output}; - auto start = std::chrono::high_resolution_clock::now(); - layer0.run(in, out); - auto end = std::chrono::high_resolution_clock::now(); - auto total_duration = - std::chrono::duration_cast(end - start); - for (size_t i = 0; i < 8000000; i++) { - EXPECT_EQ((*out[0].as())[i], 0); - } - - start = std::chrono::high_resolution_clock::now(); - layer1.run(in, out); - end = std::chrono::high_resolution_clock::now(); - total_duration = - std::chrono::duration_cast(end - start); - for (size_t i = 0; i < 8000000; i++) { - EXPECT_EQ((*out[0].as())[i], 0); - } - - start = std::chrono::high_resolution_clock::now(); - layer2.run(in, out); - end = std::chrono::high_resolution_clock::now(); - total_duration = - std::chrono::duration_cast(end - start); - for (size_t i = 0; i < 8000000; i++) { - EXPECT_EQ((*out[0].as())[i], 0); - } - - start = std::chrono::high_resolution_clock::now(); - layer3.run(in, out); - end = std::chrono::high_resolution_clock::now(); - total_duration = - std::chrono::duration_cast(end - start); - for (size_t i = 0; i < 8000000; i++) { - EXPECT_EQ((*out[0].as())[i], 0); + std::vector backends = {ParBackend::kSeq, ParBackend::kThreads, + ParBackend::kTbb, ParBackend::kOmp}; + + for (auto backend : backends) { + RuntimeOptions options; + options.setParallelBackend(backend); + + auto start = std::chrono::high_resolution_clock::now(); + layer.run(in, out, options); + auto end = std::chrono::high_resolution_clock::now(); + auto duration = + std::chrono::duration_cast(end - start); + std::cout << " time: " << duration.count() << " ms" << std::endl; + for (size_t i = 0; i < 8000000; i++) { + EXPECT_EQ((*out[0].as())[i], 0); + } } } -TEST(ewlayer, parallel_for_ew_sigmoid) { - EWLayer layer0("sigmoid"); - layer0.setParallelBackend(ParBackend::kSeq); - EWLayer layer1("sigmoid"); - layer1.setParallelBackend(ParBackend::kThreads); - EWLayer layer2("sigmoid"); - layer2.setParallelBackend(ParBackend::kTbb); - EWLayer layer3("sigmoid"); - layer3.setParallelBackend(ParBackend::kOmp); +TEST(ewlayer, parallel_for_ew_sigmoid_compact) { + EWLayer layer("sigmoid"); std::vector vec(8000000, -1); Tensor input = make_tensor(vec); @@ -286,31 +254,47 @@ TEST(ewlayer, parallel_for_ew_sigmoid) { std::vector in{input}; std::vector out{output}; - auto start = std::chrono::high_resolution_clock::now(); - layer0.run(in, out); - auto end = std::chrono::high_resolution_clock::now(); - auto total_duration = - std::chrono::duration_cast(end - start); - - start = std::chrono::high_resolution_clock::now(); - layer1.run(in, out); - end = std::chrono::high_resolution_clock::now(); - total_duration = - std::chrono::duration_cast(end - start); - - start = std::chrono::high_resolution_clock::now(); - layer2.run(in, out); - end = std::chrono::high_resolution_clock::now(); - total_duration = - std::chrono::duration_cast(end - start); - - start = std::chrono::high_resolution_clock::now(); - layer3.run(in, out); - end = std::chrono::high_resolution_clock::now(); - total_duration = - std::chrono::duration_cast(end - start); + std::vector> backends = { + {ParBackend::kSeq, "Sequential"}, + {ParBackend::kThreads, "Threads"}, + {ParBackend::kTbb, "TBB"}, + {ParBackend::kOmp, "OpenMP"}}; + + std::vector reference_result; + bool first = true; + + for (const auto& [backend, name] : backends) { + RuntimeOptions options; + options.parallel = (backend != ParBackend::kSeq); + options.par_backend = backend; + if (backend == ParBackend::kThreads) { + options.threads = 4; + } - EXPECT_EQ(0, 0); + auto start = std::chrono::high_resolution_clock::now(); + layer.run(in, out, options); + auto end = std::chrono::high_resolution_clock::now(); + auto duration = + std::chrono::duration_cast(end - start); + + std::cout << "Sigmoid " << name << " time: " << duration.count() << " ms" + << std::endl; + + auto current_result = *out[0].as(); + if (first) { + reference_result = current_result; + first = false; + for (size_t i = 0; i < 100; i++) { + EXPECT_EQ(current_result[i], 0) + << "Invalid sigmoid result at index " << i; + } + } else { + for (size_t i = 0; i < reference_result.size(); i++) { + EXPECT_EQ(current_result[i], reference_result[i]) + << "Mismatch with " << name << " at index " << i; + } + } + } } TEST(ewlayer, parallel_for_direct) { diff --git a/test/single_layer/test_poolinglayer.cpp b/test/single_layer/test_poolinglayer.cpp index ff8ccc826..b1bfd798a 100644 --- a/test/single_layer/test_poolinglayer.cpp +++ b/test/single_layer/test_poolinglayer.cpp @@ -284,10 +284,13 @@ TEST(poolinglayer, new_pooling_layer_can_run_int_avg) { } TEST(poolinglayer, new_pooling_layer_can_run_int_avg_tbb) { + RuntimeOptions options; + options.backend = Backend::kNaive; + options.parallel = true; + options.par_backend = ParBackend::kTbb; Shape inpshape = {4, 4}; Shape poolshape = {2, 2}; - PoolingLayer a(poolshape, {2, 2}, {0, 0, 0, 0}, {1, 1}, false, "average", - it_lab_ai::kTBB); + PoolingLayer a(poolshape, {2, 2}, {0, 0, 0, 0}, {1, 1}, false, "average"); std::vector input({9, 8, 7, 6, 5, 4, 3, 2, 2, 3, 4, 5, 6, 7, 8, 9}); PoolingLayerImplTBB impl(inpshape, poolshape, {2, 2}, {0, 0, 0, 0}, @@ -300,7 +303,7 @@ TEST(poolinglayer, new_pooling_layer_can_run_int_avg_tbb) { std::vector in{make_tensor(input, inpshape)}; std::vector out{output}; - a.run(in, out); + a.run(in, out, options); std::vector true_output = {6, 4, 4, 6}; for (size_t i = 0; i < true_output.size(); i++) { @@ -324,14 +327,18 @@ TEST(poolinglayer, new_pooling_layer_can_run_1d_pooling_float) { } TEST(poolinglayer, new_pooling_layer_tbb_can_run_1d_pooling_float) { + RuntimeOptions options; + options.backend = Backend::kNaive; + options.parallel = true; + options.par_backend = ParBackend::kTbb; Shape inpshape = {8}; Shape poolshape = {3}; - PoolingLayer a(poolshape, "average", it_lab_ai::kTBB); + PoolingLayer a(poolshape, "average"); std::vector input({9.0F, 8.0F, 7.0F, 6.0F, 5.0F, 4.0F, 3.0F, 2.0F}); Tensor output = make_tensor({0}); std::vector in{make_tensor(input, inpshape)}; std::vector out{output}; - a.run(in, out); + a.run(in, out, options); std::vector true_output = {8.0F, 6.0F, 4.0F}; for (size_t i = 0; i < true_output.size(); i++) { EXPECT_NEAR((*out[0].as())[i], true_output[i], 1e-5); @@ -432,3 +439,79 @@ TEST(poolinglayer, maxpool_onnx_with_pooling_layer) { EXPECT_LE(val, 10.0f); } } + +TEST(poolinglayer, new_pooling_layer_with_parallel_none) { + RuntimeOptions options; + options.backend = Backend::kNaive; + options.parallel = true; + options.par_backend = ParBackend::kSeq; + + Shape inpshape = {4, 4}; + Shape poolshape = {2, 2}; + PoolingLayer a(poolshape, {1, 1}, {1, 1, 1, 1}, {1, 1}, false, "average"); + std::vector input({9.0F, 8.0F, 7.0F, 6.0F, 5.0F, 4.0F, 3.0F, 2.0F, + 2.0F, 3.0F, 4.0F, 5.0F, 6.0F, 7.0F, 8.0F, 9.0F}); + Tensor output = make_tensor({0}); + std::vector in{make_tensor(input, inpshape)}; + std::vector out{output}; + a.run(in, out, options); + EXPECT_EQ(out[0].get_shape().count(), 25); +} + +TEST(poolinglayer, new_pooling_layer_int_avg_with_parallel_none) { + RuntimeOptions options; + options.backend = Backend::kNaive; + options.parallel = true; + options.par_backend = ParBackend::kSeq; + + Shape inpshape = {4, 4}; + Shape poolshape = {2, 2}; + PoolingLayer a(poolshape, {2, 2}, {0, 0, 0, 0}, {1, 1}, false, "average"); + std::vector input({9, 8, 7, 6, 5, 4, 3, 2, 2, 3, 4, 5, 6, 7, 8, 9}); + + PoolingLayerImpl impl(inpshape, poolshape, {2, 2}, {0, 0, 0, 0}, {1, 1}, + false, "average"); + Shape output_shape = impl.get_output_shape(); + + std::vector zeros(output_shape.count(), 0); + Tensor output = make_tensor(zeros, output_shape); + + std::vector in{make_tensor(input, inpshape)}; + std::vector out{output}; + + a.run(in, out, options); + + std::vector true_output = {6, 4, 4, 6}; + for (size_t i = 0; i < true_output.size(); i++) { + EXPECT_NEAR((*out[0].as())[i], true_output[i], 1e-5); + } +} + +TEST(poolinglayer, new_pooling_layer_int_avg_without_parallel_flag) { + RuntimeOptions options; + options.backend = Backend::kNaive; + options.parallel = false; + options.par_backend = ParBackend::kTbb; + + Shape inpshape = {4, 4}; + Shape poolshape = {2, 2}; + PoolingLayer a(poolshape, {2, 2}, {0, 0, 0, 0}, {1, 1}, false, "average"); + std::vector input({9, 8, 7, 6, 5, 4, 3, 2, 2, 3, 4, 5, 6, 7, 8, 9}); + + PoolingLayerImpl impl(inpshape, poolshape, {2, 2}, {0, 0, 0, 0}, {1, 1}, + false, "average"); + Shape output_shape = impl.get_output_shape(); + + std::vector zeros(output_shape.count(), 0); + Tensor output = make_tensor(zeros, output_shape); + + std::vector in{make_tensor(input, inpshape)}; + std::vector out{output}; + + a.run(in, out, options); + + std::vector true_output = {6, 4, 4, 6}; + for (size_t i = 0; i < true_output.size(); i++) { + EXPECT_NEAR((*out[0].as())[i], true_output[i], 1e-5); + } +}