diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b08551e57..79ec09682 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -40,7 +40,7 @@ jobs: max-size: 2G - name: Build run: | - cmake -S . -B build \ + cmake -S . -B build -G Ninja \ -DCMAKE_C_COMPILER_LAUNCHER=ccache \ -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \ @@ -51,19 +51,18 @@ jobs: cmake --build build --parallel env: CTEST_OUTPUT_ON_FAILURE: 1 - - name: Prepare OpenCV libs + - name: Prepare ALL libs run: | - mkdir -p build/bin/opencv_libs - cp -a build/3rdparty/opencv_build/lib/libopencv_* build/bin/opencv_libs/ - echo "Library permissions after copy:" - stat -c "%A %n" build/bin/opencv_libs/libopencv_imgcodecs.so* - + mkdir -p build/bin/all_libs + cp -a build/3rdparty/opencv_build/lib/* build/bin/all_libs/ 2>/dev/null || true + ldd build/bin/ACC | grep "=> /" | awk '{print $3}' | xargs -I {} cp {} build/bin/all_libs/ 2>/dev/null || true - name: Upload artifacts uses: actions/upload-artifact@v4 with: name: mnist-${{ matrix.build_type }}${{ matrix.stats && '-stats' || '' }} path: | ${{ steps.set_binaries.outputs.ACC_BINARY }} + build/bin/all_libs/* build/bin/opencv_libs/* build/setenv.sh - name: Test @@ -92,7 +91,7 @@ jobs: key: ccache-${{ github.job }} - name: Build run: | - cmake -S . -B build \ + cmake -S . -B build -G Ninja \ -DCMAKE_C_COMPILER_LAUNCHER=ccache \ -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ -DCMAKE_C_COMPILER=clang \ @@ -117,7 +116,7 @@ jobs: submodules: true - name: Install prerequisites run: | - brew install libomp + brew install libomp ninja brew link libomp --overwrite --force - name: Setup ccache uses: hendrikmuhs/ccache-action@v1.2 @@ -125,12 +124,25 @@ jobs: key: ccache-${{ github.job }} - name: Build run: | - cmake -S . -B build \ + OPENMP_PATH=$(brew --prefix libomp) + echo "OpenMP path: $OPENMP_PATH" + cmake -S . -B build -G Ninja \ -DCMAKE_C_COMPILER_LAUNCHER=ccache \ -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ - -DCMAKE_C_FLAGS="-I$(brew --prefix libomp)/include" \ - -DCMAKE_CXX_FLAGS="-I$(brew --prefix libomp)/include" + -DCMAKE_PREFIX_PATH=$OPENMP_PATH \ + -DCMAKE_INCLUDE_PATH=$OPENMP_PATH/include \ + -DCMAKE_LIBRARY_PATH=$OPENMP_PATH/lib \ + -DOpenMP_C_FLAGS="-Xclang -fopenmp -I$OPENMP_PATH/include" \ + -DOpenMP_CXX_FLAGS="-Xclang -fopenmp -I$OPENMP_PATH/include" \ + -DOpenMP_C_LIB_NAMES="omp" \ + -DOpenMP_CXX_LIB_NAMES="omp" \ + -DOpenMP_omp_LIBRARY="$OPENMP_PATH/lib/libomp.dylib" \ + -DCMAKE_EXE_LINKER_FLAGS="-L$OPENMP_PATH/lib -lomp" \ + -DCMAKE_SHARED_LINKER_FLAGS="-L$OPENMP_PATH/lib -lomp" cmake --build build --parallel + env: + LDFLAGS: "-L$(brew --prefix libomp)/lib -lomp" + CPPFLAGS: "-I$(brew --prefix libomp)/include" - name: Test run: cmake --build build -t test env: @@ -175,7 +187,7 @@ jobs: key: ccache-${{ github.job }} - name: Build and Test run: | - cmake -S . -B build \ + cmake -S . -B build -G Ninja \ -DCMAKE_C_COMPILER_LAUNCHER=ccache \ -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ -DCMAKE_BUILD_TYPE=Release @@ -201,7 +213,7 @@ jobs: max-size: 2G - name: Build run: | - cmake -S . -B build \ + cmake -S . -B build -G Ninja \ -DCMAKE_C_COMPILER_LAUNCHER=ccache \ -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ -DCMAKE_C_FLAGS="--coverage" \ @@ -222,32 +234,36 @@ jobs: slug: embedded-dev-research/ITLabAI evaluate-model: - runs-on: ubuntu-latest - needs: [build-linux] - permissions: - contents: write + runs-on: ubuntu-latest + needs: [build-linux] + permissions: + contents: write - steps: + steps: - uses: actions/checkout@v4 with: fetch-depth: 0 + + - name: Download binary and libs + uses: actions/download-artifact@v4 + with: + name: mnist-RELEASE + path: build/ + - name: Set binary path id: set_eval_binary run: | echo "EVAL_BINARY=build/bin/ACC" >> $GITHUB_OUTPUT + - name: Install system dependencies run: | sudo apt-get update - sudo apt-get install -y \ - libgtk-3-0 \ - libtbb12 \ - libjpeg-dev \ - libpng-dev \ - libtiff-dev + sudo apt-get install -y libgtk-3-0 libtbb12 libjpeg-dev libpng-dev libtiff-dev libopenjp2-7 libdnnl3 sudo ldconfig + - name: Generate model JSON run: | - cd docs && mkdir jsons + cd docs && mkdir -p jsons cd .. cd app/Converters pip install -r requirements.txt @@ -264,76 +280,46 @@ jobs: - name: Download MNIST test dataset if: steps.cache-mnist.outputs.cache-hit != 'true' run: | - set -e mkdir -p docs/mnist/mnist/test - echo "Downloading test images..." wget -q https://github.com/DeepTrackAI/MNIST_dataset/archive/main.zip -O main.zip unzip -q main.zip cp MNIST_dataset-main/mnist/test/*.png docs/mnist/mnist/test/ rm -rf main.zip MNIST_dataset-main - echo "Downloaded $(ls docs/mnist/mnist/test | wc -l) images" - - - name: Download binary and libs - uses: actions/download-artifact@v4 - with: - name: mnist-RELEASE - path: build/ - name: Prepare environment run: | chmod +x "${{ steps.set_eval_binary.outputs.EVAL_BINARY }}" + export LD_LIBRARY_PATH=$PWD/build/bin/all_libs:/usr/lib/x86_64-linux-gnu:$LD_LIBRARY_PATH - export LD_LIBRARY_PATH=$PWD/build/bin/opencv_libs:/usr/lib/x86_64-linux-gnu - echo "Final LD_LIBRARY_PATH: $LD_LIBRARY_PATH" - - - name: Verify library integrity - run: | - echo "### Library verification ###" - file build/bin/opencv_libs/libopencv_imgcodecs.so.4.12.0 | grep "shared object" - - name: Run evaluation run: | - echo "### Pre-run checks ###" - export LD_LIBRARY_PATH=$PWD/build/bin/opencv_libs:/usr/lib/x86_64-linux-gnu - echo "LD_LIBRARY_PATH: $LD_LIBRARY_PATH" - - LD_DEBUG=files "${{ steps.set_eval_binary.outputs.EVAL_BINARY }}" --model alexnet_mnist 2> ld_debug.log - echo "### Library loading debug ###" - grep -i "opencv_imgcodecs" ld_debug.log - - "${{ steps.set_eval_binary.outputs.EVAL_BINARY }}" > accuracy.txt - echo "Accuracy: $(cat accuracy.txt)" + export LD_LIBRARY_PATH=$PWD/build/bin/all_libs:/usr/lib/x86_64-linux-gnu:$LD_LIBRARY_PATH + "${{ steps.set_eval_binary.outputs.EVAL_BINARY }}" --model alexnet_mnist > accuracy.txt 2>&1 + if [ $? -ne 0 ]; then + exit 1 + fi + - name: Extract accuracy value + run: | + ACCURACY=$(grep -oE '[0-9]+\.?[0-9]*%' accuracy.txt | head -1 || echo "0%") + echo "$ACCURACY" > accuracy_value.txt + - name: Update README (master only) if: github.ref == 'refs/heads/master' run: | - ACCURACY=$(cat accuracy.txt | sed 's/%//g') + ACCURACY=$(cat accuracy_value.txt | sed 's/%//g') DATE=$(date '+%Y-%m-%d') - - echo "Updating README with:" - echo "Accuracy: $ACCURACY" - echo "Date: $DATE" - - echo "Current README content:" - grep -A 2 -B 2 "ACCURACY_PLACEHOLDER" README.md || echo "Placeholder not found" - sed -i "s/.*/Accuracy: ${ACCURACY}% (updated: ${DATE})/" README.md - - echo "Updated README content:" - grep -A 2 -B 2 "ACCURACY_PLACEHOLDER" README.md - name: Commit and push changes (master only) if: github.ref == 'refs/heads/master' run: | git config --global user.name "GitHub Actions" git config --global user.email "actions@github.com" - git add README.md - if git diff-index --quiet HEAD --; then echo "No changes to commit" else - git commit -m "[CI] Update accuracy: $(cat accuracy.txt)%" + git commit -m "[CI] Update accuracy: $(cat accuracy_value.txt)" git push origin master - echo "Changes pushed to master branch" fi diff --git a/.github/workflows/static-analysis.yml b/.github/workflows/static-analysis.yml index 3eb5eff68..e37195a72 100644 --- a/.github/workflows/static-analysis.yml +++ b/.github/workflows/static-analysis.yml @@ -32,7 +32,7 @@ jobs: key: ccache-${{ github.job }} - name: Build run: | - cmake -S . -B build \ + cmake -S . -B build -G Ninja \ -DCMAKE_C_COMPILER_LAUNCHER=ccache \ -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ -DCMAKE_C_COMPILER=clang \ diff --git a/.gitmodules b/.gitmodules index 1495d0c1d..f19f40a5b 100644 --- a/.gitmodules +++ b/.gitmodules @@ -10,3 +10,6 @@ [submodule "3rdparty/Json"] path = 3rdparty/Json url = https://github.com/nlohmann/json +[submodule "3rdparty/oneDNN"] + path = 3rdparty/oneDNN + url = https://github.com/uxlfoundation/oneDNN diff --git a/3rdparty/CMakeLists.txt b/3rdparty/CMakeLists.txt index 02559b9fd..5319b259d 100644 --- a/3rdparty/CMakeLists.txt +++ b/3rdparty/CMakeLists.txt @@ -1,5 +1,7 @@ add_subdirectory(googletest) +add_subdirectory(oneDNN) + # Unified TBB Configuration option(TBB_TEST "Build TBB tests" OFF) option(TBB_EXAMPLES "Build TBB examples" OFF) diff --git a/3rdparty/oneDNN b/3rdparty/oneDNN new file mode 160000 index 000000000..ceb0d6c8c --- /dev/null +++ b/3rdparty/oneDNN @@ -0,0 +1 @@ +Subproject commit ceb0d6c8c5685ee9af1eb6c07a3508248e886673 diff --git a/app/Graph/CMakeLists.txt b/app/Graph/CMakeLists.txt index f953547a4..15f16e4dd 100644 --- a/app/Graph/CMakeLists.txt +++ b/app/Graph/CMakeLists.txt @@ -11,6 +11,7 @@ target_link_libraries(BuildGraph PUBLIC ${OpenCV_LIBS}) target_link_libraries(BuildGraph PUBLIC reader_lib) target_link_libraries(BuildGraph PUBLIC TBB_unified) target_link_libraries(BuildGraph PUBLIC layers_lib) +target_link_libraries(BuildGraph PUBLIC layers_oneDNN_lib) target_link_libraries(BuildGraph PUBLIC gtest_main) target_include_directories(BuildGraph PUBLIC ${CMAKE_SOURCE_DIR}/3rdparty/Json/include) diff --git a/app/Graph/acc_check.cpp b/app/Graph/acc_check.cpp index 673b40130..dbd3d16b0 100644 --- a/app/Graph/acc_check.cpp +++ b/app/Graph/acc_check.cpp @@ -14,12 +14,14 @@ using namespace it_lab_ai; int main(int argc, char* argv[]) { std::string model_name = "alexnet_mnist"; bool parallel = false; - + bool onednn = false; for (int i = 1; i < argc; ++i) { if (std::string(argv[i]) == "--parallel") { parallel = true; } else if (std::string(argv[i]) == "--model" && i + 1 < argc) { model_name = argv[++i]; + } else if (std::string(argv[i]) == "--onednn") { + onednn = true; } } @@ -78,7 +80,7 @@ int main(int argc, char* argv[]) { Shape sh({static_cast(count_pic), 1, 28, 28}); Tensor t = make_tensor(res, sh); input = t; - build_graph_linear(input, output, false, parallel); + build_graph_linear(input, output, false, parallel, onednn); std::vector> tmp_output = softmax(*output.as(), 10); std::vector indices; @@ -185,7 +187,7 @@ int main(int argc, char* argv[]) { it_lab_ai::Tensor output = it_lab_ai::Tensor(output_shape, it_lab_ai::Type::kFloat); - build_graph(input, output, json_path, false, parallel); + build_graph(input, output, json_path, false, parallel, onednn); std::vector> processed_outputs; const std::vector& raw_output = *output.as(); diff --git a/app/Graph/build.cpp b/app/Graph/build.cpp index 6a1f51f7f..addd71f27 100644 --- a/app/Graph/build.cpp +++ b/app/Graph/build.cpp @@ -5,7 +5,7 @@ using namespace it_lab_ai; void build_graph_linear(it_lab_ai::Tensor& input, it_lab_ai::Tensor& output, - bool comments, bool parallel) { + bool comments, bool parallel, bool onednn) { if (comments) { for (size_t i = 0; i < input.get_shape().dims(); i++) { std::cout << input.get_shape()[i] << ' '; @@ -80,7 +80,12 @@ void build_graph_linear(it_lab_ai::Tensor& input, it_lab_ai::Tensor& output, if (comments) std::cout << "ConvLayer added to layers." << std::endl; } if (layer_type.find("relu") != std::string::npos) { - auto ew_layer = std::make_shared("relu"); + std::shared_ptr ew_layer; + if (onednn) { + ew_layer = std::make_shared("relu"); + } else { + ew_layer = std::make_shared("relu"); + } layers.push_back(ew_layer); layerpostop.push_back(true); if (comments) @@ -230,7 +235,8 @@ std::string layerTypeToString(it_lab_ai::LayerType type) { } void build_graph(it_lab_ai::Tensor& input, it_lab_ai::Tensor& output, - const std::string& json_path, bool comments, bool parallel) { + const std::string& json_path, bool comments, bool parallel, + bool onednn) { if (comments) { for (size_t i = 0; i < input.get_shape().dims(); i++) { std::cout << input.get_shape()[i] << ' '; @@ -251,7 +257,7 @@ void build_graph(it_lab_ai::Tensor& input, it_lab_ai::Tensor& output, } } - auto parse_result = parse_json_model(json_path, comments, parallel); + auto parse_result = parse_json_model(json_path, comments, parallel, onednn); auto& layers = parse_result.layers; auto& name_to_layer = parse_result.name_to_layer; @@ -371,7 +377,7 @@ void build_graph(it_lab_ai::Tensor& input, it_lab_ai::Tensor& output, } ParseResult parse_json_model(const std::string& json_path, bool comments, - bool parallel) { + bool parallel, bool onednn) { ParseResult result; auto& layers = result.layers; @@ -488,12 +494,21 @@ ParseResult parse_json_model(const std::string& json_path, bool comments, layer = conv_layer; } else if (layer_type.find("Relu") != std::string::npos || layer_type.find("relu") != std::string::npos) { - auto ew_layer = std::make_shared("relu"); + std::shared_ptr ew_layer; + if (onednn) { + ew_layer = std::make_shared("relu"); + } else { + ew_layer = std::make_shared("relu"); + } layer = ew_layer; } else if (layer_type.find("Sigmoid") != std::string::npos) { - auto ew_layer = std::make_shared("sigmoid"); + std::shared_ptr ew_layer; + if (onednn) { + ew_layer = std::make_shared("sigmoid"); + } else { + ew_layer = std::make_shared("sigmoid"); + } layer = ew_layer; - } else if (layer_type.find("Dense") != std::string::npos || layer_type.find("FullyConnected") != std::string::npos) { it_lab_ai::Tensor tensor = it_lab_ai::create_tensor_from_json( @@ -717,22 +732,38 @@ ParseResult parse_json_model(const std::string& json_path, bool comments, if (layer_type == "Mul") { ew_operation = "linear"; - auto ew_layer = - std::make_shared(ew_operation, value, 0.0F); - layer = ew_layer; - if (comments) { - std::cout << "Created binary " << layer_type << " operation with " - << value << "scalar" << std::endl; + std::shared_ptr ew_layer; + if (onednn) { + ew_layer = std::make_shared( + ew_operation, value, 0.0F); + } else { + ew_layer = std::make_shared(ew_operation, + value, 0.0F); } + layer = ew_layer; } else if (layer_type == "Add") { ew_operation = "linear"; - auto ew_layer = - std::make_shared(ew_operation, 1.0F, value); + std::shared_ptr ew_layer; + if (onednn && + it_lab_ai::EwLayerOneDnn::is_function_supported("linear")) { + ew_layer = std::make_shared( + ew_operation, 1.0F, value); + } else { + ew_layer = std::make_shared(ew_operation, + 1.0F, value); + } layer = ew_layer; } else if (layer_type == "Sub") { ew_operation = "linear"; - auto ew_layer = std::make_shared(ew_operation, - 1.0F, -value); + std::shared_ptr ew_layer; + if (onednn && + it_lab_ai::EwLayerOneDnn::is_function_supported("linear")) { + ew_layer = std::make_shared( + ew_operation, 1.0F, -value); + } else { + ew_layer = std::make_shared(ew_operation, + 1.0F, -value); + } layer = ew_layer; } else { continue; diff --git a/app/Graph/build.hpp b/app/Graph/build.hpp index 76b5d1df8..5d4d651e3 100644 --- a/app/Graph/build.hpp +++ b/app/Graph/build.hpp @@ -31,6 +31,7 @@ #include "layers/SplitLayer.hpp" #include "layers/Tensor.hpp" #include "layers/TransposeLayer.hpp" +#include "layers_oneDNN/EWLayer.hpp" std::unordered_map model_paths = { {"alexnet_mnist", MODEL_PATH_H5}, @@ -57,14 +58,15 @@ struct ParseResult { void build_graph(it_lab_ai::Tensor& input, it_lab_ai::Tensor& output, const std::string& json_path, bool comments, - bool parallel = false); + bool parallel = false, bool onednn = false); void build_graph_linear(it_lab_ai::Tensor& input, it_lab_ai::Tensor& output, - bool comments, bool parallel = false); + bool comments, bool parallel = false, + bool onednn = false); std::unordered_map load_class_names( const std::string& filename); ParseResult parse_json_model(const std::string& json_path, bool comments, - bool parallel); + bool parallel, bool onednn); std::vector get_input_shape_from_json(const std::string& json_path); std::vector process_model_output(const std::vector& output, diff --git a/app/Graph/graph_build.cpp b/app/Graph/graph_build.cpp index 3a7330c60..d2a50d3eb 100644 --- a/app/Graph/graph_build.cpp +++ b/app/Graph/graph_build.cpp @@ -11,12 +11,14 @@ using namespace it_lab_ai; int main(int argc, char* argv[]) { std::string model_name = "alexnet_mnist"; bool parallel = false; - + bool onednn = false; for (int i = 1; i < argc; ++i) { if (std::string(argv[i]) == "--parallel") { parallel = true; } else if (std::string(argv[i]) == "--model" && i + 1 < argc) { model_name = argv[++i]; + } else if (std::string(argv[i]) == "--onednn") { + onednn = true; } } @@ -62,7 +64,7 @@ int main(int argc, char* argv[]) { std::vector vec(75, 3); it_lab_ai::Tensor output = it_lab_ai::make_tensor(vec, sh1); - build_graph_linear(input, output, true, parallel); + build_graph_linear(input, output, true, parallel, onednn); std::vector tmp_output = softmax(*output.as()); int top_n = std::min(3, static_cast(tmp_output.size())); std::vector indices(tmp_output.size()); @@ -92,7 +94,7 @@ int main(int argc, char* argv[]) { size_t output_classes = 1000; it_lab_ai::Tensor output({1, output_classes}, it_lab_ai::Type::kFloat); - build_graph(input, output, json_path, false, parallel); + build_graph(input, output, json_path, false, parallel, onednn); std::vector tmp_output = process_model_output(*output.as(), model_name); diff --git a/include/CMakeLists.txt b/include/CMakeLists.txt index 418c50061..760af1d8b 100644 --- a/include/CMakeLists.txt +++ b/include/CMakeLists.txt @@ -7,6 +7,9 @@ set(GRAPHT_HEADERS "${graphT_headers}" PARENT_SCOPE) file(GLOB_RECURSE layers_headers layers/*.h layers/*.hpp) set(LAYERS_HEADERS "${layers_headers}" PARENT_SCOPE) +file(GLOB_RECURSE layers_oneDNN_headers layers_oneDNN/*.h layers_oneDNN/*.hpp) +set(LAYERS_ONEDNN_HEADERS "${layers_oneDNN_headers}" PARENT_SCOPE) + file(GLOB_RECURSE perf_headers perf/*.h perf/*.hpp) set(PERF_HEADERS "${perf_headers}" PARENT_SCOPE) diff --git a/include/layers_oneDNN/EWLayer.hpp b/include/layers_oneDNN/EWLayer.hpp new file mode 100644 index 000000000..6a04f8dd0 --- /dev/null +++ b/include/layers_oneDNN/EWLayer.hpp @@ -0,0 +1,51 @@ +#pragma once + +#include +#include +#include +#include + +#include "layers/Layer.hpp" + +namespace it_lab_ai { + +class EwLayerOneDnn : public Layer { + public: + EwLayerOneDnn() + : Layer(kElementWise), func_("none"), alpha_(0.0F), beta_(0.0F) {} + + EwLayerOneDnn(std::string function, float alpha = 0.0F, float beta = 0.0F) + : Layer(kElementWise), + func_(std::move(function)), + alpha_(alpha), + beta_(beta) {} + + void run(const std::vector& input, + std::vector& output) override; + static bool is_function_supported(const std::string& function); + +#ifdef ENABLE_STATISTIC_WEIGHTS + Tensor get_weights() override { + std::vector v = {0}; + Tensor a = make_tensor(v); + return a; + } +#endif + + private: + void initialize_onednn(const Shape& shape, Type data_type); + dnnl::algorithm get_algorithm() const; + void validate_input(const std::vector& input) const; + + std::string func_; + float alpha_; + float beta_; + + std::unique_ptr engine_; + std::unique_ptr stream_; + std::unique_ptr eltwise_prim_; + dnnl::memory::desc memory_desc_; + bool initialized_ = false; +}; + +} // namespace it_lab_ai \ No newline at end of file diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index ba0d466da..ab526f5b8 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -2,4 +2,5 @@ add_subdirectory(graph) add_subdirectory(graph_transformations) add_subdirectory(perf) add_subdirectory(layers) -add_subdirectory(Weights_Reader) \ No newline at end of file +add_subdirectory(layers_oneDNN) +add_subdirectory(Weights_Reader) diff --git a/src/layers/CMakeLists.txt b/src/layers/CMakeLists.txt index d3426e04a..f8ac6d84b 100644 --- a/src/layers/CMakeLists.txt +++ b/src/layers/CMakeLists.txt @@ -1,3 +1,4 @@ file(GLOB_RECURSE layers_src *.cpp) add_library(layers_lib STATIC "${LAYERS_HEADERS}" "${layers_src}") target_link_libraries(layers_lib PUBLIC TBB_unified) +target_link_libraries(layers_lib PUBLIC dnnl) diff --git a/src/layers_oneDNN/CMakeLists.txt b/src/layers_oneDNN/CMakeLists.txt new file mode 100644 index 000000000..e4ee067e1 --- /dev/null +++ b/src/layers_oneDNN/CMakeLists.txt @@ -0,0 +1,9 @@ +file(GLOB_RECURSE layers_oneDNN_src *.cpp) +add_library(layers_oneDNN_lib STATIC "${LAYERS_ONEDNN_HEADERS}" "${layers_oneDNN_src}") +target_link_libraries(layers_oneDNN_lib PUBLIC dnnl TBB_unified) +target_include_directories(layers_oneDNN_lib PUBLIC + ${CMAKE_CURRENT_SOURCE_DIR}/../../include +) +target_compile_definitions(layers_oneDNN_lib PRIVATE + DNNL_ENABLE_CONCURRENT_EXEC=1 +) diff --git a/src/layers_oneDNN/EWLayer.cpp b/src/layers_oneDNN/EWLayer.cpp new file mode 100644 index 000000000..fc838705b --- /dev/null +++ b/src/layers_oneDNN/EWLayer.cpp @@ -0,0 +1,171 @@ +#include "layers_oneDNN/EWLayer.hpp" + +#include +#include + +namespace it_lab_ai { + +void EwLayerOneDnn::run(const std::vector& input, + std::vector& output) { + validate_input(input); + + const Tensor& input_tensor = input[0]; + Type data_type = input_tensor.get_type(); + + if (!initialized_) { + initialize_onednn(input_tensor.get_shape(), data_type); + } + + try { + if (data_type == Type::kFloat) { + const std::vector& input_data = *input_tensor.as(); + std::vector output_data(input_data.size()); + dnnl::memory src_mem = dnnl::memory( + memory_desc_, *engine_, const_cast(input_data.data())); + dnnl::memory dst_mem = + dnnl::memory(memory_desc_, *engine_, output_data.data()); + eltwise_prim_->execute( + *stream_, {{DNNL_ARG_SRC, src_mem}, {DNNL_ARG_DST, dst_mem}}); + stream_->wait(); + output[0] = make_tensor(output_data, input_tensor.get_shape()); + } else if (data_type == Type::kInt) { + const std::vector& input_data = *input_tensor.as(); + std::vector output_data(input_data.size()); + + std::vector float_input; + float_input.reserve(input_data.size()); + for (int val : input_data) { + float_input.push_back(static_cast(val)); + } + + std::vector float_output(input_data.size()); + + dnnl::memory src_mem = + dnnl::memory(memory_desc_, *engine_, float_input.data()); + dnnl::memory dst_mem = + dnnl::memory(memory_desc_, *engine_, float_output.data()); + eltwise_prim_->execute( + *stream_, {{DNNL_ARG_SRC, src_mem}, {DNNL_ARG_DST, dst_mem}}); + stream_->wait(); + + for (size_t i = 0; i < float_output.size(); ++i) { + output_data[i] = static_cast(std::round(float_output[i])); + } + output[0] = make_tensor(output_data, input_tensor.get_shape()); + } else { + throw std::runtime_error("EwLayerOneDnn: Unsupported data type"); + } + + } catch (const std::exception& e) { + std::cerr << "oneDNN execution failed: " << e.what() << std::endl; + throw; + } +} + +void EwLayerOneDnn::validate_input(const std::vector& input) const { + if (input.size() != 1) { + throw std::runtime_error("EwLayerOneDnn: Expected exactly 1 input tensor"); + } + + if (!is_function_supported(func_)) { + throw std::invalid_argument("Unsupported function for oneDNN: " + func_); + } + + Type data_type = input[0].get_type(); + if (data_type != Type::kFloat && data_type != Type::kInt) { + throw std::runtime_error( + "EwLayerOneDnn supports only float and int tensors"); + } +} + +void EwLayerOneDnn::initialize_onednn(const Shape& shape, Type data_type) { + try { + engine_ = std::make_unique(dnnl::engine::kind::cpu, 0); + stream_ = std::make_unique(*engine_); + + std::vector dims; + for (size_t i = 0; i < shape.dims(); i++) { + dims.push_back(static_cast(shape.at(i))); + } + + dnnl::memory::format_tag format; + switch (dims.size()) { + case 1: + format = dnnl::memory::format_tag::a; + break; + case 2: + format = dnnl::memory::format_tag::ab; + break; + case 3: + format = dnnl::memory::format_tag::abc; + break; + case 4: + format = dnnl::memory::format_tag::abcd; + break; + case 5: + format = dnnl::memory::format_tag::abcde; + break; + default: + throw std::invalid_argument("Unsupported tensor dimensionality: " + + std::to_string(dims.size())); + } + + dnnl::memory::data_type dnnl_data_type; + if (data_type == Type::kFloat) { + dnnl_data_type = dnnl::memory::data_type::f32; + } else { + dnnl_data_type = dnnl::memory::data_type::f32; + } + + memory_desc_ = dnnl::memory::desc(dims, dnnl_data_type, format); + + dnnl::algorithm algo = get_algorithm(); + + float primitive_alpha = 0.0F; + float primitive_beta = 0.0F; + + if (func_ == "relu") { + primitive_alpha = 0.0F; + } else if (func_ == "linear") { + primitive_alpha = alpha_; + primitive_beta = beta_; + } + + auto eltwise_pd = dnnl::eltwise_forward::primitive_desc( + *engine_, dnnl::prop_kind::forward_inference, algo, memory_desc_, + memory_desc_, primitive_alpha, primitive_beta); + + eltwise_prim_ = std::make_unique(eltwise_pd); + + initialized_ = true; + + } catch (const std::exception& e) { + std::cerr << "oneDNN initialization failed for function '" << func_ + << "': " << e.what() << std::endl; + throw; + } +} + +dnnl::algorithm EwLayerOneDnn::get_algorithm() const { + if (func_ == "relu") { + return dnnl::algorithm::eltwise_relu; + } + if (func_ == "tanh") { + return dnnl::algorithm::eltwise_tanh; + } + if (func_ == "sigmoid") { + return dnnl::algorithm::eltwise_logistic; + } + if (func_ == "linear") { + return dnnl::algorithm::eltwise_linear; + } + + throw std::invalid_argument("Unsupported function for oneDNN: " + func_); +} + +bool EwLayerOneDnn::is_function_supported(const std::string& function) { + return (function == "relu" || function == "tanh" || function == "sigmoid" || + function == "linear"); +} + +} // namespace it_lab_ai \ No newline at end of file diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 318354de2..fc95325c7 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -4,7 +4,7 @@ add_executable(run_test ${TEST_SRC_FILES}) if (NOT WIN32) target_link_libraries(run_test PUBLIC OpenMP::OpenMP_CXX) endif() -target_link_libraries(run_test PUBLIC perf_lib layers_lib) +target_link_libraries(run_test PUBLIC perf_lib layers_lib layers_oneDNN_lib) target_link_libraries(run_test PUBLIC gtest) target_link_libraries(run_test PUBLIC ReadLib) target_link_libraries(run_test PUBLIC reader_lib) @@ -26,6 +26,14 @@ if (WIN32) "${CMAKE_BINARY_DIR}/bin/") endif() +if (WIN32) + add_custom_command(TARGET run_test POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy + $ + $ + ) +endif() + add_test(UnitTests ${CMAKE_BINARY_DIR}/bin/run_test) file(DOWNLOAD @@ -47,4 +55,3 @@ target_compile_definitions(run_test PRIVATE TEST_DATA_PATH="${CMAKE_SOURCE_DIR}/ target_compile_definitions(run_test PUBLIC TESTS_BINARY_PATH="${CMAKE_CURRENT_BINARY_DIR}") - diff --git a/test/single_layer/test_ewlayer_onednn.cpp b/test/single_layer/test_ewlayer_onednn.cpp new file mode 100644 index 000000000..3b6ff3afb --- /dev/null +++ b/test/single_layer/test_ewlayer_onednn.cpp @@ -0,0 +1,306 @@ +#include +#include +#include +#include + +#include "gtest/gtest.h" +#include "layers/EWLayer.hpp" +#include "layers_oneDNN/EWLayer.hpp" + +using namespace it_lab_ai; + +TEST(ewlayer_onednn, supported_functions_check) { + EXPECT_TRUE(EwLayerOneDnn::is_function_supported("relu")); + EXPECT_TRUE(EwLayerOneDnn::is_function_supported("tanh")); + EXPECT_TRUE(EwLayerOneDnn::is_function_supported("sigmoid")); + EXPECT_TRUE(EwLayerOneDnn::is_function_supported("linear")); + + EXPECT_FALSE(EwLayerOneDnn::is_function_supported("sin")); + EXPECT_FALSE(EwLayerOneDnn::is_function_supported("minus")); + EXPECT_FALSE(EwLayerOneDnn::is_function_supported("nonexistent")); +} + +TEST(ewlayer_onednn, relu_float) { + EwLayerOneDnn layer("relu"); + + Tensor input = make_tensor({1.0F, -1.0F, 2.0F, -2.0F}); + Tensor output; + std::vector expected = {1.0F, 0.0F, 2.0F, 0.0F}; + + std::vector in{input}; + std::vector out{output}; + layer.run(in, out); + + auto output_data = *out[0].as(); + ASSERT_EQ(output_data.size(), expected.size()); + for (size_t i = 0; i < output_data.size(); i++) { + EXPECT_NEAR(output_data[i], expected[i], 1e-5); + } +} + +TEST(ewlayer_onednn, relu_int) { + EwLayerOneDnn layer("relu"); + + Tensor input = make_tensor({1, -1, 2, -2, 0, -5}); + Tensor output; + std::vector expected = {1, 0, 2, 0, 0, 0}; + + std::vector in{input}; + std::vector out{output}; + layer.run(in, out); + + auto output_data = *out[0].as(); + ASSERT_EQ(output_data.size(), expected.size()); + for (size_t i = 0; i < output_data.size(); i++) { + EXPECT_EQ(output_data[i], expected[i]); + } +} + +TEST(ewlayer_onednn, linear_float) { + EwLayerOneDnn layer("linear", 2.0f, 0.0f); + + Tensor input = make_tensor({1.0F, -1.0F, 2.0F, -5.0F}); + Tensor output; + std::vector expected = {2.0F, -2.0F, 4.0F, -10.0F}; + + std::vector in{input}; + std::vector out{output}; + layer.run(in, out); + + auto output_data = *out[0].as(); + ASSERT_EQ(output_data.size(), expected.size()); + for (size_t i = 0; i < output_data.size(); i++) { + EXPECT_NEAR(output_data[i], expected[i], 1e-5); + } +} + +TEST(ewlayer_onednn, linear_int) { + EwLayerOneDnn layer("linear", 2.0f, 1.0f); + + Tensor input = make_tensor({1, -1, 2, -5, 0}); + Tensor output; + std::vector expected = {3, -1, 5, -9, 1}; + + std::vector in{input}; + std::vector out{output}; + layer.run(in, out); + + auto output_data = *out[0].as(); + ASSERT_EQ(output_data.size(), expected.size()); + for (size_t i = 0; i < output_data.size(); i++) { + EXPECT_EQ(output_data[i], expected[i]); + } +} + +TEST(ewlayer_onednn, linear_with_bias_float) { + EwLayerOneDnn layer("linear", 1.0f, -1.0f); + + Tensor input = make_tensor({1.0F, -1.0F, 2.0F, -5.0F}); + Tensor output; + std::vector expected = {0.0F, -2.0F, 1.0F, -6.0F}; + + std::vector in{input}; + std::vector out{output}; + layer.run(in, out); + + auto output_data = *out[0].as(); + ASSERT_EQ(output_data.size(), expected.size()); + for (size_t i = 0; i < output_data.size(); i++) { + EXPECT_NEAR(output_data[i], expected[i], 1e-5); + } +} + +TEST(ewlayer_onednn, tanh_float) { + EwLayerOneDnn layer("tanh"); + + Tensor input = make_tensor({0.0F, 1.0F, -1.0F, 2.0F}); + Tensor output; + std::vector expected; + + std::vector input_data = *input.as(); + for (auto val : input_data) { + expected.push_back(std::tanh(val)); + } + + std::vector in{input}; + std::vector out{output}; + layer.run(in, out); + + auto output_data = *out[0].as(); + ASSERT_EQ(output_data.size(), expected.size()); + for (size_t i = 0; i < output_data.size(); i++) { + EXPECT_NEAR(output_data[i], expected[i], 1e-5); + } +} + +TEST(ewlayer_onednn, sigmoid_float) { + EwLayerOneDnn layer("sigmoid"); + + Tensor input = make_tensor({0.0F, 1.0F, -1.0F, 2.0F}); + Tensor output; + std::vector expected; + + std::vector input_data = *input.as(); + for (auto val : input_data) { + expected.push_back(1.0f / (1.0f + std::exp(-val))); + } + + std::vector in{input}; + std::vector out{output}; + layer.run(in, out); + + auto output_data = *out[0].as(); + ASSERT_EQ(output_data.size(), expected.size()); + for (size_t i = 0; i < output_data.size(); i++) { + EXPECT_NEAR(output_data[i], expected[i], 1e-5); + } +} + +TEST(ewlayer_onednn, multidim_tensor_relu) { + Shape shape({1, 3, 2, 2}); + + EwLayerOneDnn layer("relu"); + + std::vector input_data(1 * 3 * 2 * 2); + for (size_t i = 0; i < input_data.size(); i++) { + input_data[i] = static_cast(i) - 2.0f; + } + + Tensor input = make_tensor(input_data, shape); + Tensor output; + + std::vector in{input}; + std::vector out{output}; + layer.run(in, out); + + auto output_data = *out[0].as(); + ASSERT_EQ(output_data.size(), input_data.size()); + + for (size_t i = 0; i < output_data.size(); i++) { + float expected = std::max(0.0f, input_data[i]); + EXPECT_NEAR(output_data[i], expected, 1e-5); + } +} + +TEST(ewlayer_onednn, multidim_tensor_relu_int) { + Shape shape({2, 2, 2}); + + EwLayerOneDnn layer("relu"); + + std::vector input_data = {1, -1, 2, -2, 0, -3, 4, -4}; + Tensor input = make_tensor(input_data, shape); + Tensor output; + std::vector expected = {1, 0, 2, 0, 0, 0, 4, 0}; + + std::vector in{input}; + std::vector out{output}; + layer.run(in, out); + + auto output_data = *out[0].as(); + ASSERT_EQ(output_data.size(), expected.size()); + for (size_t i = 0; i < output_data.size(); i++) { + EXPECT_EQ(output_data[i], expected[i]); + } +} + +TEST(ewlayer_onednn, compare_with_naive_relu) { + EwLayerOneDnn onednn_layer("relu"); + + EWLayer naive_layer("relu"); + + std::vector input_data(100); + for (size_t i = 0; i < input_data.size(); i++) { + input_data[i] = static_cast(i) - 50.0f; + } + + Tensor input_tensor = make_tensor(input_data); + + Tensor onednn_output; + std::vector onednn_in{input_tensor}; + std::vector onednn_out{onednn_output}; + onednn_layer.run(onednn_in, onednn_out); + auto onednn_result = *onednn_out[0].as(); + + Tensor naive_output; + std::vector naive_in{input_tensor}; + std::vector naive_out{naive_output}; + naive_layer.run(naive_in, naive_out); + auto naive_result = *naive_out[0].as(); + + ASSERT_EQ(onednn_result.size(), naive_result.size()); + for (size_t i = 0; i < onednn_result.size(); i++) { + EXPECT_NEAR(onednn_result[i], naive_result[i], 1e-5); + } +} + +TEST(ewlayer_onednn, multiple_input_tensors) { + EwLayerOneDnn layer("relu"); + + Tensor input1 = make_tensor({1.0F, 2.0F}); + Tensor input2 = make_tensor({3.0F, 4.0F}); + Tensor output; + + std::vector in{input1, input2}; + std::vector out{output}; + + EXPECT_THROW({ layer.run(in, out); }, std::runtime_error); +} + +TEST(ewlayer_onednn, unsupported_tensor_dimensionality) { + EwLayerOneDnn layer("relu"); + + Shape shape_6d({2, 3, 4, 5, 6, 7}); + std::vector data_6d(2 * 3 * 4 * 5 * 6 * 7, 1.0f); + Tensor input = make_tensor(data_6d, shape_6d); + + Tensor output; + std::vector in{input}; + std::vector out{output}; + + EXPECT_THROW({ layer.run(in, out); }, std::invalid_argument); +} + +TEST(ewlayer_onednn, empty_input_tensor) { + EwLayerOneDnn layer("relu"); + + Tensor input = make_tensor({}); + Tensor output; + std::vector in{input}; + std::vector out{output}; + EXPECT_NO_THROW({ layer.run(in, out); }); +} + +TEST(ewlayer_onednn, invalid_function_algorithm_mapping) { + EwLayerOneDnn layer("relu"); + EXPECT_THROW( + { + EwLayerOneDnn invalid_layer("invalid_function_123"); + Tensor input = make_tensor({1.0F}); + Tensor output; + std::vector in{input}; + std::vector out{output}; + invalid_layer.run(in, out); + }, + std::invalid_argument); +} + +TEST(ewlayer_onednn, initialization_failure_propagation) { + EwLayerOneDnn layer("relu"); + + Shape shape_7d({2, 2, 2, 2, 2, 2, 2}); + std::vector data_7d(128, 1.0f); + Tensor input = make_tensor(data_7d, shape_7d); + + Tensor output; + std::vector in{input}; + std::vector out{output}; + + try { + layer.run(in, out); + FAIL() << "Expected std::invalid_argument exception"; + } catch (const std::invalid_argument& e) { + EXPECT_NE(std::string(e.what()).find("dimensionality"), std::string::npos); + } catch (...) { + FAIL() << "Expected std::invalid_argument exception"; + } +}