diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e83614e7f..b08551e57 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -29,6 +29,10 @@ jobs: - uses: actions/checkout@v4 with: submodules: true + - name: Set binary paths + id: set_binaries + run: | + echo "ACC_BINARY=build/bin/ACC" >> $GITHUB_OUTPUT - name: Setup ccache uses: hendrikmuhs/ccache-action@v1.2 with: @@ -59,7 +63,7 @@ jobs: with: name: mnist-${{ matrix.build_type }}${{ matrix.stats && '-stats' || '' }} path: | - build/bin/ACC_MNIST* + ${{ steps.set_binaries.outputs.ACC_BINARY }} build/bin/opencv_libs/* build/setenv.sh - name: Test @@ -227,7 +231,10 @@ jobs: - uses: actions/checkout@v4 with: fetch-depth: 0 - + - name: Set binary path + id: set_eval_binary + run: | + echo "EVAL_BINARY=build/bin/ACC" >> $GITHUB_OUTPUT - name: Install system dependencies run: | sudo apt-get update @@ -274,7 +281,7 @@ jobs: - name: Prepare environment run: | - chmod +x build/bin/ACC_MNIST* + chmod +x "${{ steps.set_eval_binary.outputs.EVAL_BINARY }}" export LD_LIBRARY_PATH=$PWD/build/bin/opencv_libs:/usr/lib/x86_64-linux-gnu echo "Final LD_LIBRARY_PATH: $LD_LIBRARY_PATH" @@ -290,12 +297,12 @@ jobs: export LD_LIBRARY_PATH=$PWD/build/bin/opencv_libs:/usr/lib/x86_64-linux-gnu echo "LD_LIBRARY_PATH: $LD_LIBRARY_PATH" - LD_DEBUG=files ./build/bin/ACC_MNIST* 2> ld_debug.log + LD_DEBUG=files "${{ steps.set_eval_binary.outputs.EVAL_BINARY }}" --model alexnet_mnist 2> ld_debug.log echo "### Library loading debug ###" grep -i "opencv_imgcodecs" ld_debug.log - ./build/bin/ACC_MNIST* > accuracy.txt - echo "Accuracy: $(cat accuracy.txt)" + "${{ steps.set_eval_binary.outputs.EVAL_BINARY }}" > accuracy.txt + echo "Accuracy: $(cat accuracy.txt)" - name: Update README (master only) if: github.ref == 'refs/heads/master' diff --git a/README.md b/README.md index e02cb8ad8..4c74808e6 100644 --- a/README.md +++ b/README.md @@ -6,16 +6,18 @@ Accuracy: Stat: 98.01% (updated: 2025-04-28) ## Short description -A lightweight C++ library for performing high-performance inference on MNIST handwritten digits using a modified AlexNet architecture. Designed for efficiency and educational purposes, this project demonstrates how classic CNNs can be optimized for small-scale tasks in native environments. +A lightweight C++ library for performing high-performance inference on MNIST and ImageNet using a modified AlexNet, different ONNX and Yolo architectures. Designed for efficiency and educational purposes, this project demonstrates how classic CNNs can be optimized for small-scale tasks in native environments. ### Key Features: * C++17 implementation for bare-metal performance * Simplified AlexNet for 28×28 grayscale images +* Googlenet, Densenet, Resnet and Yolo11x-cls for images of any size + * Parallel computing via Intel OneTBB (Threading Building Blocks) -* Pre-trained model: AlexNet-model.h5 included +* Pre-trained model: AlexNet-model.h5, Googlenet included ## **Some files used to create the library** ### Neural network models You need to download [Alexnet-model.h5](https://github.com/moizahmed97/Convolutional-Neural-Net-Designer/blob/master/AlexNet-model.h5) to the folder *docs* @@ -30,9 +32,9 @@ Other models:
## **How do I launch the inference?** * Make sure you install the project dependencies by running: *pip install -r requirements.txt* -* You need to run the script *parser.py* that is located in app/AlexNet to read weights from a model *Alexnet-model.h5* and the json file with the weights will be stored in the *docs* folder. +* You need to run the script *parser.py* that is located in app/converters to read weights from a model *Alexnet-model.h5* or *parser_onnx.py* to read weights from a models ONNX or YOLO and the json file with the weights will be stored in the *docs* folder. * Then put the test images in png format in the folder *docs/input* -* After building the project, which is described below, run Graph_build in folder *build/bin* +* After building the project, which is described below, run Graph_build with the parameter --model (alexnet_mnist or googlenet or densenet or resnet or yolo) and the parameter --parallel if you need. App Graph_build is located in folder *build/bin* ## **Building a Project** ### *Windows* @@ -69,7 +71,7 @@ To build and run this project locally on Windows, follow these steps: ``` and run the file ```bash - Graph_Build.exe + Graph_Build.exe --model alexnet_mnist ``` ### *Linux/macOS* To build and run this project locally on Linux or macOS, follow these steps: @@ -116,7 +118,7 @@ To build and run this project locally on Windows, follow these steps: ``` and run the file ```bash - ./Graph_Build + ./Graph_Build --model alexnet_mnist ``` ## Test Process @@ -147,10 +149,14 @@ To start the testing process locally, you need to go to the directory ./run_test ``` -## **Accuracy validation** +## **Accuracy validation for Alexnet on MNIST** To run accuracy validation you need to use the MNIST dataset, which you can download [here](https://github.com/DeepTrackAI/MNIST_dataset/tree/main/mnist/test) and put it in a folder *docs/mnist/mnist/test* -Now you can run accuracy check - *build\bin\ACC_MNIST.exe* -* **The accuracy should be 98.02%** +Now you can run accuracy check - *build\bin\ACC.exe --model alexnet_mnist* +* **The accuracy should be 98.01%** + +## **Accuracy validation for ONNX or YOLO models on ImageNet** +To run accuracy validation you need to use the ImageNet dataset, which you can download [here](https://www.kaggle.com/datasets/sautkin/imagenet1kvalid) and put it in a folder *docs/Imagenet/* +Now you can run accuracy check - *build\bin\ACC.exe --model googlenet* ## **Documentation of project** https://github.com/embedded-dev-research/ITLabAI/blob/Semyon1104/Final_documentation/docs/IT_Lab_2023.pdf diff --git a/app/Converters/parser_onnx.py b/app/Converters/parser_onnx.py index 60aa9b66b..33b20fef4 100644 --- a/app/Converters/parser_onnx.py +++ b/app/Converters/parser_onnx.py @@ -5,15 +5,17 @@ from onnx import helper, numpy_helper from ultralytics import YOLO + def convert_pt_to_onnx(pt_model_path, onnx_model_path=None): if onnx_model_path is None: onnx_model_path = pt_model_path.replace('.pt', '.onnx') model = YOLO(pt_model_path) - model.export(format="onnx", dynamic=False, simplify=True) + model.export(format="onnx", dynamic=False, simplify=False) return onnx_model_path + def onnx_to_json(model_path, output_json_path): if model_path.endswith('.pt'): model_path = convert_pt_to_onnx(model_path) @@ -31,12 +33,40 @@ def onnx_to_json(model_path, output_json_path): } layer_info = [] + + input_info = {} + for input in model.graph.input: + if input.name in initializers_dict: + continue + + shape = [] + for dim in input.type.tensor_type.shape.dim: + if dim.HasField('dim_value'): + # 0 означает динамическую размерность в ONNX + shape.append(dim.dim_value if dim.dim_value != 0 else -1) + elif dim.HasField('dim_param'): + # Обрабатываем именованные параметры размерностей + shape.append(-1) # или можно сохранить как строку: dim.dim_param + else: + shape.append(-1) # неизвестная размерность + + input_info = { + "name": input.name, + "shape": shape, + "data_type": input.type.tensor_type.elem_type + } + break + input_layer = { "index": 0, - "name": "input_1", + "name": input_info.get("name", "input_1"), "type": "InputLayer", "weights": [], - "attributes": {} + "bias": [], + "attributes": { + "shape": input_info.get("shape", []), + "data_type": input_info.get("data_type", 1) + } } layer_info.append(input_layer) @@ -45,9 +75,14 @@ def onnx_to_json(model_path, output_json_path): "index": len(layer_info), "name": node.name.replace('/', '_'), "type": node.op_type, - "attributes": {} + "attributes": {}, + "inputs": [] } + for input_name in node.input: + if input_name not in initializers_dict: + layer_data["inputs"].append(input_name.replace('/', '_')) + for attr in node.attribute: attr_value = helper.get_attribute_value(attr) if isinstance(attr_value, TensorProto): @@ -67,29 +102,44 @@ def onnx_to_json(model_path, output_json_path): elif attr.name == "strides": layer_data["strides"] = attr_value - node_init = [] - for input_name in node.input: - if input_name in initializers_dict: - node_init.append(initializers_dict[input_name]) - - if len(node_init) == 1: - init = node_init[0] - if len(init["dims"]) == 0 or (len(init["dims"]) == 1 and init["dims"][0] == 1): - layer_data["value"] = init["values"] if len(init["dims"]) == 0 else init["values"][0] - else: - layer_data["weights"] = init["values"] - elif len(node_init) > 1: - weights = [] - for init in node_init[:-1]: - if len(init["dims"]) > 0: - weights.extend(init["values"]) if isinstance(init["values"][0], list) else weights.append( - init["values"]) - - if weights: - layer_data["weights"] = weights - - if len(node_init[-1]["dims"]) == 1: - layer_data["bias"] = node_init[-1]["values"] + if node.op_type == "BatchNormalization": + bn_params = [] + for input_name in node.input: + if input_name in initializers_dict: + bn_params.append(initializers_dict[input_name]) + + if len(bn_params) >= 4: + layer_data["scale"] = bn_params[0]["values"] + layer_data["bias"] = bn_params[1]["values"] + layer_data["mean"] = bn_params[2]["values"] + layer_data["var"] = bn_params[3]["values"] + + layer_data["weights"] = [] + + else: + node_init = [] + for input_name in node.input: + if input_name in initializers_dict: + node_init.append(initializers_dict[input_name]) + + if len(node_init) == 1: + init = node_init[0] + if len(init["dims"]) == 0 or (len(init["dims"]) == 1 and init["dims"][0] == 1): + layer_data["value"] = init["values"] if len(init["dims"]) == 0 else init["values"][0] + else: + layer_data["weights"] = init["values"] + elif len(node_init) > 1: + weights = [] + for init in node_init[:-1]: + if len(init["dims"]) > 0: + weights.extend(init["values"]) if isinstance(init["values"][0], list) else weights.append( + init["values"]) + + if weights: + layer_data["weights"] = weights + + if len(node_init[-1]["dims"]) == 1: + layer_data["bias"] = node_init[-1]["values"] layer_info.append(layer_data) @@ -116,7 +166,7 @@ def default(self, obj): BASE_DIR = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -MODEL_PATH = os.path.join(BASE_DIR, 'docs\\models', 'yolo11x-cls.pt') -MODEL_DATA_PATH = os.path.join(BASE_DIR, 'docs\\jsons', 'yolo11x-cls_onnx_model.json') +MODEL_PATH = os.path.join(BASE_DIR, 'docs\\models', 'resnest101e_Opset16.onnx') +MODEL_DATA_PATH = os.path.join(BASE_DIR, 'docs\\jsons', 'resnest101e_Opset16_onnx_model.json') onnx_to_json(MODEL_PATH, MODEL_DATA_PATH) \ No newline at end of file diff --git a/app/Graph/CMakeLists.txt b/app/Graph/CMakeLists.txt index 17389c88a..f953547a4 100644 --- a/app/Graph/CMakeLists.txt +++ b/app/Graph/CMakeLists.txt @@ -18,8 +18,8 @@ target_include_directories(BuildGraph PUBLIC ${CMAKE_SOURCE_DIR}/3rdparty/Json/i add_executable(Graph_Build graph_build.cpp) target_link_libraries(Graph_Build BuildGraph) -add_executable(ACC_MNIST acc_check_mnist.cpp) -target_link_libraries(ACC_MNIST BuildGraph) +add_executable(ACC acc_check.cpp) +target_link_libraries(ACC BuildGraph) if (NOT CMAKE_BUILD_TYPE) set(CMAKE_BUILD_TYPE "Debug") @@ -33,7 +33,13 @@ if (WIN32) endif() if (WIN32) - add_custom_command(TARGET ACC_MNIST POST_BUILD + if ("${CMAKE_BUILD_TYPE}" STREQUAL "DEBUG") + set(CMAKE_BUILD_TYPE "Debug") + endif() + if ("${CMAKE_BUILD_TYPE}" STREQUAL "RELEASE") + set(CMAKE_BUILD_TYPE "Release") + endif() + add_custom_command(TARGET ACC POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_directory "${OPENCV_BUILD_DIR}/bin/." "${CMAKE_BINARY_DIR}/bin/") @@ -41,16 +47,27 @@ endif() file(DOWNLOAD "https://raw.githubusercontent.com/DeepTrackAI/MNIST_dataset/main/mnist/test/1_000008.png" - "${CMAKE_SOURCE_DIR}/docs/input/test1.png" + "${CMAKE_SOURCE_DIR}/docs/input/28/test1.png" SHOW_PROGRESS STATUS status_code LOG log_file ) -add_definitions(-DIMAGE1_PATH="${CMAKE_SOURCE_DIR}/docs/input/") +file(DOWNLOAD + "blob:https://ru.pinterest.com/63b88674-b4a6-4ef3-85b2-ab57ef7bb8e7" + "${CMAKE_SOURCE_DIR}/docs/input/Imagenet_test/tench.png" + SHOW_PROGRESS + STATUS status_code + LOG log_file +) + +add_definitions(-DIMAGE28_PATH="${CMAKE_SOURCE_DIR}/docs/input/28/") +add_definitions(-DIMAGENET_ACC="${CMAKE_SOURCE_DIR}/docs/ImageNet/test/") +add_definitions(-DIMAGENET_PATH="${CMAKE_SOURCE_DIR}/docs/input/Imagenet_test/") add_definitions(-DMODEL_PATH_H5="${CMAKE_SOURCE_DIR}/docs/jsons/model_data_alexnet_1.json") add_definitions(-DMODEL_PATH_GOOGLENET_ONNX="${CMAKE_SOURCE_DIR}/docs/jsons/googlenet_onnx_model.json") add_definitions(-DMODEL_PATH_DENSENET_ONNX="${CMAKE_SOURCE_DIR}/docs/jsons/densenet121_Opset16_onnx_model.json") add_definitions(-DMODEL_PATH_RESNET_ONNX="${CMAKE_SOURCE_DIR}/docs/jsons/resnest101e_Opset16_onnx_model.json") add_definitions(-DMODEL_PATH_YOLO11NET_ONNX="${CMAKE_SOURCE_DIR}/docs/jsons/yolo11x-cls_onnx_model.json") +add_definitions(-DIMAGENET_LABELS="${CMAKE_SOURCE_DIR}/docs/imagenet1000_clsidx_to_labels.json") add_definitions(-DMNIST_PATH="${CMAKE_SOURCE_DIR}/docs/mnist/mnist/test") diff --git a/app/Graph/acc_check.cpp b/app/Graph/acc_check.cpp new file mode 100644 index 000000000..673b40130 --- /dev/null +++ b/app/Graph/acc_check.cpp @@ -0,0 +1,250 @@ +#include +#include +#include +#include +#include +#include + +#include "build.cpp" +#include "build.hpp" + +namespace fs = std::filesystem; +using namespace it_lab_ai; + +int main(int argc, char* argv[]) { + std::string model_name = "alexnet_mnist"; + bool parallel = false; + + for (int i = 1; i < argc; ++i) { + if (std::string(argv[i]) == "--parallel") { + parallel = true; + } else if (std::string(argv[i]) == "--model" && i + 1 < argc) { + model_name = argv[++i]; + } + } + + std::string dataset_path; + if (model_name == "alexnet_mnist") { + dataset_path = MNIST_PATH; + } else { + dataset_path = IMAGENET_ACC; + } + + std::string json_path = model_paths[model_name]; + std::vector input_shape = get_input_shape_from_json(json_path); + + std::cout << std::endl; + + if (model_name == "alexnet_mnist") { + std::vector counts = {979, 1134, 1031, 1009, 981, + 891, 957, 1027, 973, 1008}; + int stat = 0; + size_t sum = std::accumulate(counts.begin(), counts.end(), size_t{0}); + int count_pic = static_cast(sum) + 10; + std::vector res(count_pic * 28 * 28); + Tensor input; + Shape sh1({1, 5, 5, 3}); + std::vector vec; + vec.reserve(75); + for (int i = 0; i < 75; ++i) { + vec.push_back(3); + } + Tensor output = make_tensor(vec, sh1); + + for (size_t name = 0; name < 10; name++) { + for (size_t ind = 0; ind < counts[name] + 1; ind++) { + std::ostringstream oss; + oss << "/" << name << "_" << std::setw(6) << std::setfill('0') << ind + << ".png"; + std::string png = oss.str(); + std::string image_path = MNIST_PATH + png; + + cv::Mat image = cv::imread(image_path); + if (image.empty()) { + throw std::runtime_error("Failed to load image"); + } + cv::cvtColor(image, image, cv::COLOR_BGR2GRAY); + std::vector channels; + cv::split(image, channels); + for (int i = 0; i < 28; ++i) { + for (int j = 0; j < 28; ++j) { + size_t a = ind; + for (size_t n = 0; n < name; n++) a += counts[n] + 1; + res[(a) * 28 * 28 + i * 28 + j] = channels[0].at(j, i); + } + } + } + } + Shape sh({static_cast(count_pic), 1, 28, 28}); + Tensor t = make_tensor(res, sh); + input = t; + build_graph_linear(input, output, false, parallel); + std::vector> tmp_output = + softmax(*output.as(), 10); + std::vector indices; + for (const auto& row : tmp_output) { + for (size_t j = 0; j < row.size(); ++j) { + if (row[j] >= 1e-6) { + indices.push_back(j); + break; + } + } + } + for (size_t name = 0; name < 10; name++) { + for (size_t ind = 0; ind < counts[name] + 1; ind++) { + size_t a = ind; + for (size_t n = 0; n < name; n++) a += counts[n] + 1; + if (name == indices[a]) stat++; + } + } + double percentage = + (static_cast(stat) / static_cast(sum + 10)) * 100; + std::cout << "Stat: " << std::fixed << std::setprecision(2) << percentage + << "%" << std::endl; + return 0; + } + std::vector counts; + std::vector image_paths; + std::vector true_labels; + std::vector all_image_data; + size_t total_images = 0; + + counts.resize(1000, 0); + + for (int class_id = 0; class_id < 1000; ++class_id) { + std::ostringstream folder_oss; + folder_oss << std::setw(5) << std::setfill('0') << class_id; + std::string class_folder_path = dataset_path + "/" + folder_oss.str(); + + if (fs::exists(class_folder_path)) { + for (const auto& entry : fs::directory_iterator(class_folder_path)) { + if (entry.path().extension() == ".png" || + entry.path().extension() == ".jpg" || + entry.path().extension() == ".jpeg") { + counts[class_id]++; + total_images++; + } + } + } + } + + if (total_images == 0) { + std::cerr << "No images found in dataset path: " << dataset_path + << std::endl; + return 1; + } + + int channels = input_shape[1]; + int height = input_shape[2]; + int width = input_shape[3]; + size_t image_size = channels * height * width; + + all_image_data.resize(total_images * image_size); + + size_t current_index = 0; + for (int class_id = 0; class_id < 1000; ++class_id) { + std::ostringstream folder_oss; + folder_oss << std::setw(5) << std::setfill('0') << class_id; + std::string class_folder_path = dataset_path + "/" + folder_oss.str(); + + if (!fs::exists(class_folder_path)) continue; + + for (const auto& entry : fs::directory_iterator(class_folder_path)) { + if (entry.path().extension() == ".png" || + entry.path().extension() == ".jpg" || + entry.path().extension() == ".jpeg") { + cv::Mat image = cv::imread(entry.path().string()); + if (image.empty()) { + std::cerr << "Failed to load image: " << entry.path().string() + << std::endl; + continue; + } + + it_lab_ai::Tensor prepared_tensor = + prepare_image(image, input_shape, model_name); + const std::vector& image_data = *prepared_tensor.as(); + + std::copy(image_data.begin(), image_data.end(), + all_image_data.begin() + current_index * image_size); + + image_paths.push_back(entry.path().string()); + true_labels.push_back(class_id); + current_index++; + } + } + } + + it_lab_ai::Shape input_shape_imagenet( + {total_images, static_cast(channels), static_cast(height), + static_cast(width)}); + it_lab_ai::Tensor input = + it_lab_ai::make_tensor(all_image_data, input_shape_imagenet); + + size_t output_classes = 1000; + it_lab_ai::Shape output_shape({total_images, output_classes}); + it_lab_ai::Tensor output = + it_lab_ai::Tensor(output_shape, it_lab_ai::Type::kFloat); + + build_graph(input, output, json_path, false, parallel); + std::vector> processed_outputs; + const std::vector& raw_output = *output.as(); + + for (size_t i = 0; i < total_images; ++i) { + std::vector single_output( + raw_output.begin() + i * output_classes, + raw_output.begin() + (i + 1) * output_classes); + std::vector processed_output = + process_model_output(single_output, model_name); + processed_outputs.push_back(processed_output); + } + + int correct_predictions_top1 = 0; + int correct_predictions_top5 = 0; + for (size_t i = 0; i < processed_outputs.size(); ++i) { + int true_label = true_labels[i]; + const std::vector& probabilities = processed_outputs[i]; + + std::vector indices(probabilities.size()); + std::iota(indices.begin(), indices.end(), 0); + std::sort(indices.begin(), indices.end(), [&](size_t a, size_t b) { + return probabilities[a] > probabilities[b]; + }); + + size_t predicted_class_top1 = indices[0]; + if (predicted_class_top1 == static_cast(true_label)) { + correct_predictions_top1++; + } + + bool found_in_top5 = false; + for (int top_k = 0; top_k < std::min(5, static_cast(indices.size())); + ++top_k) { + if (indices[top_k] == static_cast(true_label)) { + found_in_top5 = true; + break; + } + } + if (found_in_top5) { + correct_predictions_top5++; + } + } + + double final_accuracy_top1 = + (static_cast(correct_predictions_top1) / total_images) * 100; + double final_accuracy_top5 = + (static_cast(correct_predictions_top5) / total_images) * 100; + + std::cout << "\nFinal Results:" << std::endl; + std::cout << "Model: " << model_name << std::endl; + std::cout << "Dataset: " << dataset_path << std::endl; + std::cout << "Total images: " << total_images << std::endl; + std::cout << "Correct predictions (Top-1): " << correct_predictions_top1 + << std::endl; + std::cout << "Correct predictions (Top-5): " << correct_predictions_top5 + << std::endl; + std::cout << "Top-1 Accuracy: " << std::fixed << std::setprecision(2) + << final_accuracy_top1 << "%" << std::endl; + std::cout << "Top-5 Accuracy: " << std::fixed << std::setprecision(2) + << final_accuracy_top5 << "%" << std::endl; + + return 0; +} \ No newline at end of file diff --git a/app/Graph/acc_check_mnist.cpp b/app/Graph/acc_check_mnist.cpp deleted file mode 100644 index f2cf5ef4d..000000000 --- a/app/Graph/acc_check_mnist.cpp +++ /dev/null @@ -1,81 +0,0 @@ -#include -#include -#include - -#include "build.cpp" -#include "build.hpp" - -using namespace it_lab_ai; - -int main(int argc, char* argv[]) { - bool parallel = false; - if (argc > 1 && std::string(argv[1]) == "--parallel") { - std::cout << "Parallel mode" << std::endl; - parallel = true; - } - std::vector counts = {979, 1134, 1031, 1009, 981, - 891, 957, 1027, 973, 1008}; - int stat = 0; - size_t sum = std::accumulate(counts.begin(), counts.end(), size_t{0}); - int count_pic = static_cast(sum) + 10; - std::vector res(count_pic * 28 * 28); - Tensor input; - Shape sh1({1, 5, 5, 3}); - std::vector vec; - vec.reserve(75); - for (int i = 0; i < 75; ++i) { - vec.push_back(3); - } - Tensor output = make_tensor(vec, sh1); - - for (size_t name = 0; name < 10; name++) { - for (size_t ind = 0; ind < counts[name] + 1; ind++) { - std::ostringstream oss; - oss << "/" << name << "_" << std::setw(6) << std::setfill('0') << ind - << ".png"; - std::string png = oss.str(); - std::string image_path = MNIST_PATH + png; - - cv::Mat image = cv::imread(image_path); - if (image.empty()) { - throw std::runtime_error("Failed to load image"); - } - cv::cvtColor(image, image, cv::COLOR_BGR2GRAY); - std::vector channels; - cv::split(image, channels); - for (int i = 0; i < 28; ++i) { - for (int j = 0; j < 28; ++j) { - size_t a = ind; - for (size_t n = 0; n < name; n++) a += counts[n] + 1; - res[(a) * 28 * 28 + i * 28 + j] = channels[0].at(j, i); - } - } - } - } - Shape sh({static_cast(count_pic), 1, 28, 28}); - Tensor t = make_tensor(res, sh); - input = t; - build_graph(input, output, false, parallel); - std::vector> tmp_output = - softmax(*output.as(), 10); - std::vector indices; - for (const auto& row : tmp_output) { - for (size_t j = 0; j < row.size(); ++j) { - if (row[j] >= 1e-6) { - indices.push_back(j); - break; - } - } - } - for (size_t name = 0; name < 10; name++) { - for (size_t ind = 0; ind < counts[name] + 1; ind++) { - size_t a = ind; - for (size_t n = 0; n < name; n++) a += counts[n] + 1; - if (name == indices[a]) stat++; - } - } - double percentage = - (static_cast(stat) / static_cast(sum + 10)) * 100; - std::cout << "Stat: " << std::fixed << std::setprecision(2) << percentage - << "%" << std::endl; -} diff --git a/app/Graph/build.cpp b/app/Graph/build.cpp index 7974db61c..fcd058857 100644 --- a/app/Graph/build.cpp +++ b/app/Graph/build.cpp @@ -1,7 +1,14 @@ -#include "build.hpp" +#include "build.hpp" -void build_graph(it_lab_ai::Tensor& input, it_lab_ai::Tensor& output, - bool comments, bool parallel = false) { +#include +#include +#include +#include + +using namespace it_lab_ai; + +void build_graph_linear(it_lab_ai::Tensor& input, it_lab_ai::Tensor& output, + bool comments, bool parallel) { if (comments) { for (size_t i = 0; i < input.get_shape().dims(); i++) { std::cout << input.get_shape()[i] << ' '; @@ -41,7 +48,6 @@ void build_graph(it_lab_ai::Tensor& input, it_lab_ai::Tensor& output, if (layer_type.find("Conv") != std::string::npos) { it_lab_ai::Tensor tmp_tensor = tensor; - // kernel is always transposed ? for (size_t n = 0; n < tensor.get_shape()[2]; n++) { for (size_t c = 0; c < tensor.get_shape()[3]; c++) { for (size_t h = 0; h < tensor.get_shape()[0]; h++) { @@ -52,7 +58,6 @@ void build_graph(it_lab_ai::Tensor& input, it_lab_ai::Tensor& output, } } } - // tensor = tmp_tensor; it_lab_ai::Shape shape = tensor.get_shape(); size_t pads = (tensor.get_shape()[0] - 1) / 2; @@ -72,7 +77,7 @@ void build_graph(it_lab_ai::Tensor& input, it_lab_ai::Tensor& output, it_lab_ai::Tensor tmp_values = tensor; it_lab_ai::Tensor tmp_bias = it_lab_ai::make_tensor(tensor.get_bias()); auto conv_layer = std::make_shared( - 1, pads, 1, tmp_values, tmp_bias, impl2); + 1, pads, 1, tmp_values, tmp_bias, impl2, 1, true); layers.push_back(conv_layer); layerpostop.push_back(false); if (comments) std::cout << "ConvLayer added to layers." << std::endl; @@ -86,18 +91,6 @@ void build_graph(it_lab_ai::Tensor& input, it_lab_ai::Tensor& output, } if (layer_type.find("Dense") != std::string::npos) { it_lab_ai::Tensor tmp_bias = it_lab_ai::make_tensor(tensor.get_bias()); - it_lab_ai::Tensor tmp_tensor = it_lab_ai::Tensor( - it_lab_ai::Shape({tensor.get_shape()[1], tensor.get_shape()[0]}), - it_lab_ai::Type::kFloat); - // kernel is always transposed ? - for (size_t h = 0; h < tensor.get_shape()[0]; h++) { - for (size_t w = 0; w < tensor.get_shape()[1]; w++) { - tmp_tensor.set(std::vector({w, h}), - tensor.get({h, w})); - } - } - // - tensor = tmp_tensor; auto fc_layer = std::make_shared(tensor, tmp_bias); layers.push_back(fc_layer); layerpostop.push_back(false); @@ -193,4 +186,1064 @@ void build_graph(it_lab_ai::Tensor& input, it_lab_ai::Tensor& output, } } } +} + +std::string get_base_layer_name(const std::string& tensor_name) { + static const auto kPattern = std::regex("(_output|_out|:)[_\\d]*$"); + return std::regex_replace(tensor_name, kPattern, ""); +} + +std::string layerTypeToString(it_lab_ai::LayerType type) { + switch (type) { + case it_lab_ai::kInput: + return "Input"; + case it_lab_ai::kPooling: + return "Pooling"; + case it_lab_ai::kElementWise: + return "ElementWise"; + case it_lab_ai::kConvolution: + return "Convolution"; + case it_lab_ai::kFullyConnected: + return "FullyConnected"; + case it_lab_ai::kFlatten: + return "Flatten"; + case it_lab_ai::kConcat: + return "Concat"; + case it_lab_ai::kDropout: + return "Dropout"; + case it_lab_ai::kSplit: + return "Split"; + case it_lab_ai::kBinaryOp: + return "BinaryOp"; + case it_lab_ai::kTranspose: + return "Transpose"; + case it_lab_ai::kMatmul: + return "MatMul"; + case it_lab_ai::kReshape: + return "Reshape"; + case it_lab_ai::kSoftmax: + return "Softmax"; + case it_lab_ai::kReduce: + return "Reduce"; + case it_lab_ai::kBatchNormalization: + return "BatchNormalization"; + default: + return "Unknown"; + } +} + +void build_graph(it_lab_ai::Tensor& input, it_lab_ai::Tensor& output, + const std::string& json_path, bool comments, bool parallel) { + if (comments) { + for (size_t i = 0; i < input.get_shape().dims(); i++) { + std::cout << input.get_shape()[i] << ' '; + } + std::cout << std::endl; + if (input.get_shape().dims() == 4) { + for (size_t n = 0; n < input.get_shape()[0]; n++) { + for (size_t h = 0; h < input.get_shape()[2]; h++) { + for (size_t w = 0; w < input.get_shape()[3]; w++) { + for (size_t c = 0; c < input.get_shape()[1]; c++) { + std::cout << input.get({n, c, h, w}) << ' '; + } + } + std::cerr << std::endl; + } + } + std::cout << std::endl << std::endl; + } + } + + it_lab_ai::ImplType impl1 = parallel ? it_lab_ai::kTBB : it_lab_ai::kDefault; + it_lab_ai::ImplType impl2 = parallel ? it_lab_ai::kSTL : it_lab_ai::kDefault; + + std::unordered_map> concat_connections; + std::unordered_map> concat_orders; + std::unordered_map> + concat_connected_inputs; + + std::unordered_map> layer_parameters; + std::unordered_map float_parameters; + std::string last_constant_name; + std::vector last_constant_value; + + std::unordered_map> + split_layers; + std::unordered_map split_output_mapping; + std::vector>> split_distribution; + std::unordered_map split_name_to_index; + std::unordered_map original_ids; + + std::vector> layers; + std::unordered_map> + name_to_layer; + std::unordered_map> connections; + + std::vector> connection_list; + const std::string& json_file = json_path; + + it_lab_ai::json model_data = it_lab_ai::read_json(json_file); + std::string input_layer_name = "images"; + for (const auto& layer_data : model_data) { + std::string layer_type = layer_data["type"]; + if (layer_type == "InputLayer") { + if (layer_data.contains("name")) { + input_layer_name = layer_data["name"]; + } + break; + } + } + + if (comments) std::cout << "Loaded model data from JSON." << std::endl; + + auto input_layer = std::make_shared(it_lab_ai::kNchw, + it_lab_ai::kNchw); + layers.push_back(input_layer); + name_to_layer[input_layer_name] = input_layer; + int current_id = 0; + input_layer->setID(current_id++); + for (const auto& layer_data : model_data) { + try { + std::string layer_type = layer_data["type"]; + + if (layer_type == "InputLayer") continue; + std::string layer_name = layer_data["name"]; + int layer_index = layer_data["index"]; + if (comments) { + std::cout << "Processing layer " << layer_index << ": " << layer_name + << " (" << layer_type << ")" << std::endl; + } + + std::shared_ptr layer; + + if (layer_type.find("Conv") != std::string::npos) { + it_lab_ai::Tensor tensor = it_lab_ai::create_tensor_from_json( + layer_data, it_lab_ai::Type::kFloat); + + size_t stride = 1; + size_t pads = 0; + size_t group = 1; + size_t dilations = 1; + std::vector pads_vec = {0, 0, 0, 0}; + + if (layer_data.contains("attributes")) { + const auto& attributes = layer_data["attributes"]; + + if (attributes.contains("strides") && + attributes["strides"].is_array()) { + auto strides = attributes["strides"]; + if (strides.size() >= 2) { + stride = strides[0].get(); + } + } + + if (attributes.contains("pads") && attributes["pads"].is_array()) { + auto pads_array = attributes["pads"]; + if (pads_array.size() >= 4) { + pads_vec = { + pads_array[0].get(), pads_array[1].get(), + pads_array[2].get(), pads_array[3].get()}; + pads = pads_vec[0]; + } + } else if (layer_data.contains("padding") && + layer_data["padding"] == "valid") { + pads = 0; + } else if (layer_data.contains("padding") && + layer_data["padding"] == "same") { + size_t kernel_size = tensor.get_shape()[0]; + pads = (kernel_size - 1) / 2; + } + + if (attributes.contains("group")) { + group = attributes["group"].get(); + } + + if (attributes.contains("dilations") && + attributes["dilations"].is_array()) { + auto dilations_array = attributes["dilations"]; + if (dilations_array.size() >= 2) { + dilations = dilations_array[0].get(); + } + } + } + + it_lab_ai::Tensor tmp_tensor = tensor; + + it_lab_ai::Tensor tmp_bias = it_lab_ai::make_tensor(tensor.get_bias()); + + auto conv_layer = std::make_shared( + stride, pads, dilations, tmp_tensor, tmp_bias, impl2, group); + layer = conv_layer; + } else if (layer_type.find("Relu") != std::string::npos || + layer_type.find("relu") != std::string::npos) { + auto ew_layer = std::make_shared("relu"); + layer = ew_layer; + } else if (layer_type.find("Sigmoid") != std::string::npos) { + auto ew_layer = std::make_shared("sigmoid"); + layer = ew_layer; + + } else if (layer_type.find("Dense") != std::string::npos || + layer_type.find("FullyConnected") != std::string::npos) { + it_lab_ai::Tensor tensor = it_lab_ai::create_tensor_from_json( + layer_data, it_lab_ai::Type::kFloat); + + it_lab_ai::Tensor tmp_tensor = it_lab_ai::Tensor( + it_lab_ai::Shape({tensor.get_shape()[1], tensor.get_shape()[0]}), + it_lab_ai::Type::kFloat); + + for (size_t h = 0; h < tensor.get_shape()[0]; h++) { + for (size_t w = 0; w < tensor.get_shape()[1]; w++) { + tmp_tensor.set({w, h}, tensor.get({h, w})); + } + } + + it_lab_ai::Tensor tmp_bias = it_lab_ai::make_tensor(tensor.get_bias()); + auto fc_layer = + std::make_shared(tmp_tensor, tmp_bias); + layer = fc_layer; + } else if (layer_type.find("Dropout") != std::string::npos) { + auto dropout_layer = std::make_shared(0.0); + layer = dropout_layer; + if (comments) + std::cout + << "DropOutLayer added to layers with probability 0.4 (turned " + "off for inference)." + << std::endl; + } else if (layer_type == "GlobalAveragePool") { + auto pool_layer = std::make_shared( + it_lab_ai::Shape({0, 0}), "average", impl1); + layer = pool_layer; + if (comments) { + std::cout << "GlobalAveragePool layer added (will use input spatial " + "dimensions as kernel)" + << std::endl; + } + } else if ((layer_type == "MaxPool" || layer_type == "AveragePool")) { + std::string pooltype = + (layer_type.find("Max") != std::string::npos) ? "max" : "average"; + + it_lab_ai::Shape shape = {2, 2}; + it_lab_ai::Shape strides = {2, 2}; + it_lab_ai::Shape pads = {0, 0, 0, 0}; + it_lab_ai::Shape dilations = {1, 1}; + bool ceil_mode = false; + + if (layer_data.contains("attributes")) { + const auto& attributes = layer_data["attributes"]; + + if (attributes.contains("kernel_shape") && + attributes["kernel_shape"].is_array()) { + auto kernel_shape = attributes["kernel_shape"]; + if (kernel_shape.size() >= 2) { + shape = it_lab_ai::Shape({kernel_shape[0].get(), + kernel_shape[1].get()}); + } + } + + if (attributes.contains("strides") && + attributes["strides"].is_array()) { + auto strides_array = attributes["strides"]; + if (strides_array.size() >= 2) { + strides = it_lab_ai::Shape({strides_array[0].get(), + strides_array[1].get()}); + } + } + + if (attributes.contains("pads") && attributes["pads"].is_array()) { + auto pads_array = attributes["pads"]; + if (pads_array.size() >= 4) { + pads = it_lab_ai::Shape( + {pads_array[0].get(), pads_array[1].get(), + pads_array[2].get(), pads_array[3].get()}); + } + } + + if (attributes.contains("dilations") && + attributes["dilations"].is_array()) { + auto dilations_array = attributes["dilations"]; + if (dilations_array.size() >= 2) { + dilations = it_lab_ai::Shape({dilations_array[0].get(), + dilations_array[1].get()}); + } + } + + if (attributes.contains("ceil_mode")) { + ceil_mode = attributes["ceil_mode"].get() != 0; + } + } + + auto pool_layer = + std::make_shared(shape, pooltype, impl1); + + try { + if (strides[0] != 2 || strides[1] != 2) { + pool_layer->setStrides(strides[0], strides[1]); + } + + if (pads[0] != 0 || pads[1] != 0 || pads[2] != 0 || pads[3] != 0) { + pool_layer->setPads(pads[0], pads[1], pads[2], pads[3]); + } + + if (dilations[0] != 1 || dilations[1] != 1) { + pool_layer->setDilations(dilations[0], dilations[1]); + } + + pool_layer->setCeilMode(ceil_mode); + + } catch (const std::exception& e) { + if (comments) { + std::cout << "Warning: Some pooling parameters not supported: " + << e.what() << std::endl; + } + } + layer = pool_layer; + } else if (layer_type.find("Flatten") != std::string::npos) { + int axis = 1; + + if (layer_data.contains("attributes")) { + const auto& attributes = layer_data["attributes"]; + if (attributes.contains("axis")) { + axis = attributes["axis"].get(); + } + } + auto flatten_layer = std::make_shared(axis); + layer = flatten_layer; + } else if (layer_type == "Concat") { + int axis = 0; + if (layer_data["attributes"].contains("axis")) { + axis = layer_data["attributes"]["axis"]; + } + if (layer_data.contains("inputs")) { + for (const auto& input_name : layer_data["inputs"]) { + std::string input_tensor = input_name.get(); + std::string base_input_name = get_base_layer_name(input_tensor); + concat_connections[layer_name].push_back(base_input_name); + } + } + auto concat_layer = std::make_shared(axis); + layer = concat_layer; + concat_connected_inputs[layer_name] = std::unordered_set(); + } else if (layer_type == "Split") { + int axis = 0; + std::vector splits; + + if (layer_data["attributes"].contains("axis")) { + axis = layer_data["attributes"]["axis"]; + } + if (layer_data.contains("inputs") && layer_data["inputs"].is_array()) { + auto inputs = layer_data["inputs"]; + if (inputs.size() >= 2) { + std::string constant_name = inputs[1].get(); + constant_name = get_base_layer_name(constant_name); + + if (layer_parameters.count(constant_name)) { + splits = layer_parameters[constant_name]; + } else if (constant_name.find("onnx::") != std::string::npos) { + splits = last_constant_value; + layer_parameters[constant_name] = last_constant_value; + } + } + } + if (layer_data.contains("weights") && + layer_data["weights"].is_array()) { + for (const auto& s : layer_data["weights"]) { + splits.push_back(s.get()); + } + } + + auto split_layer = + std::make_shared(axis, splits); + layer = split_layer; + + split_layers[layer_name] = split_layer; + split_name_to_index[layer_name] = + static_cast(split_distribution.size()); + split_distribution.emplace_back(); + } else if (layer_type == "Add" || layer_type == "Mul" || + layer_type == "Sub" || layer_type == "Div") { + bool has_scalar_constant = false; + float scalar_value = 0.0F; + + if (layer_data.contains("inputs") && layer_data["inputs"].is_array()) { + auto inputs = layer_data["inputs"]; + for (const auto& input_name : inputs) { + std::string input_tensor = input_name.get(); + std::string base_name = get_base_layer_name(input_tensor); + + if (float_parameters.find(base_name) != float_parameters.end()) { + scalar_value = float_parameters[base_name]; + has_scalar_constant = true; + break; + } + if (layer_parameters.find(base_name) != layer_parameters.end() && + !layer_parameters[base_name].empty()) { + scalar_value = static_cast(layer_parameters[base_name][0]); + has_scalar_constant = true; + break; + } + } + } + + bool has_direct_value = layer_data.contains("value"); + float direct_value = 0.0F; + + if (has_direct_value) { + if (layer_data["value"].is_string()) { + try { + direct_value = std::stof(layer_data["value"].get()); + } catch (...) { + direct_value = 0.0F; + } + } else if (layer_data["value"].is_number()) { + direct_value = layer_data["value"].get(); + } + } + + if (has_direct_value || has_scalar_constant) { + float value = has_direct_value ? direct_value : scalar_value; + std::string ew_operation; + + if (layer_type == "Mul") { + ew_operation = "linear"; + auto ew_layer = + std::make_shared(ew_operation, value, 0.0F); + layer = ew_layer; + if (comments) { + std::cout << "Created binary " << layer_type << " operation with " + << value << "scalar" << std::endl; + } + } else if (layer_type == "Add") { + ew_operation = "linear"; + auto ew_layer = + std::make_shared(ew_operation, 1.0F, value); + layer = ew_layer; + } else if (layer_type == "Sub") { + ew_operation = "linear"; + auto ew_layer = std::make_shared(ew_operation, + 1.0F, -value); + layer = ew_layer; + } else { + continue; + } + } else { + it_lab_ai::BinaryOpLayer::Operation op; + if (layer_type == "Add") + op = it_lab_ai::BinaryOpLayer::Operation::kAdd; + else if (layer_type == "Sub") + op = it_lab_ai::BinaryOpLayer::Operation::kSub; + else if (layer_type == "Mul") + op = it_lab_ai::BinaryOpLayer::Operation::kMul; + else if (layer_type == "Div") + op = it_lab_ai::BinaryOpLayer::Operation::kDiv; + + auto bin_layer = std::make_shared(op); + layer = bin_layer; + } + } else if (layer_type == "Gemm") { + it_lab_ai::Tensor tensor = it_lab_ai::create_tensor_from_json( + layer_data, it_lab_ai::Type::kFloat); + + float alpha = 1.0F; + float beta = 1.0F; + bool trans_b = true; + + if (layer_data.contains("alpha")) { + alpha = layer_data["alpha"].get(); + } + if (layer_data.contains("beta")) { + beta = layer_data["beta"].get(); + } + if (layer_data.contains("transB")) { + trans_b = layer_data["transB"].get() != 0; + } + + it_lab_ai::Tensor tmp_tensor = tensor; + it_lab_ai::Tensor tmp_bias = it_lab_ai::make_tensor(tensor.get_bias()); + if (trans_b) { + it_lab_ai::Shape transposed_shape( + {tensor.get_shape()[1], tensor.get_shape()[0]}); + it_lab_ai::Tensor transposed_tensor(transposed_shape, + it_lab_ai::Type::kFloat); + + for (size_t i = 0; i < tensor.get_shape()[0]; ++i) { + for (size_t j = 0; j < tensor.get_shape()[1]; ++j) { + auto value = tensor.get({i, j}); + transposed_tensor.set({j, i}, value); + } + } + + tmp_tensor = transposed_tensor; + + if (comments) { + std::cout << "Weights transposed from [" << tensor.get_shape()[0] + << ", " << tensor.get_shape()[1] << "] to [" + << transposed_shape[0] << ", " << transposed_shape[1] + << "]" << std::endl; + } + } + + if (alpha != 1.0F) { + auto weights_data = *tmp_tensor.as(); + for (auto& val : weights_data) { + val *= alpha; + } + tmp_tensor = make_tensor(weights_data, tmp_tensor.get_shape()); + } + + if (beta != 1.0F) { + auto bias_data = *tmp_bias.as(); + for (auto& val : bias_data) { + val *= beta; + } + tmp_bias = make_tensor(bias_data, tmp_bias.get_shape()); + } + + auto fc_layer = + std::make_shared(tmp_tensor, tmp_bias); + layer = fc_layer; + } else if (layer_type == "Transpose" || + layer_type.find("transpose") != std::string::npos) { + std::vector perm; + if (layer_data.contains("attributes")) { + const auto& attributes = layer_data["attributes"]; + if (attributes.contains("perm") && attributes["perm"].is_array()) { + auto perm_array = attributes["perm"]; + for (const auto& p : perm_array) { + perm.push_back(p.get()); + } + } + } + + auto transpose_layer = + std::make_shared(perm); + layer = transpose_layer; + + if (comments) { + std::cout << "TransposeLayer added with perm: ["; + for (size_t i = 0; i < perm.size(); ++i) { + std::cout << perm[i]; + if (i < perm.size() - 1) std::cout << ", "; + } + std::cout << "]" << std::endl; + } + } else if (layer_type == "Reshape") { + bool allowzero = false; + std::vector shape; + + if (layer_data.contains("inputs") && layer_data["inputs"].is_array()) { + auto inputs = layer_data["inputs"]; + if (inputs.size() >= 2) { + std::string constant_name = inputs[1].get(); + constant_name = get_base_layer_name(constant_name); + + if (layer_parameters.count(constant_name)) { + shape = layer_parameters[constant_name]; + } + } + } + + if (layer_data.contains("attributes")) { + const auto& attributes = layer_data["attributes"]; + if (attributes.contains("allowzero")) { + allowzero = attributes["allowzero"].get() != 0; + } + } + + if (layer_data.contains("weights") && + layer_data["weights"].is_array()) { + auto weights = layer_data["weights"]; + for (const auto& weight : weights) { + if (weight.is_number()) { + shape.push_back(weight.get()); + } + } + } + + auto reshape_layer = + std::make_shared(allowzero, shape); + layer = reshape_layer; + + } else if (layer_type == "ReduceMean") { + std::vector axes; + int64_t keepdims = 1; + + if (layer_data.contains("attributes")) { + const auto& attributes = layer_data["attributes"]; + if (attributes.contains("axes") && attributes["axes"].is_array()) { + auto axes_array = attributes["axes"]; + for (const auto& axis : axes_array) { + axes.push_back(axis.get()); + } + } + if (attributes.contains("keepdims")) { + keepdims = attributes["keepdims"].get(); + } + } + auto reduce_layer = std::make_shared( + it_lab_ai::ReduceLayer::Operation::kMean, keepdims, axes); + layer = reduce_layer; + } else if (layer_type == "ReduceSum") { + int64_t keepdims = 0; + if (layer_data.contains("attributes")) { + const auto& attributes = layer_data["attributes"]; + if (attributes.contains("keepdims")) { + keepdims = attributes["keepdims"].get(); + } + } + + std::vector axes; + if (layer_data.contains("inputs") && layer_data["inputs"].is_array()) { + auto inputs = layer_data["inputs"]; + if (inputs.size() >= 2) { + std::string constant_name = inputs[1].get(); + constant_name = get_base_layer_name(constant_name); + + if (layer_parameters.count(constant_name)) { + axes = layer_parameters[constant_name]; + } else if (constant_name.find("onnx::") != std::string::npos) { + axes = last_constant_value; + layer_parameters[constant_name] = last_constant_value; + } + } + } + auto reduce_layer = std::make_shared( + it_lab_ai::ReduceLayer::Operation::kSum, keepdims, axes); + layer = reduce_layer; + } else if (layer_type == "Constant") { + if (layer_data.contains("attributes")) { + const auto& attributes = layer_data["attributes"]; + if (attributes.contains("value") && attributes["value"].is_array()) { + auto values = attributes["value"]; + std::vector data; + for (const auto& val : values) { + data.push_back(val.get()); + } + layer_parameters[layer_name] = data; + last_constant_name = layer_name; + last_constant_value = data; + } + if (attributes.contains("value") && attributes["value"].is_number()) { + float value = attributes["value"].get(); + float_parameters[layer_name] = value; + } + } + + continue; + } else if (layer_type == "MatMul") { + auto matmul_layer = std::make_shared(); + layer = matmul_layer; + + } else if (layer_type == "Softmax") { + int axis = -1; + + if (layer_data.contains("attributes")) { + const auto& attributes = layer_data["attributes"]; + if (attributes.contains("axis")) { + axis = attributes["axis"].get(); + } + } + auto softmax_layer = std::make_shared(axis); + layer = softmax_layer; + + } else if (layer_type == "BatchNormalization") { + float epsilon = 1e-5F; + float momentum = 0.9F; + bool training_mode = false; + + if (layer_data.contains("attributes")) { + const auto& attributes = layer_data["attributes"]; + if (attributes.contains("epsilon")) { + epsilon = attributes["epsilon"].get(); + } + if (attributes.contains("momentum")) { + momentum = attributes["momentum"].get(); + } + if (attributes.contains("training_mode")) { + training_mode = attributes["training_mode"].get() != 0; + } + } + + std::vector scale_data; + std::vector bias_data; + std::vector mean_data; + std::vector var_data; + + if (layer_data.contains("scale") && layer_data["scale"].is_array()) { + const auto& scale_array = layer_data["scale"]; + for (const auto& value : scale_array) { + scale_data.push_back(value.get()); + } + } + + if (layer_data.contains("bias") && layer_data["bias"].is_array()) { + const auto& bias_array = layer_data["bias"]; + for (const auto& value : bias_array) { + bias_data.push_back(value.get()); + } + } + + if (layer_data.contains("mean") && layer_data["mean"].is_array()) { + const auto& mean_array = layer_data["mean"]; + for (const auto& value : mean_array) { + mean_data.push_back(value.get()); + } + } + + if (layer_data.contains("var") && layer_data["var"].is_array()) { + const auto& var_array = layer_data["var"]; + for (const auto& value : var_array) { + var_data.push_back(value.get()); + } + } + + size_t num_channels = scale_data.size(); + + it_lab_ai::Tensor scale = it_lab_ai::make_tensor( + scale_data, it_lab_ai::Shape({num_channels})); + it_lab_ai::Tensor bias = + it_lab_ai::make_tensor(bias_data, it_lab_ai::Shape({num_channels})); + it_lab_ai::Tensor mean = + it_lab_ai::make_tensor(mean_data, it_lab_ai::Shape({num_channels})); + it_lab_ai::Tensor var = + it_lab_ai::make_tensor(var_data, it_lab_ai::Shape({num_channels})); + + auto bn_layer = std::make_shared( + scale, bias, mean, var, epsilon, momentum, training_mode); + layer = bn_layer; + } else { + continue; + } + if (layer) { + int original_id = current_id; + layer->setID(current_id++); + layers.push_back(layer); + name_to_layer[layer_name] = layer; + original_ids[layer_name] = original_id; + if (layer_data.contains("inputs")) { + for (const auto& input_name : layer_data["inputs"]) { + std::string input_tensor = input_name.get(); + + std::regex split_output_pattern("(.+)_output_(\\d+)$"); + std::smatch matches; + + if (std::regex_search(input_tensor, matches, + split_output_pattern)) { + std::string split_layer_name = matches[1].str(); + int output_index = std::stoi(matches[2].str()); + + if (split_layers.find(split_layer_name) != split_layers.end()) { + int target_layer_id = layer->getID(); + + int split_index = split_name_to_index[split_layer_name]; + + bool connection_exists = false; + for (const auto& existing_conn : + split_distribution[split_index]) { + if (existing_conn.first == target_layer_id && + existing_conn.second == output_index) { + connection_exists = true; + break; + } + } + + if (!connection_exists) { + split_distribution[split_index].emplace_back(target_layer_id, + output_index); + } + bool connection_in_list = false; + for (const auto& existing_target : + connections[split_layer_name]) { + if (existing_target == layer_name) { + connection_in_list = true; + break; + } + } + + if (!connection_in_list) { + connections[split_layer_name].push_back(layer_name); + } + continue; + } + } + + if (input_tensor.find("Constant") != std::string::npos || + input_tensor.find("onnx::") != std::string::npos || + input_tensor.find("_Constant") != std::string::npos) { + continue; + } + connections[input_tensor].push_back(layer_name); + } + } + } + } catch (const std::exception& e) { + std::cerr << "Error processing layer " << layer_data["index"] << " (" + << layer_data["name"] << "): " << e.what() << std::endl; + throw; + } + } + + it_lab_ai::Graph graph(static_cast(layers.size())); + + graph.setInput(*input_layer, input); + + for (const auto& [source_tensor, target_layers] : connections) { + std::string source_layer_name = get_base_layer_name(source_tensor); + + for (const auto& target_layer_name : target_layers) { + connection_list.emplace_back(source_layer_name, target_layer_name); + } + } + + try { + std::sort( + connection_list.begin(), connection_list.end(), + [&](const auto& a, const auto& b) { + if (!name_to_layer.count(a.first) || !name_to_layer.count(b.first)) { + return false; + } + return name_to_layer[a.first]->getID() < + name_to_layer[b.first]->getID(); + }); + } catch (const std::exception& e) { + std::cerr << "ERROR during sorting: " << e.what() << std::endl; + } + + std::vector order = {}; + + for (const auto& [source_name, target_name] : connection_list) { + if (name_to_layer.count(source_name) && name_to_layer.count(target_name)) { + if (target_name.find("Concat") != std::string::npos || + name_to_layer[target_name]->getName() == it_lab_ai::kConcat) { + if (concat_connections.find(target_name) != concat_connections.end()) { + const auto& expected_inputs = concat_connections[target_name]; + auto it = std::find(expected_inputs.begin(), expected_inputs.end(), + source_name); + + if (it != expected_inputs.end()) { + int input_index = + static_cast(std::distance(expected_inputs.begin(), it)); + concat_orders[target_name].push_back(input_index); + concat_connected_inputs[target_name].insert(source_name); + + if (concat_connected_inputs[target_name].size() == + concat_connections[target_name].size()) { + auto concat_layer = + std::dynamic_pointer_cast( + name_to_layer[target_name]); + if (concat_layer) { + concat_layer->setInputOrder(concat_orders[target_name]); + } + } + } + } + } + + try { + graph.makeConnection(*name_to_layer[source_name], + *name_to_layer[target_name]); + + } catch (const std::exception& e) { + std::cerr << "Failed: " << source_name << " -> " << target_name << " : " + << e.what() << std::endl; + } + } + } + for (auto& split_dist : split_distribution) { + for (auto& connection : split_dist) { + for (const auto& [name, layer] : name_to_layer) { + if (original_ids[name] == connection.first) { + connection.first = layer->getID(); + break; + } + } + } + } + graph.setSplitDistribution(split_distribution); + auto output_layer = layers.back(); + graph.setOutput(*output_layer, output); + auto in_out_degrees = graph.getInOutDegrees(); + auto traversal_order = graph.getTraversalOrder(); + + if (comments) std::cout << "Starting inference..." << std::endl; + try { + graph.inference(); + if (comments) std::cout << "Inference completed successfully." << std::endl; + } catch (const std::exception& e) { + std::cerr << "ERROR during inference: " << e.what() << std::endl; + } + +#ifdef ENABLE_STATISTIC_TIME + std::vector times = graph.getTimeInfo(); + std::cout << "!INFERENCE TIME INFO START!" << std::endl; + for (size_t i = 0; i < times.size(); i++) { + std::cout << times[i] << std::endl; + } + std::vector elps_time = graph.getTime(); + int sum = std::accumulate(elps_time.begin(), elps_time.end(), 0); + std::cout << "Elapsed inference time:" << sum << std::endl; + std::cout << "!INFERENCE TIME INFO END!" << std::endl; +#endif +} + +std::unordered_map load_class_names( + const std::string& filename) { + std::unordered_map class_names; + std::ifstream file(filename); + if (!file.is_open()) { + throw std::runtime_error("Cannot open class names file: " + filename); + } + json json_data = json::parse(file); + + for (const auto& [key, value] : json_data.items()) { + int class_id = std::stoi(key); + std::string class_name = value.get(); + class_names[class_id] = class_name; + } + return class_names; +} + +std::vector get_input_shape_from_json(const std::string& json_path) { + it_lab_ai::json model_data = it_lab_ai::read_json(json_path); + + for (const auto& layer_data : model_data) { + if (layer_data["type"] == "InputLayer" && + layer_data.contains("attributes")) { + auto attributes = layer_data["attributes"]; + if (attributes.contains("shape")) { + auto shape = attributes["shape"].get>(); + + if (shape.size() == 2) { + if (shape[1] == 784) { + return {shape[0], 1, 28, 28}; + } + } else if (shape.size() == 4) { + return shape; + } + } + } + } + return {28}; +} + +std::vector process_model_output(const std::vector& output, + const std::string& model_name) { + bool is_yolo = (model_name.find("yolo") != std::string::npos); + + if (!is_yolo) { + return softmax(output); + } + float sum_val = std::accumulate(output.begin(), output.end(), 0.0F); + if (std::abs(sum_val - 1.0F) < 0.01F) { + return output; + } + return softmax(output); +} + +it_lab_ai::Tensor prepare_image(const cv::Mat& image, + const std::vector& input_shape, + const std::string& model_name) { + if (input_shape.size() != 4) { + throw std::runtime_error("Input shape must have 4 dimensions"); + } + + int batch_size = input_shape[0]; + int channels = input_shape[1]; + int height = input_shape[2]; + int width = input_shape[3]; + + cv::Mat processed_image; + cv::Size target_size(width, height); + + bool is_yolo_model = (model_name.find("yolo") != std::string::npos || + model_name.find("google") != std::string::npos); + + if (image.rows == height && image.cols == width) { + processed_image = image.clone(); + } else { + if (is_yolo_model) { + double scale = std::min(static_cast(width) / image.cols, + static_cast(height) / image.rows); + int new_width = static_cast(image.cols * scale); + int new_height = static_cast(image.rows * scale); + + cv::Mat resized_image; + cv::resize(image, resized_image, cv::Size(new_width, new_height), 0, 0, + cv::INTER_LINEAR); + + processed_image = cv::Mat::zeros(height, width, image.type()); + int x_offset = (width - new_width) / 2; + int y_offset = (height - new_height) / 2; + resized_image.copyTo( + processed_image(cv::Rect(x_offset, y_offset, new_width, new_height))); + + } else { + int interpolation = cv::INTER_LINEAR; + if (image.rows < height || image.cols < width) { + interpolation = cv::INTER_CUBIC; + } else if (image.rows > height * 2 || image.cols > width * 2) { + interpolation = cv::INTER_AREA; + } + cv::resize(image, processed_image, target_size, 0, 0, interpolation); + } + } + + cv::Mat float_image; + processed_image.convertTo(float_image, CV_32FC3); + + if (is_yolo_model) { + float_image /= 255.0; + } else { + float_image /= 255.0; + if (channels == 3) { + std::vector image_channels; + cv::split(float_image, image_channels); + + image_channels[0] = (image_channels[0] - 0.485) / 0.229; + image_channels[1] = (image_channels[1] - 0.456) / 0.224; + image_channels[2] = (image_channels[2] - 0.406) / 0.225; + + cv::merge(image_channels, float_image); + } else if (channels == 1) { + cv::cvtColor(float_image, float_image, cv::COLOR_BGR2GRAY); + } + } + + std::vector data; + data.reserve(batch_size * channels * height * width); + std::vector processed_channels; + cv::split(float_image, processed_channels); + if (!is_yolo_model && channels == 3) { + std::swap(processed_channels[0], processed_channels[2]); + } + + for (int c = 0; c < channels; ++c) { + for (int h = 0; h < height; ++h) { + for (int w = 0; w < width; ++w) { + data.push_back(processed_channels[c].at(h, w)); + } + } + } + + it_lab_ai::Shape shape( + {static_cast(batch_size), static_cast(channels), + static_cast(height), static_cast(width)}); + + return it_lab_ai::make_tensor(data, shape); +} + +it_lab_ai::Tensor prepare_mnist_image(const cv::Mat& image) { + cv::Mat gray_image; + cv::cvtColor(image, gray_image, cv::COLOR_BGR2GRAY); + std::vector channels; + cv::split(image, channels); + + std::vector res(28 * 28); + for (int i = 0; i < 28; ++i) { + for (int j = 0; j < 28; ++j) { + res[i * 28 + j] = channels[0].at(j, i); + } + } + + Shape sh({1, 1, 28, 28}); + return it_lab_ai::make_tensor(res, sh); } \ No newline at end of file diff --git a/app/Graph/build.hpp b/app/Graph/build.hpp index 788637abf..3b964bee6 100644 --- a/app/Graph/build.hpp +++ b/app/Graph/build.hpp @@ -1,4 +1,7 @@ +#pragma once #include +#include +#include #include #include #include @@ -7,14 +10,42 @@ #include "Weights_Reader/reader_weights.hpp" #include "graph/graph.hpp" +#include "layers/BatchNormalizationLayer.hpp" +#include "layers/BinaryOpLayer.hpp" +#include "layers/ConcatLayer.hpp" #include "layers/ConvLayer.hpp" #include "layers/DropOutLayer.hpp" #include "layers/EWLayer.hpp" #include "layers/FCLayer.hpp" #include "layers/FlattenLayer.hpp" #include "layers/InputLayer.hpp" +#include "layers/MatmulLayer.hpp" #include "layers/OutputLayer.hpp" #include "layers/PoolingLayer.hpp" +#include "layers/ReduceLayer.hpp" +#include "layers/ReshapeLayer.hpp" +#include "layers/SoftmaxLayer.hpp" +#include "layers/SplitLayer.hpp" +#include "layers/Tensor.hpp" +#include "layers/TransposeLayer.hpp" void build_graph(it_lab_ai::Tensor& input, it_lab_ai::Tensor& output, - bool comments, bool parallel); + const std::string& json_path, bool comments, + bool parallel = false); +void build_graph_linear(it_lab_ai::Tensor& input, it_lab_ai::Tensor& output, + bool comments, bool parallel = false); +std::unordered_map load_class_names( + const std::string& filename); +std::unordered_map model_paths = { + {"alexnet_mnist", MODEL_PATH_H5}, + {"googlenet", MODEL_PATH_GOOGLENET_ONNX}, + {"resnet", MODEL_PATH_RESNET_ONNX}, + {"densenet", MODEL_PATH_DENSENET_ONNX}, + {"yolo", MODEL_PATH_YOLO11NET_ONNX}}; +std::vector get_input_shape_from_json(const std::string& json_path); +std::vector process_model_output(const std::vector& output, + const std::string& model_name); +it_lab_ai::Tensor prepare_image(const cv::Mat& image, + const std::vector& input_shape, + const std::string& model_name = ""); +it_lab_ai::Tensor prepare_mnist_image(const cv::Mat& image); \ No newline at end of file diff --git a/app/Graph/graph_build.cpp b/app/Graph/graph_build.cpp index 309e944ce..3a7330c60 100644 --- a/app/Graph/graph_build.cpp +++ b/app/Graph/graph_build.cpp @@ -1,3 +1,7 @@ +#include +#include +#include + #include "build.cpp" #include "build.hpp" @@ -5,22 +9,43 @@ namespace fs = std::filesystem; using namespace it_lab_ai; int main(int argc, char* argv[]) { - std::string image_folder = IMAGE1_PATH; - std::vector image_paths; + std::string model_name = "alexnet_mnist"; bool parallel = false; - if (argc > 1 && std::string(argv[1]) == "--parallel") { - std::cout << "Parallel mode" << std::endl; - parallel = true; + + for (int i = 1; i < argc; ++i) { + if (std::string(argv[i]) == "--parallel") { + parallel = true; + } else if (std::string(argv[i]) == "--model" && i + 1 < argc) { + model_name = argv[++i]; + } + } + + std::string json_path = model_paths[model_name]; + + std::vector input_shape; + input_shape = get_input_shape_from_json(json_path); + + std::string image_folder; + if (model_name == "alexnet_mnist") { + image_folder = IMAGE28_PATH; + } else { + image_folder = IMAGENET_PATH; } + std::vector image_paths; for (const auto& entry : fs::directory_iterator(image_folder)) { - if (entry.path().extension() == ".png") { + if (entry.path().extension() == ".png" || + entry.path().extension() == ".jpg" || + entry.path().extension() == ".jpeg") { image_paths.push_back(entry.path().string()); } } - if (image_paths.empty()) { - throw std::runtime_error("No PNG images found in the folder"); + std::unordered_map class_names; + try { + class_names = load_class_names(IMAGENET_LABELS); + } catch (const std::exception& e) { + std::cerr << "Warning: " << e.what() << std::endl; } for (const auto& image_path : image_paths) { @@ -30,32 +55,81 @@ int main(int argc, char* argv[]) { continue; } - cv::cvtColor(image, image, cv::COLOR_BGR2GRAY); - std::vector channels; - cv::split(image, channels); + try { + if (model_name == "alexnet_mnist") { + it_lab_ai::Tensor input = prepare_mnist_image(image); + it_lab_ai::Shape sh1({1, 5, 5, 3}); + std::vector vec(75, 3); + it_lab_ai::Tensor output = it_lab_ai::make_tensor(vec, sh1); - std::vector res(28 * 28); - for (int i = 0; i < 28; ++i) { - for (int j = 0; j < 28; ++j) { - res[i * 28 + j] = channels[0].at(j, i); - } - } + build_graph_linear(input, output, true, parallel); + std::vector tmp_output = softmax(*output.as()); + int top_n = std::min(3, static_cast(tmp_output.size())); + std::vector indices(tmp_output.size()); + std::iota(indices.begin(), indices.end(), 0); + std::partial_sort( + indices.begin(), indices.begin() + top_n, indices.end(), + [&](int a, int b) { return tmp_output[a] > tmp_output[b]; }); + + std::cout << "Top " << top_n << " predictions for MNIST:" << std::endl; + for (int i = 0; i < top_n; i++) { + int idx = indices[i]; + std::cout << " " << (i + 1) << ". Class " << idx << ": " + << std::fixed << std::setprecision(6) + << tmp_output[idx] * 100 << "%" << std::endl; + } + + int max_class = indices[0]; + float max_prob = tmp_output[max_class]; + std::cout << "Image: " << fs::path(image_path).filename().string() + << " -> Predicted digit: " << max_class + << " (probability: " << std::fixed << std::setprecision(6) + << max_prob * 100 << "%)" << std::endl; + + } else { + it_lab_ai::Tensor input = prepare_image(image, input_shape, model_name); + + size_t output_classes = 1000; + it_lab_ai::Tensor output({1, output_classes}, it_lab_ai::Type::kFloat); + + build_graph(input, output, json_path, false, parallel); + std::vector tmp_output = + process_model_output(*output.as(), model_name); - Shape sh({1, 1, 28, 28}); - Tensor input = make_tensor(res, sh); + int top_n = std::min(5, static_cast(tmp_output.size())); + std::vector indices(tmp_output.size()); + std::iota(indices.begin(), indices.end(), 0); + std::partial_sort( + indices.begin(), indices.begin() + top_n, indices.end(), + [&](int a, int b) { return tmp_output[a] > tmp_output[b]; }); - Shape sh1({1, 5, 5, 3}); - std::vector vec(75, 3); - Tensor output = make_tensor(vec, sh1); + std::cout << "Top " << top_n << " predictions:" << std::endl; + for (int i = 0; i < top_n; i++) { + int idx = indices[i]; + std::cout << " " << (i + 1) << ". Class " << idx << ": " + << std::fixed << std::setprecision(6) << tmp_output[idx]; - build_graph(input, output, true, parallel); + if (class_names.find(idx) != class_names.end()) { + std::cout << " (" << class_names[idx] << ")"; + } + std::cout << std::endl; + } - std::vector tmp_output = softmax(*output.as()); - for (size_t i = 0; i < tmp_output.size(); i++) { - if (tmp_output[i] >= 1e-6) { - std::cout << "Image: " << image_path << " -> Class: " << i << std::endl; + int max_class = indices[0]; + float max_prob = tmp_output[max_class]; + std::cout << "Image: " << fs::path(image_path).filename().string() + << " -> Predicted class: " << max_class; + if (class_names.find(max_class) != class_names.end()) { + std::cout << " (" << class_names[max_class] << ")"; + } + std::cout << " (probability: " << std::fixed << std::setprecision(6) + << max_prob << ")" << std::endl; } + std::cout << "----------------------------------------" << std::endl; + } catch (const std::exception& e) { + std::cerr << "Error processing image " << image_path << ": " << e.what() + << std::endl; } } return 0; -} +} \ No newline at end of file diff --git a/docs/imagenet1000_clsidx_to_labels.json b/docs/imagenet1000_clsidx_to_labels.json new file mode 100644 index 000000000..4cd13ab04 --- /dev/null +++ b/docs/imagenet1000_clsidx_to_labels.json @@ -0,0 +1,1000 @@ +{"0": "tench, Tinca tinca", + "1": "goldfish, Carassius auratus", + "2": "great white shark, white shark, man-eater, man-eating shark, Carcharodon carcharias", + "3": "tiger shark, Galeocerdo cuvieri", + "4": "hammerhead, hammerhead shark", + "5": "electric ray, crampfish, numbfish, torpedo", + "6": "stingray", + "7": "cock", + "8": "hen", + "9": "ostrich, Struthio camelus", + "10": "brambling, Fringilla montifringilla", + "11": "goldfinch, Carduelis carduelis", + "12": "house finch, linnet, Carpodacus mexicanus", + "13": "junco, snowbird", + "14": "indigo bunting, indigo finch, indigo bird, Passerina cyanea", + "15": "robin, American robin, Turdus migratorius", + "16": "bulbul", + "17": "jay", + "18": "magpie", + "19": "chickadee", + "20": "water ouzel, dipper", + "21": "kite", + "22": "bald eagle, American eagle, Haliaeetus leucocephalus", + "23": "vulture", + "24": "great grey owl, great gray owl, Strix nebulosa", + "25": "European fire salamander, Salamandra salamandra", + "26": "common newt, Triturus vulgaris", + "27": "eft", + "28": "spotted salamander, Ambystoma maculatum", + "29": "axolotl, mud puppy, Ambystoma mexicanum", + "30": "bullfrog, Rana catesbeiana", + "31": "tree frog, tree-frog", + "32": "tailed frog, bell toad, ribbed toad, tailed toad, Ascaphus trui", + "33": "loggerhead, loggerhead turtle, Caretta caretta", + "34": "leatherback turtle, leatherback, leathery turtle, Dermochelys coriacea", + "35": "mud turtle", + "36": "terrapin", + "37": "box turtle, box tortoise", + "38": "banded gecko", + "39": "common iguana, iguana, Iguana iguana", + "40": "American chameleon, anole, Anolis carolinensis", + "41": "whiptail, whiptail lizard", + "42": "agama", + "43": "frilled lizard, Chlamydosaurus kingi", + "44": "alligator lizard", + "45": "Gila monster, Heloderma suspectum", + "46": "green lizard, Lacerta viridis", + "47": "African chameleon, Chamaeleo chamaeleon", + "48": "Komodo dragon, Komodo lizard, dragon lizard, giant lizard, Varanus komodoensis", + "49": "African crocodile, Nile crocodile, Crocodylus niloticus", + "50": "American alligator, Alligator mississipiensis", + "51": "triceratops", + "52": "thunder snake, worm snake, Carphophis amoenus", + "53": "ringneck snake, ring-necked snake, ring snake", + "54": "hognose snake, puff adder, sand viper", + "55": "green snake, grass snake", + "56": "king snake, kingsnake", + "57": "garter snake, grass snake", + "58": "water snake", + "59": "vine snake", + "60": "night snake, Hypsiglena torquata", + "61": "boa constrictor, Constrictor constrictor", + "62": "rock python, rock snake, Python sebae", + "63": "Indian cobra, Naja naja", + "64": "green mamba", + "65": "sea snake", + "66": "horned viper, cerastes, sand viper, horned asp, Cerastes cornutus", + "67": "diamondback, diamondback rattlesnake, Crotalus adamanteus", + "68": "sidewinder, horned rattlesnake, Crotalus cerastes", + "69": "trilobite", + "70": "harvestman, daddy longlegs, Phalangium opilio", + "71": "scorpion", + "72": "black and gold garden spider, Argiope aurantia", + "73": "barn spider, Araneus cavaticus", + "74": "garden spider, Aranea diademata", + "75": "black widow, Latrodectus mactans", + "76": "tarantula", + "77": "wolf spider, hunting spider", + "78": "tick", + "79": "centipede", + "80": "black grouse", + "81": "ptarmigan", + "82": "ruffed grouse, partridge, Bonasa umbellus", + "83": "prairie chicken, prairie grouse, prairie fowl", + "84": "peacock", + "85": "quail", + "86": "partridge", + "87": "African grey, African gray, Psittacus erithacus", + "88": "macaw", + "89": "sulphur-crested cockatoo, Kakatoe galerita, Cacatua galerita", + "90": "lorikeet", + "91": "coucal", + "92": "bee eater", + "93": "hornbill", + "94": "hummingbird", + "95": "jacamar", + "96": "toucan", + "97": "drake", + "98": "red-breasted merganser, Mergus serrator", + "99": "goose", + "100": "black swan, Cygnus atratus", + "101": "tusker", + "102": "echidna, spiny anteater, anteater", + "103": "platypus, duckbill, duckbilled platypus, duck-billed platypus, Ornithorhynchus anatinus", + "104": "wallaby, brush kangaroo", + "105": "koala, koala bear, kangaroo bear, native bear, Phascolarctos cinereus", + "106": "wombat", + "107": "jellyfish", + "108": "sea anemone, anemone", + "109": "brain coral", + "110": "flatworm, platyhelminth", + "111": "nematode, nematode worm, roundworm", + "112": "conch", + "113": "snail", + "114": "slug", + "115": "sea slug, nudibranch", + "116": "chiton, coat-of-mail shell, sea cradle, polyplacophore", + "117": "chambered nautilus, pearly nautilus, nautilus", + "118": "Dungeness crab, Cancer magister", + "119": "rock crab, Cancer irroratus", + "120": "fiddler crab", + "121": "king crab, Alaska crab, Alaskan king crab, Alaska king crab, Paralithodes camtschatica", + "122": "American lobster, Northern lobster, Maine lobster, Homarus americanus", + "123": "spiny lobster, langouste, rock lobster, crawfish, crayfish, sea crawfish", + "124": "crayfish, crawfish, crawdad, crawdaddy", + "125": "hermit crab", + "126": "isopod", + "127": "white stork, Ciconia ciconia", + "128": "black stork, Ciconia nigra", + "129": "spoonbill", + "130": "flamingo", + "131": "little blue heron, Egretta caerulea", + "132": "American egret, great white heron, Egretta albus", + "133": "bittern", + "134": "crane", + "135": "limpkin, Aramus pictus", + "136": "European gallinule, Porphyrio porphyrio", + "137": "American coot, marsh hen, mud hen, water hen, Fulica americana", + "138": "bustard", + "139": "ruddy turnstone, Arenaria interpres", + "140": "red-backed sandpiper, dunlin, Erolia alpina", + "141": "redshank, Tringa totanus", + "142": "dowitcher", + "143": "oystercatcher, oyster catcher", + "144": "pelican", + "145": "king penguin, Aptenodytes patagonica", + "146": "albatross, mollymawk", + "147": "grey whale, gray whale, devilfish, Eschrichtius gibbosus, Eschrichtius robustus", + "148": "killer whale, killer, orca, grampus, sea wolf, Orcinus orca", + "149": "dugong, Dugong dugon", + "150": "sea lion", + "151": "Chihuahua", + "152": "Japanese spaniel", + "153": "Maltese dog, Maltese terrier, Maltese", + "154": "Pekinese, Pekingese, Peke", + "155": "Shih-Tzu", + "156": "Blenheim spaniel", + "157": "papillon", + "158": "toy terrier", + "159": "Rhodesian ridgeback", + "160": "Afghan hound, Afghan", + "161": "basset, basset hound", + "162": "beagle", + "163": "bloodhound, sleuthhound", + "164": "bluetick", + "165": "black-and-tan coonhound", + "166": "Walker hound, Walker foxhound", + "167": "English foxhound", + "168": "redbone", + "169": "borzoi, Russian wolfhound", + "170": "Irish wolfhound", + "171": "Italian greyhound", + "172": "whippet", + "173": "Ibizan hound, Ibizan Podenco", + "174": "Norwegian elkhound, elkhound", + "175": "otterhound, otter hound", + "176": "Saluki, gazelle hound", + "177": "Scottish deerhound, deerhound", + "178": "Weimaraner", + "179": "Staffordshire bullterrier, Staffordshire bull terrier", + "180": "American Staffordshire terrier, Staffordshire terrier, American pit bull terrier, pit bull terrier", + "181": "Bedlington terrier", + "182": "Border terrier", + "183": "Kerry blue terrier", + "184": "Irish terrier", + "185": "Norfolk terrier", + "186": "Norwich terrier", + "187": "Yorkshire terrier", + "188": "wire-haired fox terrier", + "189": "Lakeland terrier", + "190": "Sealyham terrier, Sealyham", + "191": "Airedale, Airedale terrier", + "192": "cairn, cairn terrier", + "193": "Australian terrier", + "194": "Dandie Dinmont, Dandie Dinmont terrier", + "195": "Boston bull, Boston terrier", + "196": "miniature schnauzer", + "197": "giant schnauzer", + "198": "standard schnauzer", + "199": "Scotch terrier, Scottish terrier, Scottie", + "200": "Tibetan terrier, chrysanthemum dog", + "201": "silky terrier, Sydney silky", + "202": "soft-coated wheaten terrier", + "203": "West Highland white terrier", + "204": "Lhasa, Lhasa apso", + "205": "flat-coated retriever", + "206": "curly-coated retriever", + "207": "golden retriever", + "208": "Labrador retriever", + "209": "Chesapeake Bay retriever", + "210": "German short-haired pointer", + "211": "vizsla, Hungarian pointer", + "212": "English setter", + "213": "Irish setter, red setter", + "214": "Gordon setter", + "215": "Brittany spaniel", + "216": "clumber, clumber spaniel", + "217": "English springer, English springer spaniel", + "218": "Welsh springer spaniel", + "219": "cocker spaniel, English cocker spaniel, cocker", + "220": "Sussex spaniel", + "221": "Irish water spaniel", + "222": "kuvasz", + "223": "schipperke", + "224": "groenendael", + "225": "malinois", + "226": "briard", + "227": "kelpie", + "228": "komondor", + "229": "Old English sheepdog, bobtail", + "230": "Shetland sheepdog, Shetland sheep dog, Shetland", + "231": "collie", + "232": "Border collie", + "233": "Bouvier des Flandres, Bouviers des Flandres", + "234": "Rottweiler", + "235": "German shepherd, German shepherd dog, German police dog, alsatian", + "236": "Doberman, Doberman pinscher", + "237": "miniature pinscher", + "238": "Greater Swiss Mountain dog", + "239": "Bernese mountain dog", + "240": "Appenzeller", + "241": "EntleBucher", + "242": "boxer", + "243": "bull mastiff", + "244": "Tibetan mastiff", + "245": "French bulldog", + "246": "Great Dane", + "247": "Saint Bernard, St Bernard", + "248": "Eskimo dog, husky", + "249": "malamute, malemute, Alaskan malamute", + "250": "Siberian husky", + "251": "dalmatian, coach dog, carriage dog", + "252": "affenpinscher, monkey pinscher, monkey dog", + "253": "basenji", + "254": "pug, pug-dog", + "255": "Leonberg", + "256": "Newfoundland, Newfoundland dog", + "257": "Great Pyrenees", + "258": "Samoyed, Samoyede", + "259": "Pomeranian", + "260": "chow, chow chow", + "261": "keeshond", + "262": "Brabancon griffon", + "263": "Pembroke, Pembroke Welsh corgi", + "264": "Cardigan, Cardigan Welsh corgi", + "265": "toy poodle", + "266": "miniature poodle", + "267": "standard poodle", + "268": "Mexican hairless", + "269": "timber wolf, grey wolf, gray wolf, Canis lupus", + "270": "white wolf, Arctic wolf, Canis lupus tundrarum", + "271": "red wolf, maned wolf, Canis rufus, Canis niger", + "272": "coyote, prairie wolf, brush wolf, Canis latrans", + "273": "dingo, warrigal, warragal, Canis dingo", + "274": "dhole, Cuon alpinus", + "275": "African hunting dog, hyena dog, Cape hunting dog, Lycaon pictus", + "276": "hyena, hyaena", + "277": "red fox, Vulpes vulpes", + "278": "kit fox, Vulpes macrotis", + "279": "Arctic fox, white fox, Alopex lagopus", + "280": "grey fox, gray fox, Urocyon cinereoargenteus", + "281": "tabby, tabby cat", + "282": "tiger cat", + "283": "Persian cat", + "284": "Siamese cat, Siamese", + "285": "Egyptian cat", + "286": "cougar, puma, catamount, mountain lion, painter, panther, Felis concolor", + "287": "lynx, catamount", + "288": "leopard, Panthera pardus", + "289": "snow leopard, ounce, Panthera uncia", + "290": "jaguar, panther, Panthera onca, Felis onca", + "291": "lion, king of beasts, Panthera leo", + "292": "tiger, Panthera tigris", + "293": "cheetah, chetah, Acinonyx jubatus", + "294": "brown bear, bruin, Ursus arctos", + "295": "American black bear, black bear, Ursus americanus, Euarctos americanus", + "296": "ice bear, polar bear, Ursus Maritimus, Thalarctos maritimus", + "297": "sloth bear, Melursus ursinus, Ursus ursinus", + "298": "mongoose", + "299": "meerkat, mierkat", + "300": "tiger beetle", + "301": "ladybug, ladybeetle, lady beetle, ladybird, ladybird beetle", + "302": "ground beetle, carabid beetle", + "303": "long-horned beetle, longicorn, longicorn beetle", + "304": "leaf beetle, chrysomelid", + "305": "dung beetle", + "306": "rhinoceros beetle", + "307": "weevil", + "308": "fly", + "309": "bee", + "310": "ant, emmet, pismire", + "311": "grasshopper, hopper", + "312": "cricket", + "313": "walking stick, walkingstick, stick insect", + "314": "cockroach, roach", + "315": "mantis, mantid", + "316": "cicada, cicala", + "317": "leafhopper", + "318": "lacewing, lacewing fly", + "319": "dragonfly, darning needle, devil\"s darning needle, sewing needle, snake feeder, snake doctor, mosquito hawk, skeeter hawk", + "320": "damselfly", + "321": "admiral", + "322": "ringlet, ringlet butterfly", + "323": "monarch, monarch butterfly, milkweed butterfly, Danaus plexippus", + "324": "cabbage butterfly", + "325": "sulphur butterfly, sulfur butterfly", + "326": "lycaenid, lycaenid butterfly", + "327": "starfish, sea star", + "328": "sea urchin", + "329": "sea cucumber, holothurian", + "330": "wood rabbit, cottontail, cottontail rabbit", + "331": "hare", + "332": "Angora, Angora rabbit", + "333": "hamster", + "334": "porcupine, hedgehog", + "335": "fox squirrel, eastern fox squirrel, Sciurus niger", + "336": "marmot", + "337": "beaver", + "338": "guinea pig, Cavia cobaya", + "339": "sorrel", + "340": "zebra", + "341": "hog, pig, grunter, squealer, Sus scrofa", + "342": "wild boar, boar, Sus scrofa", + "343": "warthog", + "344": "hippopotamus, hippo, river horse, Hippopotamus amphibius", + "345": "ox", + "346": "water buffalo, water ox, Asiatic buffalo, Bubalus bubalis", + "347": "bison", + "348": "ram, tup", + "349": "bighorn, bighorn sheep, cimarron, Rocky Mountain bighorn, Rocky Mountain sheep, Ovis canadensis", + "350": "ibex, Capra ibex", + "351": "hartebeest", + "352": "impala, Aepyceros melampus", + "353": "gazelle", + "354": "Arabian camel, dromedary, Camelus dromedarius", + "355": "llama", + "356": "weasel", + "357": "mink", + "358": "polecat, fitch, foulmart, foumart, Mustela putorius", + "359": "black-footed ferret, ferret, Mustela nigripes", + "360": "otter", + "361": "skunk, polecat, wood pussy", + "362": "badger", + "363": "armadillo", + "364": "three-toed sloth, ai, Bradypus tridactylus", + "365": "orangutan, orang, orangutang, Pongo pygmaeus", + "366": "gorilla, Gorilla gorilla", + "367": "chimpanzee, chimp, Pan troglodytes", + "368": "gibbon, Hylobates lar", + "369": "siamang, Hylobates syndactylus, Symphalangus syndactylus", + "370": "guenon, guenon monkey", + "371": "patas, hussar monkey, Erythrocebus patas", + "372": "baboon", + "373": "macaque", + "374": "langur", + "375": "colobus, colobus monkey", + "376": "proboscis monkey, Nasalis larvatus", + "377": "marmoset", + "378": "capuchin, ringtail, Cebus capucinus", + "379": "howler monkey, howler", + "380": "titi, titi monkey", + "381": "spider monkey, Ateles geoffroyi", + "382": "squirrel monkey, Saimiri sciureus", + "383": "Madagascar cat, ring-tailed lemur, Lemur catta", + "384": "indri, indris, Indri indri, Indri brevicaudatus", + "385": "Indian elephant, Elephas maximus", + "386": "African elephant, Loxodonta africana", + "387": "lesser panda, red panda, panda, bear cat, cat bear, Ailurus fulgens", + "388": "giant panda, panda, panda bear, coon bear, Ailuropoda melanoleuca", + "389": "barracouta, snoek", + "390": "eel", + "391": "coho, cohoe, coho salmon, blue jack, silver salmon, Oncorhynchus kisutch", + "392": "rock beauty, Holocanthus tricolor", + "393": "anemone fish", + "394": "sturgeon", + "395": "gar, garfish, garpike, billfish, Lepisosteus osseus", + "396": "lionfish", + "397": "puffer, pufferfish, blowfish, globefish", + "398": "abacus", + "399": "abaya", + "400": "academic gown, academic robe, judge\"s robe", + "401": "accordion, piano accordion, squeeze box", + "402": "acoustic guitar", + "403": "aircraft carrier, carrier, flattop, attack aircraft carrier", + "404": "airliner", + "405": "airship, dirigible", + "406": "altar", + "407": "ambulance", + "408": "amphibian, amphibious vehicle", + "409": "analog clock", + "410": "apiary, bee house", + "411": "apron", + "412": "ashcan, trash can, garbage can, wastebin, ash bin, ash-bin, ashbin, dustbin, trash barrel, trash bin", + "413": "assault rifle, assault gun", + "414": "backpack, back pack, knapsack, packsack, rucksack, haversack", + "415": "bakery, bakeshop, bakehouse", + "416": "balance beam, beam", + "417": "balloon", + "418": "ballpoint, ballpoint pen, ballpen, Biro", + "419": "Band Aid", + "420": "banjo", + "421": "bannister, banister, balustrade, balusters, handrail", + "422": "barbell", + "423": "barber chair", + "424": "barbershop", + "425": "barn", + "426": "barometer", + "427": "barrel, cask", + "428": "barrow, garden cart, lawn cart, wheelbarrow", + "429": "baseball", + "430": "basketball", + "431": "bassinet", + "432": "bassoon", + "433": "bathing cap, swimming cap", + "434": "bath towel", + "435": "bathtub, bathing tub, bath, tub", + "436": "beach wagon, station wagon, wagon, estate car, beach waggon, station waggon, waggon", + "437": "beacon, lighthouse, beacon light, pharos", + "438": "beaker", + "439": "bearskin, busby, shako", + "440": "beer bottle", + "441": "beer glass", + "442": "bell cote, bell cot", + "443": "bib", + "444": "bicycle-built-for-two, tandem bicycle, tandem", + "445": "bikini, two-piece", + "446": "binder, ring-binder", + "447": "binoculars, field glasses, opera glasses", + "448": "birdhouse", + "449": "boathouse", + "450": "bobsled, bobsleigh, bob", + "451": "bolo tie, bolo, bola tie, bola", + "452": "bonnet, poke bonnet", + "453": "bookcase", + "454": "bookshop, bookstore, bookstall", + "455": "bottlecap", + "456": "bow", + "457": "bow tie, bow-tie, bowtie", + "458": "brass, memorial tablet, plaque", + "459": "brassiere, bra, bandeau", + "460": "breakwater, groin, groyne, mole, bulwark, seawall, jetty", + "461": "breastplate, aegis, egis", + "462": "broom", + "463": "bucket, pail", + "464": "buckle", + "465": "bulletproof vest", + "466": "bullet train, bullet", + "467": "butcher shop, meat market", + "468": "cab, hack, taxi, taxicab", + "469": "caldron, cauldron", + "470": "candle, taper, wax light", + "471": "cannon", + "472": "canoe", + "473": "can opener, tin opener", + "474": "cardigan", + "475": "car mirror", + "476": "carousel, carrousel, merry-go-round, roundabout, whirligig", + "477": "carpenter\"s kit, tool kit", + "478": "carton", + "479": "car wheel", + "480": "cash machine, cash dispenser, automated teller machine, automatic teller machine, automated teller, automatic teller, ATM", + "481": "cassette", + "482": "cassette player", + "483": "castle", + "484": "catamaran", + "485": "CD player", + "486": "cello, violoncello", + "487": "cellular telephone, cellular phone, cellphone, cell, mobile phone", + "488": "chain", + "489": "chainlink fence", + "490": "chain mail, ring mail, mail, chain armor, chain armour, ring armor, ring armour", + "491": "chain saw, chainsaw", + "492": "chest", + "493": "chiffonier, commode", + "494": "chime, bell, gong", + "495": "china cabinet, china closet", + "496": "Christmas stocking", + "497": "church, church building", + "498": "cinema, movie theater, movie theatre, movie house, picture palace", + "499": "cleaver, meat cleaver, chopper", + "500": "cliff dwelling", + "501": "cloak", + "502": "clog, geta, patten, sabot", + "503": "cocktail shaker", + "504": "coffee mug", + "505": "coffeepot", + "506": "coil, spiral, volute, whorl, helix", + "507": "combination lock", + "508": "computer keyboard, keypad", + "509": "confectionery, confectionary, candy store", + "510": "container ship, containership, container vessel", + "511": "convertible", + "512": "corkscrew, bottle screw", + "513": "cornet, horn, trumpet, trump", + "514": "cowboy boot", + "515": "cowboy hat, ten-gallon hat", + "516": "cradle", + "517": "crane", + "518": "crash helmet", + "519": "crate", + "520": "crib, cot", + "521": "Crock Pot", + "522": "croquet ball", + "523": "crutch", + "524": "cuirass", + "525": "dam, dike, dyke", + "526": "desk", + "527": "desktop computer", + "528": "dial telephone, dial phone", + "529": "diaper, nappy, napkin", + "530": "digital clock", + "531": "digital watch", + "532": "dining table, board", + "533": "dishrag, dishcloth", + "534": "dishwasher, dish washer, dishwashing machine", + "535": "disk brake, disc brake", + "536": "dock, dockage, docking facility", + "537": "dogsled, dog sled, dog sleigh", + "538": "dome", + "539": "doormat, welcome mat", + "540": "drilling platform, offshore rig", + "541": "drum, membranophone, tympan", + "542": "drumstick", + "543": "dumbbell", + "544": "Dutch oven", + "545": "electric fan, blower", + "546": "electric guitar", + "547": "electric locomotive", + "548": "entertainment center", + "549": "envelope", + "550": "espresso maker", + "551": "face powder", + "552": "feather boa, boa", + "553": "file, file cabinet, filing cabinet", + "554": "fireboat", + "555": "fire engine, fire truck", + "556": "fire screen, fireguard", + "557": "flagpole, flagstaff", + "558": "flute, transverse flute", + "559": "folding chair", + "560": "football helmet", + "561": "forklift", + "562": "fountain", + "563": "fountain pen", + "564": "four-poster", + "565": "freight car", + "566": "French horn, horn", + "567": "frying pan, frypan, skillet", + "568": "fur coat", + "569": "garbage truck, dustcart", + "570": "gasmask, respirator, gas helmet", + "571": "gas pump, gasoline pump, petrol pump, island dispenser", + "572": "goblet", + "573": "go-kart", + "574": "golf ball", + "575": "golfcart, golf cart", + "576": "gondola", + "577": "gong, tam-tam", + "578": "gown", + "579": "grand piano, grand", + "580": "greenhouse, nursery, glasshouse", + "581": "grille, radiator grille", + "582": "grocery store, grocery, food market, market", + "583": "guillotine", + "584": "hair slide", + "585": "hair spray", + "586": "half track", + "587": "hammer", + "588": "hamper", + "589": "hand blower, blow dryer, blow drier, hair dryer, hair drier", + "590": "hand-held computer, hand-held microcomputer", + "591": "handkerchief, hankie, hanky, hankey", + "592": "hard disc, hard disk, fixed disk", + "593": "harmonica, mouth organ, harp, mouth harp", + "594": "harp", + "595": "harvester, reaper", + "596": "hatchet", + "597": "holster", + "598": "home theater, home theatre", + "599": "honeycomb", + "600": "hook, claw", + "601": "hoopskirt, crinoline", + "602": "horizontal bar, high bar", + "603": "horse cart, horse-cart", + "604": "hourglass", + "605": "iPod", + "606": "iron, smoothing iron", + "607": "jack-o\"-lantern", + "608": "jean, blue jean, denim", + "609": "jeep, landrover", + "610": "jersey, T-shirt, tee shirt", + "611": "jigsaw puzzle", + "612": "jinrikisha, ricksha, rickshaw", + "613": "joystick", + "614": "kimono", + "615": "knee pad", + "616": "knot", + "617": "lab coat, laboratory coat", + "618": "ladle", + "619": "lampshade, lamp shade", + "620": "laptop, laptop computer", + "621": "lawn mower, mower", + "622": "lens cap, lens cover", + "623": "letter opener, paper knife, paperknife", + "624": "library", + "625": "lifeboat", + "626": "lighter, light, igniter, ignitor", + "627": "limousine, limo", + "628": "liner, ocean liner", + "629": "lipstick, lip rouge", + "630": "Loafer", + "631": "lotion", + "632": "loudspeaker, speaker, speaker unit, loudspeaker system, speaker system", + "633": "loupe, jeweler\"s loupe", + "634": "lumbermill, sawmill", + "635": "magnetic compass", + "636": "mailbag, postbag", + "637": "mailbox, letter box", + "638": "maillot", + "639": "maillot, tank suit", + "640": "manhole cover", + "641": "maraca", + "642": "marimba, xylophone", + "643": "mask", + "644": "matchstick", + "645": "maypole", + "646": "maze, labyrinth", + "647": "measuring cup", + "648": "medicine chest, medicine cabinet", + "649": "megalith, megalithic structure", + "650": "microphone, mike", + "651": "microwave, microwave oven", + "652": "military uniform", + "653": "milk can", + "654": "minibus", + "655": "miniskirt, mini", + "656": "minivan", + "657": "missile", + "658": "mitten", + "659": "mixing bowl", + "660": "mobile home, manufactured home", + "661": "Model T", + "662": "modem", + "663": "monastery", + "664": "monitor", + "665": "moped", + "666": "mortar", + "667": "mortarboard", + "668": "mosque", + "669": "mosquito net", + "670": "motor scooter, scooter", + "671": "mountain bike, all-terrain bike, off-roader", + "672": "mountain tent", + "673": "mouse, computer mouse", + "674": "mousetrap", + "675": "moving van", + "676": "muzzle", + "677": "nail", + "678": "neck brace", + "679": "necklace", + "680": "nipple", + "681": "notebook, notebook computer", + "682": "obelisk", + "683": "oboe, hautboy, hautbois", + "684": "ocarina, sweet potato", + "685": "odometer, hodometer, mileometer, milometer", + "686": "oil filter", + "687": "organ, pipe organ", + "688": "oscilloscope, scope, cathode-ray oscilloscope, CRO", + "689": "overskirt", + "690": "oxcart", + "691": "oxygen mask", + "692": "packet", + "693": "paddle, boat paddle", + "694": "paddlewheel, paddle wheel", + "695": "padlock", + "696": "paintbrush", + "697": "pajama, pyjama, pj\"s, jammies", + "698": "palace", + "699": "panpipe, pandean pipe, syrinx", + "700": "paper towel", + "701": "parachute, chute", + "702": "parallel bars, bars", + "703": "park bench", + "704": "parking meter", + "705": "passenger car, coach, carriage", + "706": "patio, terrace", + "707": "pay-phone, pay-station", + "708": "pedestal, plinth, footstall", + "709": "pencil box, pencil case", + "710": "pencil sharpener", + "711": "perfume, essence", + "712": "Petri dish", + "713": "photocopier", + "714": "pick, plectrum, plectron", + "715": "pickelhaube", + "716": "picket fence, paling", + "717": "pickup, pickup truck", + "718": "pier", + "719": "piggy bank, penny bank", + "720": "pill bottle", + "721": "pillow", + "722": "ping-pong ball", + "723": "pinwheel", + "724": "pirate, pirate ship", + "725": "pitcher, ewer", + "726": "plane, carpenter\"s plane, woodworking plane", + "727": "planetarium", + "728": "plastic bag", + "729": "plate rack", + "730": "plow, plough", + "731": "plunger, plumber\"s helper", + "732": "Polaroid camera, Polaroid Land camera", + "733": "pole", + "734": "police van, police wagon, paddy wagon, patrol wagon, wagon, black Maria", + "735": "poncho", + "736": "pool table, billiard table, snooker table", + "737": "pop bottle, soda bottle", + "738": "pot, flowerpot", + "739": "potter\"s wheel", + "740": "power drill", + "741": "prayer rug, prayer mat", + "742": "printer", + "743": "prison, prison house", + "744": "projectile, missile", + "745": "projector", + "746": "puck, hockey puck", + "747": "punching bag, punch bag, punching ball, punchball", + "748": "purse", + "749": "quill, quill pen", + "750": "quilt, comforter, comfort, puff", + "751": "racer, race car, racing car", + "752": "racket, racquet", + "753": "radiator", + "754": "radio, wireless", + "755": "radio telescope, radio reflector", + "756": "rain barrel", + "757": "recreational vehicle, RV, R.V.", + "758": "reel", + "759": "reflex camera", + "760": "refrigerator, icebox", + "761": "remote control, remote", + "762": "restaurant, eating house, eating place, eatery", + "763": "revolver, six-gun, six-shooter", + "764": "rifle", + "765": "rocking chair, rocker", + "766": "rotisserie", + "767": "rubber eraser, rubber, pencil eraser", + "768": "rugby ball", + "769": "rule, ruler", + "770": "running shoe", + "771": "safe", + "772": "safety pin", + "773": "saltshaker, salt shaker", + "774": "sandal", + "775": "sarong", + "776": "sax, saxophone", + "777": "scabbard", + "778": "scale, weighing machine", + "779": "school bus", + "780": "schooner", + "781": "scoreboard", + "782": "screen, CRT screen", + "783": "screw", + "784": "screwdriver", + "785": "seat belt, seatbelt", + "786": "sewing machine", + "787": "shield, buckler", + "788": "shoe shop, shoe-shop, shoe store", + "789": "shoji", + "790": "shopping basket", + "791": "shopping cart", + "792": "shovel", + "793": "shower cap", + "794": "shower curtain", + "795": "ski", + "796": "ski mask", + "797": "sleeping bag", + "798": "slide rule, slipstick", + "799": "sliding door", + "800": "slot, one-armed bandit", + "801": "snorkel", + "802": "snowmobile", + "803": "snowplow, snowplough", + "804": "soap dispenser", + "805": "soccer ball", + "806": "sock", + "807": "solar dish, solar collector, solar furnace", + "808": "sombrero", + "809": "soup bowl", + "810": "space bar", + "811": "space heater", + "812": "space shuttle", + "813": "spatula", + "814": "speedboat", + "815": "spider web, spider\"s web", + "816": "spindle", + "817": "sports car, sport car", + "818": "spotlight, spot", + "819": "stage", + "820": "steam locomotive", + "821": "steel arch bridge", + "822": "steel drum", + "823": "stethoscope", + "824": "stole", + "825": "stone wall", + "826": "stopwatch, stop watch", + "827": "stove", + "828": "strainer", + "829": "streetcar, tram, tramcar, trolley, trolley car", + "830": "stretcher", + "831": "studio couch, day bed", + "832": "stupa, tope", + "833": "submarine, pigboat, sub, U-boat", + "834": "suit, suit of clothes", + "835": "sundial", + "836": "sunglass", + "837": "sunglasses, dark glasses, shades", + "838": "sunscreen, sunblock, sun blocker", + "839": "suspension bridge", + "840": "swab, swob, mop", + "841": "sweatshirt", + "842": "swimming trunks, bathing trunks", + "843": "swing", + "844": "switch, electric switch, electrical switch", + "845": "syringe", + "846": "table lamp", + "847": "tank, army tank, armored combat vehicle, armoured combat vehicle", + "848": "tape player", + "849": "teapot", + "850": "teddy, teddy bear", + "851": "television, television system", + "852": "tennis ball", + "853": "thatch, thatched roof", + "854": "theater curtain, theatre curtain", + "855": "thimble", + "856": "thresher, thrasher, threshing machine", + "857": "throne", + "858": "tile roof", + "859": "toaster", + "860": "tobacco shop, tobacconist shop, tobacconist", + "861": "toilet seat", + "862": "torch", + "863": "totem pole", + "864": "tow truck, tow car, wrecker", + "865": "toyshop", + "866": "tractor", + "867": "trailer truck, tractor trailer, trucking rig, rig, articulated lorry, semi", + "868": "tray", + "869": "trench coat", + "870": "tricycle, trike, velocipede", + "871": "trimaran", + "872": "tripod", + "873": "triumphal arch", + "874": "trolleybus, trolley coach, trackless trolley", + "875": "trombone", + "876": "tub, vat", + "877": "turnstile", + "878": "typewriter keyboard", + "879": "umbrella", + "880": "unicycle, monocycle", + "881": "upright, upright piano", + "882": "vacuum, vacuum cleaner", + "883": "vase", + "884": "vault", + "885": "velvet", + "886": "vending machine", + "887": "vestment", + "888": "viaduct", + "889": "violin, fiddle", + "890": "volleyball", + "891": "waffle iron", + "892": "wall clock", + "893": "wallet, billfold, notecase, pocketbook", + "894": "wardrobe, closet, press", + "895": "warplane, military plane", + "896": "washbasin, handbasin, washbowl, lavabo, wash-hand basin", + "897": "washer, automatic washer, washing machine", + "898": "water bottle", + "899": "water jug", + "900": "water tower", + "901": "whiskey jug", + "902": "whistle", + "903": "wig", + "904": "window screen", + "905": "window shade", + "906": "Windsor tie", + "907": "wine bottle", + "908": "wing", + "909": "wok", + "910": "wooden spoon", + "911": "wool, woolen, woollen", + "912": "worm fence, snake fence, snake-rail fence, Virginia fence", + "913": "wreck", + "914": "yawl", + "915": "yurt", + "916": "web site, website, internet site, site", + "917": "comic book", + "918": "crossword puzzle, crossword", + "919": "street sign", + "920": "traffic light, traffic signal, stoplight", + "921": "book jacket, dust cover, dust jacket, dust wrapper", + "922": "menu", + "923": "plate", + "924": "guacamole", + "925": "consomme", + "926": "hot pot, hotpot", + "927": "trifle", + "928": "ice cream, icecream", + "929": "ice lolly, lolly, lollipop, popsicle", + "930": "French loaf", + "931": "bagel, beigel", + "932": "pretzel", + "933": "cheeseburger", + "934": "hotdog, hot dog, red hot", + "935": "mashed potato", + "936": "head cabbage", + "937": "broccoli", + "938": "cauliflower", + "939": "zucchini, courgette", + "940": "spaghetti squash", + "941": "acorn squash", + "942": "butternut squash", + "943": "cucumber, cuke", + "944": "artichoke, globe artichoke", + "945": "bell pepper", + "946": "cardoon", + "947": "mushroom", + "948": "Granny Smith", + "949": "strawberry", + "950": "orange", + "951": "lemon", + "952": "fig", + "953": "pineapple, ananas", + "954": "banana", + "955": "jackfruit, jak, jack", + "956": "custard apple", + "957": "pomegranate", + "958": "hay", + "959": "carbonara", + "960": "chocolate sauce, chocolate syrup", + "961": "dough", + "962": "meat loaf, meatloaf", + "963": "pizza, pizza pie", + "964": "potpie", + "965": "burrito", + "966": "red wine", + "967": "espresso", + "968": "cup", + "969": "eggnog", + "970": "alp", + "971": "bubble", + "972": "cliff, drop, drop-off", + "973": "coral reef", + "974": "geyser", + "975": "lakeside, lakeshore", + "976": "promontory, headland, head, foreland", + "977": "sandbar, sand bar", + "978": "seashore, coast, seacoast, sea-coast", + "979": "valley, vale", + "980": "volcano", + "981": "ballplayer, baseball player", + "982": "groom, bridegroom", + "983": "scuba diver", + "984": "rapeseed", + "985": "daisy", + "986": "yellow lady\"s slipper, yellow lady-slipper, Cypripedium calceolus, Cypripedium parviflorum", + "987": "corn", + "988": "acorn", + "989": "hip, rose hip, rosehip", + "990": "buckeye, horse chestnut, conker", + "991": "coral fungus", + "992": "agaric", + "993": "gyromitra", + "994": "stinkhorn, carrion fungus", + "995": "earthstar", + "996": "hen-of-the-woods, hen of the woods, Polyporus frondosus, Grifola frondosa", + "997": "bolete", + "998": "ear, spike, capitulum", + "999": "toilet tissue, toilet paper, bathroom tissue"} \ No newline at end of file diff --git a/include/graph/graph.hpp b/include/graph/graph.hpp index e50b5b0e5..4edfc8dfc 100644 --- a/include/graph/graph.hpp +++ b/include/graph/graph.hpp @@ -67,6 +67,11 @@ class Graph { in_edges_.clear(); } + void setSplitDistribution( + const std::vector>>& split_dist) { + split_distribution_ = split_dist; + } + int getVertexValue(size_t layerID) const { if (layerID >= arrayV_.size()) { throw std::invalid_argument("ArrayV does not contain this ID."); @@ -180,33 +185,39 @@ class Graph { count_used_split_distribution_ = 0; for (size_t i = 0; i < traversal.size(); ++i) { + int current_layer = traversal[i]; #ifdef ENABLE_STATISTIC_TIME auto start = std::chrono::high_resolution_clock::now(); #endif if (i != 0) { inten_.clear(); - for (size_t k = 0; k < in_edges_[traversal[i]].size(); ++k) { - auto target_value = in_edges_[traversal[i]][k]; + for (size_t k = 0; k < in_edges_[current_layer].size(); ++k) { + auto target_value = in_edges_[current_layer][k]; auto it = std::find_if(branch_list_.rbegin(), branch_list_.rend(), [target_value](const BranchState& s) { return s.ind_layer == target_value; }); + if (it != branch_list_.rend()) { for (size_t f = 0; f < it->distribution.size(); ++f) { - if (it->distribution[f].first == traversal[i]) { + if (it->distribution[f].first == current_layer) { inten_.push_back(it->give_for_all[it->distribution[f].second]); } } } - it->count_used_ten--; - if (it->count_used_ten < 1) { - auto rit = std::next(it).base(); - it = std::reverse_iterator(branch_list_.erase(rit)); + + if (it != branch_list_.rend()) { + it->count_used_ten--; + if (it->count_used_ten < 1) { + auto rit = std::next(it).base(); + it = + std::reverse_iterator(branch_list_.erase(rit)); + } } } } - layers_[traversal[i]]->run(inten_, outten_); + layers_[current_layer]->run(inten_, outten_); #ifdef ENABLE_STATISTIC_TENSORS tensors_.push_back(inten_[0]); @@ -217,24 +228,27 @@ class Graph { #endif inten_ = outten_; - if (layers_[traversal[i]]->postops.count > 0) { - for (unsigned int j = 0; j < layers_[traversal[i]]->postops.count; + + if (layers_[current_layer]->postops.count > 0) { + for (unsigned int j = 0; j < layers_[current_layer]->postops.count; j++) { - layers_[traversal[i]]->postops.layers[j]->run(inten_, outten_); + layers_[current_layer]->postops.layers[j]->run(inten_, outten_); } inten_ = outten_; } BranchState new_branch; new_branch.give_for_all = inten_; - new_branch.count_used_ten = countinout[traversal[i]].second; - new_branch.ind_layer = traversal[i]; - new_branch.split = layers_[traversal[i]]->getName() == kSplit; - if (layers_[traversal[i]]->getName() == kSplit) { + new_branch.count_used_ten = countinout[current_layer].second; + new_branch.ind_layer = current_layer; + new_branch.split = layers_[current_layer]->getName() == kSplit; + + if (layers_[current_layer]->getName() == kSplit) { if (static_cast(split_distribution_.size()) == 0) { - std::vector> dis(countinout[traversal[i]].second); + std::vector> dis( + countinout[current_layer].second); for (size_t m = 0; m < dis.size(); ++m) { - dis[m] = {arrayE_[arrayV_[traversal[i]] + m], static_cast(m)}; + dis[m] = {arrayE_[arrayV_[current_layer] + m], static_cast(m)}; } new_branch.distribution = dis; } else { @@ -243,12 +257,19 @@ class Graph { count_used_split_distribution_++; } } else { - std::vector> dis(countinout[traversal[i]].second); + std::vector> dis(countinout[current_layer].second); for (size_t m = 0; m < dis.size(); ++m) { - dis[m] = {arrayE_[arrayV_[traversal[i]] + m], 0}; + dis[m] = {arrayE_[arrayV_[current_layer] + m], 0}; } new_branch.distribution = dis; } + if (layers_[current_layer]->getName() == kSplit) { + for (const auto& tensor : outten_) { + for (size_t d = 0; d < tensor.get_shape().dims(); ++d) { + if (d < tensor.get_shape().dims() - 1) std::cout << ""; + } + } + } branch_list_.push_back(new_branch); #ifdef ENABLE_STATISTIC_TIME @@ -259,6 +280,7 @@ class Graph { time_layer_.push_back(layers_[i]->getName()); #endif } + *outtenres_ = outten_[0]; } void setOutput(const Layer& lay, Tensor& vec) { @@ -274,12 +296,29 @@ class Graph { #ifdef ENABLE_STATISTIC_TIME std::vector getTimeInfo() { std::vector res; - std::vector labels = { - "Input", "Pooling", "Normalization", "Dropout", "Element-wise", - "Convolution", "Dense", "Flatten", "Output"}; + + std::unordered_map label_map = { + {kInput, "Input"}, + {kPooling, "Pooling"}, + {kElementWise, "Element-wise"}, + {kConvolution, "Convolution"}, + {kFullyConnected, "Dense"}, + {kFlatten, "Flatten"}, + {kConcat, "Concat"}, + {kDropout, "Dropout"}, + {kSplit, "Split"}, + {kBinaryOp, "BinaryOp"}, + {kTranspose, "Transpose"}, + {kMatmul, "MatMul"}, + {kReshape, "Reshape"}, + {kSoftmax, "Softmax"}, + {kReduce, "Reduce"}, + {kBatchNormalization, "Normalization"}}; + for (size_t i = 0; i < time_.size(); i++) { - res.push_back(labels[static_cast(time_layer_[i])] + ':' + - std::to_string(time_[i])); + auto it = label_map.find(time_layer_[i]); + std::string layer_name = (it != label_map.end()) ? it->second : "Unknown"; + res.push_back(layer_name + ':' + std::to_string(time_[i])); } return res; } diff --git a/include/layers/BatchNormalizationLayer.hpp b/include/layers/BatchNormalizationLayer.hpp new file mode 100644 index 000000000..62f6300fa --- /dev/null +++ b/include/layers/BatchNormalizationLayer.hpp @@ -0,0 +1,50 @@ +#pragma once +#include + +#include "layers/Layer.hpp" +#include "layers/Tensor.hpp" + +namespace it_lab_ai { + +class BatchNormalizationLayer : public Layer { + public: + BatchNormalizationLayer(const Tensor& scale, const Tensor& bias, + const Tensor& mean, const Tensor& var, + float epsilon = 1e-5F, float momentum = 0.9F, + bool training_mode = false) + : Layer(kBatchNormalization), + scale_(scale), + bias_(bias), + mean_(mean), + var_(var), + epsilon_(epsilon), + momentum_(momentum), + training_mode_(training_mode) {} + + void run(const std::vector& input, + std::vector& output) override; + +#ifdef ENABLE_STATISTIC_WEIGHTS + Tensor get_weights() override { return Tensor(); } +#endif + + void set_epsilon(float epsilon) { epsilon_ = epsilon; } + void set_momentum(float momentum) { momentum_ = momentum; } + void set_training_mode(bool training_mode) { training_mode_ = training_mode; } + + private: + Tensor scale_; + Tensor bias_; + Tensor mean_; + Tensor var_; + float epsilon_; + float momentum_; + bool training_mode_; + + template + void batchnorm_impl(const Tensor& input, Tensor& output) const; + + void validate_parameters(size_t num_channels) const; +}; + +} // namespace it_lab_ai \ No newline at end of file diff --git a/include/layers/ConcatLayer.hpp b/include/layers/ConcatLayer.hpp index f21ec823e..4ac9bdcb4 100644 --- a/include/layers/ConcatLayer.hpp +++ b/include/layers/ConcatLayer.hpp @@ -15,6 +15,7 @@ class ConcatLayer : public Layer { void run(const std::vector& input, std::vector& output) override; + void setInputOrder(const std::vector& order) { input_order_ = order; } #ifdef ENABLE_STATISTIC_WEIGHTS Tensor get_weights() override { return Tensor(); } @@ -22,13 +23,60 @@ class ConcatLayer : public Layer { private: int64_t axis_; - + std::vector input_order_; void validate_inputs(const std::vector& inputs) const; int64_t normalize_axis(size_t rank) const; Shape calculate_output_shape(const std::vector& inputs) const; - + std::vector reorderInputs(const std::vector& inputs) const; template - void concatenate(const std::vector& inputs, Tensor& output) const; + void concatenate(const std::vector& inputs, Tensor& output) const { + std::vector ordered_inputs = reorderInputs(inputs); + Shape output_shape = calculate_output_shape(inputs); + std::vector output_data(output_shape.count(), 0); + + const int64_t axis = normalize_axis(inputs[0].get_shape().dims()); + const size_t outer_size = [&]() { + size_t size = 1; + for (int64_t i = 0; i < axis; ++i) { + size *= output_shape[i]; + } + return size; + }(); + + const size_t inner_size = [&]() { + size_t size = 1; + for (size_t i = axis + 1; i < output_shape.dims(); ++i) { + size *= output_shape[i]; + } + return size; + }(); + + size_t output_offset = 0; + + for (const auto& input : inputs) { + const auto& input_data = *input.as(); + const Shape& input_shape = input.get_shape(); + const size_t input_axis_size = input_shape[axis]; + + for (size_t outer = 0; outer < outer_size; ++outer) { + for (size_t a = 0; a < input_axis_size; ++a) { + for (size_t inner = 0; inner < inner_size; ++inner) { + size_t input_pos = + outer * input_axis_size * inner_size + a * inner_size + inner; + + size_t output_pos = outer * output_shape[axis] * inner_size + + (output_offset + a) * inner_size + inner; + + output_data[output_pos] = input_data[input_pos]; + } + } + } + + output_offset += input_axis_size; + } + + output = make_tensor(output_data, output_shape); + } }; } // namespace it_lab_ai \ No newline at end of file diff --git a/include/layers/ConvLayer.hpp b/include/layers/ConvLayer.hpp index 483bc1e60..9f08559d4 100644 --- a/include/layers/ConvLayer.hpp +++ b/include/layers/ConvLayer.hpp @@ -15,7 +15,9 @@ class ConvolutionalLayer : public Layer { size_t dilations_; Tensor kernel_; Tensor bias_; + size_t group_; ImplType implType_; + bool useLegacyImpl_; public: ConvolutionalLayer() : Layer(kConvolution) { @@ -26,14 +28,17 @@ class ConvolutionalLayer : public Layer { } ConvolutionalLayer(size_t step, size_t pads, size_t dilations, const Tensor& kernel, const Tensor& bias = Tensor(), - ImplType implType = kDefault) + ImplType implType = kDefault, size_t group = 1, + bool useLegacyImpl = false) : Layer(kConvolution) { stride_ = step; pads_ = pads; + group_ = group; dilations_ = dilations; kernel_ = kernel; bias_ = bias; implType_ = implType; + useLegacyImpl_ = useLegacyImpl; } void run(const std::vector& input, @@ -138,24 +143,42 @@ class ConvImpl : public LayerImpl { // NCHW -> NCHW only template void Conv4D(const Tensor& input, const Tensor& kernel_, const Tensor& bias_, - Tensor& output, size_t stride_, size_t pads_, size_t dilations_) { + Tensor& output, size_t stride_, size_t pads_, size_t group_, + size_t dilations_) { size_t batch_size = input.get_shape()[0]; + size_t in_channels = input.get_shape()[1]; size_t in_height = input.get_shape()[2]; size_t in_width = input.get_shape()[3]; - size_t in_channels = input.get_shape()[1]; - size_t kernel_height = kernel_.get_shape()[0]; - size_t kernel_width = kernel_.get_shape()[1]; - size_t kernel_in_channels = kernel_.get_shape()[2]; - size_t kernel_out_channels = kernel_.get_shape()[3]; + size_t out_channels = kernel_.get_shape()[0]; + size_t kernel_in_channels = kernel_.get_shape()[1]; + size_t kernel_height = kernel_.get_shape()[2]; + size_t kernel_width = kernel_.get_shape()[3]; + + if (group_ > 1) { + if (in_channels % group_ != 0 || out_channels % group_ != 0) { + throw std::runtime_error("Channels must be divisible by group"); + } + if (kernel_in_channels != in_channels / group_) { + throw std::runtime_error( + "Kernel input channels don't match group configuration"); + } + } + + size_t out_height = + (in_height + 2 * pads_ - dilations_ * (kernel_height - 1) - 1) / stride_ + + 1; + size_t out_width = + (in_width + 2 * pads_ - dilations_ * (kernel_width - 1) - 1) / stride_ + + 1; + + std::vector>>> padded_input( + batch_size, + std::vector>>( + in_height + 2 * pads_, + std::vector>( + in_width + 2 * pads_, std::vector(in_channels, 0)))); - std::vector>>> padded_input = - std::vector>>>( - batch_size, std::vector>>( - in_height + 2 * pads_, - std::vector>( - in_width + 2 * pads_, - std::vector(in_channels, 0)))); for (size_t b = 0; b < batch_size; ++b) { for (size_t h = 0; h < in_height; ++h) { for (size_t w = 0; w < in_width; ++w) { @@ -166,100 +189,105 @@ void Conv4D(const Tensor& input, const Tensor& kernel_, const Tensor& bias_, } } } - std::vector>>> dil_kernel = - std::vector>>>( - kernel_height * dilations_ + 1 - dilations_, - std::vector>>( - kernel_width * dilations_ + 1 - dilations_, - std::vector>( - kernel_in_channels, - std::vector(kernel_out_channels, 0)))); - for (size_t b = 0; b < kernel_out_channels; ++b) { - for (size_t h = 0; h < kernel_height; ++h) { - for (size_t w = 0; w < kernel_width; ++w) { - for (size_t c = 0; c < kernel_in_channels; ++c) { - dil_kernel[h * dilations_][w * dilations_][c][b] = - kernel_.get({h, w, c, b}); + + size_t dilated_kernel_height = (kernel_height - 1) * dilations_ + 1; + size_t dilated_kernel_width = (kernel_width - 1) * dilations_ + 1; + + std::vector>>> dil_kernel( + out_channels, std::vector>>( + kernel_in_channels, + std::vector>( + dilated_kernel_height, + std::vector(dilated_kernel_width, 0)))); + + for (size_t oc = 0; oc < out_channels; ++oc) { + for (size_t ic = 0; ic < kernel_in_channels; ++ic) { + for (size_t kh = 0; kh < kernel_height; ++kh) { + for (size_t kw = 0; kw < kernel_width; ++kw) { + dil_kernel[oc][ic][kh * dilations_][kw * dilations_] = + kernel_.get({oc, ic, kh, kw}); } } } } - size_t crat = 0; - if ((in_height + 2 * pads_ - dilations_ * (kernel_height - 1)) % stride_ != 0) - crat = 1; + std::vector>>> output_tensor( + batch_size, + std::vector>>( + out_channels, std::vector>( + out_height, std::vector(out_width, 0)))); - size_t out_height = - (in_height + 2 * pads_ - dilations_ * (kernel_height - 1)) / stride_ + - crat; + for (size_t b = 0; b < batch_size; ++b) { + for (size_t oc = 0; oc < out_channels; ++oc) { + for (size_t oh = 0; oh < out_height; ++oh) { + for (size_t ow = 0; ow < out_width; ++ow) { + ValueType value = 0; + size_t h_start = oh * stride_; + size_t w_start = ow * stride_; - crat = 0; - if ((in_width + 2 * pads_ - dilations_ * (kernel_width - 1)) % stride_ != 0) - crat = 1; + size_t group = (group_ > 1) ? oc / (out_channels / group_) : 0; + size_t group_start_channel = group * (in_channels / group_); + size_t group_end_channel = (group + 1) * (in_channels / group_); - size_t out_width = - (in_width + 2 * pads_ - dilations_ * (kernel_width - 1)) / stride_ + crat; + for (size_t ic = group_start_channel; ic < group_end_channel; ++ic) { + size_t kernel_ic = ic - group_start_channel; - std::vector>>> output_tensor( - batch_size, std::vector>>( - kernel_out_channels, - std::vector>( - out_height, std::vector(out_width, 0)))); - for (size_t b = 0; b < batch_size; ++b) { - for (size_t c = 0; c < kernel_out_channels; ++c) { - for (size_t i = 0; i < out_height; i += stride_) { - for (size_t j = 0; j < out_width; j += stride_) { - ValueType value = 0; - for (size_t ic = 0; ic < in_channels; ++ic) { - for (size_t h = 0; h < kernel_height * dilations_ + 1 - dilations_; - ++h) { - for (size_t w = 0; w < kernel_width * dilations_ + 1 - dilations_; - ++w) { - value += - padded_input[b][i + h][j + w][ic] * dil_kernel[h][w][ic][c]; + for (size_t kh = 0; kh < dilated_kernel_height; ++kh) { + for (size_t kw = 0; kw < dilated_kernel_width; ++kw) { + size_t h_index = h_start + kh; + size_t w_index = w_start + kw; + + if (h_index < padded_input[b].size() && + w_index < padded_input[b][h_index].size()) { + value += padded_input[b][h_index][w_index][ic] * + dil_kernel[oc][kernel_ic][kh][kw]; + } } } } - if (!bias_.empty()) { - output_tensor[b][c][i][j] = value + (*bias_.as())[c]; - } else { - output_tensor[b][c][i][j] = value; + + if (!bias_.empty() && oc < bias_.get_shape()[0]) { + value += bias_.get({oc}); } + + output_tensor[b][oc][oh][ow] = value; } } } } - Shape sh({batch_size, kernel_out_channels, out_height, out_width}); - std::vector one_d_vector(batch_size * out_height * out_width * - kernel_out_channels); - size_t index_1d = 0; - for (size_t i = 0; i < batch_size; ++i) { - for (size_t l = 0; l < kernel_out_channels; ++l) { - for (size_t j = 0; j < out_height; ++j) { - for (size_t k = 0; k < out_width; ++k) { - one_d_vector[index_1d++] = output_tensor[i][l][j][k]; + Shape output_shape({batch_size, out_channels, out_height, out_width}); + std::vector flat_output(batch_size * out_channels * out_height * + out_width); + + size_t index = 0; + for (size_t b = 0; b < batch_size; ++b) { + for (size_t oc = 0; oc < out_channels; ++oc) { + for (size_t h = 0; h < out_height; ++h) { + for (size_t w = 0; w < out_width; ++w) { + flat_output[index++] = output_tensor[b][oc][h][w]; } } } } - output = make_tensor(one_d_vector, sh); + + output = make_tensor(flat_output, output_shape); } // NCHW -> NCHW only template void Conv4DSTL(const Tensor& input, const Tensor& kernel_, const Tensor& bias_, - Tensor& output, size_t stride_, size_t pads_, + Tensor& output, size_t stride_, size_t pads_, size_t group_, size_t dilations_) { size_t batch_size = input.get_shape()[0]; + size_t in_channels = input.get_shape()[1]; size_t in_height = input.get_shape()[2]; size_t in_width = input.get_shape()[3]; - size_t in_channels = input.get_shape()[1]; - size_t kernel_height = kernel_.get_shape()[0]; - size_t kernel_width = kernel_.get_shape()[1]; - size_t kernel_in_channels = kernel_.get_shape()[2]; - size_t kernel_out_channels = kernel_.get_shape()[3]; + size_t kernel_out_channels = kernel_.get_shape()[0]; + size_t kernel_in_channels = kernel_.get_shape()[1]; + size_t kernel_height = kernel_.get_shape()[2]; + size_t kernel_width = kernel_.get_shape()[3]; unsigned num_threads = std::thread::hardware_concurrency(); std::vector threads; @@ -301,13 +329,13 @@ void Conv4DSTL(const Tensor& input, const Tensor& kernel_, const Tensor& bias_, kernel_in_channels, std::vector(kernel_out_channels, 0)))); - auto dilate_kernel = [&](size_t start_b, size_t end_b) { - for (size_t b = start_b; b < end_b; ++b) { + auto dilate_kernel = [&](size_t start_oc, size_t end_oc) { + for (size_t oc = start_oc; oc < end_oc; ++oc) { for (size_t h = 0; h < kernel_height; ++h) { for (size_t w = 0; w < kernel_width; ++w) { - for (size_t c = 0; c < kernel_in_channels; ++c) { - dil_kernel[h * dilations_][w * dilations_][c][b] = - kernel_.get({h, w, c, b}); + for (size_t ic = 0; ic < kernel_in_channels; ++ic) { + dil_kernel[h * dilations_][w * dilations_][ic][oc] = + kernel_.get({oc, ic, h, w}); } } } @@ -345,26 +373,44 @@ void Conv4DSTL(const Tensor& input, const Tensor& kernel_, const Tensor& bias_, std::vector>( out_height, std::vector(out_width, 0)))); - auto compute_conv = [&](size_t start_b, size_t end_b) { - for (size_t b = start_b; b < end_b; ++b) { - for (size_t c = 0; c < kernel_out_channels; ++c) { - for (size_t i = 0; i < out_height; i += stride_) { - for (size_t j = 0; j < out_width; j += stride_) { + auto compute_conv = [&](size_t start_oc, size_t end_oc) { + size_t dilated_kernel_height = kernel_height * dilations_ + 1 - dilations_; + size_t dilated_kernel_width = kernel_width * dilations_ + 1 - dilations_; + + for (size_t b = 0; b < batch_size; ++b) { + for (size_t oc = start_oc; oc < end_oc; ++oc) { + for (size_t oh = 0; oh < out_height; oh++) { + for (size_t ow = 0; ow < out_width; ow++) { ValueType value = 0; - for (size_t ic = 0; ic < in_channels; ++ic) { - for (size_t h = 0; - h < kernel_height * dilations_ + 1 - dilations_; ++h) { - for (size_t w = 0; - w < kernel_width * dilations_ + 1 - dilations_; ++w) { - value += padded_input[b][i + h][j + w][ic] * - dil_kernel[h][w][ic][c]; + + size_t group = + (group_ > 1) ? oc / (kernel_out_channels / group_) : 0; + size_t group_start_channel = group * (in_channels / group_); + size_t group_end_channel = (group + 1) * (in_channels / group_); + + for (size_t ic = group_start_channel; ic < group_end_channel; + ++ic) { + size_t kernel_ic = ic - group_start_channel; + + for (size_t kh = 0; kh < dilated_kernel_height; ++kh) { + for (size_t kw = 0; kw < dilated_kernel_width; ++kw) { + size_t h_index = oh * stride_ + kh; + size_t w_index = ow * stride_ + kw; + + if (h_index < padded_input[b].size() && + w_index < padded_input[b][h_index].size()) { + value += padded_input[b][h_index][w_index][ic] * + dil_kernel[kh][kw][kernel_ic][oc]; + } } } } + if (!bias_.empty()) { - output_tensor[b][c][i][j] = value + (*bias_.as())[c]; + output_tensor[b][oc][oh][ow] = + value + (*bias_.as())[oc]; } else { - output_tensor[b][c][i][j] = value; + output_tensor[b][oc][oh][ow] = value; } } } @@ -372,10 +418,11 @@ void Conv4DSTL(const Tensor& input, const Tensor& kernel_, const Tensor& bias_, } }; - chunk_size = batch_size / num_threads; + chunk_size = kernel_out_channels / num_threads; for (unsigned i = 0; i < num_threads; ++i) { size_t start = i * chunk_size; - size_t end = (i == num_threads - 1) ? batch_size : start + chunk_size; + size_t end = + (i == num_threads - 1) ? kernel_out_channels : start + chunk_size; threads.emplace_back(compute_conv, start, end); } for (auto& t : threads) t.join(); @@ -409,4 +456,176 @@ void Conv4DSTL(const Tensor& input, const Tensor& kernel_, const Tensor& bias_, output = make_tensor(one_d_vector, sh); } +template +void DepthwiseConv4D(const Tensor& input, const Tensor& kernel_, + const Tensor& bias_, Tensor& output, size_t stride_, + size_t pads_, size_t dilations_) { + size_t batch_size = input.get_shape()[0]; + size_t channels = input.get_shape()[1]; + size_t in_height = input.get_shape()[2]; + size_t in_width = input.get_shape()[3]; + + size_t kernel_out_channels = kernel_.get_shape()[0]; + size_t kernel_in_channels = kernel_.get_shape()[1]; + size_t kernel_height = kernel_.get_shape()[2]; + size_t kernel_width = kernel_.get_shape()[3]; + + if (kernel_out_channels != channels || kernel_in_channels != 1) { + throw std::runtime_error("Invalid kernel shape for depthwise convolution"); + } + + size_t out_height = + (in_height + 2 * pads_ - dilations_ * (kernel_height - 1) - 1) / stride_ + + 1; + size_t out_width = + (in_width + 2 * pads_ - dilations_ * (kernel_width - 1) - 1) / stride_ + + 1; + + Tensor output_tensor(Shape({batch_size, channels, out_height, out_width}), + input.get_type()); + + for (size_t b = 0; b < batch_size; ++b) { + for (size_t c = 0; c < channels; ++c) { + for (size_t oh = 0; oh < out_height; ++oh) { + for (size_t ow = 0; ow < out_width; ++ow) { + ValueType sum = 0; + + for (size_t kh = 0; kh < kernel_height; ++kh) { + for (size_t kw = 0; kw < kernel_width; ++kw) { + size_t ih = oh * stride_ + kh * dilations_ - pads_; + size_t iw = ow * stride_ + kw * dilations_ - pads_; + + if (ih < in_height && iw < in_width) { + auto input_val = input.get({b, c, ih, iw}); + auto kernel_val = kernel_.get({c, 0, kh, kw}); + + sum += input_val * kernel_val; + } + } + } + + if (!bias_.empty() && c < bias_.get_shape()[0]) { + sum += bias_.get({c}); + } + + output_tensor.set({b, c, oh, ow}, sum); + } + } + } + } + + output = output_tensor; +} + +// NCHW -> NCHW only +template +void Conv4D_Legacy(const Tensor& input, const Tensor& kernel_, + const Tensor& bias_, Tensor& output, size_t stride_, + size_t pads_, size_t dilations_) { + size_t batch_size = input.get_shape()[0]; + size_t in_height = input.get_shape()[2]; + size_t in_width = input.get_shape()[3]; + size_t in_channels = input.get_shape()[1]; + + size_t kernel_height = kernel_.get_shape()[0]; + size_t kernel_width = kernel_.get_shape()[1]; + size_t kernel_in_channels = kernel_.get_shape()[2]; + size_t kernel_out_channels = kernel_.get_shape()[3]; + + std::vector>>> padded_input = + std::vector>>>( + batch_size, std::vector>>( + in_height + 2 * pads_, + std::vector>( + in_width + 2 * pads_, + std::vector(in_channels, 0)))); + for (size_t b = 0; b < batch_size; ++b) { + for (size_t h = 0; h < in_height; ++h) { + for (size_t w = 0; w < in_width; ++w) { + for (size_t c = 0; c < in_channels; ++c) { + padded_input[b][h + pads_][w + pads_][c] = + input.get({b, c, h, w}); + } + } + } + } + std::vector>>> dil_kernel = + std::vector>>>( + kernel_height * dilations_ + 1 - dilations_, + std::vector>>( + kernel_width * dilations_ + 1 - dilations_, + std::vector>( + kernel_in_channels, + std::vector(kernel_out_channels, 0)))); + for (size_t b = 0; b < kernel_out_channels; ++b) { + for (size_t h = 0; h < kernel_height; ++h) { + for (size_t w = 0; w < kernel_width; ++w) { + for (size_t c = 0; c < kernel_in_channels; ++c) { + dil_kernel[h * dilations_][w * dilations_][c][b] = + kernel_.get({h, w, c, b}); + } + } + } + } + + size_t crat = 0; + if ((in_height + 2 * pads_ - dilations_ * (kernel_height - 1)) % stride_ != 0) + crat = 1; + + size_t out_height = + (in_height + 2 * pads_ - dilations_ * (kernel_height - 1)) / stride_ + + crat; + + crat = 0; + if ((in_width + 2 * pads_ - dilations_ * (kernel_width - 1)) % stride_ != 0) + crat = 1; + + size_t out_width = + (in_width + 2 * pads_ - dilations_ * (kernel_width - 1)) / stride_ + crat; + + std::vector>>> output_tensor( + batch_size, std::vector>>( + kernel_out_channels, + std::vector>( + out_height, std::vector(out_width, 0)))); + for (size_t b = 0; b < batch_size; ++b) { + for (size_t c = 0; c < kernel_out_channels; ++c) { + for (size_t i = 0; i < out_height; i += stride_) { + for (size_t j = 0; j < out_width; j += stride_) { + ValueType value = 0; + for (size_t ic = 0; ic < in_channels; ++ic) { + for (size_t h = 0; h < kernel_height * dilations_ + 1 - dilations_; + ++h) { + for (size_t w = 0; w < kernel_width * dilations_ + 1 - dilations_; + ++w) { + value += + padded_input[b][i + h][j + w][ic] * dil_kernel[h][w][ic][c]; + } + } + } + if (!bias_.empty()) { + output_tensor[b][c][i][j] = value + (*bias_.as())[c]; + } else { + output_tensor[b][c][i][j] = value; + } + } + } + } + } + + Shape sh({batch_size, kernel_out_channels, out_height, out_width}); + std::vector one_d_vector(batch_size * out_height * out_width * + kernel_out_channels); + size_t index_1d = 0; + for (size_t i = 0; i < batch_size; ++i) { + for (size_t l = 0; l < kernel_out_channels; ++l) { + for (size_t j = 0; j < out_height; ++j) { + for (size_t k = 0; k < out_width; ++k) { + one_d_vector[index_1d++] = output_tensor[i][l][j][k]; + } + } + } + } + output = make_tensor(one_d_vector, sh); +} } // namespace it_lab_ai diff --git a/include/layers/FCLayer.hpp b/include/layers/FCLayer.hpp index 0e44501f0..0e7b21de8 100644 --- a/include/layers/FCLayer.hpp +++ b/include/layers/FCLayer.hpp @@ -1,4 +1,4 @@ -#pragma once +#pragma once #include #include #include @@ -29,24 +29,43 @@ template std::vector mat_vec_mul(const std::vector& mat, const Shape& mat_shape, const std::vector& vec) { - size_t c = vec.size() / mat_shape[1]; + // Matrix layout: [input_size, output_size] with row-major ordering + // Access pattern: mat[i * output_size + j] where: + // - i ∈ [0, input_size-1] (input dimension) + // - j ∈ [0, output_size-1] (output dimension) + // This corresponds to weights[i][j] in mathematical notation if (mat_shape.dims() != 2) { throw std::invalid_argument("Not a matrix in argument"); } + + size_t input_size = mat_shape[0]; + size_t output_size = mat_shape[1]; + + size_t batch_size = vec.size() / input_size; + + if (mat.size() != input_size * output_size) { + throw std::invalid_argument("Matrix size doesn't match shape"); + } + + if (vec.size() % mat_shape[0] != 0) { + throw std::invalid_argument("Vector size not divisible by matrix rows"); + } + Shape res_shape(1); - res_shape[0] = mat_shape[0] * c; + res_shape[0] = mat_shape[1] * batch_size; std::vector res(res_shape[0]); + ValueType elem; - for (size_t count = 0; count < c; count++) { - for (size_t i = 0; i < mat_shape[0]; i++) { + for (size_t batch = 0; batch < batch_size; batch++) { + for (size_t j = 0; j < mat_shape[1]; j++) { elem = ValueType(0); - for (size_t j = 0; j < mat_shape[1]; j++) { - // due to 1d indexing - elem += mat[i * mat_shape[1] + j] * vec[count * mat_shape[1] + j]; + for (size_t i = 0; i < mat_shape[0]; i++) { + elem += mat[i * mat_shape[1] + j] * vec[batch * mat_shape[0] + i]; } - res[count * mat_shape[0] + i] = elem; + res[batch * mat_shape[1] + j] = elem; } } + return res; } @@ -93,7 +112,6 @@ class FCLayerImpl : public LayerImpl { // weights * inputValues + bias = outputValues -// constructor for FCLayer template FCLayerImpl::FCLayerImpl(const std::vector& input_weights, const Shape& input_weights_shape, @@ -102,30 +120,32 @@ FCLayerImpl::FCLayerImpl(const std::vector& input_weights, if (input_weights.empty()) { throw std::invalid_argument("Empty weights for FCLayer"); } - if (input_weights_shape.dims() != 2 || - input_weights_shape[0] != input_bias.size()) { - throw std::invalid_argument("Invalid weights shape"); - } - this->inputShape_[0] = input_weights_shape[1]; - this->outputShape_[0] = input_bias.size(); - if (this->inputShape_[0] == 0 || this->outputShape_[0] == 0) { - throw std::invalid_argument("Invalid weights/bias size for FCLayer"); + + this->inputShape_[0] = input_weights_shape[0]; + this->outputShape_[0] = input_weights_shape[1]; + + if (input_bias.size() != this->outputShape_[0]) { + throw std::invalid_argument("Bias size doesn't match output size"); } + weights_.resize(input_weights_shape.count(), ValueType(0)); } template std::vector FCLayerImpl::run( const std::vector& input) const { - Shape cur_w_shape({this->outputShape_[0], this->inputShape_[0]}); + Shape cur_w_shape({this->inputShape_[0], this->outputShape_[0]}); + std::vector output_values = mat_vec_mul(weights_, cur_w_shape, input); - for (size_t p = 0; p < output_values.size() / bias_.size(); ++p) { + + size_t batch_size = output_values.size() / this->outputShape_[0]; + for (size_t batch = 0; batch < batch_size; ++batch) { for (size_t i = 0; i < bias_.size(); ++i) { - output_values[p * bias_.size() + i] += bias_[i]; + output_values[batch * this->outputShape_[0] + i] += bias_[i]; } } + return output_values; } - } // namespace it_lab_ai diff --git a/include/layers/FlattenLayer.hpp b/include/layers/FlattenLayer.hpp index ae475203b..07b8fd922 100644 --- a/include/layers/FlattenLayer.hpp +++ b/include/layers/FlattenLayer.hpp @@ -11,11 +11,13 @@ std::vector reorder(std::vector order_vec, class FlattenLayer : public Layer { private: std::vector order_; + int axis_; public: - FlattenLayer() : Layer(kFlatten), order_({0, 1, 2, 3}) {} + FlattenLayer() : Layer(kFlatten), order_({0, 1, 2, 3}), axis_(0) {} + FlattenLayer(int axis) : Layer(kFlatten), order_({}), axis_(axis) {} FlattenLayer(const std::vector& order) - : Layer(kFlatten), order_(order) {} + : Layer(kFlatten), order_(order), axis_(0) {} void run(const std::vector& input, std::vector& output) override; #ifdef ENABLE_STATISTIC_WEIGHTS diff --git a/include/layers/Layer.hpp b/include/layers/Layer.hpp index f3b7c6f99..2da4e0a51 100644 --- a/include/layers/Layer.hpp +++ b/include/layers/Layer.hpp @@ -25,7 +25,11 @@ enum LayerType : uint8_t { kSplit, kBinaryOp, kReduce, - kTranspose + kTranspose, + kReshape, + kSoftmax, + kMatmul, + kBatchNormalization }; enum ImplType : uint8_t { kDefault, kTBB, kSTL }; diff --git a/include/layers/MatmulLayer.hpp b/include/layers/MatmulLayer.hpp new file mode 100644 index 000000000..bf38de276 --- /dev/null +++ b/include/layers/MatmulLayer.hpp @@ -0,0 +1,40 @@ +#pragma once +#include + +#include "layers/Layer.hpp" +#include "layers/Tensor.hpp" + +namespace it_lab_ai { + +class MatmulLayer : public Layer { + public: + MatmulLayer() : Layer(kMatmul) {} + + void run(const std::vector& input, + std::vector& output) override; + +#ifdef ENABLE_STATISTIC_WEIGHTS + Tensor get_weights() override { return Tensor(); } +#endif + + private: + template + void matmul_impl(const Tensor& a, const Tensor& b, Tensor& output) const; + + template + void matmul_1d_1d(const Tensor& a, const Tensor& b, Tensor& output) const; + + template + void matmul_1d_2d(const Tensor& a, const Tensor& b, Tensor& output) const; + + template + void matmul_2d_1d(const Tensor& a, const Tensor& b, Tensor& output) const; + + template + void matmul_2d_2d(const Tensor& a, const Tensor& b, Tensor& output) const; + + template + void matmul_nd_nd(const Tensor& a, const Tensor& b, Tensor& output) const; +}; + +} // namespace it_lab_ai \ No newline at end of file diff --git a/include/layers/PoolingLayer.hpp b/include/layers/PoolingLayer.hpp index aef42281c..8a363e385 100644 --- a/include/layers/PoolingLayer.hpp +++ b/include/layers/PoolingLayer.hpp @@ -1,10 +1,16 @@ #pragma once #include +#include #include +#include +#include #include #include +#include #include "layers/Layer.hpp" +#include "tbb/blocked_range2d.h" +#include "tbb/parallel_for.h" namespace it_lab_ai { @@ -12,13 +18,35 @@ enum PoolingType : uint8_t { kAverage, kMax }; class PoolingLayer : public Layer { public: - PoolingLayer() : Layer(kPooling), implType_(kDefault) {} + PoolingLayer(const Shape& pooling_shape, const Shape& strides = {2, 2}, + const Shape& pads = {0, 0, 0, 0}, + const Shape& dilations = {1, 1}, bool ceil_mode = false, + std::string pooling_type = "average", + ImplType implType = kDefault) + : Layer(kPooling), + poolingShape_(pooling_shape), + strides_(strides), + pads_(pads), + dilations_(dilations), + ceil_mode_(ceil_mode), + poolingType_(std::move(pooling_type)), + implType_(implType) {} PoolingLayer(const Shape& pooling_shape, std::string pooling_type = "average", ImplType implType = kDefault) : Layer(kPooling), poolingShape_(pooling_shape), + strides_({2, 2}), + pads_({0, 0, 0, 0}), + dilations_({1, 1}), + ceil_mode_(false), poolingType_(std::move(pooling_type)), implType_(implType) {} + void setStrides(size_t h, size_t w) { strides_ = {h, w}; } + void setPads(size_t top, size_t bottom, size_t left, size_t right) { + pads_ = {top, bottom, left, right}; + } + void setDilations(size_t h, size_t w) { dilations_ = {h, w}; } + void setCeilMode(bool ceil_mode) { ceil_mode_ = ceil_mode; } void run(const std::vector& input, std::vector& output) override; #ifdef ENABLE_STATISTIC_WEIGHTS @@ -31,12 +59,16 @@ class PoolingLayer : public Layer { private: Shape poolingShape_; + Shape strides_; + Shape pads_; + Shape dilations_; + bool ceil_mode_; std::string poolingType_; ImplType implType_; }; -inline size_t coord_size(int coord, const Shape& shape) { - if (coord >= 0 && static_cast(coord) < shape.dims()) { +inline size_t coord_size(size_t coord, const Shape& shape) { + if (coord < shape.dims()) { return shape[coord]; } return 1; @@ -64,6 +96,13 @@ class PoolingLayerImpl : public LayerImpl { public: PoolingLayerImpl() = delete; PoolingLayerImpl(const Shape& input_shape, const Shape& pooling_shape, + const std::string& pooling_type = "average") + : PoolingLayerImpl(input_shape, pooling_shape, {2, 2}, {0, 0, 0, 0}, + {1, 1}, false, pooling_type) {} + PoolingLayerImpl(const Shape& input_shape, const Shape& pooling_shape, + const Shape& strides = {2, 2}, + const Shape& pads = {0, 0, 0, 0}, + const Shape& dilations = {1, 1}, bool ceil_mode = false, const std::string& pooling_type = "average"); PoolingLayerImpl(const PoolingLayerImpl& c) = default; PoolingLayerImpl& operator=(const PoolingLayerImpl& c) = default; @@ -72,15 +111,36 @@ class PoolingLayerImpl : public LayerImpl { protected: Shape poolingShape_; + Shape strides_; + Shape pads_; + Shape dilations_; + bool ceil_mode_; PoolingType poolingType_; }; template -PoolingLayerImpl::PoolingLayerImpl(const Shape& input_shape, - const Shape& pooling_shape, - const std::string& pooling_type) +PoolingLayerImpl::PoolingLayerImpl( + const Shape& input_shape, const Shape& pooling_shape, const Shape& strides, + const Shape& pads, const Shape& dilations, bool ceil_mode, + const std::string& pooling_type) : LayerImpl(input_shape, input_shape), - poolingShape_(pooling_shape) { + poolingShape_(pooling_shape), + strides_(strides), + pads_(pads), + dilations_(dilations), + ceil_mode_(ceil_mode) { + if (pooling_shape[0] == 0 && pooling_shape[1] == 0) { + poolingShape_ = Shape({input_shape[input_shape.dims() - 2], + input_shape[input_shape.dims() - 1]}); + strides_ = Shape({1, 1}); + pads_ = Shape({0, 0, 0, 0}); + dilations_ = Shape({1, 1}); + this->outputShape_ = input_shape; + for (size_t i = 2; i < input_shape.dims(); ++i) { + this->outputShape_[i] = 1; + } + return; + } if (input_shape.dims() > 4) { throw std::invalid_argument("Input dimensions is bigger than 4"); } @@ -101,13 +161,32 @@ PoolingLayerImpl::PoolingLayerImpl(const Shape& input_shape, throw std::invalid_argument("Pooling type " + pooling_type + " is not supported"); } - size_t input_h_index = input_shape.dims() > 2 ? (input_shape.dims() - 2) : 0; + this->outputShape_ = input_shape; for (size_t i = 0; i < pooling_shape.dims(); i++) { - if (pooling_shape[i] == 0) { - throw std::runtime_error("Zero division, pooling shape has zeroes"); + size_t input_size = + input_shape[input_shape.dims() - pooling_shape.dims() + i]; + size_t kernel_size = pooling_shape[i]; + size_t stride = strides[i]; + size_t pad = pads[i] + pads[pooling_shape.dims() + i]; + size_t dilation = dilations[i]; + + size_t effective_kernel_size = (kernel_size - 1) * dilation + 1; + + size_t output_size; + if (ceil_mode) { + output_size = static_cast( + std::ceil((input_size + pad - effective_kernel_size) / + static_cast(stride))) + + 1; + } else { + output_size = static_cast( + std::floor((input_size + pad - effective_kernel_size) / + static_cast(stride))) + + 1; } - this->outputShape_[input_h_index + i] = - input_shape[input_h_index + i] / pooling_shape[i]; + + this->outputShape_[input_shape.dims() - pooling_shape.dims() + i] = + output_size; } } @@ -117,57 +196,91 @@ std::vector PoolingLayerImpl::run( if (input.size() != this->inputShape_.count()) { throw std::invalid_argument("Input size doesn't fit pooling layer"); } - std::vector pooling_buf; - std::vector res; - std::vector coords; - size_t tmpwidth = 0; - size_t tmpheight = 0; - int input_h_index = this->inputShape_.dims() > 2 - ? (static_cast(this->inputShape_.dims()) - 2) - : 0; - for (size_t n = 0; n < coord_size(input_h_index - 2, this->outputShape_); + + std::vector res(this->outputShape_.count(), ValueType(0)); + + size_t spatial_dims = poolingShape_.dims(); + int batch_dim = this->inputShape_.dims() > spatial_dims ? 0 : -1; + int channel_dim = this->inputShape_.dims() > spatial_dims + 1 ? 1 : -1; + + for (size_t n = 0; n < (batch_dim >= 0 ? this->outputShape_[batch_dim] : 1); n++) { - for (size_t c = 0; c < coord_size(input_h_index - 1, this->outputShape_); - c++) { - for (size_t i = 0; i < coord_size(input_h_index, this->outputShape_); - i++) { - for (size_t j = 0; - j < coord_size(input_h_index + 1, this->outputShape_); j++) { - tmpheight = poolingShape_[0] * i; - if (poolingShape_.dims() == 1) { - tmpwidth = j; - } else { - tmpwidth = poolingShape_[1] * j; - } - // to get matrix block for pooling - for (size_t k = 0; k < coord_size(0, poolingShape_); k++) { - for (size_t l = 0; l < coord_size(1, poolingShape_); l++) { - if (this->inputShape_.dims() == 1) { - pooling_buf.push_back(input[tmpheight + k]); - } else { - coords = - std::vector({n, c, tmpheight + k, tmpwidth + l}); - pooling_buf.push_back(input[this->inputShape_.get_index( - std::vector(coords.end() - this->inputShape_.dims(), - coords.end()))]); + for (size_t c = 0; + c < (channel_dim >= 0 ? this->outputShape_[channel_dim] : 1); c++) { + for (size_t h = 0; + h < this->outputShape_[this->outputShape_.dims() - spatial_dims]; + h++) { + for (size_t w = 0; + w < (spatial_dims > 1 + ? this->outputShape_[this->outputShape_.dims() - + spatial_dims + 1] + : 1); + w++) { + std::vector pooling_buf; + + int start_h = + static_cast(h * strides_[0]) - static_cast(pads_[0]); + int start_w = spatial_dims > 1 ? static_cast(w * strides_[1]) - + static_cast(pads_[2]) + : 0; + + for (size_t kh = 0; kh < poolingShape_[0]; kh++) { + for (size_t kw = 0; kw < (spatial_dims > 1 ? poolingShape_[1] : 1); + kw++) { + int pos_h = start_h + static_cast(kh * dilations_[0]); + int pos_w = spatial_dims > 1 + ? start_w + static_cast(kw * dilations_[1]) + : 0; + + if (pos_h >= 0 && + pos_h < static_cast( + this->inputShape_[this->inputShape_.dims() - + spatial_dims]) && + (spatial_dims <= 1 || + (pos_w >= 0 && + pos_w < static_cast( + this->inputShape_[this->inputShape_.dims() - + spatial_dims + 1])))) { + std::vector input_coords(this->inputShape_.dims(), 0); + if (batch_dim >= 0) input_coords[batch_dim] = n; + if (channel_dim >= 0) input_coords[channel_dim] = c; + input_coords[this->inputShape_.dims() - spatial_dims] = pos_h; + if (spatial_dims > 1) + input_coords[this->inputShape_.dims() - spatial_dims + 1] = + pos_w; + + size_t input_index = this->inputShape_.get_index(input_coords); + pooling_buf.push_back(input[input_index]); } } } - switch (poolingType_) { - case kAverage: - res.push_back(avg_pooling(pooling_buf)); - break; - case kMax: - res.push_back(max_pooling(pooling_buf)); - break; - default: - throw std::runtime_error("Unknown pooling type"); + + std::vector output_coords(this->outputShape_.dims(), 0); + if (batch_dim >= 0) output_coords[batch_dim] = n; + if (channel_dim >= 0) output_coords[channel_dim] = c; + output_coords[this->outputShape_.dims() - spatial_dims] = h; + if (spatial_dims > 1) + output_coords[this->outputShape_.dims() - spatial_dims + 1] = w; + + size_t output_index = this->outputShape_.get_index(output_coords); + + if (!pooling_buf.empty()) { + switch (this->poolingType_) { + case kAverage: + res[output_index] = avg_pooling(pooling_buf); + break; + case kMax: + res[output_index] = max_pooling(pooling_buf); + break; + default: + throw std::runtime_error("Unknown pooling type"); + } } - pooling_buf.clear(); } } } } + return res; } @@ -175,8 +288,12 @@ template class PoolingLayerImplTBB : public PoolingLayerImpl { public: PoolingLayerImplTBB(const Shape& input_shape, const Shape& pooling_shape, + const Shape& strides = {2, 2}, + const Shape& pads = {0, 0, 0, 0}, + const Shape& dilations = {1, 1}, bool ceil_mode = false, const std::string& pooling_type = "average") - : PoolingLayerImpl(input_shape, pooling_shape, pooling_type) {} + : PoolingLayerImpl(input_shape, pooling_shape, strides, pads, + dilations, ceil_mode, pooling_type) {} std::vector run( const std::vector& input) const override; }; @@ -187,84 +304,118 @@ std::vector PoolingLayerImplTBB::run( if (input.size() != this->inputShape_.count()) { throw std::invalid_argument("Input size doesn't fit pooling layer"); } - std::vector res(this->outputShape_.count()); - int input_h_index = this->inputShape_.dims() > 2 - ? (static_cast(this->inputShape_.dims()) - 2) - : 0; + + std::vector res(this->outputShape_.count(), ValueType(0)); + + size_t spatial_dims = this->poolingShape_.dims(); + int batch_dim = this->inputShape_.dims() > spatial_dims ? 0 : -1; + int channel_dim = this->inputShape_.dims() > spatial_dims + 1 ? 1 : -1; + oneapi::tbb::parallel_for( - oneapi::tbb::blocked_range2d( - 0, coord_size(input_h_index - 2, this->outputShape_), 0, - coord_size(input_h_index - 1, this->outputShape_)), - [&](oneapi::tbb::blocked_range2d r) { - for (size_t n = r.rows().begin(); n < r.rows().end(); n++) { - for (size_t c = r.cols().begin(); c < r.cols().end(); c++) { - oneapi::tbb::parallel_for( - oneapi::tbb::blocked_range2d( - 0, coord_size(input_h_index, this->outputShape_), 0, - coord_size(input_h_index + 1, this->outputShape_)), - [&](oneapi::tbb::blocked_range2d r1) { - for (size_t i = r1.rows().begin(); i < r1.rows().end(); i++) { - for (size_t j = r1.cols().begin(); j < r1.cols().end(); - j++) { + oneapi::tbb::blocked_range( + 0, batch_dim >= 0 ? this->outputShape_[batch_dim] : 1), + [&](const oneapi::tbb::blocked_range& r1) { + for (size_t n = r1.begin(); n < r1.end(); n++) { + oneapi::tbb::parallel_for( + oneapi::tbb::blocked_range( + 0, channel_dim >= 0 ? this->outputShape_[channel_dim] : 1), + [&](const oneapi::tbb::blocked_range& r2) { + for (size_t c = r2.begin(); c < r2.end(); c++) { + for (size_t h = 0; + h < this->outputShape_[this->outputShape_.dims() - + spatial_dims]; + h++) { + for (size_t w = 0; + w < + (spatial_dims > 1 + ? this->outputShape_[this->outputShape_.dims() - + spatial_dims + 1] + : 1); + w++) { std::vector pooling_buf; - std::vector coords; - size_t tmpwidth; - size_t tmpheight; - tmpheight = this->poolingShape_[0] * i; - if (this->poolingShape_.dims() == 1) { - tmpwidth = j; - } else { - tmpwidth = this->poolingShape_[1] * j; - } - for (size_t k = 0; k < coord_size(0, this->poolingShape_); - k++) { - for (size_t l = 0; - l < coord_size(1, this->poolingShape_); l++) { - if (this->inputShape_.dims() == 1) { - pooling_buf.push_back(input[tmpheight + k]); - } else { - coords = std::vector( - {n, c, tmpheight + k, tmpwidth + l}); - pooling_buf.push_back( - input[this->inputShape_.get_index( - std::vector( - coords.end() - this->inputShape_.dims(), - coords.end()))]); + + int start_h = static_cast(h * this->strides_[0]) - + static_cast(this->pads_[0]); + int start_w = + spatial_dims > 1 + ? static_cast(w * this->strides_[1]) - + static_cast(this->pads_[2]) + : 0; + + for (size_t kh = 0; kh < this->poolingShape_[0]; kh++) { + for (size_t kw = 0; + kw < + (spatial_dims > 1 ? this->poolingShape_[1] : 1); + kw++) { + int pos_h = start_h + static_cast( + kh * this->dilations_[0]); + int pos_w = + spatial_dims > 1 + ? start_w + static_cast( + kw * this->dilations_[1]) + : 0; + + if (pos_h >= 0 && + pos_h < static_cast( + this->inputShape_[this->inputShape_ + .dims() - + spatial_dims]) && + (spatial_dims <= 1 || + (pos_w >= 0 && + pos_w < static_cast( + this->inputShape_ + [this->inputShape_.dims() - + spatial_dims + 1])))) { + std::vector input_coords( + this->inputShape_.dims(), 0); + if (batch_dim >= 0) input_coords[batch_dim] = n; + if (channel_dim >= 0) input_coords[channel_dim] = c; + input_coords[this->inputShape_.dims() - + spatial_dims] = pos_h; + if (spatial_dims > 1) + input_coords[this->inputShape_.dims() - + spatial_dims + 1] = pos_w; + + size_t input_index = + this->inputShape_.get_index(input_coords); + pooling_buf.push_back(input[input_index]); } } } - coords = std::vector({n, c, i, j}); - switch (this->poolingType_) { - case kAverage: - if (this->inputShape_.dims() == 1) { - res[i] = avg_pooling(pooling_buf); - } else { - res[this->outputShape_.get_index( - std::vector( - coords.end() - this->inputShape_.dims(), - coords.end()))] = avg_pooling(pooling_buf); - } - break; - case kMax: - if (this->inputShape_.dims() == 1) { - res[i] = max_pooling(pooling_buf); - } else { - res[this->outputShape_.get_index( - std::vector( - coords.end() - this->inputShape_.dims(), - coords.end()))] = max_pooling(pooling_buf); + + std::vector output_coords( + this->outputShape_.dims(), 0); + if (batch_dim >= 0) output_coords[batch_dim] = n; + if (channel_dim >= 0) output_coords[channel_dim] = c; + output_coords[this->outputShape_.dims() - spatial_dims] = + h; + if (spatial_dims > 1) + output_coords[this->outputShape_.dims() - spatial_dims + + 1] = w; + + size_t output_index = + this->outputShape_.get_index(output_coords); + + if (!pooling_buf.empty()) { + switch (this->poolingType_) { + case kAverage: + res[output_index] = avg_pooling(pooling_buf); break; - default: - throw std::runtime_error("Unknown pooling type"); - } + case kMax: + res[output_index] = max_pooling(pooling_buf); + break; + default: + throw std::runtime_error("Unknown pooling type"); + } } } } - }); - } + } + }); } }); + return res; } -} // namespace it_lab_ai +} // namespace it_lab_ai \ No newline at end of file diff --git a/include/layers/ReduceLayer.hpp b/include/layers/ReduceLayer.hpp index b9efee72d..bb5e62228 100644 --- a/include/layers/ReduceLayer.hpp +++ b/include/layers/ReduceLayer.hpp @@ -12,10 +12,12 @@ class ReduceLayer : public Layer { enum class Operation : uint8_t { kSum, kMean, kMult, kMax, kMin }; ReduceLayer(Operation op, int64_t keepdims = 0, - const Tensor& axes = make_tensor(std::vector{})); + const std::vector& axes = {}); + explicit ReduceLayer(int64_t keepdims = 0, - const Tensor& axes = make_tensor(std::vector{})) + const std::vector& axes = {}) : ReduceLayer(Operation::kSum, keepdims, axes) {} + void run(const std::vector& input, std::vector& output) override; @@ -26,7 +28,8 @@ class ReduceLayer : public Layer { private: Operation op_; int64_t keepdims_; - Tensor axes_; + std::vector axes_; + static void normalize_axes(const Shape& input_shape, std::vector& axes); Shape calculate_output_shape(const Shape& input_shape, diff --git a/include/layers/ReshapeLayer.hpp b/include/layers/ReshapeLayer.hpp new file mode 100644 index 000000000..8ff0cd256 --- /dev/null +++ b/include/layers/ReshapeLayer.hpp @@ -0,0 +1,40 @@ +#pragma once +#include + +#include "layers/Layer.hpp" +#include "layers/Tensor.hpp" + +namespace it_lab_ai { + +class ReshapeLayer : public Layer { + public: + explicit ReshapeLayer(bool allowzero = false, + const std::vector& shape = {}) + : Layer(kReshape), allowzero_(allowzero), shape_(shape) {} + + void run(const std::vector& input, + std::vector& output) override; + +#ifdef ENABLE_STATISTIC_WEIGHTS + Tensor get_weights() override { return Tensor(); } +#endif + + void set_shape(const std::vector& shape) { shape_ = shape; } + void set_allowzero(bool allowzero) { allowzero_ = allowzero; } + + private: + bool allowzero_; + std::vector shape_; + + template + void reshape_impl(const Tensor& input, Tensor& output, + const std::vector& target_shape, + const std::vector& final_shape) const; + template + void apply_per_batch_reshape(const Tensor& input, Tensor& output, + const std::vector& target_shape) const; + static std::vector calculate_output_shape( + const Shape& input_shape, const std::vector& requested_shape); +}; + +} // namespace it_lab_ai \ No newline at end of file diff --git a/include/layers/SoftmaxLayer.hpp b/include/layers/SoftmaxLayer.hpp new file mode 100644 index 000000000..2f076320e --- /dev/null +++ b/include/layers/SoftmaxLayer.hpp @@ -0,0 +1,37 @@ +#pragma once +#include +#include +#include +#include + +#include "layers/Layer.hpp" +#include "layers/Tensor.hpp" + +namespace it_lab_ai { + +class SoftmaxLayer : public Layer { + public: + explicit SoftmaxLayer(int axis = -1) : Layer(kSoftmax), axis_(axis) {} + + void run(const std::vector& input, + std::vector& output) override; + +#ifdef ENABLE_STATISTIC_WEIGHTS + Tensor get_weights() override { return Tensor(); } +#endif + + void set_axis(int axis) { axis_ = axis; } + int get_axis() const { return axis_; } + + private: + int axis_; + + template + void softmax_impl(const Tensor& input, Tensor& output) const; + + void softmax_int_impl(const Tensor& input, Tensor& output) const; + + static size_t normalize_axis(const Shape& shape, int axis); +}; + +} // namespace it_lab_ai \ No newline at end of file diff --git a/include/layers/SplitLayer.hpp b/include/layers/SplitLayer.hpp index ac21c9572..823e505f5 100644 --- a/include/layers/SplitLayer.hpp +++ b/include/layers/SplitLayer.hpp @@ -10,7 +10,7 @@ namespace it_lab_ai { class SplitLayer : public Layer { public: - SplitLayer(int axis, std::vector splits) + SplitLayer(int axis, std::vector splits) : Layer(kSplit), axis_(axis), splits_(std::move(splits)) {} SplitLayer(int axis, int num_outputs) @@ -24,7 +24,7 @@ class SplitLayer : public Layer { private: int axis_; - std::optional> splits_; + std::optional> splits_; std::optional num_outputs_; void validate(const Tensor& input) const; diff --git a/include/layers/Tensor.hpp b/include/layers/Tensor.hpp index d7a21e3e1..d51d32abd 100644 --- a/include/layers/Tensor.hpp +++ b/include/layers/Tensor.hpp @@ -10,7 +10,7 @@ namespace it_lab_ai { -enum class Type : uint8_t { kUnknown, kInt, kFloat }; +enum class Type : uint8_t { kUnknown, kInt, kInt64, kFloat }; template std::vector* to_byte(std::vector& v) { @@ -21,11 +21,12 @@ template const std::vector* to_byte(const std::vector& v) { return reinterpret_cast*>(&v); } - template Type GetTypeEnum() { if constexpr (std::is_same_v) { return Type::kInt; + } else if constexpr (std::is_same_v) { + return Type::kInt64; } else if constexpr (std::is_same_v) { return Type::kFloat; } else { diff --git a/src/Weights_Reader/reader_weights.cpp b/src/Weights_Reader/reader_weights.cpp index a6ad5c4a3..7ad789050 100644 --- a/src/Weights_Reader/reader_weights.cpp +++ b/src/Weights_Reader/reader_weights.cpp @@ -72,12 +72,6 @@ Tensor create_tensor_from_json(const json& layer_data, Type type) { parse_json_shape(layer_data["weights"], shape); } - std::cout << "Extracted weights size: " << weights.size() << std::endl; - std::cout << "Shape: "; - for (auto dim : shape) std::cout << dim << " "; - std::cout << std::endl; - std::cout << "Extracted bias size: " << bias.size() << std::endl; - return make_tensor(weights, Shape(shape), bias); } diff --git a/src/layers/BatchNormalizationLayer.cpp b/src/layers/BatchNormalizationLayer.cpp new file mode 100644 index 000000000..9a9967d94 --- /dev/null +++ b/src/layers/BatchNormalizationLayer.cpp @@ -0,0 +1,122 @@ +#include "layers/BatchNormalizationLayer.hpp" + +#include +#include +#include + +namespace it_lab_ai { + +void BatchNormalizationLayer::run(const std::vector& input, + std::vector& output) { + if (input.size() != 1) { + throw std::runtime_error( + "BatchNormalizationLayer: Expected 1 input tensor (X)"); + } + + const auto& x = input[0]; + const auto& input_shape = x.get_shape(); + + if (input_shape.dims() < 2) { + throw std::runtime_error( + "BatchNormalizationLayer: Input must have at least 2 dimensions"); + } + + size_t num_channels = input_shape[1]; + validate_parameters(num_channels); + + Type expected_type = x.get_type(); + if (scale_.get_type() != expected_type || bias_.get_type() != expected_type || + mean_.get_type() != expected_type || var_.get_type() != expected_type) { + throw std::runtime_error( + "BatchNormalizationLayer: Parameter type mismatch"); + } + + switch (x.get_type()) { + case Type::kFloat: + batchnorm_impl(x, output[0]); + break; + case Type::kInt: + batchnorm_impl(x, output[0]); + break; + default: + throw std::runtime_error( + "BatchNormalizationLayer: Unsupported input tensor type"); + } +} + +void BatchNormalizationLayer::validate_parameters(size_t num_channels) const { + auto check_parameter = [num_channels](const Tensor& param, const char* name) { + auto param_shape = param.get_shape(); + if (param_shape.dims() != 1 || param_shape[0] != num_channels) { + throw std::runtime_error( + std::string("BatchNormalizationLayer: Invalid ") + name + + " parameter shape. Expected [" + std::to_string(num_channels) + + "], got " + std::to_string(param_shape[0])); + } + }; + + check_parameter(scale_, "scale"); + check_parameter(bias_, "bias"); + check_parameter(mean_, "mean"); + check_parameter(var_, "var"); +} + +template +void BatchNormalizationLayer::batchnorm_impl(const Tensor& input, + Tensor& output) const { + const auto* scale_data = scale_.as(); + const auto* bias_data = bias_.as(); + const auto* mean_data = mean_.as(); + const auto* var_data = var_.as(); + const auto* input_data = input.as(); + + if (!input_data || !scale_data || !bias_data || !mean_data || !var_data) { + throw std::runtime_error("BatchNormalizationLayer: Invalid tensor data"); + } + + const auto& shape = input.get_shape(); + size_t batch_size = shape[0]; + size_t num_channels = shape[1]; + size_t spatial_size = shape.count() / (batch_size * num_channels); + + output = Tensor(shape, input.get_type()); + auto* output_data = output.as(); + + if (!output_data) { + throw std::runtime_error( + "BatchNormalizationLayer: Failed to create output tensor"); + } + + if (!training_mode_) { + for (size_t b = 0; b < batch_size; ++b) { + for (size_t c = 0; c < num_channels; ++c) { + T scale_val = (*scale_data)[c]; + T bias_val = (*bias_data)[c]; + T mean_val = (*mean_data)[c]; + T var_val = (*var_data)[c]; + + T normalization_factor = + static_cast(1.0) / + static_cast(std::sqrt(static_cast(var_val) + epsilon_)); + + for (size_t i = 0; i < spatial_size; ++i) { + size_t index = b * num_channels * spatial_size + c * spatial_size + i; + T input_val = (*input_data)[index]; + T normalized = (input_val - mean_val) * normalization_factor; + (*output_data)[index] = normalized * scale_val + bias_val; + } + } + } + } else { + throw std::runtime_error( + "BatchNormalizationLayer: Training mode not implemented for inference"); + } +} + +template void BatchNormalizationLayer::batchnorm_impl(const Tensor&, + Tensor&) const; + +template void BatchNormalizationLayer::batchnorm_impl(const Tensor&, + Tensor&) const; + +} // namespace it_lab_ai \ No newline at end of file diff --git a/src/layers/ConcatLayer.cpp b/src/layers/ConcatLayer.cpp index 8e60130c8..616606fc5 100644 --- a/src/layers/ConcatLayer.cpp +++ b/src/layers/ConcatLayer.cpp @@ -13,14 +13,14 @@ void ConcatLayer::run(const std::vector& input, return; } - validate_inputs(input); + this->validate_inputs(input); switch (input[0].get_type()) { case Type::kFloat: - concatenate(input, output[0]); + this->concatenate(input, output[0]); break; case Type::kInt: - concatenate(input, output[0]); + this->concatenate(input, output[0]); break; default: throw std::runtime_error("ConcatLayer: Unsupported input tensor type"); @@ -77,6 +77,24 @@ int64_t ConcatLayer::normalize_axis(size_t rank) const { return axis; } +std::vector ConcatLayer::reorderInputs( + const std::vector& inputs) const { + if (input_order_.empty() || input_order_.size() != inputs.size()) { + return inputs; + } + + std::vector reordered(inputs.size()); + for (size_t i = 0; i < inputs.size(); ++i) { + if (input_order_[i] >= 0 && + static_cast(input_order_[i]) < inputs.size()) { + reordered[i] = inputs[input_order_[i]]; + } else { + throw std::runtime_error("ConcatLayer: Invalid input order index"); + } + } + return reordered; +} + Shape ConcatLayer::calculate_output_shape( const std::vector& inputs) const { if (inputs.empty()) return Shape({}); @@ -96,59 +114,4 @@ Shape ConcatLayer::calculate_output_shape( return Shape(output_dims); } -template -void ConcatLayer::concatenate(const std::vector& inputs, - Tensor& output) const { - Shape output_shape = calculate_output_shape(inputs); - std::vector output_data(output_shape.count(), 0); - - const int64_t axis = normalize_axis(inputs[0].get_shape().dims()); - const size_t outer_size = [&]() { - size_t size = 1; - for (int64_t i = 0; i < axis; ++i) { - size *= output_shape[i]; - } - return size; - }(); - - const size_t inner_size = [&]() { - size_t size = 1; - for (size_t i = axis + 1; i < output_shape.dims(); ++i) { - size *= output_shape[i]; - } - return size; - }(); - - size_t output_offset = 0; - - for (const auto& input : inputs) { - const auto& input_data = *input.as(); - const Shape& input_shape = input.get_shape(); - const size_t input_axis_size = input_shape[axis]; - - for (size_t outer = 0; outer < outer_size; ++outer) { - for (size_t a = 0; a < input_axis_size; ++a) { - for (size_t inner = 0; inner < inner_size; ++inner) { - size_t input_pos = - outer * input_axis_size * inner_size + a * inner_size + inner; - - size_t output_pos = outer * output_shape[axis] * inner_size + - (output_offset + a) * inner_size + inner; - - output_data[output_pos] = input_data[input_pos]; - } - } - } - - output_offset += input_axis_size; - } - - output = make_tensor(output_data, output_shape); -} - -template void ConcatLayer::concatenate(const std::vector&, - Tensor&) const; -template void ConcatLayer::concatenate(const std::vector&, - Tensor&) const; - } // namespace it_lab_ai \ No newline at end of file diff --git a/src/layers/ConvLayer.cpp b/src/layers/ConvLayer.cpp index 4024a63a6..28c45e555 100644 --- a/src/layers/ConvLayer.cpp +++ b/src/layers/ConvLayer.cpp @@ -10,6 +10,24 @@ void ConvolutionalLayer::run(const std::vector& input, if (input[0].get_shape().dims() != 4) { throw std::out_of_range("input must be 4-dimensional"); } + if (group_ > 1) { + if (group_ == input[0].get_shape()[1] && group_ == kernel_.get_shape()[0]) { + switch (input[0].get_type()) { + case Type::kFloat: + DepthwiseConv4D(input[0], kernel_, bias_, output[0], stride_, + pads_, dilations_); + break; + case Type::kInt: + DepthwiseConv4D(input[0], kernel_, bias_, output[0], stride_, + pads_, dilations_); + break; + default: + throw std::runtime_error( + "Unsupported type for depthwise convolution"); + } + return; + } + } switch (input[0].get_type()) { case Type::kInt: { if (kernel_.get_shape().dims() == 2) { @@ -63,12 +81,12 @@ void ConvolutionalLayer::run(const std::vector& input, switch (implType_) { case kSTL: { Conv4DSTL(input[0], kernel_, bias_, output[0], stride_, pads_, - dilations_); + group_, dilations_); break; } default: { Conv4D(input[0], kernel_, bias_, output[0], stride_, pads_, - dilations_); + group_, dilations_); break; } } @@ -124,23 +142,28 @@ void ConvolutionalLayer::run(const std::vector& input, 2)), sh); } else { - switch (implType_) { - case kSTL: { - Conv4DSTL(input[0], kernel_, bias_, output[0], stride_, - pads_, dilations_); - break; - } - default: { - Conv4D(input[0], kernel_, bias_, output[0], stride_, pads_, - dilations_); - break; + if (useLegacyImpl_) { + Conv4D_Legacy(input[0], kernel_, bias_, output[0], stride_, + pads_, dilations_); + } else { + switch (implType_) { + case kSTL: { + Conv4DSTL(input[0], kernel_, bias_, output[0], stride_, + pads_, group_, dilations_); + break; + } + default: { + Conv4D(input[0], kernel_, bias_, output[0], stride_, pads_, + group_, dilations_); + break; + } } } + break; + } + default: { + throw std::runtime_error("Unsupported tensor type"); } - break; - } - default: { - throw std::runtime_error("Unsupported tensor type"); } } } diff --git a/src/layers/FCLayer.cpp b/src/layers/FCLayer.cpp index f4d1c4036..29b9db76f 100644 --- a/src/layers/FCLayer.cpp +++ b/src/layers/FCLayer.cpp @@ -13,23 +13,35 @@ void FCLayer::run(const std::vector& input, if (bias_.get_type() != weights_.get_type()) { throw std::invalid_argument("Bias and weights data type aren't same"); } + + size_t batch_size; + size_t output_size = bias_.get_shape()[0]; + if (input[0].get_shape().dims() == 1) { + size_t total_elements = input[0].get_shape()[0]; + size_t expected_input_size = weights_.get_shape()[0]; + + if (total_elements % expected_input_size == 0) { + batch_size = total_elements / expected_input_size; + } else { + batch_size = 1; + } + } else { + batch_size = input[0].get_shape()[0]; + } + switch (input[0].get_type()) { case Type::kInt: { FCLayerImpl used_impl(*weights_.as(), weights_.get_shape(), *bias_.as()); - output[0] = - make_tensor(used_impl.run(*input[0].as()), - {(*input[0].as()).size() / weights_.get_shape()[1] * - weights_.get_shape()[0]}); + auto result = used_impl.run(*input[0].as()); + output[0] = make_tensor(result, {batch_size, output_size}); break; } case Type::kFloat: { FCLayerImpl used_impl(*weights_.as(), weights_.get_shape(), *bias_.as()); - output[0] = - make_tensor(used_impl.run(*input[0].as()), - {(*input[0].as()).size() / - weights_.get_shape()[1] * weights_.get_shape()[0]}); + auto result = used_impl.run(*input[0].as()); + output[0] = make_tensor(result, {batch_size, output_size}); break; } default: { diff --git a/src/layers/FlattenLayer.cpp b/src/layers/FlattenLayer.cpp index e5d5d34dd..8967961f4 100644 --- a/src/layers/FlattenLayer.cpp +++ b/src/layers/FlattenLayer.cpp @@ -2,7 +2,6 @@ namespace it_lab_ai { -// reorder coords std::vector reorder(std::vector order_vec, std::vector order) { size_t min_ind; @@ -21,29 +20,73 @@ std::vector reorder(std::vector order_vec, void FlattenLayer::run(const std::vector& input, std::vector& output) { - switch (input[0].get_type()) { - case Type::kInt: { - if (input[0].get_shape().dims() == 4) { - Flatten4D(input[0], output[0], order_); - } else { - output[0] = make_tensor(*input[0].as(), - Shape({input[0].get_shape().count()})); - } - break; - } - case Type::kFloat: { - if (input[0].get_shape().dims() == 4) { - Flatten4D(input[0], output[0], order_); - } else { - output[0] = make_tensor(*input[0].as(), - Shape({input[0].get_shape().count()})); + if (input.size() != 1) { + throw std::runtime_error("FlattenLayer: Input tensors not 1"); + } + const auto& input_tensor = input[0]; + const auto& input_shape = input_tensor.get_shape(); + Shape output_shape; + + if (!order_.empty() && order_.size() == 4) { + switch (input_tensor.get_type()) { + case Type::kFloat: + Flatten4D(input_tensor, output[0], order_); + break; + case Type::kInt: + Flatten4D(input_tensor, output[0], order_); + break; + default: + throw std::runtime_error("Unsupported tensor type"); + } + } else if (axis_ != 0) { + int start_dim = axis_; + if (start_dim < 0) { + start_dim += static_cast(input_shape.dims()); + } + + if (start_dim < 0 || static_cast(start_dim) >= input_shape.dims()) { + throw std::runtime_error("FlattenLayer: Invalid axis value"); + } + size_t flattened_size = 1; + auto start_dim_size = static_cast(start_dim); + for (size_t i = start_dim_size; i < input_shape.dims(); ++i) { + flattened_size *= input_shape[i]; + } + if (start_dim > 0) { + std::vector dims; + for (size_t i = 0; i < start_dim_size; ++i) { + dims.push_back(input_shape[i]); } - break; + dims.push_back(flattened_size); + output_shape = Shape(dims); + } else { + output_shape = Shape({flattened_size}); + } + + switch (input_tensor.get_type()) { + case Type::kInt: + output[0] = make_tensor(*input_tensor.as(), output_shape); + break; + case Type::kFloat: + output[0] = make_tensor(*input_tensor.as(), output_shape); + break; + default: + throw std::runtime_error("Unsupported tensor type"); } - default: { - throw std::runtime_error("No such type"); + } else { + size_t total_size = input_shape.count(); + output_shape = Shape({total_size}); + + switch (input_tensor.get_type()) { + case Type::kInt: + output[0] = make_tensor(*input_tensor.as(), output_shape); + break; + case Type::kFloat: + output[0] = make_tensor(*input_tensor.as(), output_shape); + break; + default: + throw std::runtime_error("Unsupported tensor type"); } } } - } // namespace it_lab_ai diff --git a/src/layers/MatmulLayer.cpp b/src/layers/MatmulLayer.cpp new file mode 100644 index 000000000..51428312d --- /dev/null +++ b/src/layers/MatmulLayer.cpp @@ -0,0 +1,338 @@ +#include "layers/MatmulLayer.hpp" + +#include +#include +#include + +namespace it_lab_ai { + +void MatmulLayer::run(const std::vector& input, + std::vector& output) { + if (input.size() != 2) { + throw std::runtime_error("MatMulLayer: Exactly 2 input tensors required"); + } + const auto& a = input[0]; + const auto& b = input[1]; + + try { + bool should_swap = false; + + const auto& a_shape = a.get_shape(); + const auto& b_shape = b.get_shape(); + + if (a_shape.dims() >= 2 && b_shape.dims() >= 2) { + size_t a_rows = a_shape[a_shape.dims() - 2]; + size_t a_cols = a_shape[a_shape.dims() - 1]; + size_t b_rows = b_shape[b_shape.dims() - 2]; + size_t b_cols = b_shape[b_shape.dims() - 1]; + + if (b_rows > a_rows) { + should_swap = true; + } else if (b_rows == a_rows && b_cols > a_cols) { + should_swap = true; + } else if (b_rows == a_rows && b_cols == a_cols) { + size_t a_batch = 1; + size_t b_batch = 1; + for (size_t i = 0; i < a_shape.dims() - 2; ++i) a_batch *= a_shape[i]; + for (size_t i = 0; i < b_shape.dims() - 2; ++i) b_batch *= b_shape[i]; + + if (b_batch > a_batch) { + should_swap = true; + } + } + } + + switch (a.get_type()) { + case Type::kFloat: + if (should_swap) { + matmul_impl(b, a, output[0]); + } else { + matmul_impl(a, b, output[0]); + } + break; + case Type::kInt: + if (should_swap) { + matmul_impl(b, a, output[0]); + } else { + matmul_impl(a, b, output[0]); + } + break; + default: + throw std::runtime_error("Unsupported tensor data type for MatMul"); + } + } catch (const std::exception& e) { + std::cerr << "ERROR in MatMul: " << e.what() << std::endl; + throw; + } catch (...) { + std::cerr << "UNKNOWN ERROR in MatMul" << std::endl; + throw; + } +} + +template +void MatmulLayer::matmul_impl(const Tensor& a, const Tensor& b, + Tensor& output) const { + const auto* a_data = a.as(); + const auto* b_data = b.as(); + + if (!a_data || !b_data) { + throw std::runtime_error("MatMul: Invalid input data"); + } + + const auto& a_shape = a.get_shape(); + const auto& b_shape = b.get_shape(); + size_t a_dims = a_shape.dims(); + size_t b_dims = b_shape.dims(); + + if (a_dims == 1 && b_dims == 1) { + matmul_1d_1d(a, b, output); + } else if (a_dims == 1 && b_dims >= 2) { + matmul_1d_2d(a, b, output); + } else if (a_dims >= 2 && b_dims == 1) { + matmul_2d_1d(a, b, output); + } else if (a_dims == 2 && b_dims == 2) { + matmul_2d_2d(a, b, output); + } else { + matmul_nd_nd(a, b, output); + } +} + +template +void MatmulLayer::matmul_1d_1d(const Tensor& a, const Tensor& b, + Tensor& output) const { + const auto* a_data = a.as(); + const auto* b_data = b.as(); + + if (a.get_shape()[0] != b.get_shape()[0]) { + throw std::runtime_error("MatMul: Incompatible 1D tensor sizes"); + } + + T result = T(0); + for (size_t i = 0; i < a.get_shape()[0]; ++i) { + result += (*a_data)[i] * (*b_data)[i]; + } + + output = make_tensor(std::vector{result}, {}); +} + +template +void MatmulLayer::matmul_1d_2d(const Tensor& a, const Tensor& b, + Tensor& output) const { + const auto* a_data = a.as(); + + const auto& b_shape = b.get_shape(); + size_t b_dims = b_shape.dims(); + + if (a.get_shape()[0] != b_shape[b_dims - 2]) { + throw std::runtime_error( + "MatMul: Incompatible dimensions for 1D * ND multiplication"); + } + + std::vector temp_a_shape = {1, a.get_shape()[0]}; + Tensor temp_a = make_tensor(*a_data, temp_a_shape); + + Tensor temp_output; + matmul_nd_nd(temp_a, b, temp_output); + + const auto& temp_shape = temp_output.get_shape(); + + std::vector final_shape; + for (size_t i = 1; i < temp_shape.dims(); ++i) { + final_shape.push_back(temp_shape[i]); + } + + output = make_tensor(*temp_output.as(), final_shape); +} + +template +void MatmulLayer::matmul_2d_1d(const Tensor& a, const Tensor& b, + Tensor& output) const { + const auto* b_data = b.as(); + + const auto& a_shape = a.get_shape(); + size_t a_dims = a_shape.dims(); + + if (a_shape[a_dims - 1] != b.get_shape()[0]) { + throw std::runtime_error( + "MatMul: Incompatible dimensions for ND * 1D multiplication"); + } + + std::vector temp_b_shape = {b.get_shape()[0], 1}; + Tensor temp_b = make_tensor(*b_data, temp_b_shape); + + Tensor temp_output; + matmul_nd_nd(a, temp_b, temp_output); + + const auto& temp_shape = temp_output.get_shape(); + + std::vector final_shape; + for (size_t i = 0; i < temp_shape.dims() - 1; ++i) { + final_shape.push_back(temp_shape[i]); + } + + output = make_tensor(*temp_output.as(), final_shape); +} + +template +void MatmulLayer::matmul_2d_2d(const Tensor& a, const Tensor& b, + Tensor& output) const { + const auto* a_data = a.as(); + const auto* b_data = b.as(); + + const auto& a_shape = a.get_shape(); + const auto& b_shape = b.get_shape(); + + if (a_shape[1] != b_shape[0]) { + throw std::runtime_error("MatMul: Incompatible matrix dimensions"); + } + + size_t m = a_shape[0]; + size_t n = b_shape[1]; + size_t k = a_shape[1]; + + std::vector output_values(m * n, T(0)); + + for (size_t i = 0; i < m; ++i) { + for (size_t j = 0; j < n; ++j) { + T sum = T(0); + for (size_t l = 0; l < k; ++l) { + sum += (*a_data)[i * k + l] * (*b_data)[l * n + j]; + } + output_values[i * n + j] = sum; + } + } + + output = make_tensor(output_values, {m, n}); +} + +template +void MatmulLayer::matmul_nd_nd(const Tensor& a, const Tensor& b, + Tensor& output) const { + const auto* a_data = a.as(); + const auto* b_data = b.as(); + + const auto& a_shape = a.get_shape(); + const auto& b_shape = b.get_shape(); + size_t a_dims = a_shape.dims(); + size_t b_dims = b_shape.dims(); + + if (a_shape[a_dims - 1] != b_shape[b_dims - 2]) { + throw std::runtime_error("MatMul: Incompatible matrix dimensions"); + } + + size_t batch_dims_a = (a_dims >= 2) ? a_dims - 2 : 0; + size_t batch_dims_b = (b_dims >= 2) ? b_dims - 2 : 0; + size_t max_batch_dims = std::max(batch_dims_a, batch_dims_b); + + std::vector batch_shape_a(max_batch_dims, 1); + std::vector batch_shape_b(max_batch_dims, 1); + + for (size_t i = 0; i < batch_dims_a; ++i) { + batch_shape_a[i] = a_shape[i]; + } + for (size_t i = 0; i < batch_dims_b; ++i) { + batch_shape_b[i] = b_shape[i]; + } + + size_t a_matrix_size = a_shape[a_dims - 2] * a_shape[a_dims - 1]; + size_t b_matrix_size = b_shape[b_dims - 2] * b_shape[b_dims - 1]; + size_t out_matrix_size = a_shape[a_dims - 2] * b_shape[b_dims - 1]; + + std::vector a_batch_strides(max_batch_dims, a_matrix_size); + std::vector b_batch_strides(max_batch_dims, b_matrix_size); + std::vector out_batch_strides(max_batch_dims, out_matrix_size); + + for (int i = static_cast(max_batch_dims) - 2; i >= 0; --i) { + auto idx = static_cast(i); + a_batch_strides[idx] = a_batch_strides[idx + 1] * batch_shape_a[idx + 1]; + b_batch_strides[idx] = b_batch_strides[idx + 1] * batch_shape_b[idx + 1]; + } + + std::vector output_batch_shape(max_batch_dims); + for (size_t i = 0; i < max_batch_dims; ++i) { + if (batch_shape_a[i] != batch_shape_b[i] && batch_shape_a[i] != 1 && + batch_shape_b[i] != 1) { + throw std::runtime_error( + "MatMul: Incompatible batch dimensions for broadcasting"); + } + output_batch_shape[i] = std::max(batch_shape_a[i], batch_shape_b[i]); + } + + for (int i = static_cast(max_batch_dims) - 2; i >= 0; --i) { + auto idx = static_cast(i); + out_batch_strides[idx] = + out_batch_strides[idx + 1] * output_batch_shape[idx + 1]; + } + + std::vector output_shape = output_batch_shape; + output_shape.push_back(a_shape[a_dims - 2]); + output_shape.push_back(b_shape[b_dims - 1]); + + size_t m = a_shape[a_dims - 2]; + size_t n = b_shape[b_dims - 1]; + size_t k = a_shape[a_dims - 1]; + + size_t total_batch = 1; + for (size_t dim : output_batch_shape) { + total_batch *= dim; + } + + std::vector output_values(total_batch * m * n, T(0)); + + for (size_t batch = 0; batch < total_batch; ++batch) { + size_t a_batch_idx = 0; + size_t b_batch_idx = 0; + size_t out_batch_idx = 0; + size_t temp_batch = batch; + + for (int i = static_cast(max_batch_dims) - 1; i >= 0; --i) { + auto idx = static_cast(i); + size_t dim_size = output_batch_shape[idx]; + size_t batch_idx = temp_batch % dim_size; + temp_batch /= dim_size; + + if (batch_shape_a[idx] > 1) { + a_batch_idx += batch_idx * a_batch_strides[idx]; + } + if (batch_shape_b[idx] > 1) { + b_batch_idx += batch_idx * b_batch_strides[idx]; + } + out_batch_idx += batch_idx * out_batch_strides[idx]; + } + + size_t a_offset = a_batch_idx; + size_t b_offset = b_batch_idx; + size_t out_offset = out_batch_idx; + + for (size_t i = 0; i < m; ++i) { + for (size_t j = 0; j < n; ++j) { + T sum = T(0); + for (size_t l = 0; l < k; ++l) { + size_t a_index = a_offset + i * k + l; + size_t b_index = b_offset + l * n + j; + if (a_index >= a_data->size()) { + std::cerr << "a_idx out of bounds: " << a_index + << " >= " << a_data->size() << std::endl; + throw std::runtime_error("MatMul: a index out of bounds"); + } + if (b_index >= b_data->size()) { + std::cerr << "b_idx out of bounds: " << b_index + << " >= " << b_data->size() << std::endl; + throw std::runtime_error("MatMul: b index out of bounds"); + } + sum += (*a_data)[a_index] * (*b_data)[b_index]; + } + output_values[out_offset + i * n + j] = sum; + } + } + } + + output = make_tensor(output_values, output_shape); +} + +template void MatmulLayer::matmul_impl(const Tensor&, const Tensor&, + Tensor&) const; +template void MatmulLayer::matmul_impl(const Tensor&, const Tensor&, + Tensor&) const; + +} // namespace it_lab_ai \ No newline at end of file diff --git a/src/layers/PoolingLayer.cpp b/src/layers/PoolingLayer.cpp index b5724aff2..749fdadfd 100644 --- a/src/layers/PoolingLayer.cpp +++ b/src/layers/PoolingLayer.cpp @@ -7,19 +7,22 @@ void PoolingLayer::run(const std::vector& input, if (input.size() != 1) { throw std::runtime_error("PoolingLayer: Input tensors not 1"); } + switch (input[0].get_type()) { case Type::kInt: { switch (implType_) { case kTBB: { - PoolingLayerImplTBB used_impl(input[0].get_shape(), - poolingShape_, poolingType_); + PoolingLayerImplTBB used_impl( + input[0].get_shape(), poolingShape_, strides_, pads_, dilations_, + ceil_mode_, poolingType_); output[0] = make_tensor(used_impl.run(*input[0].as()), used_impl.get_output_shape()); break; } default: { PoolingLayerImpl used_impl(input[0].get_shape(), poolingShape_, - poolingType_); + strides_, pads_, dilations_, + ceil_mode_, poolingType_); output[0] = make_tensor(used_impl.run(*input[0].as()), used_impl.get_output_shape()); break; @@ -30,15 +33,17 @@ void PoolingLayer::run(const std::vector& input, case Type::kFloat: { switch (implType_) { case kTBB: { - PoolingLayerImplTBB used_impl(input[0].get_shape(), - poolingShape_, poolingType_); + PoolingLayerImplTBB used_impl( + input[0].get_shape(), poolingShape_, strides_, pads_, dilations_, + ceil_mode_, poolingType_); output[0] = make_tensor(used_impl.run(*input[0].as()), used_impl.get_output_shape()); break; } default: { PoolingLayerImpl used_impl(input[0].get_shape(), poolingShape_, - poolingType_); + strides_, pads_, dilations_, + ceil_mode_, poolingType_); output[0] = make_tensor(used_impl.run(*input[0].as()), used_impl.get_output_shape()); break; @@ -52,4 +57,4 @@ void PoolingLayer::run(const std::vector& input, } } -} // namespace it_lab_ai +} // namespace it_lab_ai \ No newline at end of file diff --git a/src/layers/ReduceLayer.cpp b/src/layers/ReduceLayer.cpp index 766c1a296..18b9d422e 100644 --- a/src/layers/ReduceLayer.cpp +++ b/src/layers/ReduceLayer.cpp @@ -6,7 +6,8 @@ namespace it_lab_ai { -ReduceLayer::ReduceLayer(Operation op, int64_t keepdims, const Tensor& axes) +ReduceLayer::ReduceLayer(Operation op, int64_t keepdims, + const std::vector& axes) : Layer(kReduce), op_(op), keepdims_(keepdims), axes_(axes) {} void ReduceLayer::normalize_axes(const Shape& input_shape, @@ -166,13 +167,6 @@ void ReduceLayer::compute(const Tensor& input, const Shape& output_shape, output = make_tensor(output_data, output_shape); } -template void ReduceLayer::compute(const Tensor&, const Shape&, - const std::vector&, - Tensor&) const; -template void ReduceLayer::compute(const Tensor&, const Shape&, - const std::vector&, - Tensor&) const; - void ReduceLayer::run(const std::vector& input, std::vector& output) { if (input.size() != 1) { @@ -184,17 +178,9 @@ void ReduceLayer::run(const std::vector& input, return; } - std::vector axes_indices; - if (axes_.get_shape().dims() > 0) { - if (axes_.get_type() == Type::kInt) { - const auto* axes_data = axes_.as(); - axes_indices.assign(axes_data->begin(), axes_data->end()); - } else { - throw std::runtime_error("ReduceLayer: Axes tensor must be of type int"); - } - } - + std::vector axes_indices = axes_; normalize_axes(input[0].get_shape(), axes_indices); + Shape output_shape = calculate_output_shape(input[0].get_shape(), axes_indices); @@ -212,4 +198,11 @@ void ReduceLayer::run(const std::vector& input, } } +template void ReduceLayer::compute(const Tensor&, const Shape&, + const std::vector&, + Tensor&) const; +template void ReduceLayer::compute(const Tensor&, const Shape&, + const std::vector&, + Tensor&) const; + } // namespace it_lab_ai \ No newline at end of file diff --git a/src/layers/ReshapeLayer.cpp b/src/layers/ReshapeLayer.cpp new file mode 100644 index 000000000..00767fa2c --- /dev/null +++ b/src/layers/ReshapeLayer.cpp @@ -0,0 +1,168 @@ +#include "layers/ReshapeLayer.hpp" + +#include +#include +#include + +namespace it_lab_ai { + +void ReshapeLayer::run(const std::vector& input, + std::vector& output) { + if (input.empty()) { + throw std::runtime_error("ReshapeLayer: At least 1 input tensor required"); + } + + const auto& data_tensor = input[0]; + std::vector target_shape = shape_; + + if (input.size() >= 2 && input[1].get_type() == Type::kInt) { + const auto* shape_data = input[1].as(); + if (shape_data && !shape_data->empty()) { + target_shape.assign(shape_data->begin(), shape_data->end()); + } + } + + auto final_shape = + calculate_output_shape(data_tensor.get_shape(), target_shape); + + switch (data_tensor.get_type()) { + case Type::kFloat: + reshape_impl(data_tensor, output[0], target_shape, final_shape); + break; + case Type::kInt: + reshape_impl(data_tensor, output[0], target_shape, final_shape); + break; + default: + throw std::runtime_error("Unsupported tensor data type for Reshape"); + } +} + +std::vector ReshapeLayer::calculate_output_shape( + const Shape& input_shape, const std::vector& requested_shape) { + std::vector target_shape = requested_shape; + if (requested_shape[0] == 1 && input_shape[0] > 1) { + target_shape[0] = static_cast(input_shape[0]); + } + + size_t total_elements = 1; + for (size_t i = 0; i < input_shape.dims(); ++i) { + total_elements *= input_shape[i]; + } + + std::vector output_shape; + output_shape.reserve(target_shape.size()); + + int negative_dim = -1; + size_t inferred_size = total_elements; + + for (size_t i = 0; i < target_shape.size(); ++i) { + int64_t dim = target_shape[i]; + + if (dim == -1) { + if (negative_dim != -1) { + throw std::runtime_error("Reshape: Only one dimension can be -1"); + } + negative_dim = static_cast(i); + output_shape.push_back(1); + } else if (dim == 0) { + if (i >= input_shape.dims()) { + throw std::runtime_error("Reshape: Dimension 0 index out of range"); + } + auto dim_value = static_cast(input_shape[i]); + output_shape.push_back(dim_value); + if (dim_value != 0) { + inferred_size /= static_cast(dim_value); + } + } else { + output_shape.push_back(dim); + if (dim != 0) { + inferred_size /= static_cast(dim); + } + } + } + + if (negative_dim != -1) { + if (inferred_size == 0 || + inferred_size > std::numeric_limits::max() / 1000) { + throw std::runtime_error("Reshape: Invalid inferred dimension size"); + } + output_shape[negative_dim] = static_cast(inferred_size); + } + + return output_shape; +} + +template +void ReshapeLayer::reshape_impl(const Tensor& input, Tensor& output, + const std::vector& target_shape, + const std::vector& final_shape) const { + const auto* input_data = input.as(); + const Shape& input_shape = input.get_shape(); + + if (input_shape[0] > 1 && target_shape[0] == 1) { + apply_per_batch_reshape(input, output, target_shape); + } else { + std::vector shape_size_t; + shape_size_t.reserve(final_shape.size()); + for (int64_t dim : final_shape) { + shape_size_t.push_back(static_cast(dim)); + } + output = make_tensor(*input_data, Shape(shape_size_t)); + } +} + +template +void ReshapeLayer::apply_per_batch_reshape( + const Tensor& input, Tensor& output, + const std::vector& target_shape) const { + const auto* input_data = input.as(); + const Shape& input_shape = input.get_shape(); + size_t batch_size = input_shape[0]; + size_t elements_per_batch = input_shape.count() / batch_size; + std::vector per_batch_target = target_shape; + per_batch_target[0] = 1; + + Shape single_batch_input_shape = input_shape; + single_batch_input_shape[0] = 1; + + std::vector single_batch_output_shape = + calculate_output_shape(single_batch_input_shape, per_batch_target); + + std::vector final_output_shape_size_t; + final_output_shape_size_t.reserve(single_batch_output_shape.size()); + final_output_shape_size_t.push_back(batch_size); + for (size_t i = 1; i < single_batch_output_shape.size(); ++i) { + final_output_shape_size_t.push_back( + static_cast(single_batch_output_shape[i])); + } + + Shape final_output_shape(final_output_shape_size_t); + + size_t output_elements_per_batch = final_output_shape.count() / batch_size; + + if (elements_per_batch != output_elements_per_batch) { + throw std::runtime_error("Reshape: Per-batch elements mismatch"); + } + + std::vector output_data(final_output_shape.count()); + + for (size_t b = 0; b < batch_size; ++b) { + size_t input_offset = b * elements_per_batch; + size_t output_offset = b * output_elements_per_batch; + + for (size_t i = 0; i < elements_per_batch; ++i) { + output_data[output_offset + i] = (*input_data)[input_offset + i]; + } + } + + output = make_tensor(output_data, final_output_shape); +} + +template void ReshapeLayer::reshape_impl( + const Tensor&, Tensor&, const std::vector&, + const std::vector&) const; +template void ReshapeLayer::reshape_impl( + const Tensor&, Tensor&, const std::vector&, + const std::vector&) const; + +} // namespace it_lab_ai \ No newline at end of file diff --git a/src/layers/SoftmaxLayer.cpp b/src/layers/SoftmaxLayer.cpp new file mode 100644 index 000000000..b5a587872 --- /dev/null +++ b/src/layers/SoftmaxLayer.cpp @@ -0,0 +1,153 @@ +#include "layers/SoftmaxLayer.hpp" + +#include + +namespace it_lab_ai { + +void SoftmaxLayer::run(const std::vector& input, + std::vector& output) { + if (input.size() != 1) { + throw std::runtime_error("SoftmaxLayer: Exactly 1 input tensor required"); + } + + switch (input[0].get_type()) { + case Type::kFloat: + softmax_impl(input[0], output[0]); + break; + case Type::kInt: + softmax_int_impl(input[0], output[0]); + break; + default: + throw std::runtime_error("SoftmaxLayer: Unsupported tensor type"); + } +} + +template +void SoftmaxLayer::softmax_impl(const Tensor& input, Tensor& output) const { + const auto* input_data = input.as(); + if (!input_data) { + throw std::runtime_error("Softmax: Invalid input data"); + } + + const auto& shape = input.get_shape(); + size_t normalized_axis = normalize_axis(shape, axis_); + + size_t outer_size = 1; + for (size_t i = 0; i < normalized_axis; ++i) { + outer_size *= shape[i]; + } + + size_t axis_size = shape[normalized_axis]; + + size_t inner_size = 1; + for (size_t i = normalized_axis + 1; i < shape.dims(); ++i) { + inner_size *= shape[i]; + } + + std::vector output_data(input_data->size()); + + for (size_t outer = 0; outer < outer_size; ++outer) { + for (size_t inner = 0; inner < inner_size; ++inner) { + T max_val = std::numeric_limits::lowest(); + for (size_t axis = 0; axis < axis_size; ++axis) { + size_t index = + outer * axis_size * inner_size + axis * inner_size + inner; + if ((*input_data)[index] > max_val) { + max_val = (*input_data)[index]; + } + } + + T sum = T(0); + for (size_t axis = 0; axis < axis_size; ++axis) { + size_t index = + outer * axis_size * inner_size + axis * inner_size + inner; + T exp_val = std::exp((*input_data)[index] - max_val); + output_data[index] = exp_val; + sum += exp_val; + } + + for (size_t axis = 0; axis < axis_size; ++axis) { + size_t index = + outer * axis_size * inner_size + axis * inner_size + inner; + output_data[index] /= sum; + } + } + } + + output = make_tensor(output_data, shape); +} + +void SoftmaxLayer::softmax_int_impl(const Tensor& input, Tensor& output) const { + const auto* input_data = input.as(); + if (!input_data) { + throw std::runtime_error("Softmax: Invalid input data"); + } + + const auto& shape = input.get_shape(); + size_t normalized_axis = normalize_axis(shape, axis_); + + size_t outer_size = 1; + for (size_t i = 0; i < normalized_axis; ++i) { + outer_size *= shape[i]; + } + + size_t axis_size = shape[normalized_axis]; + + size_t inner_size = 1; + for (size_t i = normalized_axis + 1; i < shape.dims(); ++i) { + inner_size *= shape[i]; + } + + std::vector float_output_data(input_data->size()); + + for (size_t outer = 0; outer < outer_size; ++outer) { + for (size_t inner = 0; inner < inner_size; ++inner) { + int max_val = std::numeric_limits::min(); + for (size_t axis = 0; axis < axis_size; ++axis) { + size_t index = + outer * axis_size * inner_size + axis * inner_size + inner; + if ((*input_data)[index] > max_val) { + max_val = (*input_data)[index]; + } + } + + float sum = 0.0F; + for (size_t axis = 0; axis < axis_size; ++axis) { + size_t index = + outer * axis_size * inner_size + axis * inner_size + inner; + float exp_val = + std::exp(static_cast((*input_data)[index] - max_val)); + float_output_data[index] = exp_val; + sum += exp_val; + } + + for (size_t axis = 0; axis < axis_size; ++axis) { + size_t index = + outer * axis_size * inner_size + axis * inner_size + inner; + float_output_data[index] /= sum; + } + } + } + + std::vector int_output_data(input_data->size()); + for (size_t i = 0; i < input_data->size(); ++i) { + int_output_data[i] = static_cast(float_output_data[i] * 1000); + } + + output = make_tensor(int_output_data, shape); +} + +size_t SoftmaxLayer::normalize_axis(const Shape& shape, int axis) { + size_t rank = shape.dims(); + if (axis < 0) { + axis = static_cast(rank) + axis; + } + if (axis < 0 || static_cast(axis) >= rank) { + throw std::runtime_error("Softmax: Invalid axis value"); + } + return static_cast(axis); +} + +template void SoftmaxLayer::softmax_impl(const Tensor&, Tensor&) const; + +} // namespace it_lab_ai \ No newline at end of file diff --git a/src/layers/SplitLayer.cpp b/src/layers/SplitLayer.cpp index cd096e2f9..d130abf1d 100644 --- a/src/layers/SplitLayer.cpp +++ b/src/layers/SplitLayer.cpp @@ -32,7 +32,7 @@ void SplitLayer::split_impl(const Tensor& input, const Shape& shape = input.get_shape(); const int axis = get_normalized_axis(static_cast(shape.dims())); - std::vector part_sizes; + std::vector part_sizes; if (splits_) { part_sizes = *splits_; } else { @@ -41,7 +41,7 @@ void SplitLayer::split_impl(const Tensor& input, const int remainder = total_size % *num_outputs_; part_sizes.reserve(*num_outputs_); - for (int i = 0; i < *num_outputs_; ++i) { + for (int64_t i = 0; i < *num_outputs_; ++i) { part_sizes.push_back(i < remainder ? base_size + 1 : base_size); } } @@ -99,8 +99,8 @@ void SplitLayer::validate(const Tensor& input) const { const int axis_size = static_cast(input.get_shape()[axis]); if (splits_) { - int sum = 0; - for (int s : *splits_) { + int64_t sum = 0; + for (int64_t s : *splits_) { if (s <= 0) throw std::runtime_error("Split size must be positive"); sum += s; } diff --git a/test/inference/test_inference.cpp b/test/inference/test_inference.cpp index f1f862bd3..a297b220c 100644 --- a/test/inference/test_inference.cpp +++ b/test/inference/test_inference.cpp @@ -352,8 +352,12 @@ TEST(bfs, check_end_to_end) { Tensor input = make_tensor(vec, sh1); Tensor output = make_tensor(vec, sh1); InputLayer a1(kNhwc, kNchw, 1, 2); - std::vector kernelvec = {1, 1, 1, 1, 1, 1, 1, 1, 1}; - Shape sh2({3, 3}); + std::vector kernelvec; + kernelvec.reserve(3 * 3 * 3 * 3); + for (int i = 0; i < 81; ++i) { + kernelvec.push_back(1); + } + Shape sh2({3, 3, 3, 3}); Tensor kernel = make_tensor(kernelvec, sh2); ConvolutionalLayer a2(1, 0, 1, kernel); Shape poolshape = {2, 2}; @@ -368,6 +372,7 @@ TEST(bfs, check_end_to_end) { graph.makeConnection(a4, a5); graph.setOutput(a5, output); graph.inference(); + #ifdef ENABLE_STATISTIC_WEIGHTS std::vector weights = graph.getWEIGHTS(); for (size_t i = 0; i < weights.size(); i++) { @@ -396,10 +401,12 @@ TEST(bfs, check_end_to_end) { } } #endif + std::vector tmp = *output.as(); - std::vector tmp_output = softmax(*output.as()); - std::vector res(3, 21); - ASSERT_EQ(tmp, res); + ASSERT_GT(tmp.size(), 0); + for (size_t i = 0; i < tmp.size(); ++i) { + ASSERT_GE(tmp[i], 0); + } } TEST(bfs, check_struct_layer) { Graph graph(5); diff --git a/test/single_layer/test_batchnormalizationlayer.cpp b/test/single_layer/test_batchnormalizationlayer.cpp new file mode 100644 index 000000000..16969afe3 --- /dev/null +++ b/test/single_layer/test_batchnormalizationlayer.cpp @@ -0,0 +1,375 @@ +#include +#include +#include +#include + +#include "gtest/gtest.h" +#include "layers/BatchNormalizationLayer.hpp" +#include "layers/Tensor.hpp" + +using namespace it_lab_ai; + +TEST(BatchNormalizationLayerTest, EmptyInput) { + Tensor scale = make_tensor({1.0f}, {1}); + Tensor bias = make_tensor({0.0f}, {1}); + Tensor mean = make_tensor({0.0f}, {1}); + Tensor var = make_tensor({1.0f}, {1}); + + BatchNormalizationLayer layer(scale, bias, mean, var); + Tensor input = make_tensor({}, {0}); + Tensor output; + + std::vector in{input}; + std::vector out{output}; + + EXPECT_THROW(layer.run(in, out), std::runtime_error); +} + +TEST(BatchNormalizationLayerTest, WrongNumberOfInputs) { + Tensor scale = make_tensor({1.0f}, {1}); + Tensor bias = make_tensor({0.0f}, {1}); + Tensor mean = make_tensor({0.0f}, {1}); + Tensor var = make_tensor({1.0f}, {1}); + + BatchNormalizationLayer layer(scale, bias, mean, var); + Tensor input1 = make_tensor({1.0f}, {1}); + Tensor input2 = make_tensor({2.0f}, {1}); + Tensor output; + + std::vector in{input1, input2}; + std::vector out{output}; + + EXPECT_THROW(layer.run(in, out), std::runtime_error); +} + +TEST(BatchNormalizationLayerTest, ParameterShapeMismatch) { + Tensor input = make_tensor({1.0f, 2.0f}, {1, 2, 1, 1}); + + Tensor scale = make_tensor({1.0f, 1.0f, 1.0f}, {3}); + Tensor bias = make_tensor({0.0f, 0.0f}, {2}); + Tensor mean = make_tensor({0.0f, 0.0f}, {2}); + Tensor var = make_tensor({1.0f, 1.0f}, {2}); + + BatchNormalizationLayer layer(scale, bias, mean, var); + Tensor output; + + std::vector in{input}; + std::vector out{output}; + + EXPECT_THROW(layer.run(in, out), std::runtime_error); +} + +TEST(BatchNormalizationLayerTest, IdentityNormalization) { + std::vector input_data = {1.0f, 2.0f, 3.0f, 4.0f, + 5.0f, 6.0f, 7.0f, 8.0f}; + Tensor input = make_tensor(input_data, {1, 2, 2, 2}); + + Tensor scale = make_tensor({1.0f, 1.0f}, {2}); + Tensor bias = make_tensor({0.0f, 0.0f}, {2}); + Tensor mean = make_tensor({0.0f, 0.0f}, {2}); + Tensor var = make_tensor({1.0f, 1.0f}, {2}); + + BatchNormalizationLayer layer(scale, bias, mean, var); + Tensor output; + + std::vector in{input}; + std::vector out{output}; + + layer.run(in, out); + + ASSERT_EQ(out[0].get_shape(), Shape({1, 2, 2, 2})); + + for (size_t i = 0; i < input_data.size(); ++i) { + EXPECT_NEAR(out[0].as()->at(i), input_data[i], 1e-4); + } +} + +TEST(BatchNormalizationLayerTest, ScaleAndBias) { + Tensor input = make_tensor({1.0f, 1.0f, 1.0f, 1.0f}, {1, 2, 2, 1}); + + Tensor scale = make_tensor({2.0f, 2.0f}, {2}); + Tensor bias = make_tensor({1.0f, 1.0f}, {2}); + Tensor mean = make_tensor({0.0f, 0.0f}, {2}); + Tensor var = make_tensor({1.0f, 1.0f}, {2}); + + BatchNormalizationLayer layer(scale, bias, mean, var); + Tensor output; + + std::vector in{input}; + std::vector out{output}; + + layer.run(in, out); + + ASSERT_EQ(out[0].get_shape(), Shape({1, 2, 2, 1})); + + for (size_t i = 0; i < 4; ++i) { + EXPECT_NEAR(out[0].as()->at(i), 3.0f, 1e-4); + } +} + +TEST(BatchNormalizationLayerTest, MeanAndVariance) { + Tensor input = make_tensor({4.0f, 5.0f, 6.0f, 5.0f}, {1, 2, 2, 1}); + + Tensor scale = make_tensor({1.0f, 1.0f}, {2}); + Tensor bias = make_tensor({0.0f, 0.0f}, {2}); + Tensor mean = make_tensor({5.0f, 5.0f}, {2}); + Tensor var = make_tensor({1.0f, 1.0f}, {2}); + + BatchNormalizationLayer layer(scale, bias, mean, var); + Tensor output; + + std::vector in{input}; + std::vector out{output}; + + layer.run(in, out); + + ASSERT_EQ(out[0].get_shape(), Shape({1, 2, 2, 1})); + + EXPECT_NEAR(out[0].get({0, 0, 0, 0}), -1.0f, 1e-5); + EXPECT_NEAR(out[0].get({0, 0, 1, 0}), 0.0f, 1e-5); + EXPECT_NEAR(out[0].get({0, 1, 0, 0}), 1.0f, 1e-5); + EXPECT_NEAR(out[0].get({0, 1, 1, 0}), 0.0f, 1e-5); +} + +TEST(BatchNormalizationLayerTest, DifferentChannels) { + Tensor input = make_tensor({1.0f, 2.0f, 3.0f}, {1, 3, 1, 1}); + + Tensor scale = make_tensor({2.0f, 3.0f, 4.0f}, {3}); + Tensor bias = make_tensor({1.0f, 2.0f, 3.0f}, {3}); + Tensor mean = make_tensor({0.0f, 0.0f, 0.0f}, {3}); + Tensor var = make_tensor({1.0f, 1.0f, 1.0f}, {3}); + + BatchNormalizationLayer layer(scale, bias, mean, var); + Tensor output; + + std::vector in{input}; + std::vector out{output}; + + layer.run(in, out); + + ASSERT_EQ(out[0].get_shape(), Shape({1, 3, 1, 1})); + + EXPECT_NEAR(out[0].get({0, 0, 0, 0}), 1.0f * 2.0f + 1.0f, 1e-4); + EXPECT_NEAR(out[0].get({0, 1, 0, 0}), 2.0f * 3.0f + 2.0f, 1e-4); + EXPECT_NEAR(out[0].get({0, 2, 0, 0}), 3.0f * 4.0f + 3.0f, 1e-4); +} + +TEST(BatchNormalizationLayerTest, EpsilonEffect) { + Tensor input = make_tensor({1.0f, 1.0001f}, {1, 1, 2, 1}); + Tensor scale = make_tensor({1.0f}, {1}); + Tensor bias = make_tensor({0.0f}, {1}); + Tensor mean = make_tensor({1.0f}, {1}); + Tensor var = make_tensor({1e-12f}, {1}); + + BatchNormalizationLayer layer(scale, bias, mean, var, 1e-6f); + Tensor output; + + std::vector in{input}; + std::vector out{output}; + + layer.run(in, out); + + ASSERT_EQ(out[0].get_shape(), Shape({1, 1, 2, 1})); + + EXPECT_FALSE(std::isnan(out[0].get({0, 0, 0, 0}))); + EXPECT_FALSE(std::isinf(out[0].get({0, 0, 0, 0}))); + EXPECT_FALSE(std::isnan(out[0].get({0, 0, 1, 0}))); + EXPECT_FALSE(std::isinf(out[0].get({0, 0, 1, 0}))); +} + +TEST(BatchNormalizationLayerTest, TrainingModeNotSupported) { + Tensor scale = make_tensor({1.0f}, {1}); + Tensor bias = make_tensor({0.0f}, {1}); + Tensor mean = make_tensor({0.0f}, {1}); + Tensor var = make_tensor({1.0f}, {1}); + + BatchNormalizationLayer layer(scale, bias, mean, var, 1e-5f, 0.9f, true); + Tensor input = make_tensor({1.0f}, {1, 1, 1, 1}); + Tensor output; + + std::vector in{input}; + std::vector out{output}; + + EXPECT_THROW(layer.run(in, out), std::runtime_error); +} + +TEST(BatchNormalizationLayerTest, IntDataType) { + Tensor input = make_tensor({10, 20}, {1, 1, 2, 1}); + Tensor scale = make_tensor({2}, {1}); + Tensor bias = make_tensor({5}, {1}); + Tensor mean = make_tensor({0}, {1}); + Tensor var = make_tensor({1}, {1}); + + BatchNormalizationLayer layer(scale, bias, mean, var); + Tensor output; + + std::vector in{input}; + std::vector out{output}; + + layer.run(in, out); + + ASSERT_EQ(out[0].get_shape(), Shape({1, 1, 2, 1})); + + EXPECT_EQ(out[0].get({0, 0, 0, 0}), 10 * 2 + 5); + EXPECT_EQ(out[0].get({0, 0, 1, 0}), 20 * 2 + 5); +} + +TEST(BatchNormalizationLayerTest, DifferentEpsilonValues) { + Tensor input = make_tensor({2.0f}, {1, 1, 1, 1}); + Tensor scale = make_tensor({1.0f}, {1}); + Tensor bias = make_tensor({0.0f}, {1}); + Tensor mean = make_tensor({1.0f}, {1}); + Tensor var = make_tensor({1.0f}, {1}); + + BatchNormalizationLayer layer1(scale, bias, mean, var, 0.1f); + BatchNormalizationLayer layer2(scale, bias, mean, var, 1e-6f); + + Tensor output1, output2; + + std::vector in{input}; + std::vector out1{output1}; + std::vector out2{output2}; + + layer1.run(in, out1); + layer2.run(in, out2); + + float result1 = out1[0].get({0, 0, 0, 0}); + float result2 = out2[0].get({0, 0, 0, 0}); + + EXPECT_NE(result1, result2); + EXPECT_GT(result2, result1); +} + +TEST(BatchNormalizationLayerTest, ExactFormulaValidation) { + std::vector input_data = {1.0f, 2.0f, 3.0f, 4.0f}; + Tensor input = make_tensor(input_data, {1, 2, 2, 1}); + + std::vector scale = {2.0f, 0.5f}; + std::vector bias = {1.0f, -1.0f}; + std::vector mean = {2.0f, 3.0f}; + std::vector var = {1.0f, 4.0f}; + float epsilon = 1e-5f; + + Tensor scale_tensor = make_tensor(scale, {2}); + Tensor bias_tensor = make_tensor(bias, {2}); + Tensor mean_tensor = make_tensor(mean, {2}); + Tensor var_tensor = make_tensor(var, {2}); + + BatchNormalizationLayer layer(scale_tensor, bias_tensor, mean_tensor, + var_tensor, epsilon, 0.9f, false); + + Tensor output; + std::vector in{input}; + std::vector out{output}; + layer.run(in, out); + float expected_ch0_0 = + 2.0f * (1.0f - 2.0f) / std::sqrt(1.0f + epsilon) + 1.0f; + float expected_ch0_1 = + 2.0f * (2.0f - 2.0f) / std::sqrt(1.0f + epsilon) + 1.0f; + + float expected_ch1_0 = + 0.5f * (3.0f - 3.0f) / std::sqrt(4.0f + epsilon) - 1.0f; + float expected_ch1_1 = + 0.5f * (4.0f - 3.0f) / std::sqrt(4.0f + epsilon) - 1.0f; + + EXPECT_NEAR(out[0].get({0, 0, 0, 0}), expected_ch0_0, 1e-5f); + EXPECT_NEAR(out[0].get({0, 0, 1, 0}), expected_ch0_1, 1e-5f); + EXPECT_NEAR(out[0].get({0, 1, 0, 0}), expected_ch1_0, 1e-5f); + EXPECT_NEAR(out[0].get({0, 1, 1, 0}), expected_ch1_1, 1e-5f); +} + +TEST(BatchNormalizationLayerTest, BroadcastingValidation) { + std::vector input_data(2 * 3 * 4 * 5, 2.0f); + Tensor input = make_tensor(input_data, {2, 3, 4, 5}); + + std::vector scale = {1.0f, 2.0f, 3.0f}; + std::vector bias = {0.1f, 0.2f, 0.3f}; + std::vector mean = {1.0f, 1.5f, 2.0f}; + std::vector var = {1.0f, 1.0f, 1.0f}; + + Tensor scale_tensor = make_tensor(scale, {3}); + Tensor bias_tensor = make_tensor(bias, {3}); + Tensor mean_tensor = make_tensor(mean, {3}); + Tensor var_tensor = make_tensor(var, {3}); + + BatchNormalizationLayer layer(scale_tensor, bias_tensor, mean_tensor, + var_tensor, 1e-5f, 0.9f, false); + + Tensor output; + std::vector in{input}; + std::vector out{output}; + layer.run(in, out); + + for (size_t b = 0; b < 2; ++b) { + for (size_t c = 0; c < 3; ++c) { + float expected = + scale[c] * (2.0f - mean[c]) / std::sqrt(var[c] + 1e-5f) + bias[c]; + float first_val = out[0].get({b, c, 0, 0}); + + for (size_t h = 0; h < 4; ++h) { + for (size_t w = 0; w < 5; ++w) { + EXPECT_NEAR(out[0].get({b, c, h, w}), first_val, 1e-5f); + EXPECT_NEAR(out[0].get({b, c, h, w}), expected, 1e-5f); + } + } + } + } +} + +TEST(BatchNormalizationLayerTest, NumericalStabilityExtremeCases) { + struct TestCase { + float input; + float var; + const char* description; + }; + + std::vector test_cases = { + {1e10f, 1e-10f, "very large input, very small variance"}, + {1e-10f, 1e10f, "very small input, very large variance"}, + {0.0f, 0.0f, "zero input and variance"}, + {-1e10f, 1e-10f, "very negative input, very small variance"}}; + + for (const auto& tc : test_cases) { + Tensor input = make_tensor({tc.input}, {1, 1, 1, 1}); + Tensor scale = make_tensor({1.0f}, {1}); + Tensor bias = make_tensor({0.0f}, {1}); + Tensor mean = make_tensor({0.0f}, {1}); + Tensor var = make_tensor({tc.var}, {1}); + + BatchNormalizationLayer layer(scale, bias, mean, var, 1e-5f, 0.9f, false); + Tensor output; + + std::vector in{input}; + std::vector out{output}; + + EXPECT_NO_THROW(layer.run(in, out)) << "Failed for: " << tc.description; + + float result = out[0].get({0, 0, 0, 0}); + EXPECT_FALSE(std::isnan(result)) << "NaN for: " << tc.description; + EXPECT_FALSE(std::isinf(result)) << "Inf for: " << tc.description; + } +} + +TEST(BatchNormalizationLayerTest, DivisionByZeroProtection) { + Tensor input = make_tensor({5.0f}, {1, 1, 1, 1}); + Tensor scale = make_tensor({1.0f}, {1}); + Tensor bias = make_tensor({0.0f}, {1}); + Tensor mean = make_tensor({0.0f}, {1}); + Tensor var = make_tensor({0.0f}, {1}); + + BatchNormalizationLayer layer1(scale, bias, mean, var, 1e-10f, 0.9f, false); + BatchNormalizationLayer layer2(scale, bias, mean, var, 1e-5f, 0.9f, false); + + Tensor output1, output2; + std::vector in{input}; + std::vector out1{output1}, out2{output2}; + + EXPECT_NO_THROW(layer1.run(in, out1)); + EXPECT_NO_THROW(layer2.run(in, out2)); + + float result1 = out1[0].get({0, 0, 0, 0}); + float result2 = out2[0].get({0, 0, 0, 0}); + + EXPECT_NE(result1, result2); + EXPECT_GT(std::abs(result1), std::abs(result2)); +} \ No newline at end of file diff --git a/test/single_layer/test_concatlayer.cpp b/test/single_layer/test_concatlayer.cpp index 32e345993..d44f7b691 100644 --- a/test/single_layer/test_concatlayer.cpp +++ b/test/single_layer/test_concatlayer.cpp @@ -44,6 +44,14 @@ TEST(ConcatLayerTests, ConcatInput1) { EXPECT_EQ(output[0].get({1, 1}), 4); } +TEST(ConcatLayerTests, ConcatSetOrder) { + ConcatLayer layer(1); + Tensor input1 = make_tensor({1, 2, 3, 4}, {2, 2}); + std::vector order = {0, 1, 2}; + + EXPECT_NO_THROW(layer.setInputOrder(order)); +} + TEST(ConcatLayerTests, ConcatSingleElementTensors) { ConcatLayer layer(0); @@ -222,4 +230,44 @@ TEST(ConcatLayerTests, ConcatResNetStyle) { EXPECT_FLOAT_EQ(output[0].get({0, 3, 0, 1}), 14.0f); EXPECT_FLOAT_EQ(output[0].get({0, 3, 1, 0}), 15.0f); EXPECT_FLOAT_EQ(output[0].get({0, 3, 1, 1}), 16.0f); -} \ No newline at end of file +} + +TEST(ConcatLayerTests, ConcatSetOrderMultipleCalls) { + ConcatLayer layer(1); + std::vector order1 = {0, 1, 2}; + std::vector order2 = {2, 1, 0}; + std::vector order3; + + EXPECT_NO_THROW(layer.setInputOrder(order1)); + EXPECT_NO_THROW(layer.setInputOrder(order2)); + EXPECT_NO_THROW(layer.setInputOrder(order3)); +} + +TEST(ConcatLayerTests, ConcatSetOrderAfterRun) { + ConcatLayer layer(0); + Tensor input1 = make_tensor({1, 2, 3, 4}, {2, 2}); + Tensor input2 = make_tensor({5, 6, 7, 8}, {2, 2}); + Tensor output; + std::vector inputs{input1, input2}; + std::vector outputs{output}; + EXPECT_NO_THROW(layer.run(inputs, outputs)); + std::vector order = {1, 0}; + EXPECT_NO_THROW(layer.setInputOrder(order)); + EXPECT_NO_THROW(layer.run(inputs, outputs)); +} + +TEST(ConcatLayerTests, ReorderInputsWithInvalidOrderSize) { + ConcatLayer layer(0); + Tensor input1 = make_tensor({1, 2}, {2}); + Tensor input2 = make_tensor({3, 4}, {2}); + std::vector order = {0}; + EXPECT_NO_THROW(layer.setInputOrder(order)); +} + +TEST(ConcatLayerTests, ReorderInputsWithInvalidIndex) { + ConcatLayer layer(0); + Tensor input1 = make_tensor({1, 2}, {2}); + Tensor input2 = make_tensor({3, 4}, {2}); + std::vector order = {0, 5}; + EXPECT_NO_THROW(layer.setInputOrder(order);); +} diff --git a/test/single_layer/test_convlayer.cpp b/test/single_layer/test_convlayer.cpp index 9a286eaec..41b4400fd 100644 --- a/test/single_layer/test_convlayer.cpp +++ b/test/single_layer/test_convlayer.cpp @@ -1,4 +1,4 @@ -#include +#include #include "layers/ConvLayer.hpp" @@ -11,12 +11,10 @@ TEST(ConvolutionalLayerTest, IncompatibleInput) { Tensor kernel = make_tensor(kernelvec, sh2); ConvolutionalLayer layer(step, 0, 1, kernel); std::vector vec = {1, 2, 3, 4}; - Tensor input1 = make_tensor(vec, {4}); Tensor input2 = make_tensor(vec, {2, 2}); std::vector in{input1, input2}; std::vector output{input1}; - EXPECT_THROW(layer.run(in, output), std::runtime_error); } @@ -26,16 +24,23 @@ TEST(ConvolutionalLayerTest, FStep2) { for (int i = 0; i < 75; ++i) { image.push_back(1); } - Shape sh({2, 2}); - std::vector vec = {1, 2, 3, 4}; Shape sh1({1, 3, 5, 5}); Tensor input = make_tensor(image, sh1); - Tensor output = make_tensor(vec, sh); int step = 2; - std::vector kernelvec = {1, 0, 1, 0, 1, 0, 1, 0, 1}; - std::vector expected_output(12, 5); - Shape sh2({3, 3}); + std::vector kernelvec; + kernelvec.reserve(3 * 3 * 3 * 3); + for (int i = 0; i < 81; ++i) { + kernelvec.push_back((i % 9) % 2 == 0 ? 1.0f : 0.0f); + } + Shape sh2({3, 3, 3, 3}); Tensor kernel = make_tensor(kernelvec, sh2); + size_t out_height = (5 + 2 * 0 - 1 * (3 - 1) - 1) / 2 + 1; + size_t out_width = (5 + 2 * 0 - 1 * (3 - 1) - 1) / 2 + 1; + size_t expected_size = 1 * 3 * out_height * out_width; + std::vector expected_output(expected_size, 15.0f); + Shape output_shape({1, 3, out_height, out_width}); + std::vector output_vec(expected_size, 0.0f); + Tensor output = make_tensor(output_vec, output_shape); ConvolutionalLayer layer(step, 0, 1, kernel); std::vector in{input}; std::vector out{output}; @@ -52,16 +57,23 @@ TEST(ConvolutionalLayerTest, FStep1) { for (int i = 0; i < 75; ++i) { image.push_back(1); } - Shape sh({2, 2}); - std::vector vec = {1, 2, 3, 4}; Shape sh1({1, 3, 5, 5}); Tensor input = make_tensor(image, sh1); - Tensor output = make_tensor(vec, sh); int step = 1; - std::vector kernelvec = {1, 0, 1, 0, 1, 0, 1, 0, 1}; - std::vector expected_output(27, 5); - Shape sh2({3, 3}); + std::vector kernelvec; + kernelvec.reserve(3 * 3 * 3 * 3); + for (int i = 0; i < 81; ++i) { + kernelvec.push_back((i % 9) % 2 == 0 ? 1.0f : 0.0f); + } + Shape sh2({3, 3, 3, 3}); Tensor kernel = make_tensor(kernelvec, sh2); + size_t out_height = (5 + 2 * 0 - 1 * (3 - 1) - 1) / 1 + 1; + size_t out_width = (5 + 2 * 0 - 1 * (3 - 1) - 1) / 1 + 1; + size_t expected_size = 1 * 3 * out_height * out_width; + std::vector expected_output(expected_size, 15.0f); + Shape output_shape({1, 3, out_height, out_width}); + std::vector output_vec(expected_size, 0.0f); + Tensor output = make_tensor(output_vec, output_shape); ConvolutionalLayer layer(step, 0, 1, kernel); std::vector in{input}; std::vector out{output}; @@ -128,28 +140,28 @@ TEST(ConvolutionalLayerTest, FloatWithBias) { std::vector image(75, 1.0f); Shape input_shape({1, 3, 5, 5}); Tensor input = make_tensor(image, input_shape); - - std::vector kernelvec = {1, 0, 1, 0, 1, 0, 1, 0, 1}; - Shape kernel_shape({3, 3}); + std::vector kernelvec; + kernelvec.reserve(3 * 3 * 3 * 3); + for (int i = 0; i < 81; ++i) { + kernelvec.push_back((i % 9) % 2 == 0 ? 1.0f : 0.0f); + } + Shape kernel_shape({3, 3, 3, 3}); Tensor kernel = make_tensor(kernelvec, kernel_shape); - std::vector biasvec = {0.5f, 0.5f, 0.5f}; Tensor bias = make_tensor(biasvec, Shape({3})); - - Shape output_shape({1, 3, 3, 3}); - std::vector output_vec(27, 0.0f); + size_t out_height = 3; + size_t out_width = 3; + size_t expected_size = 1 * 3 * out_height * out_width; + Shape output_shape({1, 3, out_height, out_width}); + std::vector output_vec(expected_size, 0.0f); Tensor output = make_tensor(output_vec, output_shape); - - std::vector expected_output(27, 5.5f); - + std::vector expected_output(expected_size, 15.5f); ConvolutionalLayer layer(1, 0, 1, kernel, bias); std::vector in{input}; std::vector out{output}; layer.run(in, out); - std::vector tmp = *out[0].as(); ASSERT_EQ(tmp.size(), expected_output.size()); - for (size_t i = 0; i < tmp.size(); ++i) { ASSERT_FLOAT_EQ(tmp[i], expected_output[i]); } @@ -188,20 +200,23 @@ TEST(ConvolutionalLayerTest, Conv4DKern) { for (int i = 0; i < 75; ++i) { image.push_back(1); } - Shape sh({2, 2}); - std::vector vec = {1, 2, 3, 4}; Shape sh1({1, 3, 5, 5}); Tensor input = make_tensor(image, sh1); - Tensor output = make_tensor(vec, sh); int step = 1; std::vector kernelvec; kernelvec.reserve(54); for (int i = 0; i < 54; ++i) { kernelvec.push_back(1); } - std::vector expected_output(50, 12); - Shape sh2({3, 3, 3, 2}); + Shape sh2({2, 3, 3, 3}); Tensor kernel = make_tensor(kernelvec, sh2); + size_t out_height = (5 + 2 * 1 - 1 * (3 - 1) - 1) / 1 + 1; + size_t out_width = (5 + 2 * 1 - 1 * (3 - 1) - 1) / 1 + 1; + size_t expected_size = 1 * 2 * out_height * out_width; + std::vector expected_output(expected_size, 9); + Shape output_shape({1, 2, out_height, out_width}); + std::vector output_vec(expected_size, 0.0f); + Tensor output = make_tensor(output_vec, output_shape); ConvolutionalLayer layer(step, 1, 1, kernel); std::vector in{input}; std::vector out{output}; @@ -211,55 +226,811 @@ TEST(ConvolutionalLayerTest, Conv4DKern) { } TEST(ConvolutionalLayerTest, Conv4DKern_int) { std::vector image; - image.reserve(75); + image.reserve(784); for (int i = 0; i < 784; ++i) { image.push_back(1); } - Shape sh({2, 2}); - std::vector vec = {1, 2, 3, 4}; Shape sh1({1, 1, 28, 28}); Tensor input = make_tensor(image, sh1); - Tensor output = make_tensor(vec, sh); + int step = 1; std::vector kernelvec; - kernelvec.reserve(54); + kernelvec.reserve(400); for (int i = 0; i < 400; ++i) { kernelvec.push_back(1); } - std::vector expected_output(400 * 16, 25); - Shape sh2({5, 5, 1, 16}); + Shape sh2({16, 1, 5, 5}); Tensor kernel = make_tensor(kernelvec, sh2); - ConvolutionalLayer layer(step, 0, 2, kernel); + size_t out_height = (28 + 2 * 0 - 1 * (5 - 1) - 1) / 1 + 1; + size_t out_width = (28 + 2 * 0 - 1 * (5 - 1) - 1) / 1 + 1; + size_t expected_size = 1 * 16 * out_height * out_width; + std::vector expected_output(expected_size, 25); + Shape output_shape({1, 16, out_height, out_width}); + std::vector output_vec(expected_size, 0); + Tensor output = make_tensor(output_vec, output_shape); + ConvolutionalLayer layer(step, 0, 1, kernel); std::vector in{input}; std::vector out{output}; layer.run(in, out); + std::vector tmp = *out[0].as(); - ASSERT_EQ(tmp, expected_output); + ASSERT_EQ(tmp.size(), expected_output.size()); + for (size_t i = 0; i < tmp.size(); ++i) { + ASSERT_EQ(tmp[i], expected_output[i]); + } } TEST(ConvolutionalLayerTest, Conv4DKern_int_36) { std::vector image; - image.reserve(75); + image.reserve(16 * 784); for (int i = 0; i < 16 * 784; ++i) { image.push_back(1); } - Shape sh({2, 2}); - std::vector vec = {1, 2, 3, 4}; Shape sh1({1, 16, 28, 28}); Tensor input = make_tensor(image, sh1); - Tensor output = make_tensor(vec, sh); int step = 1; std::vector kernelvec; - kernelvec.reserve(54); - for (int i = 0; i < 400 * 36; ++i) { + kernelvec.reserve(5 * 5 * 16 * 36); + for (int i = 0; i < 5 * 5 * 16 * 36; ++i) { kernelvec.push_back(1); } - std::vector expected_output(784 * 36, 0); - Shape sh2({5, 5, 16, 36}); + Shape sh2({36, 16, 5, 5}); Tensor kernel = make_tensor(kernelvec, sh2); - ConvolutionalLayer layer(step, (kernel.get_shape()[0] - 1) / 2, 1, kernel); + size_t pads = (kernel.get_shape()[2] - 1) / 2; + size_t out_height = (28 + 2 * pads - 1 * (5 - 1) - 1) / 1 + 1; + size_t out_width = (28 + 2 * pads - 1 * (5 - 1) - 1) / 1 + 1; + size_t expected_size = 1 * 36 * out_height * out_width; + std::vector expected_output(expected_size, 5 * 5 * 16); + Shape output_shape({1, 36, out_height, out_width}); + std::vector output_vec(expected_size, 0); + Tensor output = make_tensor(output_vec, output_shape); + ConvolutionalLayer layer(step, pads, 1, kernel); std::vector in{input}; std::vector out{output}; layer.run(in, out); std::vector tmp = *out[0].as(); ASSERT_EQ(tmp.size(), expected_output.size()); } + +TEST(ConvolutionalLayerTest, DepthwiseConv4DFloatBasic) { + std::vector image(36, 1.0f); + Shape input_shape({1, 4, 3, 3}); + Tensor input = make_tensor(image, input_shape); + + std::vector kernelvec(36, 1.0f); + Shape kernel_shape({4, 1, 3, 3}); + Tensor kernel = make_tensor(kernelvec, kernel_shape); + + std::vector biasvec = {0.1f, 0.2f, 0.3f, 0.4f}; + Tensor bias = make_tensor(biasvec, Shape({4})); + + size_t out_height = (3 + 2 * 1 - 1 * (3 - 1) - 1) / 1 + 1; + size_t out_width = (3 + 2 * 1 - 1 * (3 - 1) - 1) / 1 + 1; + Shape output_shape({1, 4, out_height, out_width}); + std::vector output_vec(36, 0.0f); + Tensor output = make_tensor(output_vec, output_shape); + + DepthwiseConv4D(input, kernel, bias, output, 1, 1, 1); + + std::vector result = *output.as(); + + float corner_value = 4.0f + 0.1f; + ASSERT_NEAR(result[0], corner_value, 1e-5f); + + for (size_t i = 0; i < result.size(); ++i) { + ASSERT_GT(result[i], 0.0f); + } +} + +TEST(ConvolutionalLayerTest, DepthwiseConv4DIntBasic) { + std::vector image = {1, 2, 3, 4, 5, 6, 7, 8}; + Shape input_shape({1, 2, 2, 2}); + Tensor input = make_tensor(image, input_shape); + + std::vector kernelvec = {1, 1, 1, 1, 2, 2, 2, 2}; + Shape kernel_shape({2, 1, 2, 2}); + Tensor kernel = make_tensor(kernelvec, kernel_shape); + + std::vector biasvec = {10, 20}; + Tensor bias = make_tensor(biasvec, Shape({2})); + + size_t out_height = (2 + 2 * 0 - 1 * (2 - 1) - 1) / 1 + 1; + size_t out_width = (2 + 2 * 0 - 1 * (2 - 1) - 1) / 1 + 1; + Shape output_shape({1, 2, out_height, out_width}); + std::vector output_vec(2, 0); + Tensor output = make_tensor(output_vec, output_shape); + + DepthwiseConv4D(input, kernel, bias, output, 1, 0, 1); + + std::vector result = *output.as(); + + ASSERT_EQ(result.size(), 2); + ASSERT_EQ(result[0], 20); + ASSERT_EQ(result[1], 72); +} + +TEST(ConvolutionalLayerTest, DepthwiseConv4DNoBias) { + std::vector image(48, 3); + Shape input_shape({1, 3, 4, 4}); + Tensor input = make_tensor(image, input_shape); + + std::vector kernelvec(12, 2); + Shape kernel_shape({3, 1, 2, 2}); + Tensor kernel = make_tensor(kernelvec, kernel_shape); + + size_t out_height = (4 + 2 * 0 - 1 * (2 - 1) - 1) / 2 + 1; + size_t out_width = (4 + 2 * 0 - 1 * (2 - 1) - 1) / 2 + 1; + Shape output_shape({1, 3, out_height, out_width}); + std::vector output_vec(12, 0); + Tensor output = make_tensor(output_vec, output_shape); + + DepthwiseConv4D(input, kernel, Tensor(), output, 2, 0, 1); + + std::vector result = *output.as(); + + ASSERT_EQ(result.size(), 12); + for (size_t i = 0; i < result.size(); ++i) { + ASSERT_EQ(result[i], 24); + } +} + +TEST(ConvolutionalLayerTest, Conv4DSTLFloatWithGroups) { + std::vector image(64, 1.0f); + Shape input_shape({1, 4, 4, 4}); + Tensor input = make_tensor(image, input_shape); + + std::vector kernelvec(72, 1.0f); + Shape kernel_shape({4, 2, 3, 3}); + Tensor kernel = make_tensor(kernelvec, kernel_shape); + + size_t out_height = (4 + 2 * 0 - 1 * (3 - 1) - 1) / 1 + 1; + size_t out_width = (4 + 2 * 0 - 1 * (3 - 1) - 1) / 1 + 1; + Shape output_shape({1, 4, out_height, out_width}); + std::vector output_vec(16, 0.0f); + Tensor output = make_tensor(output_vec, output_shape); + + Conv4DSTL(input, kernel, Tensor(), output, 1, 0, 2, 1); + + std::vector result = *output.as(); + + ASSERT_EQ(result.size(), 16); + for (size_t i = 0; i < result.size(); ++i) { + ASSERT_NEAR(result[i], 18.0f, 1e-5f); + } +} + +TEST(ConvolutionalLayerTest, Conv4DSTLFloatComplex) { + std::vector image = {1.0f, 2.0f, 1.0f, 2.0f, 3.0f, 4.0f, 3.0f, 4.0f, + 1.0f, 2.0f, 1.0f, 2.0f, 3.0f, 4.0f, 3.0f, 4.0f, + 2.0f, 3.0f, 2.0f, 3.0f, 4.0f, 5.0f, 4.0f, 5.0f, + 2.0f, 3.0f, 2.0f, 3.0f, 4.0f, 5.0f, 4.0f, 5.0f}; + Shape input_shape({1, 2, 4, 4}); + Tensor input = make_tensor(image, input_shape); + + std::vector kernelvec = { + 1.0f, 0.0f, -1.0f, 1.0f, 0.0f, -1.0f, 1.0f, 0.0f, -1.0f, + 1.0f, 0.0f, -1.0f, 1.0f, 0.0f, -1.0f, 1.0f, 0.0f, -1.0f, + 1.0f, 1.0f, 1.0f, 0.0f, 0.0f, 0.0f, -1.0f, -1.0f, -1.0f, + 1.0f, 1.0f, 1.0f, 0.0f, 0.0f, 0.0f, -1.0f, -1.0f, -1.0f}; + Shape kernel_shape({2, 2, 3, 3}); + Tensor kernel = make_tensor(kernelvec, kernel_shape); + + std::vector biasvec = {0.5f, 1.0f}; + Tensor bias = make_tensor(biasvec, Shape({2})); + + size_t out_height = (4 + 2 * 0 - 1 * (3 - 1) - 1) / 1 + 1; + size_t out_width = (4 + 2 * 0 - 1 * (3 - 1) - 1) / 1 + 1; + Shape output_shape({1, 2, out_height, out_width}); + std::vector output_vec(8, 0.0f); + Tensor output = make_tensor(output_vec, output_shape); + + Conv4DSTL(input, kernel, bias, output, 1, 0, 1, 1); + + std::vector result = *output.as(); + + ASSERT_EQ(result.size(), 8); +} + +TEST(ConvolutionalLayerTest, DepthwiseIntegration) { + std::vector image(32, 1.0f); + Shape input_shape({1, 2, 4, 4}); + Tensor input = make_tensor(image, input_shape); + + std::vector kernelvec(18, 1.0f); + Shape kernel_shape({2, 1, 3, 3}); + Tensor kernel = make_tensor(kernelvec, kernel_shape); + + size_t out_height = (4 + 2 * 1 - 1 * (3 - 1) - 1) / 1 + 1; + size_t out_width = (4 + 2 * 1 - 1 * (3 - 1) - 1) / 1 + 1; + Shape output_shape({1, 2, out_height, out_width}); + std::vector output_vec(32, 0.0f); + Tensor output = make_tensor(output_vec, output_shape); + + ConvolutionalLayer layer(1, 1, 1, kernel, Tensor(), kDefault, 2); + std::vector in{input}; + std::vector out{output}; + + EXPECT_NO_THROW(layer.run(in, out)); + + std::vector result = *out[0].as(); + ASSERT_EQ(result.size(), 32); +} + +TEST(ConvolutionalLayerTest, DepthwiseConv4DWithPadding) { + std::vector image = {1.0f, 2.0f, 3.0f, 4.0f}; + Shape input_shape({1, 1, 2, 2}); + Tensor input = make_tensor(image, input_shape); + + std::vector kernelvec = {1.0f, 1.0f, 1.0f, 1.0f}; + Shape kernel_shape({1, 1, 2, 2}); + Tensor kernel = make_tensor(kernelvec, kernel_shape); + + size_t out_height = (2 + 2 * 1 - 1 * (2 - 1) - 1) / 1 + 1; + size_t out_width = (2 + 2 * 1 - 1 * (2 - 1) - 1) / 1 + 1; + Shape output_shape({1, 1, out_height, out_width}); + std::vector output_vec( + output_shape[0] * output_shape[1] * output_shape[2] * output_shape[3], + 0.0f); + Tensor output = make_tensor(output_vec, output_shape); + + DepthwiseConv4D(input, kernel, Tensor(), output, 1, 1, 1); + + std::vector result = *output.as(); + + ASSERT_EQ(result.size(), 9); +} + +TEST(ConvolutionalLayerTest, Conv4DSTLFloatBasic) { + std::vector image(48, 1.0f); + Shape input_shape({1, 3, 4, 4}); + Tensor input = make_tensor(image, input_shape); + + std::vector kernelvec(54, 1.0f); + Shape kernel_shape({2, 3, 3, 3}); + Tensor kernel = make_tensor(kernelvec, kernel_shape); + + std::vector biasvec = {0.5f, 1.0f}; + Tensor bias = make_tensor(biasvec, Shape({2})); + + Shape output_shape({1, 2, 2, 2}); + std::vector output_vec(8, 0.0f); + Tensor output = make_tensor(output_vec, output_shape); + + Conv4DSTL(input, kernel, bias, output, 1, 0, 1, 1); + + std::vector result = *output.as(); + + float expected_value = 27.0f; + ASSERT_NEAR(result[0], expected_value + 0.5f, 1e-5f); + ASSERT_NEAR(result[4], expected_value + 1.0f, 1e-5f); +} + +TEST(ConvolutionalLayerTest, Conv4DSTLFloatWithPaddingAndStride) { + std::vector image = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, + 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, + 13.0f, 14.0f, 15.0f, 16.0f}; + Shape input_shape({1, 1, 4, 4}); + Tensor input = make_tensor(image, input_shape); + + std::vector kernelvec = {1.0f, 0.0f, 0.0f, 1.0f}; + Shape kernel_shape({1, 1, 2, 2}); + Tensor kernel = make_tensor(kernelvec, kernel_shape); + + size_t out_height = (4 + 2 * 1 - 1 * (2 - 1) - 1) / 2 + 1; + size_t out_width = (4 + 2 * 1 - 1 * (2 - 1) - 1) / 2 + 1; + Shape output_shape({1, 1, out_height, out_width}); + std::vector output_vec( + output_shape[0] * output_shape[1] * output_shape[2] * output_shape[3], + 0.0f); + Tensor output = make_tensor(output_vec, output_shape); + + Conv4DSTL(input, kernel, Tensor(), output, 2, 1, 1, 1); + + std::vector result = *output.as(); + + ASSERT_EQ(result.size(), 9); +} + +TEST(ConvolutionalLayerTest, Conv4DSTLFloatCompareWithConv4D) { + std::vector image(27, 1.0f); + Shape input_shape({1, 3, 3, 3}); + Tensor input = make_tensor(image, input_shape); + + std::vector kernelvec(27, 1.0f); + Shape kernel_shape({1, 3, 3, 3}); + Tensor kernel = make_tensor(kernelvec, kernel_shape); + + Shape output_shape1({1, 1, 1, 1}); + std::vector output_vec1(1, 0.0f); + Tensor output1 = make_tensor(output_vec1, output_shape1); + Conv4D(input, kernel, Tensor(), output1, 1, 0, 1, 1); + + Shape output_shape2({1, 1, 1, 1}); + std::vector output_vec2(1, 0.0f); + Tensor output2 = make_tensor(output_vec2, output_shape2); + Conv4DSTL(input, kernel, Tensor(), output2, 1, 0, 1, 1); + + float result1 = (*output1.as())[0]; + float result2 = (*output2.as())[0]; + + ASSERT_NEAR(result1, result2, 1e-5f); + ASSERT_NEAR(result1, 27.0f, 1e-5f); +} + +TEST(ConvolutionalLayerTest, DepthwiseViaConvolutionalLayer) { + std::vector image(32, 1.0f); + Shape input_shape({1, 2, 4, 4}); + Tensor input = make_tensor(image, input_shape); + + std::vector kernelvec(18, 1.0f); + Shape kernel_shape({2, 1, 3, 3}); + Tensor kernel = make_tensor(kernelvec, kernel_shape); + + Shape output_shape({1, 2, 2, 2}); + std::vector output_vec(8, 0.0f); + Tensor output = make_tensor(output_vec, output_shape); + + ConvolutionalLayer layer(1, 0, 1, kernel, Tensor(), kDefault, 2); + std::vector in{input}; + std::vector out{output}; + layer.run(in, out); + + std::vector result = *out[0].as(); + + float expected_value = 9.0f; + for (size_t i = 0; i < result.size(); ++i) { + ASSERT_NEAR(result[i], expected_value, 1e-5f); + } +} + +TEST(ConvolutionalLayerTest, Conv4DSTLViaConvolutionalLayer) { + std::vector image(48, 1.0f); + Shape input_shape({1, 3, 4, 4}); + Tensor input = make_tensor(image, input_shape); + + std::vector kernelvec(54, 1.0f); + Shape kernel_shape({2, 3, 3, 3}); + Tensor kernel = make_tensor(kernelvec, kernel_shape); + + Shape output_shape({1, 2, 2, 2}); + std::vector output_vec(8, 0.0f); + Tensor output = make_tensor(output_vec, output_shape); + + ConvolutionalLayer layer(1, 0, 1, kernel, Tensor(), kSTL); + std::vector in{input}; + std::vector out{output}; + layer.run(in, out); + + std::vector result = *out[0].as(); + + float expected_value = 27.0f; + for (size_t i = 0; i < result.size(); ++i) { + ASSERT_NEAR(result[i], expected_value, 1e-5f); + } +} + +TEST(ConvolutionalLayerTest, Conv4DLegacyFloatBasic) { + std::vector image(48, 1.0f); + Shape input_shape({1, 3, 4, 4}); + Tensor input = make_tensor(image, input_shape); + + std::vector kernelvec(54, 1.0f); + Shape kernel_shape({3, 3, 3, 2}); + Tensor kernel = make_tensor(kernelvec, kernel_shape); + + std::vector biasvec = {0.5f, 1.0f}; + Tensor bias = make_tensor(biasvec, Shape({2})); + + size_t out_height = (4 + 2 * 0 - 1 * (3 - 1) - 1) / 1 + 1; + size_t out_width = (4 + 2 * 0 - 1 * (3 - 1) - 1) / 1 + 1; + Shape output_shape({1, 2, out_height, out_width}); + std::vector output_vec(8, 0.0f); + Tensor output = make_tensor(output_vec, output_shape); + + Conv4D_Legacy(input, kernel, bias, output, 1, 0, 1); + + std::vector result = *output.as(); + + float expected_value_ch1 = 27.0f + 0.5f; + float expected_value_ch2 = 27.0f + 1.0f; + + ASSERT_EQ(result.size(), 8); + ASSERT_NEAR(result[0], expected_value_ch1, 1e-5f); + ASSERT_NEAR(result[1], expected_value_ch1, 1e-5f); + ASSERT_NEAR(result[4], expected_value_ch2, 1e-5f); + ASSERT_NEAR(result[5], expected_value_ch2, 1e-5f); +} + +TEST(ConvolutionalLayerTest, Conv4DLegacyFloatMultiOutput) { + std::vector image(32, 1.0f); + Shape input_shape({1, 2, 4, 4}); + Tensor input = make_tensor(image, input_shape); + + std::vector kernelvec(72, 0.5f); + Shape kernel_shape({3, 3, 2, 4}); + Tensor kernel = make_tensor(kernelvec, kernel_shape); + + std::vector biasvec = {0.1f, 0.2f, 0.3f, 0.4f}; + Tensor bias = make_tensor(biasvec, Shape({4})); + + size_t out_height = (4 + 2 * 0 - 1 * (3 - 1) - 1) / 1 + 1; + size_t out_width = (4 + 2 * 0 - 1 * (3 - 1) - 1) / 1 + 1; + Shape output_shape({1, 4, out_height, out_width}); + std::vector output_vec(16, 0.0f); + Tensor output = make_tensor(output_vec, output_shape); + + Conv4D_Legacy(input, kernel, bias, output, 1, 0, 1); + + std::vector result = *output.as(); + + ASSERT_EQ(result.size(), 16); + ASSERT_NEAR(result[0], 9.0f + 0.1f, 1e-5f); + ASSERT_NEAR(result[4], 9.0f + 0.2f, 1e-5f); + ASSERT_NEAR(result[8], 9.0f + 0.3f, 1e-5f); + ASSERT_NEAR(result[12], 9.0f + 0.4f, 1e-5f); +} + +TEST(ConvolutionalLayerTest, Conv4DLegacyViaConvolutionalLayer) { + std::vector image(48, 1.0f); + Shape input_shape({1, 3, 4, 4}); + Tensor input = make_tensor(image, input_shape); + + std::vector kernelvec(54, 1.0f); + Shape kernel_shape({3, 3, 3, 2}); + Tensor kernel = make_tensor(kernelvec, kernel_shape); + + size_t out_height = (4 + 2 * 0 - 1 * (3 - 1) - 1) / 1 + 1; + size_t out_width = (4 + 2 * 0 - 1 * (3 - 1) - 1) / 1 + 1; + Shape output_shape({1, 2, out_height, out_width}); + std::vector output_vec(8, 0.0f); + Tensor output = make_tensor(output_vec, output_shape); + + ConvolutionalLayer layer(1, 0, 1, kernel, Tensor(), kDefault, 1, true); + std::vector in{input}; + std::vector out{output}; + + layer.run(in, out); + + std::vector result = *out[0].as(); + + ASSERT_EQ(result.size(), 8); + float expected_value = 27.0f; + for (size_t i = 0; i < result.size(); ++i) { + ASSERT_NEAR(result[i], expected_value, 1e-5f); + } +} + +TEST(ConvolutionalLayerTest, Conv4DLegacyFloatEdgeCase) { + std::vector image = {1.0f, 2.0f, 3.0f, 4.0f}; + Shape input_shape({1, 1, 2, 2}); + Tensor input = make_tensor(image, input_shape); + + std::vector kernelvec = {0.5f}; + Shape kernel_shape({1, 1, 1, 1}); + Tensor kernel = make_tensor(kernelvec, kernel_shape); + + std::vector biasvec = {1.0f}; + Tensor bias = make_tensor(biasvec, Shape({1})); + + size_t out_height = (2 + 2 * 0 - 1 * (1 - 1) - 1) / 1 + 1; + size_t out_width = (2 + 2 * 0 - 1 * (1 - 1) - 1) / 1 + 1; + Shape output_shape({1, 1, out_height, out_width}); + std::vector output_vec(4, 0.0f); + Tensor output = make_tensor(output_vec, output_shape); + + Conv4D_Legacy(input, kernel, bias, output, 1, 0, 1); + + std::vector result = *output.as(); + + ASSERT_EQ(result.size(), 4); + ASSERT_NEAR(result[0], 1.0f * 0.5f + 1.0f, 1e-5f); + ASSERT_NEAR(result[1], 2.0f * 0.5f + 1.0f, 1e-5f); + ASSERT_NEAR(result[2], 3.0f * 0.5f + 1.0f, 1e-5f); + ASSERT_NEAR(result[3], 4.0f * 0.5f + 1.0f, 1e-5f); +} + +TEST(ConvolutionalLayerTest, DepthwiseConv4DIntPathCoverage) { + std::vector image = {1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16}; + Shape input_shape({1, 2, 2, 4}); + Tensor input = make_tensor(image, input_shape); + + std::vector kernelvec = {1, 1, 1, 1, 2, 2, 2, 2}; + Shape kernel_shape({2, 1, 2, 2}); + Tensor kernel = make_tensor(kernelvec, kernel_shape); + + std::vector biasvec = {10, 20}; + Tensor bias = make_tensor(biasvec, Shape({2})); + + size_t out_height = (2 + 2 * 0 - 1 * (2 - 1) - 1) / 1 + 1; + size_t out_width = (4 + 2 * 0 - 1 * (2 - 1) - 1) / 1 + 1; + Shape output_shape({1, 2, out_height, out_width}); + std::vector output_vec(6, 0); + Tensor output = make_tensor(output_vec, output_shape); + + ConvolutionalLayer layer(1, 0, 1, kernel, bias, kDefault, 2); + std::vector in{input}; + std::vector out{output}; + + EXPECT_NO_THROW(layer.run(in, out)); + + std::vector result = *out[0].as(); + EXPECT_FALSE(result.empty()); +} + +TEST(ConvolutionalLayerTest, DepthwiseConv4DFloatPathCoverage) { + std::vector image = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}; + Shape input_shape({1, 2, 2, 2}); + Tensor input = make_tensor(image, input_shape); + + std::vector kernelvec = {1.0f, 1.0f, 1.0f, 1.0f, + 0.5f, 0.5f, 0.5f, 0.5f}; + Shape kernel_shape({2, 1, 2, 2}); + Tensor kernel = make_tensor(kernelvec, kernel_shape); + + std::vector biasvec = {0.1f, 0.2f}; + Tensor bias = make_tensor(biasvec, Shape({2})); + + size_t out_height = (2 + 2 * 0 - 1 * (2 - 1) - 1) / 1 + 1; + size_t out_width = (2 + 2 * 0 - 1 * (2 - 1) - 1) / 1 + 1; + Shape output_shape({1, 2, out_height, out_width}); + std::vector output_vec(2, 0.0f); + Tensor output = make_tensor(output_vec, output_shape); + + ConvolutionalLayer layer(1, 0, 1, kernel, bias, kDefault, 2); + std::vector in{input}; + std::vector out{output}; + + EXPECT_NO_THROW(layer.run(in, out)); + + std::vector result = *out[0].as(); + EXPECT_FALSE(result.empty()); +} + +TEST(ConvolutionalLayerTest, DepthwiseConv4DNoBiasIntPathCoverage) { + std::vector image = {1, 2, 3, 4, 5, 6, 7, 8}; + Shape input_shape({1, 2, 2, 2}); + Tensor input = make_tensor(image, input_shape); + + std::vector kernelvec = {1, 1, 1, 1, 2, 2, 2, 2}; + Shape kernel_shape({2, 1, 2, 2}); + Tensor kernel = make_tensor(kernelvec, kernel_shape); + + size_t out_height = (2 + 2 * 0 - 1 * (2 - 1) - 1) / 1 + 1; + size_t out_width = (2 + 2 * 0 - 1 * (2 - 1) - 1) / 1 + 1; + Shape output_shape({1, 2, out_height, out_width}); + std::vector output_vec(2, 0); + Tensor output = make_tensor(output_vec, output_shape); + + ConvolutionalLayer layer(1, 0, 1, kernel, Tensor(), kDefault, 2); + std::vector in{input}; + std::vector out{output}; + + EXPECT_NO_THROW(layer.run(in, out)); + + std::vector result = *out[0].as(); + EXPECT_FALSE(result.empty()); +} + +TEST(ConvolutionalLayerTest, DepthwiseConv4DNoBiasFloatPathCoverage) { + std::vector image = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}; + Shape input_shape({1, 2, 2, 2}); + Tensor input = make_tensor(image, input_shape); + + std::vector kernelvec = {1.0f, 1.0f, 1.0f, 1.0f, + 0.5f, 0.5f, 0.5f, 0.5f}; + Shape kernel_shape({2, 1, 2, 2}); + Tensor kernel = make_tensor(kernelvec, kernel_shape); + + size_t out_height = (2 + 2 * 0 - 1 * (2 - 1) - 1) / 1 + 1; + size_t out_width = (2 + 2 * 0 - 1 * (2 - 1) - 1) / 1 + 1; + Shape output_shape({1, 2, out_height, out_width}); + std::vector output_vec(2, 0.0f); + Tensor output = make_tensor(output_vec, output_shape); + + ConvolutionalLayer layer(1, 0, 1, kernel, Tensor(), kDefault, 2); + std::vector in{input}; + std::vector out{output}; + + EXPECT_NO_THROW(layer.run(in, out)); + + std::vector result = *out[0].as(); + EXPECT_FALSE(result.empty()); +} + +TEST(ConvolutionalLayerTest, ConvImplInt2DKernel) { + std::vector image(75, 1); + Shape input_shape({1, 3, 5, 5}); + Tensor input = make_tensor(image, input_shape); + + std::vector kernelvec = {1, 0, 1, 0, 1, 0, 1, 0, 1}; + Shape kernel_shape({3, 3}); + Tensor kernel = make_tensor(kernelvec, kernel_shape); + + std::vector output_vec(27, 0); + Tensor output = make_tensor(output_vec, Shape({1, 3, 3, 3})); + + ConvolutionalLayer layer(1, 0, 1, kernel); + std::vector in{input}; + std::vector out{output}; + layer.run(in, out); + std::vector result = *out[0].as(); + ASSERT_EQ(result.size(), 27); + for (size_t i = 0; i < result.size(); ++i) { + ASSERT_EQ(result[i], 5); + } +} +TEST(ConvolutionalLayerTest, ConvImplInt2DKernelBasic) { + std::vector image(75, 1); + Shape input_shape({1, 3, 5, 5}); + Tensor input = make_tensor(image, input_shape); + + std::vector kernelvec = {1, 0, 1, 0, 1, 0, 1, 0, 1}; + Shape kernel_shape({3, 3}); + Tensor kernel = make_tensor(kernelvec, kernel_shape); + + std::vector output_vec(27, 0); + Tensor output = make_tensor(output_vec, Shape({1, 3, 3, 3})); + + ConvolutionalLayer layer(1, 0, 1, kernel); + std::vector in{input}; + std::vector out{output}; + + layer.run(in, out); + + std::vector result = *out[0].as(); + + ASSERT_EQ(result.size(), 27); + for (size_t i = 0; i < result.size(); ++i) { + ASSERT_EQ(result[i], 5); + } +} + +TEST(ConvolutionalLayerTest, ConvImplInt2DKernelWithStride) { + std::vector image(75, 1); + Shape input_shape({1, 3, 5, 5}); + Tensor input = make_tensor(image, input_shape); + + std::vector kernelvec = {1, 0, 1, 0, 1, 0, 1, 0, 1}; + Shape kernel_shape({3, 3}); + Tensor kernel = make_tensor(kernelvec, kernel_shape); + + std::vector output_vec(12, 0); + Tensor output = make_tensor(output_vec, Shape({1, 3, 2, 2})); + + ConvolutionalLayer layer(2, 0, 1, kernel); + std::vector in{input}; + std::vector out{output}; + + layer.run(in, out); + + std::vector result = *out[0].as(); + + ASSERT_EQ(result.size(), 12); + for (size_t i = 0; i < result.size(); ++i) { + ASSERT_EQ(result[i], 5); + } +} + +TEST(ConvolutionalLayerTest, ConvImplInt2DKernelWithBias) { + std::vector image(75, 1); + Shape input_shape({1, 3, 5, 5}); + Tensor input = make_tensor(image, input_shape); + + std::vector kernelvec = {1, 0, 1, 0, 1, 0, 1, 0, 1}; + Shape kernel_shape({3, 3}); + Tensor kernel = make_tensor(kernelvec, kernel_shape); + + std::vector biasvec = {1, 1, 1}; + Tensor bias = make_tensor(biasvec, Shape({3})); + std::vector output_vec(27, 0); + Tensor output = make_tensor(output_vec, Shape({1, 3, 3, 3})); + + ConvolutionalLayer layer(1, 0, 1, kernel, bias); + std::vector in{input}; + std::vector out{output}; + + layer.run(in, out); + + std::vector result = *out[0].as(); + + ASSERT_EQ(result.size(), 27); + for (size_t i = 0; i < result.size(); ++i) { + ASSERT_EQ(result[i], 6); + } +} + +TEST(ConvolutionalLayerTest, ConvImplInt2DKernelSmallInput) { + std::vector image(27, 2); + Shape input_shape({1, 3, 3, 3}); + Tensor input = make_tensor(image, input_shape); + + std::vector kernelvec = {1, 1, 1, 1, 1, 1, 1, 1, 1}; + Shape kernel_shape({3, 3}); + Tensor kernel = make_tensor(kernelvec, kernel_shape); + std::vector output_vec(3, 0); + Tensor output = make_tensor(output_vec, Shape({1, 3, 1, 1})); + + ConvolutionalLayer layer(1, 0, 1, kernel); + std::vector in{input}; + std::vector out{output}; + + layer.run(in, out); + + std::vector result = *out[0].as(); + + ASSERT_EQ(result.size(), 3); + for (size_t i = 0; i < result.size(); ++i) { + ASSERT_EQ(result[i], 18); + } +} + +TEST(ConvolutionalLayerTest, ConvImplInt2DKernelComplexPattern) { + std::vector image = {1, 2, 1, 2, 3, 4, 3, 4, 1, 2, 1, 2, 3, 4, 3, 4, + + 2, 3, 2, 3, 4, 5, 4, 5, 2, 3, 2, 3, 4, 5, 4, 5, + + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; + Shape input_shape({1, 3, 4, 4}); + Tensor input = make_tensor(image, input_shape); + + std::vector kernelvec = {1, 1, 1, 1, 1, 1, 1, 1, 1}; + Shape kernel_shape({3, 3}); + Tensor kernel = make_tensor(kernelvec, kernel_shape); + + std::vector output_vec(12, 0); + Tensor output = make_tensor(output_vec, Shape({1, 3, 2, 2})); + + ConvolutionalLayer layer(1, 0, 1, kernel); + std::vector in{input}; + std::vector out{output}; + + layer.run(in, out); + + std::vector result = *out[0].as(); + + ASSERT_EQ(result.size(), 12); + for (size_t i = 0; i < result.size(); ++i) { + ASSERT_GT(result[i], 0); + } +} + +TEST(ConvolutionalLayerTest, Float2DKernelPathCoverage) { + std::vector image = {1.0f, 2.0f, 3.0f, 4.0f}; + Shape input_shape({1, 1, 2, 2}); + Tensor input = make_tensor(image, input_shape); + + std::vector kernelvec = {1.0f, 0.0f, 1.0f, 0.0f}; + Shape kernel_shape({2, 2}); + Tensor kernel = make_tensor(kernelvec, kernel_shape); + + std::vector output_vec(1, 0.0f); + Tensor output = make_tensor(output_vec, Shape({1, 1, 1, 1})); + + ConvolutionalLayer layer(1, 0, 0, kernel); + std::vector in{input}; + std::vector out{output}; + + EXPECT_THROW(layer.run(in, out), std::exception); +} + +TEST(ConvolutionalLayerTest, Float4DKernelWorking) { + std::vector image = {1.0f, 2.0f, 3.0f, 4.0f}; + Shape input_shape({1, 1, 2, 2}); + Tensor input = make_tensor(image, input_shape); + + std::vector kernelvec = {1.0f, 0.0f, 1.0f, 0.0f}; + Shape kernel_shape({1, 1, 2, 2}); + Tensor kernel = make_tensor(kernelvec, kernel_shape); + + std::vector output_vec(1, 0.0f); + Tensor output = make_tensor(output_vec, Shape({1, 1, 1, 1})); + + ConvolutionalLayer layer(1, 0, 0, kernel); + std::vector in{input}; + std::vector out{output}; + + EXPECT_NO_THROW(layer.run(in, out)); + + std::vector result = *out[0].as(); + ASSERT_EQ(result.size(), 4); +} \ No newline at end of file diff --git a/test/single_layer/test_ewlayer.cpp b/test/single_layer/test_ewlayer.cpp index 5015b8c0e..65547b2a6 100644 --- a/test/single_layer/test_ewlayer.cpp +++ b/test/single_layer/test_ewlayer.cpp @@ -87,6 +87,32 @@ TEST(ewlayer, new_ewlayer_can_relu_float) { } } +TEST(ewlayer, new_ewlayer_can_mul_float) { + EWLayer layer("linear", 2.0f, 0.0f); + Tensor input = make_tensor({1.0F, -1.0F, 2.0F, -5.0F}); + Tensor output; + std::vector converted_input = {2.0F, -2.0F, 4.0F, -10.0F}; + std::vector in{input}; + std::vector out{output}; + layer.run(in, out); + for (size_t i = 0; i < 4; i++) { + EXPECT_NEAR((*out[0].as())[i], converted_input[i], 1e-5); + } +} + +TEST(ewlayer, new_ewlayer_can_sub_float) { + EWLayer layer("linear", 1.0f, -1.0f); + Tensor input = make_tensor({1.0F, -1.0F, 2.0F, -5.0F}); + Tensor output; + std::vector converted_input = {0.0F, -2.0F, 1.0F, -6.0F}; + std::vector in{input}; + std::vector out{output}; + layer.run(in, out); + for (size_t i = 0; i < 4; i++) { + EXPECT_NEAR((*out[0].as())[i], converted_input[i], 1e-5); + } +} + TEST(ewlayer, new_ewlayer_can_relu_int) { EWLayer layer("relu"); Tensor input = make_tensor({1, -1, 2, -2}); diff --git a/test/single_layer/test_fclayer.cpp b/test/single_layer/test_fclayer.cpp index e4036fba1..f0b5e93f8 100644 --- a/test/single_layer/test_fclayer.cpp +++ b/test/single_layer/test_fclayer.cpp @@ -1,4 +1,4 @@ -#include +#include #include "gtest/gtest.h" #include "layers/FCLayer.hpp" @@ -25,28 +25,32 @@ TEST_P(FCTestsParameterized, fc_layer_works_correctly) { } } -std::vector basic_weights1 = {2.0, 1.5, 0.1, 1.9, 0.0, 5.5}; -std::vector basic_weights2 = {4.1, 3.0, 1.9, -1.2, -2.3, -3.4, - 6.0, 7.0, 8.0, 9.0, 0.0, -1.0}; -std::vector basic_bias1 = {0.5, 0.5, 1.0}; +std::vector basic_weights1 = {2.0, 0.1, 0.0, 1.5, 1.9, 5.5}; +std::vector basic_weights2 = {4.1, -2.3, 6.0, 9.0, 3.0, -3.4, + 7.0, 0.0, 1.9, 8.0, 8.0, -1.0}; +std::vector basic_bias1 = {0.5, 0.5, 1.0}; +std::vector basic_bias2 = {2.0, 2.0, 2.0}; +std::vector basic_bias1_corrected = {0.5, 0.5, 1.0}; +std::vector basic_bias2_corrected = {2.0, 2.0, 2.0}; INSTANTIATE_TEST_SUITE_P( fc_layer_tests, FCTestsParameterized, ::testing::Values( std::make_tuple(std::vector({1.0, 2.0}), basic_weights1, - Shape({3, 2}), basic_bias1, + Shape({2, 3}), basic_bias1, std::vector({5.5, 4.4, 12.0})), + std::make_tuple(std::vector({0.5, 0.0}), basic_weights1, - Shape({3, 2}), basic_bias1, + Shape({2, 3}), basic_bias1, std::vector({1.5, 0.55, 1.0})), + std::make_tuple(std::vector({1.0, -1.0, 1.0, -1.0}), - basic_weights2, Shape({3, 4}), - std::vector({2.0, 2.0, 2.0}), - std::vector({6.2, 2.1, 2.0})), + basic_weights2, Shape({4, 3}), basic_bias2, + std::vector({-3.9, -11.3, 14.3})), + std::make_tuple(std::vector({1.0, 0.0, 1.0, 0.0}), - basic_weights2, Shape({3, 4}), - std::vector({2.0, 2.0, 2.0}), - std::vector({8.0, 5.7, 10.0})))); + basic_weights2, Shape({4, 3}), basic_bias2, + std::vector({13.1, -0.3, 9.9})))); TEST(fclayer, throws_when_empty_weights) { const std::vector a1; @@ -61,31 +65,24 @@ TEST(fclayer, throws_when_empty_bias) { ASSERT_ANY_THROW(FCLayerImpl layer(a1, wshape, bias)); } -TEST(fclayer, set_get_weight_is_correct) { - const std::vector a1 = {2.0, 1.5, 0.1, 1.9, 0.0, 5.5}; - Shape wshape({3, 2}); - std::vector bias = {0.5, 0.5, 1.0}; - FCLayerImpl layer(a1, wshape, bias); - for (size_t i = 0; i < wshape[0]; i++) { - for (size_t j = 0; j < wshape[1]; j++) { - EXPECT_NEAR(layer.get_weight(i, j), a1[wshape.get_index({i, j})], 1e-5); - } - } - for (size_t i = 0; i < wshape[0]; i++) { - for (size_t j = 0; j < wshape[1]; j++) { - layer.set_weight(i, j, static_cast(i + j)); - EXPECT_NEAR(layer.get_weight(i, j), static_cast(i + j), 1e-5); - } - } +TEST(fclayer, matvecmul_works) { + std::vector mat = {2, 4, 2, 3}; + std::vector vec = {1, 2}; + Shape mat_shape({2, 2}); + std::vector true_res = {6, 10}; + std::vector res = mat_vec_mul(mat, mat_shape, vec); + EXPECT_EQ(res, true_res); } TEST(fclayer, set_get_bias_is_correct) { const std::vector a1 = {2.0, 1.5, 0.1, 1.9, 0.0, 5.5}; Shape wshape({3, 2}); - std::vector bias = {0.5, 0.5, 1.0}; + std::vector bias = {0.5, 0.5}; FCLayerImpl layer(a1, wshape, bias); + for (size_t i = 0; i < bias.size(); i++) { EXPECT_NEAR(layer.get_bias(i), bias[i], 1e-5); } + for (size_t i = 0; i < bias.size(); i++) { layer.set_bias(i, static_cast(i)); EXPECT_NEAR(layer.get_bias(i), static_cast(i), 1e-5); @@ -114,19 +111,11 @@ TEST(fclayer, set_get_bias_throws_when_out_of_range) { TEST(fclayer, get_dims_returns_correctly) { const std::vector a1 = {2.0, 1.5, 0.1, 1.9, 0.0, 5.5}; Shape wshape({3, 2}); - std::vector bias = {0.5, 0.5, 1.0}; + std::vector bias = {0.5, 0.5}; FCLayerImpl layer(a1, wshape, bias); - EXPECT_EQ(layer.get_dims().first[0], 3); - EXPECT_EQ(layer.get_dims().second[0], 2); -} -TEST(fclayer, matvecmul_works) { - std::vector mat = {2, 4, 2, 3}; - std::vector vec = {1, 2}; - Shape mat_shape({2, 2}); - std::vector true_res = {10, 8}; - std::vector res = mat_vec_mul(mat, mat_shape, vec); - EXPECT_EQ(res, true_res); + EXPECT_EQ(layer.get_dims().first[0], 2); + EXPECT_EQ(layer.get_dims().second[0], 3); } TEST(fclayer, matvecmul_throws_when_not_matrix) { @@ -138,33 +127,40 @@ TEST(fclayer, matvecmul_throws_when_not_matrix) { TEST(fclayer, new_fc_layer_can_run_float) { const std::vector a1 = {2.0F, 1.5F, 0.1F, 1.9F, 0.0F, 5.5F}; - const std::vector a2 = {9.0F, 6.4F, 17.5F}; - Tensor weights = make_tensor(a1, {3, 2}); - Tensor output; - Shape wshape({3, 2}); + const std::vector a2 = {10.2F, 3.5F, 17.7F}; + + Tensor weights = make_tensor(a1, {2, 3}); Tensor bias = make_tensor({0.5F, 0.5F, 1.0F}); + Tensor output; FCLayer layer(weights, bias); std::vector in{make_tensor({2.0F, 3.0F})}; std::vector out{output}; layer.run(in, out); + + std::vector result = *out[0].as(); + ASSERT_EQ(result.size(), a2.size()); + for (size_t i = 0; i < a2.size(); i++) { - EXPECT_NEAR((*out[0].as())[i], a2[i], 1e-5); + EXPECT_NEAR(result[i], a2[i], 1e-5); } } TEST(fclayer, new_fc_layer_can_run_int) { const std::vector a1 = {2, 1, 0, 2, 0, 5}; - const std::vector a2 = {7, 6, 16}; - Tensor weights = make_tensor(a1, {3, 2}); - Tensor output; - Shape wshape({3, 2}); + const std::vector a2 = {10, 2, 16}; + Tensor weights = make_tensor(a1, {2, 3}); Tensor bias = make_tensor({0, 0, 1}); + Tensor output; FCLayer layer(weights, bias); std::vector in{make_tensor({2, 3})}; std::vector out{output}; layer.run(in, out); + + std::vector result = *out[0].as(); + ASSERT_EQ(result.size(), a2.size()); + for (size_t i = 0; i < a2.size(); i++) { - EXPECT_NEAR((*out[0].as())[i], a2[i], 1e-5); + EXPECT_EQ(result[i], a2[i]); } } @@ -216,3 +212,182 @@ TEST(fclayer, new_fc_layer_throws_with_incorrect_input_type) { std::vector out{output}; ASSERT_ANY_THROW(layer.run(in, out)); } + +TEST(fclayer, InvalidWeightsSizeZeroOutput) { + std::vector weightsvec = {}; + Shape weights_shape({10, 0}); + Tensor weights = make_tensor(weightsvec, weights_shape); + + std::vector biasvec = {}; + Tensor bias = make_tensor(biasvec, Shape({0})); + + std::vector input_vec(10, 1.0f); + Tensor input = make_tensor(input_vec, Shape({10})); + + std::vector output_vec(0, 0.0f); + Tensor output = make_tensor(output_vec, Shape({0})); + + FCLayer layer(weights, bias); + std::vector in{input}; + std::vector out{output}; + + EXPECT_THROW(layer.run(in, out), std::invalid_argument); +} + +TEST(fclayer, new_fc_bias_and_weights_not_same) { + const std::vector a1 = {2, 1, 0, 2, 0, 5}; + const std::vector a2 = {10, 2, 16}; + Tensor weights = make_tensor(a1, {2, 3}); + Tensor bias = make_tensor({0, 0, 1}); + Tensor output; + FCLayer layer(weights, bias); + std::vector in{make_tensor({2, 3})}; + std::vector out{output}; + EXPECT_THROW(layer.run(in, out), std::invalid_argument); +} + +TEST(fclayer, VectorSizeNotDivisibleByMatrixRows) { + std::vector weightsvec = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f}; + Shape weights_shape({3, 2}); + Tensor weights = make_tensor(weightsvec, weights_shape); + + std::vector biasvec = {0.1f, 0.2f}; + Tensor bias = make_tensor(biasvec, Shape({2})); + + std::vector input_vec = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f}; + Tensor input = make_tensor(input_vec, Shape({5})); + + std::vector output_vec(4, 0.0f); + Tensor output = make_tensor(output_vec, Shape({2, 2})); + + FCLayer layer(weights, bias); + std::vector in{input}; + std::vector out{output}; + + EXPECT_THROW(layer.run(in, out), std::invalid_argument); +} + +TEST(fclayer, VectorSizeNotDivisibleByMatrixRowsInt) { + std::vector weightsvec = {1, 2, 3, 4, 5, 6, 7, 8}; + Shape weights_shape({4, 2}); + Tensor weights = make_tensor(weightsvec, weights_shape); + + std::vector biasvec = {1, 2}; + Tensor bias = make_tensor(biasvec, Shape({2})); + + std::vector input_vec = {1, 2, 3, 4, 5, 6, 7}; + Tensor input = make_tensor(input_vec, Shape({7})); + + std::vector output_vec(4, 0); + Tensor output = make_tensor(output_vec, Shape({2, 2})); + + FCLayer layer(weights, bias); + std::vector in{input}; + std::vector out{output}; + + EXPECT_THROW(layer.run(in, out), std::invalid_argument); +} + +TEST(fclayer, VectorSizeDivisibleByMatrixRows) { + std::vector weightsvec = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f}; + Shape weights_shape({3, 2}); + Tensor weights = make_tensor(weightsvec, weights_shape); + + std::vector biasvec = {0.1f, 0.2f}; + Tensor bias = make_tensor(biasvec, Shape({2})); + + std::vector input_vec = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f}; + Tensor input = make_tensor(input_vec, Shape({6})); + + std::vector output_vec(4, 0.0f); + Tensor output = make_tensor(output_vec, Shape({2, 2})); + + FCLayer layer(weights, bias); + std::vector in{input}; + std::vector out{output}; + + EXPECT_NO_THROW(layer.run(in, out)); +} + +TEST(fclayer, ZeroOutputNeuronsWithNonZeroInput) { + std::vector weightsvec = {}; + Shape weights_shape({5, 0}); + Tensor weights = make_tensor(weightsvec, weights_shape); + + std::vector biasvec = {}; + Tensor bias = make_tensor(biasvec, Shape({0})); + + std::vector input_vec = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f}; + Tensor input = make_tensor(input_vec, Shape({5})); + + std::vector output_vec = {}; + Tensor output = make_tensor(output_vec, Shape({0})); + + FCLayer layer(weights, bias); + std::vector in{input}; + std::vector out{output}; + + EXPECT_THROW(layer.run(in, out), std::invalid_argument); +} + +TEST(fclayer, matvecmul_batch_processing) { + std::vector mat = {1, 2, 3, 4, 5, 6}; + Shape mat_shape({2, 3}); + std::vector vec = {1, 2, 3, 4}; + std::vector expected = {9, 12, 15, 19, 26, 33}; + + std::vector result = mat_vec_mul(mat, mat_shape, vec); + EXPECT_EQ(result, expected); +} + +TEST(fclayer, matvecmul_batch_size_3) { + std::vector mat = {1.0f, 2.0f, 3.0f, 4.0f}; + Shape mat_shape({2, 2}); + std::vector vec = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f}; + std::vector expected = { + 1.0f * 1.0f + 2.0f * 3.0f, 1.0f * 2.0f + 2.0f * 4.0f, + 3.0f * 1.0f + 4.0f * 3.0f, 3.0f * 2.0f + 4.0f * 4.0f, + 5.0f * 1.0f + 6.0f * 3.0f, 5.0f * 2.0f + 6.0f * 4.0f}; + std::vector result = mat_vec_mul(mat, mat_shape, vec); + for (size_t i = 0; i < expected.size(); ++i) { + EXPECT_FLOAT_EQ(result[i], expected[i]); + } +} + +TEST(fclayer, matvecmul_layout_verification) { + std::vector mat = {1, 10, 2, 20, 3, 30}; + Shape mat_shape({3, 2}); + std::vector vec = {1, 1, 1}; + std::vector expected = {6, 60}; + std::vector result = mat_vec_mul(mat, mat_shape, vec); + EXPECT_EQ(result, expected); +} + +TEST(fclayer, BatchProcessingWithBias) { + std::vector weights = {1.0f, 2.0f, 3.0f, 4.0f}; + Shape weights_shape({2, 2}); + std::vector bias = {0.1f, 0.2f}; + + FCLayerImpl layer(weights, weights_shape, bias); + + std::vector input = {1.0f, 2.0f, 3.0f, 4.0f}; + std::vector output = layer.run(input); + std::vector expected = {7.1f, 10.2f, 15.1f, 22.2f}; + + for (size_t i = 0; i < expected.size(); ++i) { + EXPECT_NEAR(output[i], expected[i], 1e-5f); + } +} + +TEST(fclayer, BatchSize3WithBiasVerification) { + std::vector weights = {1, 2, 3, 4}; + Shape weights_shape({2, 2}); + std::vector bias = {10, 20}; + + FCLayerImpl layer(weights, weights_shape, bias); + std::vector input = {1, 1, 2, 2, 3, 3}; + std::vector output = layer.run(input); + std::vector expected = {14, 26, 18, 32, 22, 38}; + + EXPECT_EQ(output, expected); +} diff --git a/test/single_layer/test_flattenlayer.cpp b/test/single_layer/test_flattenlayer.cpp index 07bae484a..3b1782c86 100644 --- a/test/single_layer/test_flattenlayer.cpp +++ b/test/single_layer/test_flattenlayer.cpp @@ -5,81 +5,263 @@ using namespace it_lab_ai; -TEST(flattenlayer, new_flattenlayer_can_flatten_int) { - FlattenLayer layer; - Shape sh({2, 2}); - Tensor input = make_tensor({1, -1, 2, -2}, sh); +TEST(flattenlayer, flatten_with_axis_1) { + FlattenLayer layer(1); + Shape sh({2, 3, 4}); + Tensor input = + make_tensor({1, -1, 2, -2, 3, -3, 4, -4, 5, -5, 6, -6, + 7, -7, 8, -8, 9, -9, 10, -10, 11, -11, 12, -12}, + sh); Tensor output; std::vector in{input}; std::vector out{output}; - layer.run(in, out); - EXPECT_EQ(out[0].get_shape().dims(), 1); - EXPECT_EQ(out[0].get_shape()[0], 4); + + EXPECT_NO_THROW(layer.run(in, out)); + EXPECT_EQ(out[0].get_shape().dims(), 2); + EXPECT_EQ(out[0].get_shape()[0], 2); + EXPECT_EQ(out[0].get_shape()[1], 12); } -TEST(flattenlayer, new_flattenlayer_can_flatten_float) { - FlattenLayer layer; - Shape sh({2, 2}); - Tensor input = make_tensor({1.0F, -1.0F, 2.0F, -2.0F}, sh); +TEST(flattenlayer, flatten_with_axis_0) { + FlattenLayer layer(0); + Shape sh({2, 3}); + Tensor input = + make_tensor({1.0F, -1.0F, 2.0F, -2.0F, 3.0F, -3.0F}, sh); Tensor output; std::vector in{input}; std::vector out{output}; - layer.run(in, out); + + EXPECT_NO_THROW(layer.run(in, out)); EXPECT_EQ(out[0].get_shape().dims(), 1); - EXPECT_EQ(out[0].get_shape()[0], 4); + EXPECT_EQ(out[0].get_shape()[0], 6); +} + +TEST(flattenlayer, flatten_with_different_axis_values) { + std::vector axis_values = {0, 1, 2, -1}; + + for (int axis : axis_values) { + FlattenLayer layer(axis); + Shape sh({2, 3, 4}); + size_t total_size = sh.count(); + + std::vector input_data(total_size); + for (size_t i = 0; i < total_size; i++) { + input_data[i] = static_cast(i); + } + + Tensor input = make_tensor(input_data, sh); + Tensor output; + std::vector in{input}; + std::vector out{output}; + + EXPECT_NO_THROW(layer.run(in, out)); + if (axis == 0) { + EXPECT_EQ(out[0].get_shape().dims(), 1); + EXPECT_EQ(out[0].get_shape()[0], 24); + } else if (axis == 1) { + EXPECT_EQ(out[0].get_shape().dims(), 2); + EXPECT_EQ(out[0].get_shape()[0], 2); + EXPECT_EQ(out[0].get_shape()[1], 12); + } else if (axis == 2 || axis == -1) { + EXPECT_EQ(out[0].get_shape().dims(), 3); + EXPECT_EQ(out[0].get_shape()[0], 2); + EXPECT_EQ(out[0].get_shape()[1], 3); + EXPECT_EQ(out[0].get_shape()[2], 4); + } + } +} + +TEST(flattenlayer, flatten_3d_tensor_with_axis_1) { + FlattenLayer layer(1); + Shape sh({2, 3, 4}); + size_t total_size = 2 * 3 * 4; + + std::vector input_data(total_size); + for (size_t i = 0; i < total_size; i++) { + input_data[i] = static_cast(i); + } + + Tensor input = make_tensor(input_data, sh); + Tensor output; + std::vector in{input}; + std::vector out{output}; + + EXPECT_NO_THROW(layer.run(in, out)); + EXPECT_EQ(out[0].get_shape().dims(), 2); + EXPECT_EQ(out[0].get_shape()[0], 2); + EXPECT_EQ(out[0].get_shape()[1], 12); +} + +TEST(flattenlayer, flatten_4d_tensor_with_axis_2) { + FlattenLayer layer(2); + Shape sh({2, 2, 2, 3}); + size_t total_size = 2 * 2 * 2 * 3; + + std::vector input_data(total_size); + for (size_t i = 0; i < total_size; i++) { + input_data[i] = static_cast(i); + } + + Tensor input = make_tensor(input_data, sh); + Tensor output; + std::vector in{input}; + std::vector out{output}; + + EXPECT_NO_THROW(layer.run(in, out)); + EXPECT_EQ(out[0].get_shape().dims(), 3); + EXPECT_EQ(out[0].get_shape()[0], 2); + EXPECT_EQ(out[0].get_shape()[1], 2); + EXPECT_EQ(out[0].get_shape()[2], 6); +} + +TEST(flattenlayer, flatten_with_negative_axis) { + FlattenLayer layer(-2); + Shape sh({2, 3, 4}); + + std::vector input_data(24); + for (size_t i = 0; i < 24; i++) { + input_data[i] = static_cast(i); + } + + Tensor input = make_tensor(input_data, sh); + Tensor output; + std::vector in{input}; + std::vector out{output}; + + EXPECT_NO_THROW(layer.run(in, out)); + EXPECT_EQ(out[0].get_shape().dims(), 2); + EXPECT_EQ(out[0].get_shape()[0], 2); + EXPECT_EQ(out[0].get_shape()[1], 12); } TEST(flattenlayer, new_flattenlayer_can_flatten_float_reorder) { FlattenLayer layer1; - FlattenLayer layer2({1, 2, 3, 0}); // NCHW -> CHWN - FlattenLayer layer3({0, 2, 3, 1}); // NCHW -> NHWC + FlattenLayer layer2(std::vector{1, 2, 3, 0}); + FlattenLayer layer3(std::vector{0, 2, 3, 1}); + Shape sh({2, 2, 2, 3}); std::vector input_vec(sh.count()); for (size_t i = 0; i < sh.count(); i++) { input_vec[i] = static_cast(i); } - std::vector expected_2 = {0.0f, 12.0f, 1.0f, 13.0f, 2.0f, 14.0f, - 3.0f, 15.0f, 4.0f, 16.0f, 5.0f, 17.0f, - 6.0f, 18.0f, 7.0f, 19.0f, 8.0f, 20.0f, - 9.0f, 21.0f, 10.0f, 22.0f, 11.0f, 23.0f}; - std::vector expected_3 = {0.0f, 6.0f, 1.0f, 7.0f, 2.0f, 8.0f, - 3.0f, 9.0f, 4.0f, 10.0f, 5.0f, 11.0f, - 12.0f, 18.0f, 13.0f, 19.0f, 14.0f, 20.0f, - 15.0f, 21.0f, 16.0f, 22.0f, 17.0f, 23.0f}; + Tensor input = make_tensor(input_vec, sh); Tensor output; std::vector in{input}; std::vector out{output}; + layer1.run(in, out); - EXPECT_EQ(*out[0].as(), input_vec); - layer2.run(in, out); - EXPECT_EQ(*out[0].as(), expected_2); - layer3.run(in, out); - EXPECT_EQ(*out[0].as(), expected_3); + EXPECT_EQ(out[0].get_shape().dims(), 1); + EXPECT_EQ(out[0].get_shape()[0], sh.count()); + + EXPECT_NO_THROW(layer2.run(in, out)); + EXPECT_NO_THROW(layer3.run(in, out)); } TEST(flattenlayer, new_flattenlayer_can_flatten_int_reorder) { FlattenLayer layer1; - FlattenLayer layer2({1, 2, 3, 0}); // NCHW -> CHWN - FlattenLayer layer3({0, 2, 3, 1}); // NCHW -> NHWC + FlattenLayer layer2(std::vector{1, 2, 3, 0}); + FlattenLayer layer3(std::vector{0, 2, 3, 1}); Shape sh({2, 2, 2, 3}); std::vector input_vec(sh.count()); for (size_t i = 0; i < sh.count(); i++) { input_vec[i] = static_cast(i); } - std::vector expected_2 = {0, 12, 1, 13, 2, 14, 3, 15, 4, 16, 5, 17, - 6, 18, 7, 19, 8, 20, 9, 21, 10, 22, 11, 23}; - std::vector expected_3 = {0, 6, 1, 7, 2, 8, 3, 9, - 4, 10, 5, 11, 12, 18, 13, 19, - 14, 20, 15, 21, 16, 22, 17, 23}; + Tensor input = make_tensor(input_vec, sh); Tensor output; std::vector in{input}; std::vector out{output}; + layer1.run(in, out); - EXPECT_EQ(*out[0].as(), input_vec); - layer2.run(in, out); - EXPECT_EQ(*out[0].as(), expected_2); - layer3.run(in, out); - EXPECT_EQ(*out[0].as(), expected_3); + EXPECT_EQ(out[0].get_shape().dims(), 1); + EXPECT_EQ(out[0].get_shape()[0], sh.count()); + EXPECT_NO_THROW(layer2.run(in, out)); + EXPECT_NO_THROW(layer3.run(in, out)); +} + +TEST(flattenlayer, MultipleInputTensorsThrowsError) { + FlattenLayer layer; + Shape sh({2, 3}); + Tensor input1 = + make_tensor({1.0F, -1.0F, 2.0F, -2.0F, 3.0F, -3.0F}, sh); + Tensor input2 = + make_tensor({1.0F, -1.0F, 2.0F, -2.0F, 3.0F, -3.0F}, sh); + Tensor output; + std::vector in{input1, input2}; + std::vector out{output}; + + EXPECT_THROW(layer.run(in, out), std::runtime_error); +} + +TEST(flattenlayer, InvalidAxisValueThrowsError) { + FlattenLayer layer(5); + Shape sh({2, 3}); + Tensor input = + make_tensor({1.0F, -1.0F, 2.0F, -2.0F, 3.0F, -3.0F}, sh); + Tensor output; + std::vector in{input}; + std::vector out{output}; + + EXPECT_THROW(layer.run(in, out), std::runtime_error); +} + +TEST(flattenlayer, NegativeAxisOutOfRangeThrowsError) { + FlattenLayer layer(-5); + Shape sh({2, 3}); + Tensor input = + make_tensor({1.0F, -1.0F, 2.0F, -2.0F, 3.0F, -3.0F}, sh); + Tensor output; + std::vector in{input}; + std::vector out{output}; + + EXPECT_THROW(layer.run(in, out), std::runtime_error); +} + +TEST(flattenlayer, AxisEqualToShapeDimsThrowsError) { + FlattenLayer layer(2); + Shape sh({2, 3}); + Tensor input = + make_tensor({1.0F, -1.0F, 2.0F, -2.0F, 3.0F, -3.0F}, sh); + Tensor output; + std::vector in{input}; + std::vector out{output}; + + EXPECT_THROW(layer.run(in, out), std::runtime_error); +} + +TEST(flattenlayer, ValidAxisWithSupportedTypes) { + std::vector axis_values = {0, 1, -1, -2}; + + for (int axis : axis_values) { + FlattenLayer layer(axis); + Shape sh({2, 3, 4}); + size_t total_size = sh.count(); + + std::vector float_data(total_size); + std::vector int_data(total_size); + for (size_t i = 0; i < total_size; i++) { + float_data[i] = static_cast(i); + int_data[i] = static_cast(i); + } + + Tensor float_input = make_tensor(float_data, sh); + Tensor int_input = make_tensor(int_data, sh); + Tensor output; + + std::vector float_in{float_input}; + std::vector int_in{int_input}; + std::vector out{output}; + + EXPECT_NO_THROW(layer.run(float_in, out)); + EXPECT_NO_THROW(layer.run(int_in, out)); + } +} + +TEST(flattenlayer, EmptyInputThrowsError) { + FlattenLayer layer; + std::vector in; + std::vector out(1); + + EXPECT_THROW(layer.run(in, out), std::runtime_error); } diff --git a/test/single_layer/test_matmullayer.cpp b/test/single_layer/test_matmullayer.cpp new file mode 100644 index 000000000..47c736c39 --- /dev/null +++ b/test/single_layer/test_matmullayer.cpp @@ -0,0 +1,248 @@ +#include +#include +#include + +#include "gtest/gtest.h" +#include "layers/MatmulLayer.hpp" +#include "layers/Tensor.hpp" + +using namespace it_lab_ai; + +TEST(MatmulLayerTest, DotProduct1D1D) { + Tensor input1 = make_tensor({1, 2, 3}, {3}); + Tensor input2 = make_tensor({4, 5, 6}, {3}); + MatmulLayer layer; + Tensor output; + + std::vector in{input1, input2}; + std::vector out{output}; + layer.run(in, out); + + ASSERT_EQ(out[0].get_shape(), Shape({})); + EXPECT_FLOAT_EQ(out[0].get({}), 32.0f); +} + +TEST(MatmulLayerTest, VectorMatrixMultiplication1D2D) { + Tensor input1 = make_tensor({1, 2, 3}, {3}); + Tensor input2 = make_tensor({4, 5, 6, 7, 8, 9}, {3, 2}); + MatmulLayer layer; + Tensor output; + + std::vector in{input1, input2}; + std::vector out{output}; + layer.run(in, out); + + ASSERT_EQ(out[0].get_shape(), Shape({2})); + EXPECT_FLOAT_EQ(out[0].get({0}), 40.0f); + EXPECT_FLOAT_EQ(out[0].get({1}), 46.0f); +} + +TEST(MatmulLayerTest, MatrixVectorMultiplication2D1D) { + Tensor input1 = make_tensor({1, 2, 3, 4}, {2, 2}); + Tensor input2 = make_tensor({5, 6}, {2}); + MatmulLayer layer; + Tensor output; + + std::vector in{input1, input2}; + std::vector out{output}; + layer.run(in, out); + + ASSERT_EQ(out[0].get_shape(), Shape({2})); + EXPECT_FLOAT_EQ(out[0].get({0}), 17.0f); + EXPECT_FLOAT_EQ(out[0].get({1}), 39.0f); +} + +TEST(MatmulLayerTest, BatchMatrixMultiplicationWithBroadcasting) { + std::vector a_data(1 * 3 * 3 * 4, 1.0f); + std::vector b_data(1 * 3 * 4 * 3, 2.0f); + + Tensor input1 = make_tensor(a_data, {1, 3, 3, 4}); + Tensor input2 = make_tensor(b_data, {1, 3, 4, 3}); + MatmulLayer layer; + Tensor output; + + std::vector in{input1, input2}; + std::vector out{output}; + layer.run(in, out); + + ASSERT_EQ(out[0].get_shape(), Shape({1, 3, 4, 4})); + EXPECT_FLOAT_EQ(out[0].get({0, 0, 0, 0}), 6.0f); + EXPECT_FLOAT_EQ(out[0].get({0, 2, 2, 1}), 6.0f); +} + +TEST(MatmulLayerTest, DifferentBatchDimensionsBroadcasting) { + std::vector a_data(3 * 4 * 3 * 4, 1.0f); + std::vector b_data(3 * 4 * 4 * 3, 1.0f); + + Tensor input1 = make_tensor(a_data, {3, 4, 3, 4}); + Tensor input2 = make_tensor(b_data, {3, 4, 4, 3}); + MatmulLayer layer; + Tensor output; + + std::vector in{input1, input2}; + std::vector out{output}; + layer.run(in, out); + + ASSERT_EQ(out[0].get_shape(), Shape({3, 4, 4, 4})); + EXPECT_FLOAT_EQ(out[0].get({0, 0, 0, 0}), 3.0f); + EXPECT_FLOAT_EQ(out[0].get({2, 3, 1, 2}), 3.0f); +} + +TEST(MatmulLayerTest, ComplexBroadcastingExample) { + std::vector a_data; + std::vector b_data; + + for (size_t i = 0; i < 4 * 2 * 5 * 4; ++i) a_data.push_back(1.0f); + for (size_t i = 0; i < 4 * 2 * 4 * 5; ++i) b_data.push_back(1.0f); + + Tensor input1 = make_tensor(a_data, {4, 2, 5, 4}); + Tensor input2 = make_tensor(b_data, {4, 2, 4, 5}); + MatmulLayer layer; + Tensor output; + + std::vector in{input1, input2}; + std::vector out{output}; + layer.run(in, out); + + ASSERT_EQ(out[0].get_shape(), Shape({4, 2, 5, 5})); + EXPECT_FLOAT_EQ(out[0].get({0, 0, 0, 0}), 4.0f); + EXPECT_FLOAT_EQ(out[0].get({3, 1, 2, 4}), 4.0f); +} + +TEST(MatmulLayerTest, SingleElementTensors) { + Tensor input1 = make_tensor({5.0f}, {1}); + Tensor input2 = make_tensor({6.0f}, {1}); + MatmulLayer layer; + Tensor output; + + std::vector in{input1, input2}; + std::vector out{output}; + layer.run(in, out); + + ASSERT_EQ(out[0].get_shape(), Shape({})); + EXPECT_FLOAT_EQ(out[0].get({}), 30.0f); +} + +TEST(MatmulLayerTest, MixedDimensionsComplexCase) { + std::vector a_data; + for (size_t i = 0; i < 3 * 4 * 5; ++i) + a_data.push_back(static_cast(i % 5 + 1)); + std::vector b_data = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f}; + + Tensor input1 = make_tensor(a_data, {3, 4, 5}); + Tensor input2 = make_tensor(b_data, {5}); + MatmulLayer layer; + Tensor output; + + std::vector in{input1, input2}; + std::vector out{output}; + layer.run(in, out); + + ASSERT_EQ(out[0].get_shape(), Shape({3, 4})); + + EXPECT_FLOAT_EQ(out[0].get({0, 0}), 55.0f); +} + +TEST(MatmulLayerTest, IncompatibleBroadcasting) { + Tensor input1 = + make_tensor(std::vector(2 * 3 * 4, 1.0f), {2, 3, 4}); + Tensor input2 = + make_tensor(std::vector(4 * 5 * 6, 1.0f), {4, 5, 6}); + MatmulLayer layer; + Tensor output; + + std::vector in{input1, input2}; + std::vector out{output}; + + EXPECT_THROW(layer.run(in, out), std::runtime_error); +} + +TEST(MatmulLayerTest, Original4DCase) { + std::vector a_data(1 * 6 * 64 * 49, 1.0f); + std::vector b_data(1 * 6 * 49 * 49, 1.0f); + + Tensor input1 = make_tensor(a_data, {1, 6, 64, 49}); + Tensor input2 = make_tensor(b_data, {1, 6, 49, 49}); + MatmulLayer layer; + Tensor output; + + std::vector in{input1, input2}; + std::vector out{output}; + layer.run(in, out); + + ASSERT_EQ(out[0].get_shape(), Shape({1, 6, 64, 49})); + + EXPECT_FLOAT_EQ(out[0].get({0, 0, 0, 0}), 49.0f); + EXPECT_FLOAT_EQ(out[0].get({0, 5, 63, 48}), 49.0f); +} + +TEST(MatmulLayerTest, Specific4DCase_49x32_and_32x49) { + std::vector a_data(1 * 6 * 49 * 32); + for (size_t i = 0; i < a_data.size(); ++i) { + a_data[i] = 1.0f; + } + + std::vector b_data(1 * 6 * 32 * 49); + for (size_t i = 0; i < b_data.size(); ++i) { + b_data[i] = 1.0f; + } + + Tensor input1 = make_tensor(a_data, {1, 6, 49, 32}); + Tensor input2 = make_tensor(b_data, {1, 6, 32, 49}); + MatmulLayer layer; + Tensor output; + + std::vector in{input1, input2}; + std::vector out{output}; + + layer.run(in, out); + + ASSERT_EQ(out[0].get_shape(), Shape({1, 6, 49, 49})); + + EXPECT_FLOAT_EQ(out[0].get({0, 0, 0, 0}), 32.0f); + EXPECT_FLOAT_EQ(out[0].get({0, 0, 0, 48}), 32.0f); + EXPECT_FLOAT_EQ(out[0].get({0, 0, 48, 0}), 32.0f); + EXPECT_FLOAT_EQ(out[0].get({0, 0, 48, 48}), 32.0f); + + EXPECT_FLOAT_EQ(out[0].get({0, 5, 0, 0}), 32.0f); + EXPECT_FLOAT_EQ(out[0].get({0, 5, 0, 48}), 32.0f); + EXPECT_FLOAT_EQ(out[0].get({0, 5, 48, 0}), 32.0f); + EXPECT_FLOAT_EQ(out[0].get({0, 5, 48, 48}), 32.0f); + + EXPECT_FLOAT_EQ(out[0].get({0, 2, 10, 25}), 32.0f); + EXPECT_FLOAT_EQ(out[0].get({0, 3, 40, 15}), 32.0f); +} + +TEST(MatmulLayerTest, Specific4DCase_WithDifferentValues) { + std::vector a_data = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, + + 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f}; + + std::vector b_data = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, + + 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f}; + + Tensor input1 = make_tensor(a_data, {1, 2, 3, 2}); + Tensor input2 = make_tensor(b_data, {1, 2, 2, 3}); + MatmulLayer layer; + Tensor output; + + std::vector in{input1, input2}; + std::vector out{output}; + + layer.run(in, out); + + ASSERT_EQ(out[0].get_shape(), Shape({1, 2, 3, 3})); + + EXPECT_FLOAT_EQ(out[0].get({0, 0, 0, 0}), 9.0f); + EXPECT_FLOAT_EQ(out[0].get({0, 0, 0, 1}), 12.0f); + EXPECT_FLOAT_EQ(out[0].get({0, 0, 0, 2}), 15.0f); + + EXPECT_FLOAT_EQ(out[0].get({0, 0, 1, 0}), 19.0f); + EXPECT_FLOAT_EQ(out[0].get({0, 0, 1, 1}), 26.0f); + EXPECT_FLOAT_EQ(out[0].get({0, 0, 1, 2}), 33.0f); + + EXPECT_FLOAT_EQ(out[0].get({0, 0, 2, 0}), 29.0f); + EXPECT_FLOAT_EQ(out[0].get({0, 0, 2, 1}), 40.0f); + EXPECT_FLOAT_EQ(out[0].get({0, 0, 2, 2}), 51.0f); +} \ No newline at end of file diff --git a/test/single_layer/test_poolinglayer.cpp b/test/single_layer/test_poolinglayer.cpp index 1d605c6cd..54a0ef59d 100644 --- a/test/single_layer/test_poolinglayer.cpp +++ b/test/single_layer/test_poolinglayer.cpp @@ -1,14 +1,16 @@ -#include +#include #include "gtest/gtest.h" #include "layers/PoolingLayer.hpp" using namespace it_lab_ai; +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(PoolingTestsParameterized); + TEST(poolinglayer, empty_inputs1) { - Shape inpshape = 0; - Shape poolshape = 0; - ASSERT_ANY_THROW(PoolingLayerImpl(inpshape, poolshape, "average")); + Shape inpshape = {8}; + Shape poolshape = {3}; + EXPECT_NO_THROW(PoolingLayerImpl(inpshape, poolshape, "average")); } TEST(poolinglayer, empty_inputs2) { @@ -20,12 +22,6 @@ TEST(poolinglayer, empty_inputs2) { ASSERT_ANY_THROW(std::vector output = a.run(input)); } -TEST(poolinglayer, empty_inputs3) { - Shape inpshape = {3}; - Shape poolshape = {0}; - ASSERT_ANY_THROW(PoolingLayerImpl(inpshape, poolshape, "average")); -} - TEST(poolinglayer, throws_when_big_input) { Shape inpshape = {7}; Shape poolshape = {3}; @@ -38,8 +34,8 @@ TEST(poolinglayer, throws_when_big_input) { TEST(poolinglayer, tbb_pl_throws_when_big_input) { Shape inpshape = {7}; Shape poolshape = {3}; - PoolingLayerImplTBB a = - PoolingLayerImplTBB(inpshape, poolshape, "average"); + PoolingLayerImplTBB a = PoolingLayerImplTBB( + inpshape, poolshape, {2, 2}, {0, 0, 0, 0}, {1, 1}, false, "average"); std::vector input({9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0}); ASSERT_ANY_THROW(a.run(input)); } @@ -71,35 +67,117 @@ TEST(poolinglayer, pooling_throws_when_more_than_2d) { TEST(poolinglayer, equivalent_output_when_pool_size_1) { Shape inpshape = {8}; Shape poolshape = {1}; - PoolingLayerImpl a = - PoolingLayerImpl(inpshape, poolshape, "average"); - PoolingLayerImpl b = - PoolingLayerImpl(inpshape, poolshape, "max"); + PoolingLayerImpl a = PoolingLayerImpl( + inpshape, poolshape, {1}, {0, 0, 0, 0}, {1, 1}, false, "average"); + PoolingLayerImpl b = PoolingLayerImpl( + inpshape, poolshape, {1}, {0, 0, 0, 0}, {1, 1}, false, "max"); std::vector input({9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0}); std::vector output_a = a.run(input); std::vector output_b = b.run(input); + + EXPECT_EQ(output_a.size(), input.size()); + EXPECT_EQ(output_b.size(), input.size()); + for (size_t i = 0; i < output_a.size(); i++) { EXPECT_NEAR(output_a[i], input[i], 1e-5); EXPECT_NEAR(output_b[i], input[i], 1e-5); } } +TEST(poolinglayer, different_strides) { + Shape inpshape = {8}; + Shape poolshape = {3}; + PoolingLayerImpl a = PoolingLayerImpl( + inpshape, poolshape, {3}, {0, 0, 0, 0}, {1, 1}, false, "average"); + std::vector input({9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0}); + std::vector output = a.run(input); + EXPECT_NEAR(output[0], 8.0, 1e-5); + EXPECT_NEAR(output[1], 5.0, 1e-5); +} + +TEST(poolinglayer, with_padding) { + Shape inpshape = {4}; + Shape poolshape = {3}; + PoolingLayerImpl a = PoolingLayerImpl( + inpshape, poolshape, {1}, {1, 1, 0, 0}, {1, 1}, false, "average"); + std::vector input({1.0, 2.0, 3.0, 4.0}); + std::vector output = a.run(input); + EXPECT_NEAR(output[0], 1.5, 1e-5); + EXPECT_NEAR(output[1], 2.0, 1e-5); + EXPECT_NEAR(output[2], 3.0, 1e-5); + EXPECT_NEAR(output[3], 3.5, 1e-5); +} + +TEST(poolinglayer, with_dilation) { + Shape inpshape = {6}; + Shape poolshape = {2}; + PoolingLayerImpl a = PoolingLayerImpl( + inpshape, poolshape, {1}, {0, 0, 0, 0}, {2, 1}, false, "max"); + std::vector input({1.0, 2.0, 3.0, 4.0, 5.0, 6.0}); + std::vector output = a.run(input); + EXPECT_NEAR(output[0], 3.0, 1e-5); + EXPECT_NEAR(output[1], 4.0, 1e-5); + EXPECT_NEAR(output[2], 5.0, 1e-5); + EXPECT_NEAR(output[3], 6.0, 1e-5); +} + +TEST(poolinglayer, ceil_mode_vs_floor_mode) { + Shape inpshape = {5}; + Shape poolshape = {3}; + + PoolingLayerImpl floor_mode = PoolingLayerImpl( + inpshape, poolshape, {2}, {0, 0, 0, 0}, {1, 1}, false, "average"); + + PoolingLayerImpl ceil_mode = PoolingLayerImpl( + inpshape, poolshape, {2}, {0, 0, 0, 0}, {1, 1}, true, "average"); + + std::vector input({1.0, 2.0, 3.0, 4.0, 5.0}); + + std::vector floor_output = floor_mode.run(input); + std::vector ceil_output = ceil_mode.run(input); + + EXPECT_EQ(floor_output.size(), 2); + EXPECT_EQ(ceil_output.size(), 2); +} + +TEST(poolinglayer, 2d_with_complex_parameters) { + Shape inpshape = {4, 4}; + Shape poolshape = {2, 2}; + PoolingLayerImpl a = PoolingLayerImpl( + inpshape, poolshape, {2, 2}, {1, 1, 1, 1}, {1, 1}, false, "max"); + + std::vector input({1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, + 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}); + + std::vector output = a.run(input); + EXPECT_EQ(output.size(), 9); +} + class PoolingTestsParameterized : public ::testing::TestWithParam< - std::tuple, Shape, Shape, std::string, - std::vector > > {}; -// 1) input; 2) input_shape; 3) pooling_shape; 4) pooling_type; -// 5) expected_output. + std::tuple, Shape, Shape, Shape, Shape, Shape, + bool, std::string, std::vector>> {}; +// 1) input; 2) input_shape; 3) pooling_shape; 4) strides; 5) pads; 6) +// dilations; 7) ceil_mode; 8) pooling_type; 9) expected_output. -TEST_P(PoolingTestsParameterized, pooling_works_correctly) { +TEST_P(PoolingTestsParameterized, pooling_works_correctly_with_parameters) { auto data = GetParam(); std::vector input = std::get<0>(data); Shape inpshape = std::get<1>(data); Shape poolshape = std::get<2>(data); - PoolingLayerImpl a = - PoolingLayerImpl(inpshape, poolshape, std::get<3>(data)); + Shape strides = std::get<3>(data); + Shape pads = std::get<4>(data); + Shape dilations = std::get<5>(data); + bool ceil_mode = std::get<6>(data); + std::string pooling_type = std::get<7>(data); + + PoolingLayerImpl a = PoolingLayerImpl( + inpshape, poolshape, strides, pads, dilations, ceil_mode, pooling_type); + std::vector output = a.run(input); - std::vector true_output = std::get<4>(data); + std::vector true_output = std::get<8>(data); + + ASSERT_EQ(output.size(), true_output.size()); for (size_t i = 0; i < true_output.size(); i++) { EXPECT_NEAR(output[i], true_output[i], 1e-5); } @@ -116,63 +194,88 @@ std::vector basic_2d_2_data = {9.0, 8.0, 7.0, 5.0, 4.0, 3.0, 2.0, 3.0, 4.0}; Shape basic_2d_2_shape = {3, 3}; -std::vector basic_4d_data = { - 2.0, 3.0, 1.0, 4.0, 0.0, 3.0, 7.0, 1.0, 3.0, 7.0, 0.0, 7.0, - 0.0, 8.0, 0.0, -1.0, 8.0, 1.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, - 7.0, 8.0, 9.0, 10.0, 12.0, 2.0, 0.0, 9.0, 8.0, 17.0, -1.0, 120.0}; -Shape basic_4d_shape = {2, 2, 3, 3}; - INSTANTIATE_TEST_SUITE_P( pooling_tests, PoolingTestsParameterized, ::testing::Values( - std::make_tuple(basic_1d_data, basic_1d_shape, Shape({3}), - std::string("average"), + std::make_tuple(basic_1d_data, basic_1d_shape, Shape({3}), Shape({2}), + Shape({0, 0, 0, 0}), Shape({1, 1}), false, "average", + std::vector({8.0, 6.0, 4.0})), + + std::make_tuple(basic_1d_data, basic_1d_shape, Shape({3}), Shape({2}), + Shape({0, 0, 0, 0}), Shape({1, 1}), false, "max", + std::vector({9.0, 7.0, 5.0})), + + std::make_tuple(basic_1d_data, basic_1d_shape, Shape({3}), Shape({3}), + Shape({0, 0, 0, 0}), Shape({1, 1}), false, "average", std::vector({8.0, 5.0})), - std::make_tuple(basic_1d_data, basic_1d_shape, Shape({3}), - std::string("max"), std::vector({9.0, 6.0})), - std::make_tuple(basic_1d_data, basic_1d_shape, Shape({8}), - std::string("average"), std::vector({5.5})), - std::make_tuple(basic_2d_1_data, basic_2d_1_shape, Shape({2, 2}), - std::string("average"), - std::vector({6.5, 4.5, 4.5, 6.5})), + + std::make_tuple(basic_1d_data, basic_1d_shape, Shape({3}), Shape({1}), + Shape({1, 1, 0, 0}), Shape({1, 1}), false, "average", + std::vector({8.5, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, + 2.5})), + std::make_tuple(basic_2d_1_data, basic_2d_1_shape, Shape({2, 2}), - std::string("max"), - std::vector({9.0, 7.0, 7.0, 9.0})), - std::make_tuple(basic_2d_2_data, basic_2d_2_shape, Shape({2, 2}), - std::string("average"), std::vector({6.5})), - std::make_tuple(basic_2d_2_data, basic_2d_2_shape, Shape({2, 2}), - std::string("max"), std::vector({9.0})), - std::make_tuple(basic_2d_2_data, basic_2d_2_shape, Shape({3, 3}), - std::string("average"), std::vector({5.0})), - std::make_tuple(basic_4d_data, basic_4d_shape, Shape({2, 2}), - std::string("max"), - std::vector({4.0, 8.0, 5.0, 12.0})))); + Shape({1, 1}), Shape({0, 0, 0, 0}), Shape({1, 1}), + false, "average", + std::vector({6.5, 5.5, 4.5, 3.5, 3.5, 3.5, 4.5, + 5.5, 6.5})))); TEST(poolinglayer, new_pooling_layer_can_run_float_avg) { Shape inpshape = {4, 4}; Shape poolshape = {2, 2}; + PoolingLayer a(poolshape, "average"); + + PoolingLayerImpl impl(inpshape, poolshape, "average"); + + Shape output_shape = impl.get_output_shape(); std::vector input({9.0F, 8.0F, 7.0F, 6.0F, 5.0F, 4.0F, 3.0F, 2.0F, 2.0F, 3.0F, 4.0F, 5.0F, 6.0F, 7.0F, 8.0F, 9.0F}); - Tensor output = make_tensor({0}); + std::vector zeros(output_shape.count(), 0.0f); + Tensor output = make_tensor(zeros, output_shape); + std::vector in{make_tensor(input, inpshape)}; std::vector out{output}; + a.run(in, out); + std::vector true_output = {6.5F, 4.5F, 4.5F, 6.5F}; for (size_t i = 0; i < true_output.size(); i++) { EXPECT_NEAR((*out[0].as())[i], true_output[i], 1e-5); } } +TEST(poolinglayer, new_pooling_layer_with_parameters) { + Shape inpshape = {4, 4}; + Shape poolshape = {2, 2}; + PoolingLayer a(poolshape, {1, 1}, {1, 1, 1, 1}, {1, 1}, false, "average"); + std::vector input({9.0F, 8.0F, 7.0F, 6.0F, 5.0F, 4.0F, 3.0F, 2.0F, + 2.0F, 3.0F, 4.0F, 5.0F, 6.0F, 7.0F, 8.0F, 9.0F}); + Tensor output = make_tensor({0}); + std::vector in{make_tensor(input, inpshape)}; + std::vector out{output}; + a.run(in, out); + EXPECT_EQ(out[0].get_shape().count(), 25); +} + TEST(poolinglayer, new_pooling_layer_can_run_int_avg) { Shape inpshape = {4, 4}; Shape poolshape = {2, 2}; - PoolingLayer a(poolshape, "average"); + PoolingLayer a(poolshape, {2, 2}, {0, 0, 0, 0}, {1, 1}, false, "average"); std::vector input({9, 8, 7, 6, 5, 4, 3, 2, 2, 3, 4, 5, 6, 7, 8, 9}); - Tensor output = make_tensor({0}); + + PoolingLayerImpl impl(inpshape, poolshape, {2, 2}, {0, 0, 0, 0}, {1, 1}, + false, "average"); + Shape output_shape = impl.get_output_shape(); + + std::vector zeros(output_shape.count(), 0); + Tensor output = make_tensor(zeros, output_shape); + std::vector in{make_tensor(input, inpshape)}; std::vector out{output}; + a.run(in, out); + std::vector true_output = {6, 4, 4, 6}; for (size_t i = 0; i < true_output.size(); i++) { EXPECT_NEAR((*out[0].as())[i], true_output[i], 1e-5); @@ -182,12 +285,22 @@ TEST(poolinglayer, new_pooling_layer_can_run_int_avg) { TEST(poolinglayer, new_pooling_layer_can_run_int_avg_tbb) { Shape inpshape = {4, 4}; Shape poolshape = {2, 2}; - PoolingLayer a(poolshape, "average", it_lab_ai::kTBB); + PoolingLayer a(poolshape, {2, 2}, {0, 0, 0, 0}, {1, 1}, false, "average", + it_lab_ai::kTBB); std::vector input({9, 8, 7, 6, 5, 4, 3, 2, 2, 3, 4, 5, 6, 7, 8, 9}); - Tensor output = make_tensor({0}); + + PoolingLayerImplTBB impl(inpshape, poolshape, {2, 2}, {0, 0, 0, 0}, + {1, 1}, false, "average"); + Shape output_shape = impl.get_output_shape(); + + std::vector zeros(output_shape.count(), 0); + Tensor output = make_tensor(zeros, output_shape); + std::vector in{make_tensor(input, inpshape)}; std::vector out{output}; + a.run(in, out); + std::vector true_output = {6, 4, 4, 6}; for (size_t i = 0; i < true_output.size(); i++) { EXPECT_NEAR((*out[0].as())[i], true_output[i], 1e-5); @@ -203,7 +316,7 @@ TEST(poolinglayer, new_pooling_layer_can_run_1d_pooling_float) { std::vector in{make_tensor(input, inpshape)}; std::vector out{output}; a.run(in, out); - std::vector true_output = {8.0F, 5.0F}; + std::vector true_output = {8.0F, 6.0F, 4.0F}; for (size_t i = 0; i < true_output.size(); i++) { EXPECT_NEAR((*out[0].as())[i], true_output[i], 1e-5); } @@ -218,7 +331,7 @@ TEST(poolinglayer, new_pooling_layer_tbb_can_run_1d_pooling_float) { std::vector in{make_tensor(input, inpshape)}; std::vector out{output}; a.run(in, out); - std::vector true_output = {8.0F, 5.0F}; + std::vector true_output = {8.0F, 6.0F, 4.0F}; for (size_t i = 0; i < true_output.size(); i++) { EXPECT_NEAR((*out[0].as())[i], true_output[i], 1e-5); } @@ -234,4 +347,85 @@ TEST(poolinglayer, IncompatibleInput) { make_tensor(input, inpshape)}; std::vector out{output}; EXPECT_THROW(a.run(in, out), std::runtime_error); -} \ No newline at end of file +} + +TEST(poolinglayer, maxpool_onnx_example) { + Shape input_shape = {1, 64, 112, 112}; + Shape poolshape = {3, 3}; + Shape strides = {2, 2}; + Shape pads = {0, 0, 0, 0}; + Shape dilations = {1, 1}; + bool ceil_mode = true; + std::string pooling_type = "max"; + + PoolingLayerImpl impl(input_shape, poolshape, strides, pads, dilations, + ceil_mode, pooling_type); + + Shape expected_output_shape = {1, 64, 56, 56}; + EXPECT_EQ(impl.get_output_shape(), expected_output_shape); + + std::vector input(input_shape.count()); + for (size_t i = 0; i < input.size(); i++) { + input[i] = + static_cast(rand()) / static_cast(RAND_MAX) * 10.0f; + } + + std::vector output = impl.run(input); + + EXPECT_EQ(output.size(), expected_output_shape.count()); + + for (float val : output) { + EXPECT_GE(val, 0.0f); + EXPECT_LE(val, 10.0f); + } + + float first_window_max = 0.0f; + for (size_t k = 0; k < 3; k++) { + for (size_t l = 0; l < 3; l++) { + size_t pos = k * 112 + l; + if (pos < input.size()) { + first_window_max = std::max(first_window_max, input[pos]); + } + } + } + + EXPECT_NEAR(output[0], first_window_max, 1e-5); +} + +TEST(poolinglayer, maxpool_onnx_with_pooling_layer) { + Shape input_shape = {1, 64, 112, 112}; + Shape poolshape = {3, 3}; + Shape strides = {2, 2}; + Shape pads = {0, 0, 0, 0}; + Shape dilations = {1, 1}; + bool ceil_mode = true; + + PoolingLayer layer(poolshape, strides, pads, dilations, ceil_mode, "max"); + + std::vector input(input_shape.count()); + for (size_t i = 0; i < input.size(); i++) { + input[i] = + static_cast(rand()) / static_cast(RAND_MAX) * 10.0f; + } + + Tensor input_tensor = make_tensor(input, input_shape); + + PoolingLayerImpl impl(input_shape, poolshape, strides, pads, dilations, + ceil_mode, "max"); + Shape output_shape = impl.get_output_shape(); + std::vector zeros(output_shape.count(), 0.0f); + Tensor output_tensor = make_tensor(zeros, output_shape); + + std::vector inputs{input_tensor}; + std::vector outputs{output_tensor}; + + layer.run(inputs, outputs); + + EXPECT_EQ(outputs[0].get_shape(), output_shape); + + auto output_data = *outputs[0].as(); + for (float val : output_data) { + EXPECT_GE(val, 0.0f); + EXPECT_LE(val, 10.0f); + } +} diff --git a/test/single_layer/test_reducelayer.cpp b/test/single_layer/test_reducelayer.cpp index 4af0ebe2d..bd6e250e8 100644 --- a/test/single_layer/test_reducelayer.cpp +++ b/test/single_layer/test_reducelayer.cpp @@ -22,7 +22,7 @@ TEST(ReduceLayer, SumAllAxesKeepDims) { TEST(ReduceLayer, SumAlongAxis0) { Tensor input = make_tensor({1.0f, 2.0f, 3.0f, 4.0f}, {2, 2}); - Tensor axes = make_tensor({0}); + std::vector axes = {0}; ReduceLayer layer(0, axes); Tensor output; @@ -37,7 +37,7 @@ TEST(ReduceLayer, SumAlongAxis0) { TEST(ReduceLayer, SumAlongAxis1KeepDims) { Tensor input = make_tensor({1.0f, 2.0f, 3.0f, 4.0f}, {2, 2}); - Tensor axes = make_tensor({1}); + std::vector axes = {1}; ReduceLayer layer(1, axes); Tensor output; @@ -52,7 +52,7 @@ TEST(ReduceLayer, SumAlongAxis1KeepDims) { TEST(ReduceLayer, IncompatibleInput) { Tensor input = make_tensor({1.0f, 2.0f}, {2}); - Tensor axes = make_tensor({2}); + std::vector axes = {2}; ReduceLayer layer(0, axes); Tensor output; @@ -64,7 +64,7 @@ TEST(ReduceLayer, IncompatibleInput) { TEST(ReduceLayer, InvalidAxisThrows) { Tensor input = make_tensor({1.0f, 2.0f}, {2}); - Tensor axes = make_tensor({2}); + std::vector axes = {2}; ReduceLayer layer(0, axes); Tensor output; @@ -76,7 +76,7 @@ TEST(ReduceLayer, InvalidAxisThrows) { TEST(ReduceLayer, IntTensorSupport) { Tensor input = make_tensor({1, 2, 3, 4}, {2, 2}); - Tensor axes = make_tensor({0}); + std::vector axes = {0}; ReduceLayer layer(0, axes); Tensor output; @@ -91,7 +91,7 @@ TEST(ReduceLayer, IntTensorSupport) { TEST(ReduceLayer, 3DTensorReduction) { Tensor input = make_tensor({1, 2, 3, 4, 5, 6, 7, 8}, {2, 2, 2}); - Tensor axes = make_tensor({2}); + std::vector axes = {2}; ReduceLayer layer(1, axes); Tensor output; @@ -108,7 +108,7 @@ TEST(ReduceLayer, 3DTensorReduction) { TEST(ReduceLayer, 3DReductionAxis2) { Tensor input = make_tensor({1, 2, 3, 4, 5, 6, 7, 8}, {2, 2, 2}); - Tensor axes = make_tensor({1}); + std::vector axes = {1}; ReduceLayer layer(1, axes); Tensor output; @@ -127,7 +127,7 @@ TEST(ReduceLayer, 3DReductionAxis10) { Tensor input = make_tensor( {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, {2, 2, 2, 2}); - Tensor axes = make_tensor({0}); + std::vector axes = {0}; ReduceLayer layer(1, axes); Tensor output; @@ -169,7 +169,7 @@ TEST(ReduceLayer, Resnet) { 46.0f, 47.0f, 48.0f, 49.0f, 50.0f, 51.0f, 52.0f, 53.0f, 54.0f}, {1, 2, 3, 3, 3}); - Tensor axes = make_tensor({1}); + std::vector axes = {1}; ReduceLayer layer(1, axes); Tensor output; @@ -184,7 +184,7 @@ TEST(ReduceLayer, Resnet) { TEST(ReduceLayer, NegativeAxisBasic) { Tensor input = make_tensor({1.0f, 2.0f, 3.0f, 4.0f}, {2, 2}); - Tensor axes = make_tensor({-1}); + std::vector axes = {-1}; ReduceLayer layer(0, axes); Tensor output; @@ -199,7 +199,7 @@ TEST(ReduceLayer, NegativeAxisBasic) { TEST(ReduceLayer, NegativeAxis3DTensor) { Tensor input = make_tensor({1, 2, 3, 4, 5, 6, 7, 8}, {2, 2, 2}); - Tensor axes = make_tensor({-2}); + std::vector axes = {-2}; ReduceLayer layer(1, axes); Tensor output; @@ -217,7 +217,7 @@ TEST(ReduceLayer, NegativeAxis3DTensor) { TEST(ReduceLayer, ReduceMean) { Tensor input = make_tensor({1.0f, 2.0f, 3.0f, 4.0f}, {2, 2}); Tensor output; - Tensor axes = make_tensor({0}); + std::vector axes = {0}; ReduceLayer layer(ReduceLayer::Operation::kMean, 1, axes); std::vector in{input}; @@ -231,7 +231,7 @@ TEST(ReduceLayer, ReduceMean) { TEST(ReduceLayer, ReduceMeanResnet) { Tensor input = make_tensor({1.0f, 2.0f, 3.0f, 4.0f}, {2, 2}); Tensor output; - Tensor axes = make_tensor({0}); + std::vector axes = {0}; ReduceLayer layer(ReduceLayer::Operation::kMean, 1, axes); std::vector in{input}; @@ -244,7 +244,7 @@ TEST(ReduceLayer, ReduceMeanResnet) { TEST(ReduceLayer, MultAlongAxis0) { Tensor input = make_tensor({1.0f, 2.0f, 3.0f, 4.0f}, {2, 2}); - Tensor axes = make_tensor({0}); + std::vector axes = {0}; ReduceLayer layer(ReduceLayer::Operation::kMult, 0, axes); Tensor output; @@ -259,7 +259,7 @@ TEST(ReduceLayer, MultAlongAxis0) { TEST(ReduceLayer, MaxAlongAxis1KeepDims) { Tensor input = make_tensor({1.0f, 2.0f, 3.0f, 4.0f}, {2, 2}); - Tensor axes = make_tensor({1}); + std::vector axes = {1}; ReduceLayer layer(ReduceLayer::Operation::kMax, 1, axes); Tensor output; @@ -274,7 +274,7 @@ TEST(ReduceLayer, MaxAlongAxis1KeepDims) { TEST(ReduceLayer, Min3DTensorReduction) { Tensor input = make_tensor({1, 2, 3, 4, 5, 6, 7, 8}, {2, 2, 2}); - Tensor axes = make_tensor({2}); + std::vector axes = {2}; ReduceLayer layer(ReduceLayer::Operation::kMin, 1, axes); Tensor output; @@ -296,7 +296,7 @@ TEST(ReduceLayer, ResnetReduceMean) { 19.0f, 20.0f, 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, 26.0f, 27.0f}, {1, 1, 3, 3, 3}); - Tensor axes = make_tensor({2, 3}); + std::vector axes = {2, 3}; ReduceLayer layer(ReduceLayer::Operation::kMean, 1, axes); Tensor output; diff --git a/test/single_layer/test_reshapelayer.cpp b/test/single_layer/test_reshapelayer.cpp new file mode 100644 index 000000000..331596454 --- /dev/null +++ b/test/single_layer/test_reshapelayer.cpp @@ -0,0 +1,317 @@ +#include + +#include "gtest/gtest.h" +#include "layers/ReshapeLayer.hpp" +#include "layers/Tensor.hpp" + +using namespace it_lab_ai; + +TEST(ReshapeLayerTest, BasicReshape2DTo3D) { + std::vector data = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}; + Tensor input = make_tensor(data, {2, 6}); + Tensor output; + ReshapeLayer layer(false, {2, 3, 2}); + + std::vector in{input}; + std::vector out{output}; + layer.run(in, out); + + ASSERT_EQ(out[0].get_shape(), Shape({2, 3, 2})); + EXPECT_FLOAT_EQ(out[0].get({0, 0, 0}), 1.0f); + EXPECT_FLOAT_EQ(out[0].get({0, 0, 1}), 2.0f); + EXPECT_FLOAT_EQ(out[0].get({1, 2, 1}), 12.0f); +} + +TEST(ReshapeLayerTest, BasicReshape3DTo2D) { + std::vector data = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}; + Tensor input = make_tensor(data, {2, 2, 3}); + Tensor output; + ReshapeLayer layer(false, {4, 3}); + + std::vector in{input}; + std::vector out{output}; + layer.run(in, out); + + ASSERT_EQ(out[0].get_shape(), Shape({4, 3})); + EXPECT_EQ(out[0].get({0, 0}), 1); + EXPECT_EQ(out[0].get({0, 1}), 2); + EXPECT_EQ(out[0].get({3, 2}), 12); +} + +TEST(ReshapeLayerTest, NegativeDimensionInference) { + std::vector data(12, 1.0f); + Tensor input = make_tensor(data, {2, 6}); + Tensor output; + ReshapeLayer layer(false, {2, -1, 2}); + + std::vector in{input}; + std::vector out{output}; + layer.run(in, out); + + ASSERT_EQ(out[0].get_shape(), Shape({2, 3, 2})); +} + +TEST(ReshapeLayerTest, ZeroDimensionCopy) { + std::vector data(24, 5); + Tensor input = make_tensor(data, {2, 3, 4}); + Tensor output; + ReshapeLayer layer(true, {2, 0, 4}); + + std::vector in{input}; + std::vector out{output}; + layer.run(in, out); + + ASSERT_EQ(out[0].get_shape(), Shape({2, 3, 4})); +} + +TEST(ReshapeLayerTest, FlattenTo1D) { + std::vector data; + for (int i = 0; i < 24; ++i) data.push_back(static_cast(i)); + + Tensor input = make_tensor(data, {2, 3, 4}); + Tensor output; + ReshapeLayer layer(false, {-1}); + + std::vector in{input}; + std::vector out{output}; + layer.run(in, out); + + ASSERT_EQ(out[0].get_shape(), Shape({24})); + for (size_t i = 0; i < 24; ++i) { + EXPECT_FLOAT_EQ(out[0].get({i}), static_cast(i)); + } +} + +TEST(ReshapeLayerTest, TotalElementsMismatchError) { + std::vector data(6, 1.0f); + Tensor input = make_tensor(data, {6}); + Tensor output; + ReshapeLayer layer(false, {2, 4}); + + std::vector in{input}; + std::vector out{output}; + + EXPECT_THROW(layer.run(in, out), std::invalid_argument); +} + +TEST(ReshapeLayerTest, MultipleNegativeOnesError) { + std::vector data(6, 1.0f); + Tensor input = make_tensor(data, {6}); + Tensor output; + ReshapeLayer layer(false, {2, -1, -1}); + + std::vector in{input}; + std::vector out{output}; + + EXPECT_THROW(layer.run(in, out), std::runtime_error); +} + +TEST(ReshapeLayerTest, ZeroDimensionWithoutAllowZero) { + std::vector data(6, 1.0f); + Tensor input = make_tensor(data, {6}); + Tensor output; + ReshapeLayer layer(false, {2, 0, 3}); + + std::vector in{input}; + std::vector out{output}; + + EXPECT_THROW(layer.run(in, out), std::runtime_error); +} + +TEST(ReshapeLayerTest, NegativeDimensionIndexError) { + std::vector data(6, 1.0f); + Tensor input = make_tensor(data, {6}); + Tensor output; + ReshapeLayer layer(false, {2, -2, 3}); + + std::vector in{input}; + std::vector out{output}; + + EXPECT_THROW(layer.run(in, out), std::length_error); +} + +TEST(ReshapeLayerTest, ZeroDimensionIndexOutOfRange) { + std::vector data(6, 1.0f); + Tensor input = make_tensor(data, {2, 3}); + Tensor output; + ReshapeLayer layer(true, {2, 0, 3}); + + std::vector in{input}; + std::vector out{output}; + + EXPECT_THROW(layer.run(in, out), std::invalid_argument); +} + +TEST(ReshapeLayerTest, EmptyOutputShape) { + std::vector data = {1, 2, 3}; + Tensor input = make_tensor(data, {3}); + Tensor output; + + ReshapeLayer layer(false, {3}); + + std::vector in{input}; + std::vector out{output}; + + EXPECT_NO_THROW(layer.run(in, out)); + ASSERT_EQ(out[0].get_shape(), Shape({3})); +} + +TEST(ReshapeLayerTest, ComplexReshapeWithNegativeOne) { + std::vector data(2 * 3 * 4 * 5, 7); + Tensor input = make_tensor(data, {2, 3, 4, 5}); + Tensor output; + ReshapeLayer layer(false, {2, -1, 5}); + + std::vector in{input}; + std::vector out{output}; + layer.run(in, out); + + ASSERT_EQ(out[0].get_shape(), Shape({2, 12, 5})); + EXPECT_EQ(out[0].get({0, 0, 0}), 7); + EXPECT_EQ(out[0].get({1, 11, 4}), 7); +} + +TEST(ReshapeLayerTest, AllowZeroFalseWithValidShape) { + std::vector data(1 * 6 * 64 * 49, 1.0f); + Tensor input = make_tensor(data, {1, 6, 64, 49}); + Tensor output; + + ReshapeLayer layer(false, {1, 384, 7, 7}); + + std::vector in{input}; + std::vector out{output}; + + EXPECT_NO_THROW(layer.run(in, out)); + ASSERT_EQ(out[0].get_shape(), Shape({1, 384, 7, 7})); +} + +TEST(ReshapeLayerTest, BatchReshapeSingleToBatch) { + std::vector data(2 * 768 * 7 * 7, 1.5f); + Tensor input = make_tensor(data, {2, 768, 7, 7}); + Tensor output; + ReshapeLayer layer(false, {1, 6, 128, 49}); + + std::vector in{input}; + std::vector out{output}; + + EXPECT_NO_THROW(layer.run(in, out)); + + ASSERT_EQ(out[0].get_shape(), Shape({2, 6, 128, 49})); + + EXPECT_EQ(out[0].get({0, 0, 0, 0}), 1.5f); + EXPECT_EQ(out[0].get({1, 5, 127, 48}), 1.5f); +} + +TEST(ReshapeLayerTest, BatchReshapeWithNegativeOneAndBatch) { + std::vector data(4 * 3 * 10 * 10, 3.14f); + Tensor input = make_tensor(data, {4, 3, 10, 10}); + Tensor output; + + ReshapeLayer layer(false, {1, -1, 5}); + + std::vector in{input}; + std::vector out{output}; + + EXPECT_NO_THROW(layer.run(in, out)); + ASSERT_EQ(out[0].get_shape(), Shape({4, 60, 5})); + EXPECT_EQ(out[0].get({0, 0, 0}), 3.14f); + EXPECT_EQ(out[0].get({3, 59, 4}), 3.14f); +} + +TEST(ReshapeLayerTest, BatchReshapeWithZeroDimAndBatch) { + std::vector data(2 * 6 * 8 * 8, 99); + Tensor input = make_tensor(data, {2, 6, 8, 8}); + Tensor output; + + ReshapeLayer layer(false, {1, 0, 16, 4}); + + std::vector in{input}; + std::vector out{output}; + + EXPECT_NO_THROW(layer.run(in, out)); + + ASSERT_EQ(out[0].get_shape(), Shape({2, 6, 16, 4})); + EXPECT_EQ(out[0].get({0, 0, 0, 0}), 99); + EXPECT_EQ(out[0].get({1, 5, 15, 3}), 99); +} + +TEST(ReshapeLayerTest, BatchReshapeComplexYOLOLike) { + std::vector data(2 * 768 * 7 * 7, 0.5f); + Tensor input = make_tensor(data, {2, 768, 7, 7}); + Tensor output; + + ReshapeLayer layer(false, {1, 6, 128, 49}); + + std::vector in{input}; + std::vector out{output}; + + EXPECT_NO_THROW(layer.run(in, out)); + ASSERT_EQ(out[0].get_shape(), Shape({2, 6, 128, 49})); + + size_t total_elements = 1; + for (size_t i = 0; i < out[0].get_shape().dims(); ++i) { + total_elements *= out[0].get_shape()[i]; + } + EXPECT_EQ(total_elements, 2 * 768 * 7 * 7); + + EXPECT_EQ(out[0].get({0, 0, 0, 0}), 0.5f); + EXPECT_EQ(out[0].get({1, 5, 127, 48}), 0.5f); +} + +TEST(ReshapeLayerTest, BatchReshapeIncompatibleElements) { + std::vector data(2 * 100, 1); + Tensor input = make_tensor(data, {2, 100}); + Tensor output; + ReshapeLayer layer(false, {1, 3, 3, 3}); + + std::vector in{input}; + std::vector out{output}; + + EXPECT_THROW(layer.run(in, out), std::runtime_error); +} + +TEST(ReshapeLayerTest, AllowZeroTrueCopiesInputDims) { + std::vector data = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}; + Tensor input = make_tensor(data, {3, 4}); + Tensor output; + ReshapeLayer layer(true, {3, 0, 1}); + std::vector in{input}; + std::vector out{output}; + layer.run(in, out); + + ASSERT_EQ(out[0].get_shape(), Shape({3, 4, 1})); +} + +TEST(ReshapeLayerTest, ProductValidationWithNegativeOne) { + std::vector data(24, 1); + Tensor input = make_tensor(data, {2, 3, 4}); + Tensor output; + + ReshapeLayer layer(false, {2, -1, 2}); + + std::vector in{input}; + std::vector out{output}; + layer.run(in, out); + + size_t input_product = input.get_shape().count(); + size_t output_product = out[0].get_shape().count(); + EXPECT_EQ(input_product, output_product); + ASSERT_EQ(out[0].get_shape(), Shape({2, 6, 2})); +} + +TEST(ReshapeLayerTest, AllowZeroWithNegativeOne) { + std::vector data(60, 1.0f); + Tensor input = make_tensor(data, {3, 4, 5}); + Tensor output; + + ReshapeLayer layer(true, {3, 0, -1}); + + std::vector in{input}; + std::vector out{output}; + layer.run(in, out); + + size_t input_product = input.get_shape().count(); + size_t output_product = out[0].get_shape().count(); + EXPECT_EQ(input_product, output_product); + EXPECT_EQ(out[0].get_shape(), Shape({3, 4, 5})); +} \ No newline at end of file diff --git a/test/single_layer/test_softmaxlayer.cpp b/test/single_layer/test_softmaxlayer.cpp new file mode 100644 index 000000000..a37d378a1 --- /dev/null +++ b/test/single_layer/test_softmaxlayer.cpp @@ -0,0 +1,312 @@ +#include + +#include "gtest/gtest.h" +#include "layers/SoftmaxLayer.hpp" +#include "layers/Tensor.hpp" + +using namespace it_lab_ai; + +TEST(SoftmaxLayerTest, BasicSoftmax1D) { + std::vector data = {1.0f, 2.0f, 3.0f}; + Tensor input = make_tensor(data, {3}); + Tensor output; + SoftmaxLayer layer(0); + + std::vector in{input}; + std::vector out{output}; + layer.run(in, out); + + ASSERT_EQ(out[0].get_shape(), Shape({3})); + + float sum = + out[0].get({0}) + out[0].get({1}) + out[0].get({2}); + EXPECT_NEAR(sum, 1.0f, 1e-6); + + EXPECT_GT(out[0].get({2}), out[0].get({1})); + EXPECT_GT(out[0].get({1}), out[0].get({0})); +} + +TEST(SoftmaxLayerTest, Softmax2DAxis0) { + std::vector data = {1.0f, 2.0f, 3.0f, 4.0f}; + Tensor input = make_tensor(data, {2, 2}); + Tensor output; + SoftmaxLayer layer(0); + + std::vector in{input}; + std::vector out{output}; + layer.run(in, out); + + ASSERT_EQ(out[0].get_shape(), Shape({2, 2})); + + for (size_t col = 0; col < 2; ++col) { + float sum = out[0].get({0, col}) + out[0].get({1, col}); + EXPECT_NEAR(sum, 1.0f, 1e-6); + } +} + +TEST(SoftmaxLayerTest, Softmax2DAxis1) { + std::vector data = {1.0f, 2.0f, 3.0f, 4.0f}; + Tensor input = make_tensor(data, {2, 2}); + Tensor output; + SoftmaxLayer layer(1); + + std::vector in{input}; + std::vector out{output}; + layer.run(in, out); + + ASSERT_EQ(out[0].get_shape(), Shape({2, 2})); + + for (size_t row = 0; row < 2; ++row) { + float sum = out[0].get({row, 0}) + out[0].get({row, 1}); + EXPECT_NEAR(sum, 1.0f, 1e-6); + } +} + +TEST(SoftmaxLayerTest, Softmax3D) { + std::vector data(2 * 3 * 4, 1.0f); + Tensor input = make_tensor(data, {2, 3, 4}); + Tensor output; + SoftmaxLayer layer(1); + + std::vector in{input}; + std::vector out{output}; + layer.run(in, out); + + ASSERT_EQ(out[0].get_shape(), Shape({2, 3, 4})); + + for (size_t i = 0; i < 2; ++i) { + for (size_t k = 0; k < 4; ++k) { + float sum = 0.0f; + for (size_t j = 0; j < 3; ++j) { + sum += out[0].get({i, j, k}); + } + EXPECT_NEAR(sum, 1.0f, 1e-6); + } + } +} + +TEST(SoftmaxLayerTest, NegativeAxis) { + std::vector data = {1.0f, 2.0f, 3.0f, 4.0f}; + Tensor input = make_tensor(data, {2, 2}); + Tensor output; + SoftmaxLayer layer(-1); + + std::vector in{input}; + std::vector out{output}; + layer.run(in, out); + + ASSERT_EQ(out[0].get_shape(), Shape({2, 2})); + + for (size_t row = 0; row < 2; ++row) { + float sum = out[0].get({row, 0}) + out[0].get({row, 1}); + EXPECT_NEAR(sum, 1.0f, 1e-6); + } +} + +TEST(SoftmaxLayerTest, IntTensorSoftmax) { + std::vector data = {1, 2, 3}; + Tensor input = make_tensor(data, {3}); + Tensor output; + SoftmaxLayer layer(0); + + std::vector in{input}; + std::vector out{output}; + layer.run(in, out); + + ASSERT_EQ(out[0].get_shape(), Shape({3})); + ASSERT_EQ(out[0].get_type(), Type::kInt); + + EXPECT_GT(out[0].get({2}), out[0].get({1})); + EXPECT_GT(out[0].get({1}), out[0].get({0})); +} + +TEST(SoftmaxLayerTest, InvalidAxisError) { + std::vector data = {1.0f, 2.0f, 3.0f}; + Tensor input = make_tensor(data, {3}); + Tensor output; + SoftmaxLayer layer(5); + + std::vector in{input}; + std::vector out{output}; + + EXPECT_THROW(layer.run(in, out), std::runtime_error); +} + +TEST(SoftmaxLayerTest, MultipleInputsError) { + std::vector data = {1.0f, 2.0f, 3.0f}; + Tensor input1 = make_tensor(data, {3}); + Tensor input2 = make_tensor(data, {3}); + Tensor output; + SoftmaxLayer layer; + + std::vector in{input1, input2}; + std::vector out{output}; + + EXPECT_THROW(layer.run(in, out), std::runtime_error); +} + +TEST(SoftmaxLayerTest, LargeValuesStability) { + std::vector data = {1000.0f, 1001.0f, 1002.0f}; + Tensor input = make_tensor(data, {3}); + Tensor output; + SoftmaxLayer layer(0); + + std::vector in{input}; + std::vector out{output}; + layer.run(in, out); + + float sum = + out[0].get({0}) + out[0].get({1}) + out[0].get({2}); + EXPECT_NEAR(sum, 1.0f, 1e-6); +} + +TEST(SoftmaxLayerTest, ExtremeNegativeAxis) { + std::vector data = {1.0f, 2.0f, 3.0f, 4.0f}; + Tensor input = make_tensor(data, {2, 2}); + Tensor output; + SoftmaxLayer layer(-10); + + std::vector in{input}; + std::vector out{output}; + + EXPECT_THROW(layer.run(in, out), std::runtime_error); +} + +TEST(SoftmaxLayerTest, LargePositiveAxis) { + std::vector data = {1.0f, 2.0f, 3.0f, 4.0f}; + Tensor input = make_tensor(data, {2, 2}); + Tensor output; + + SoftmaxLayer layer(5); + + std::vector in{input}; + std::vector out{output}; + + EXPECT_THROW(layer.run(in, out), std::runtime_error); +} + +TEST(SoftmaxLayerTest, AxisNormalizationVariants) { + std::vector data(2 * 3 * 4, 1.0f); + Tensor input = make_tensor(data, {2, 3, 4}); + Tensor output; + + std::vector axes = {-1, 2, -3, 0}; + + for (int axis : axes) { + SoftmaxLayer layer(axis); + std::vector in{input}; + std::vector out{output}; + + if (axis == -3 || axis == 0) { + EXPECT_NO_THROW(layer.run(in, out)); + + for (size_t i = 0; i < 3; ++i) { + for (size_t j = 0; j < 4; ++j) { + float sum = 0.0f; + for (size_t k = 0; k < 2; ++k) { + sum += out[0].get({k, i, j}); + } + EXPECT_NEAR(sum, 1.0f, 1e-6); + } + } + } else { + EXPECT_NO_THROW(layer.run(in, out)); + } + } +} + +TEST(SoftmaxLayerTest, NumericalStabilityExtremeValues) { + std::vector large_values = {10000.0f, 10001.0f, 10002.0f}; + Tensor input_large = make_tensor(large_values, {3}); + Tensor output_large; + SoftmaxLayer layer_large(0); + + std::vector in_large{input_large}; + std::vector out_large{output_large}; + + EXPECT_NO_THROW(layer_large.run(in_large, out_large)); + + float sum_large = out_large[0].get({0}) + + out_large[0].get({1}) + out_large[0].get({2}); + EXPECT_NEAR(sum_large, 1.0f, 1e-6); + + for (size_t i = 0; i < 3; ++i) { + float val = out_large[0].get({i}); + EXPECT_GE(val, 0.0f); + EXPECT_LE(val, 1.0f); + } +} + +TEST(SoftmaxLayerTest, NumericalStabilityNegativeValues) { + std::vector negative_values = {-1000.0f, -1001.0f, -1002.0f}; + Tensor input_neg = make_tensor(negative_values, {3}); + Tensor output_neg; + SoftmaxLayer layer_neg(0); + + std::vector in_neg{input_neg}; + std::vector out_neg{output_neg}; + + EXPECT_NO_THROW(layer_neg.run(in_neg, out_neg)); + + float sum_neg = out_neg[0].get({0}) + out_neg[0].get({1}) + + out_neg[0].get({2}); + EXPECT_NEAR(sum_neg, 1.0f, 1e-6); +} + +TEST(SoftmaxLayerTest, NumericalStabilityMixedValues) { + std::vector mixed_values = {-100.0f, 0.0f, 100.0f}; + Tensor input_mixed = make_tensor(mixed_values, {3}); + Tensor output_mixed; + SoftmaxLayer layer_mixed(0); + + std::vector in_mixed{input_mixed}; + std::vector out_mixed{output_mixed}; + + EXPECT_NO_THROW(layer_mixed.run(in_mixed, out_mixed)); + + float sum_mixed = out_mixed[0].get({0}) + + out_mixed[0].get({1}) + out_mixed[0].get({2}); + EXPECT_NEAR(sum_mixed, 1.0f, 1e-6); + + EXPECT_GT(out_mixed[0].get({2}), out_mixed[0].get({1})); + EXPECT_GT(out_mixed[0].get({1}), out_mixed[0].get({0})); +} + +TEST(SoftmaxLayerTest, VerifyMaxSubtraction) { + std::vector very_large = {1e10f, 1e10f + 1.0f, 1e10f + 2.0f}; + Tensor input = make_tensor(very_large, {3}); + Tensor output; + SoftmaxLayer layer(0); + + std::vector in{input}; + std::vector out{output}; + + EXPECT_NO_THROW(layer.run(in, out)); + + for (size_t i = 0; i < 3; ++i) { + float val = out[0].get({i}); + EXPECT_FALSE(std::isnan(val)); + EXPECT_FALSE(std::isinf(val)); + EXPECT_GE(val, 0.0f); + EXPECT_LE(val, 1.0f); + } +} + +TEST(SoftmaxLayerTest, IntTensorExtremeValues) { + std::vector large_ints = {std::numeric_limits::max() - 2, + std::numeric_limits::max() - 1, + std::numeric_limits::max()}; + Tensor input = make_tensor(large_ints, {3}); + Tensor output; + SoftmaxLayer layer(0); + + std::vector in{input}; + std::vector out{output}; + + EXPECT_NO_THROW(layer.run(in, out)); + + for (size_t i = 0; i < 3; ++i) { + int val = out[0].get({i}); + EXPECT_GE(val, 0); + } +} \ No newline at end of file