Skip to content

Commit 06c0d3a

Browse files
author
NeiroYT
committed
Changes
1 parent 156dacc commit 06c0d3a

8 files changed

Lines changed: 968 additions & 6 deletions

File tree

include/CMakeLists.txt

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,16 +5,17 @@ file(GLOB_RECURSE graphT_headers graph_transformations/*.h graph_transformations
55
set(GRAPHT_HEADERS "${graphT_headers}" PARENT_SCOPE)
66

77
file(GLOB_RECURSE layers_headers layers/*.h layers/*.hpp)
8-
set(LAYERS_HEADERS "${layers_headers}" PARENT_SCOPE)
8+
file(GLOB_RECURSE parallel_headers parallel/*.h parallel/*.hpp)
9+
set(LAYERS_HEADERS "${layers_headers}" "${parallel_headers}" PARENT_SCOPE)
910

1011
file(GLOB_RECURSE layers_oneDNN_headers layers_oneDNN/*.h layers_oneDNN/*.hpp)
1112
set(LAYERS_ONEDNN_HEADERS "${layers_oneDNN_headers}" PARENT_SCOPE)
1213

14+
file(GLOB_RECURSE layers_fused_headers layers_fused/*.h layers_fused/*.hpp)
15+
set(LAYERS_FUSED_HEADERS "${layers_fused_headers}" PARENT_SCOPE)
16+
1317
file(GLOB_RECURSE perf_headers perf/*.h perf/*.hpp)
1418
set(PERF_HEADERS "${perf_headers}" PARENT_SCOPE)
1519

1620
file(GLOB_RECURSE reader_headers Weights_Reader/*.h Weights_Reader/*.hpp)
1721
set(READER_HEADERS "${reader_headers}" PARENT_SCOPE)
18-
19-
file(GLOB_RECURSE parallel_headers parallel/*.h parallel/*.hpp)
20-
set(LAYERS_HEADERS "${parallel_headers}" PARENT_SCOPE)

include/layers/Layer.hpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,8 @@ enum LayerType : uint8_t {
3434
kReshape,
3535
kSoftmax,
3636
kMatmul,
37-
kBatchNormalization
37+
kBatchNormalization,
38+
kConvRelu
3839
};
3940

4041
enum ImplType : uint8_t { kDefault, kTBB, kSTL };

include/layers_fused/ConvRelu.hpp

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
#pragma once
2+
3+
#include <memory>
4+
#include <string>
5+
#include <vector>
6+
7+
#include "layers/Layer.hpp"
8+
#include "layers/Tensor.hpp"
9+
10+
namespace it_lab_ai {
11+
12+
template <typename T>
13+
void relu(Tensor& t) {
14+
Shape sh = t.get_shape();
15+
for (size_t i = 0; i < sh.count(); i++) {
16+
if ((*t.as<T>())[i] < 0) {
17+
(*t.as<T>())[i] = 0;
18+
}
19+
}
20+
}
21+
22+
class ConvReluLayer : Layer {
23+
private:
24+
size_t stride_;
25+
size_t pads_;
26+
size_t dilations_;
27+
std::shared_ptr<Tensor> kernel_;
28+
std::shared_ptr<Tensor> bias_;
29+
size_t group_;
30+
bool useLegacyImpl_;
31+
32+
public:
33+
ConvReluLayer() : Layer(kConvRelu), kernel_(nullptr), bias_(nullptr) {
34+
stride_ = 0;
35+
pads_ = 0;
36+
dilations_ = 0;
37+
}
38+
ConvReluLayer(size_t step, size_t pads, size_t dilations,
39+
const Tensor& kernel, const Tensor& bias = Tensor(),
40+
size_t group = 1, bool useLegacyImpl = false)
41+
: Layer(kConvRelu),
42+
kernel_(std::make_shared<Tensor>(kernel)),
43+
bias_(std::make_shared<Tensor>(bias)) {
44+
stride_ = step;
45+
pads_ = pads;
46+
group_ = group;
47+
dilations_ = dilations;
48+
useLegacyImpl_ = useLegacyImpl;
49+
}
50+
ConvReluLayer(size_t step, size_t pads, size_t dilations,
51+
std::shared_ptr<Tensor> kernel,
52+
std::shared_ptr<Tensor> bias = std::make_shared<Tensor>(),
53+
size_t group = 1, bool useLegacyImpl = false)
54+
: Layer(kConvRelu), kernel_(std::move(kernel)), bias_(std::move(bias)) {
55+
stride_ = step;
56+
pads_ = pads;
57+
group_ = group;
58+
dilations_ = dilations;
59+
useLegacyImpl_ = useLegacyImpl;
60+
}
61+
void run(const std::vector<Tensor>& input,
62+
std::vector<Tensor>& output) override;
63+
void run(const std::vector<Tensor>& input, std::vector<Tensor>& output,
64+
const RuntimeOptions& options) override;
65+
#ifdef ENABLE_STATISTIC_WEIGHTS
66+
Tensor get_weights() override { return *kernel_; }
67+
#endif
68+
};
69+
} // namespace it_lab_ai

src/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,6 @@ add_subdirectory(graph)
22
add_subdirectory(graph_transformations)
33
add_subdirectory(perf)
44
add_subdirectory(layers)
5+
add_subdirectory(layers_fused)
56
add_subdirectory(layers_oneDNN)
67
add_subdirectory(Weights_Reader)

src/layers_fused/CMakeLists.txt

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
file(GLOB_RECURSE layers_fused_src *.cpp)
2+
add_library(layers_fused_lib STATIC "${LAYERS_FUSED_HEADERS}" "${layers_fused_src}")
3+
4+
target_link_libraries(layers_fused_lib PUBLIC layers_lib)
5+
target_link_libraries(layers_fused_lib PUBLIC TBB_unified)
6+
target_link_libraries(layers_fused_lib PUBLIC OpenMP::OpenMP_CXX)
7+
target_link_libraries(layers_fused_lib PUBLIC dnnl)
8+
target_link_libraries(layers_fused_lib PUBLIC Kokkos_imported)

src/layers_fused/ConvRelu.cpp

Lines changed: 175 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,175 @@
1+
#include "layers_fused/ConvRelu.hpp"
2+
#include "layers/ConvLayer.hpp"
3+
4+
namespace it_lab_ai {
5+
6+
void ConvReluLayer::run(const std::vector<Tensor>& input,
7+
std::vector<Tensor>& output) {
8+
RuntimeOptions default_options;
9+
run(input, output, default_options);
10+
}
11+
12+
void ConvReluLayer::run(const std::vector<Tensor>& input,
13+
std::vector<Tensor>& output,
14+
const RuntimeOptions& options) {
15+
if (kernel_ == nullptr || bias_ == nullptr) {
16+
throw std::runtime_error("ConvReluLayer: no weights or bias");
17+
}
18+
if (input.size() != 1) {
19+
throw std::runtime_error("ConvReluLayer: Input tensors not 1");
20+
}
21+
if (input[0].get_shape().dims() != 4) {
22+
throw std::out_of_range("input must be 4-dimensional");
23+
}
24+
25+
ParBackend backend = options.par_backend;
26+
27+
if (group_ > 1) {
28+
if (group_ == input[0].get_shape()[1] &&
29+
group_ == kernel_->get_shape()[0]) {
30+
switch (input[0].get_type()) {
31+
case Type::kFloat:
32+
DepthwiseConv4D<float>(input[0], *kernel_, *bias_, output[0], stride_,
33+
pads_, dilations_, backend);
34+
relu<float>(output[0]);
35+
break;
36+
case Type::kInt:
37+
DepthwiseConv4D<int>(input[0], *kernel_, *bias_, output[0], stride_,
38+
pads_, dilations_, backend);
39+
relu<int>(output[0]);
40+
break;
41+
default:
42+
throw std::runtime_error(
43+
"Unsupported type for depthwise convolution");
44+
}
45+
return;
46+
}
47+
}
48+
49+
switch (input[0].get_type()) {
50+
case Type::kInt: {
51+
if (kernel_->get_shape().dims() == 2) {
52+
if (dilations_ > 0) {
53+
dilations_--;
54+
}
55+
ConvImpl<int> used_impl(
56+
stride_, pads_, dilations_,
57+
static_cast<int>(
58+
input[0].get_shape()[input[0].get_shape().dims() - 1]),
59+
static_cast<int>(
60+
input[0].get_shape()[input[0].get_shape().dims() - 2]),
61+
static_cast<int>(
62+
input[0].get_shape()[input[0].get_shape().dims() - 3]),
63+
input[0].get_shape()[input[0].get_shape().dims() - 1] *
64+
input[0].get_shape()[input[0].get_shape().dims() - 2],
65+
bias_->empty() ? std::vector<int>() : *bias_->as<int>());
66+
auto sizeforshape = static_cast<size_t>(
67+
((static_cast<int>(
68+
input[0].get_shape()[input[0].get_shape().dims() - 1]) -
69+
1 -
70+
static_cast<int>(
71+
(1 + kernel_->get_shape()[kernel_->get_shape().dims() - 1]) *
72+
dilations_ +
73+
kernel_->get_shape()[kernel_->get_shape().dims() - 1] - 1)) /
74+
static_cast<int>(stride_)) +
75+
1);
76+
77+
Shape sh({1, 3, sizeforshape, sizeforshape});
78+
output[0] = make_tensor<int>(
79+
used_impl.run(
80+
*input[0].as<int>(),
81+
static_cast<int>(
82+
input[0].get_shape()[input[0].get_shape().dims() - 1]) +
83+
2 * static_cast<int>(pads_),
84+
static_cast<int>(
85+
input[0].get_shape()[input[0].get_shape().dims() - 2]) +
86+
2 * static_cast<int>(pads_),
87+
*kernel_->as<int>(),
88+
kernel_->get_shape()[kernel_->get_shape().dims() - 1],
89+
(1 + kernel_->get_shape()[kernel_->get_shape().dims() - 1]) *
90+
dilations_ +
91+
kernel_->get_shape()[kernel_->get_shape().dims() - 1],
92+
static_cast<int>(
93+
((1 +
94+
kernel_->get_shape()[kernel_->get_shape().dims() - 1]) *
95+
dilations_ +
96+
kernel_->get_shape()[kernel_->get_shape().dims() - 1] -
97+
1) /
98+
2)),
99+
sh);
100+
} else {
101+
Conv4D<int>(input[0], *kernel_, *bias_, output[0], stride_, pads_,
102+
group_, dilations_, backend);
103+
}
104+
relu<int>(output[0]);
105+
break;
106+
}
107+
case Type::kFloat: {
108+
if (kernel_->get_shape().dims() == 2) {
109+
if (dilations_ > 0) {
110+
dilations_--;
111+
}
112+
ConvImpl<float> used_impl(
113+
stride_, pads_, dilations_,
114+
static_cast<int>(
115+
input[0].get_shape()[input[0].get_shape().dims() - 1]),
116+
static_cast<int>(
117+
input[0].get_shape()[input[0].get_shape().dims() - 2]),
118+
static_cast<int>(
119+
input[0].get_shape()[input[0].get_shape().dims() - 3]),
120+
input[0].get_shape()[input[0].get_shape().dims() - 1] *
121+
input[0].get_shape()[input[0].get_shape().dims() - 2],
122+
bias_->empty() ? std::vector<float>() : *bias_->as<float>());
123+
auto sizeforshape = static_cast<size_t>(
124+
((static_cast<int>(
125+
input[0].get_shape()[input[0].get_shape().dims() - 1]) -
126+
1 -
127+
static_cast<int>(
128+
(1 + kernel_->get_shape()[kernel_->get_shape().dims() - 1]) *
129+
dilations_ +
130+
kernel_->get_shape()[kernel_->get_shape().dims() - 1] - 1)) /
131+
static_cast<int>(stride_)) +
132+
1);
133+
134+
Shape sh({1, 3, sizeforshape, sizeforshape});
135+
output[0] = make_tensor<float>(
136+
used_impl.run(
137+
*input[0].as<float>(),
138+
static_cast<int>(
139+
input[0].get_shape()[input[0].get_shape().dims() - 1]) +
140+
2 * static_cast<int>(pads_),
141+
static_cast<int>(
142+
input[0].get_shape()[input[0].get_shape().dims() - 2]) +
143+
2 * static_cast<int>(pads_),
144+
*kernel_->as<float>(),
145+
kernel_->get_shape()[kernel_->get_shape().dims() - 1],
146+
(1 + kernel_->get_shape()[kernel_->get_shape().dims() - 1]) *
147+
dilations_ +
148+
kernel_->get_shape()[kernel_->get_shape().dims() - 1],
149+
static_cast<int>(
150+
((1 +
151+
kernel_->get_shape()[kernel_->get_shape().dims() - 1]) *
152+
dilations_ +
153+
kernel_->get_shape()[kernel_->get_shape().dims() - 1] -
154+
1) /
155+
2)),
156+
sh);
157+
} else {
158+
if (useLegacyImpl_) {
159+
Conv4D_Legacy<float>(input[0], *kernel_, *bias_, output[0], stride_,
160+
pads_, dilations_, backend);
161+
} else {
162+
Conv4D<float>(input[0], *kernel_, *bias_, output[0], stride_, pads_,
163+
group_, dilations_, backend);
164+
}
165+
}
166+
relu<float>(output[0]);
167+
break;
168+
}
169+
default: {
170+
throw std::runtime_error("Unsupported tensor type");
171+
}
172+
}
173+
}
174+
175+
} // namespace it_lab_ai

test/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ file(GLOB_RECURSE TEST_SRC_FILES ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp)
33
add_executable(run_test ${TEST_SRC_FILES})
44

55
target_link_libraries(run_test PUBLIC OpenMP::OpenMP_CXX)
6-
target_link_libraries(run_test PUBLIC perf_lib layers_lib layers_oneDNN_lib)
6+
target_link_libraries(run_test PUBLIC perf_lib layers_lib layers_oneDNN_lib layers_fused_lib)
77
target_link_libraries(run_test PUBLIC gtest)
88
target_link_libraries(run_test PUBLIC ReadLib)
99
target_link_libraries(run_test PUBLIC reader_lib)

0 commit comments

Comments
 (0)