Skip to content

Commit 0a305bf

Browse files
committed
cpu: aarch64: restore reorder backport and fix SVE guard
1 parent 87f65fd commit 0a305bf

21 files changed

Lines changed: 4431 additions & 3782 deletions

src/common/memory_desc.hpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -281,6 +281,9 @@ status_t memory_desc_permute_axes(memory_desc_t &out_memory_desc,
281281
} // namespace impl
282282
} // namespace dnnl
283283

284+
// Preserve compatibility with code that still references the unqualified name.
285+
using dnnl::impl::dnnl_memory_extra_flag_none;
286+
284287
// Memory descriptor. The description is based on a number of dimensions,
285288
// dimensions themselves, plus information about elements type and memory
286289
// format. Additionally, contains format-specific descriptions of the data

src/common/memory_desc_wrapper.cpp

Lines changed: 26 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -28,10 +28,11 @@
2828
namespace dnnl {
2929
namespace impl {
3030

31-
template<typename T>
32-
static status_t fill_blocked_impl(memory_desc_t &md, T&& perm, T&& inner_blks, T&& inner_idxs) {
31+
template <typename T>
32+
static status_t fill_blocked_impl(
33+
memory_desc_t &md, T &&perm, T &&inner_blks, T &&inner_idxs) {
3334
const bool ok = true && perm.size() == (size_t)md.ndims
34-
&& inner_blks.size() == inner_idxs.size();
35+
&& inner_blks.size() == inner_idxs.size();
3536
if (!ok) return status::invalid_arguments;
3637

3738
md.offset0 = 0;
@@ -88,12 +89,19 @@ status_t fill_blocked(memory_desc_t &md, std::initializer_list<int> perm,
8889
return fill_blocked_impl(md, perm, inner_blks, inner_idxs);
8990
}
9091

91-
status_t fill_blocked(memory_desc_t &md, std::vector<int>& perm,
92-
std::vector<int>& inner_blks,
93-
std::vector<int>& inner_idxs) {
92+
status_t fill_blocked(memory_desc_t &md, std::vector<int> &perm,
93+
std::vector<int> &inner_blks, std::vector<int> &inner_idxs) {
9494
return fill_blocked_impl(md, perm, inner_blks, inner_idxs);
9595
}
9696

97+
status_t fill_blocked(memory_desc_t &md, std::vector<dim_t> &perm,
98+
std::vector<dim_t> &inner_blks, std::vector<dim_t> &inner_idxs) {
99+
std::vector<int> perm_i(perm.begin(), perm.end());
100+
std::vector<int> inner_blks_i(inner_blks.begin(), inner_blks.end());
101+
std::vector<int> inner_idxs_i(inner_idxs.begin(), inner_idxs.end());
102+
return fill_blocked_impl(md, perm_i, inner_blks_i, inner_idxs_i);
103+
}
104+
97105
void memory_desc_wrapper::compute_strides_compat(dims_t *strides_compat) const {
98106

99107
if (ndims() == 0) return;
@@ -138,8 +146,8 @@ void memory_desc_wrapper::compute_strides_compat(dims_t *strides_compat) const {
138146
utils::array_copy(strides_compat[1], inner_strides, ndims());
139147
}
140148

141-
template<typename F, typename... Args>
142-
status_t process_tag(F f, format_tag_t tag, Args&&... args) {
149+
template <typename F, typename... Args>
150+
status_t process_tag(F f, format_tag_t tag, Args &&...args) {
143151
using namespace format_tag;
144152

145153
// VCHECK_MEMORY((memory_desc.ndims != 0), status::invalid_arguments,
@@ -1047,20 +1055,22 @@ status_t process_tag(F f, format_tag_t tag, Args&&... args) {
10471055
return status::invalid_arguments;
10481056
}
10491057

1050-
status_t memory_desc_wrapper::compute_blocking(memory_desc_t &memory_desc, format_tag_t tag) {
1051-
using fill_blocked_t = status_t(memory_desc_t&, std::initializer_list<int>, std::initializer_list<int>, std::initializer_list<int>);
1058+
status_t memory_desc_wrapper::compute_blocking(
1059+
memory_desc_t &memory_desc, format_tag_t tag) {
1060+
using fill_blocked_t = status_t(memory_desc_t &, std::initializer_list<int>,
1061+
std::initializer_list<int>, std::initializer_list<int>);
10521062
if (memory_desc.ndims == 0) return status::invalid_arguments;
10531063
return process_tag<fill_blocked_t>(fill_blocked, tag, memory_desc);
10541064
}
10551065

10561066
status_t memory_desc_wrapper::compute_blocking(format_tag_t tag,
1057-
std::vector<size_t> &perm,
1058-
std::vector<size_t> &inner_blks,
1059-
std::vector<size_t> &inner_idxs) {
1067+
std::vector<size_t> &perm, std::vector<size_t> &inner_blks,
1068+
std::vector<size_t> &inner_idxs) {
10601069

1061-
auto extract_data = [&](std::initializer_list<int> _perm,
1062-
std::initializer_list<int> _inner_blks,
1063-
std::initializer_list<int> _inner_idxs) -> status_t {
1070+
auto extract_data
1071+
= [&](std::initializer_list<int> _perm,
1072+
std::initializer_list<int> _inner_blks,
1073+
std::initializer_list<int> _inner_idxs) -> status_t {
10641074
perm = {_perm.begin(), _perm.end()};
10651075
inner_blks = {_inner_blks.begin(), _inner_blks.end()};
10661076
inner_idxs = {_inner_idxs.begin(), _inner_idxs.end()};

src/common/memory_desc_wrapper.hpp

Lines changed: 36 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,10 @@ namespace dnnl {
3333
namespace impl {
3434

3535
status_t fill_blocked(memory_desc_t &md, std::vector<int> &perm,
36-
std::vector<int> &inner_blks,
37-
std::vector<int> &inner_idxs);
36+
std::vector<int> &inner_blks, std::vector<int> &inner_idxs);
37+
38+
status_t fill_blocked(memory_desc_t &md, std::vector<dim_t> &perm,
39+
std::vector<dim_t> &inner_blks, std::vector<dim_t> &inner_idxs);
3840

3941
/** thin wrapper class over \struct memory_desc_t which allows easy
4042
* manipulations with underlying C structure, which is taken by reference */
@@ -323,13 +325,20 @@ struct memory_desc_wrapper : public c_compatible {
323325
const size_t metadata = padded_dims()[0] * padded_dims()[1] / 64
324326
* sizeof(uint64_t);
325327
using comp_tile_len_type = int;
326-
size_t comp_tile_data_size = ceil(static_cast<float>(padded_dims()[0] * padded_dims()[1])
327-
/ (64 * 64 * (64 / sizeof(comp_tile_len_type)))) * 64;
328-
return comp_tile_data_size + (padded_dims()[0] * padded_dims()[1] * data_type_size())
328+
size_t comp_tile_data_size
329+
= ceil(static_cast<float>(
330+
padded_dims()[0] * padded_dims()[1])
331+
/ (64 * 64
332+
* (64 / sizeof(comp_tile_len_type))))
333+
* 64;
334+
return comp_tile_data_size
335+
+ (padded_dims()[0] * padded_dims()[1]
336+
* data_type_size())
329337
+ metadata + 1000;
330-
// todo: [av] why 1000?
338+
// todo: [av] why 1000?
331339
} else {
332-
printf("encoding:%d\n", (int)sparse_desc().encoding), fflush(stdout);
340+
printf("encoding:%d\n", (int)sparse_desc().encoding),
341+
fflush(stdout);
333342
assert(!"unknown sparse encoding");
334343
return 0;
335344
}
@@ -446,7 +455,8 @@ struct memory_desc_wrapper : public c_compatible {
446455
if (utils::one_of(format_kind(), format_kind::undef, format_kind::any))
447456
return false;
448457
if (has_runtime_dims_or_strides() || has_broadcast()) return false;
449-
return utils::div_up(nelems(with_padding)* data_type_size(), sub_byte_data_type_multiplier())
458+
return utils::div_up(nelems(with_padding) * data_type_size(),
459+
sub_byte_data_type_multiplier())
450460
== size(0, /* include_additional_size = */ false);
451461
}
452462

@@ -530,8 +540,9 @@ struct memory_desc_wrapper : public c_compatible {
530540
* following statement might be true: lhs == rhs && !lhs.similar_to(rhs) */
531541
/* TODO: revise */
532542
bool similar_to(const memory_desc_wrapper &rhs, bool with_padding = true,
533-
bool with_data_type = true, int dim_start = 0, bool use_weak_cmp = false,
534-
bool check_off0 = false, uint64_t stride_mask = 0xffffffffffffffff) const;
543+
bool with_data_type = true, int dim_start = 0,
544+
bool use_weak_cmp = false, bool check_off0 = false,
545+
uint64_t stride_mask = 0xffffffffffffffff) const;
535546

536547
/** returns true if one memory can be reordered to another */
537548
bool consistent_with(const memory_desc_wrapper &rhs) const;
@@ -670,9 +681,8 @@ struct memory_desc_wrapper : public c_compatible {
670681
memory_desc_t &memory_desc, format_tag_t tag);
671682

672683
static status_t compute_blocking(format_tag_t tag,
673-
std::vector<size_t> &perm,
674-
std::vector<size_t> &inner_blks,
675-
std::vector<size_t> &inner_idxs);
684+
std::vector<size_t> &perm, std::vector<size_t> &inner_blks,
685+
std::vector<size_t> &inner_idxs);
676686

677687
private:
678688
/* TODO: put logical_offset in utils */
@@ -703,7 +713,8 @@ struct memory_desc_wrapper : public c_compatible {
703713
};
704714

705715
inline bool memory_desc_wrapper::similar_to(const memory_desc_wrapper &rhs,
706-
bool with_padding, bool with_data_type, int dim_start, bool use_weak_cmp, bool check_off0, uint64_t stride_mask) const {
716+
bool with_padding, bool with_data_type, int dim_start,
717+
bool use_weak_cmp, bool check_off0, uint64_t stride_mask) const {
707718
using namespace utils;
708719

709720
if (one_of(format_kind(), format_kind::undef, format_kind::any))
@@ -718,12 +729,16 @@ inline bool memory_desc_wrapper::similar_to(const memory_desc_wrapper &rhs,
718729
auto custom_cpm = use_weak_cmp ? array_cmp_weak : array_cmp<dnnl_dim_t>;
719730
auto cmp_strides = [&]() {
720731
if (0xffffffffffffffff == stride_mask) {
721-
return custom_cpm(blk.strides + ds, r_blk.strides + ds, ndims() - ds);
732+
return custom_cpm(
733+
blk.strides + ds, r_blk.strides + ds, ndims() - ds);
722734
} else {
723735
for (int i = 0; i < ndims(); ++i) {
724736
if (stride_mask & (1 << i)) {
725737
if (blk.strides[i] != r_blk.strides[i]
726-
&& IMPLICATION(use_weak_cmp, (blk.strides[i] != DNNL_RUNTIME_DIM_VAL && r_blk.strides[i] != DNNL_RUNTIME_DIM_VAL))) {
738+
&& IMPLICATION(use_weak_cmp,
739+
(blk.strides[i] != DNNL_RUNTIME_DIM_VAL
740+
&& r_blk.strides[i]
741+
!= DNNL_RUNTIME_DIM_VAL))) {
727742
return false;
728743
}
729744
}
@@ -736,8 +751,7 @@ inline bool memory_desc_wrapper::similar_to(const memory_desc_wrapper &rhs,
736751
&& format_kind() == rhs.format_kind()
737752
&& IMPLICATION(with_data_type, data_type() == rhs.data_type())
738753
&& custom_cpm(dims() + ds, rhs.dims() + ds, ndims() - ds)
739-
&& cmp_strides()
740-
&& blk.inner_nblks == r_blk.inner_nblks
754+
&& cmp_strides() && blk.inner_nblks == r_blk.inner_nblks
741755
&& array_cmp(blk.inner_blks, r_blk.inner_blks, blk.inner_nblks)
742756
&& array_cmp(blk.inner_idxs, r_blk.inner_idxs, blk.inner_nblks)
743757
&& IMPLICATION(with_padding,
@@ -746,7 +760,10 @@ inline bool memory_desc_wrapper::similar_to(const memory_desc_wrapper &rhs,
746760
rhs.padded_dims() + ds, ndims() - ds)
747761
&& custom_cpm(padded_offsets() + ds,
748762
rhs.padded_offsets() + ds, ndims() - ds))
749-
&& IMPLICATION(check_off0, (offset0() == DNNL_RUNTIME_DIM_VAL || rhs.offset0() ==DNNL_RUNTIME_DIM_VAL || offset0() == rhs.offset0()));
763+
&& IMPLICATION(check_off0,
764+
(offset0() == DNNL_RUNTIME_DIM_VAL
765+
|| rhs.offset0() == DNNL_RUNTIME_DIM_VAL
766+
|| offset0() == rhs.offset0()));
750767
}
751768

752769
inline bool memory_desc_wrapper::consistent_with(

src/cpu/aarch64/acl_reorder.hpp

Lines changed: 4 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -13,87 +13,9 @@
1313
* See the License for the specific language governing permissions and
1414
* limitations under the License.
1515
*******************************************************************************/
16-
#ifndef CPU_ACL_REORDER_HPP
17-
#define CPU_ACL_REORDER_HPP
16+
#ifndef CPU_AARCH64_ACL_REORDER_HPP
17+
#define CPU_AARCH64_ACL_REORDER_HPP
1818

19-
#include "arm_compute/core/Types.h"
20-
#include "common/utils.hpp"
21-
#include "cpu/acl/acl_utils.hpp"
22-
#include "cpu/aarch64/cpu_isa_traits.hpp"
23-
#include "cpu/reorder/cpu_reorder_pd.hpp"
19+
#include "cpu/aarch64/reorder/acl_reorder.hpp"
2420

25-
namespace dnnl {
26-
namespace impl {
27-
namespace cpu {
28-
namespace acl {
29-
30-
struct acl_reorder_obj_t {
31-
arm_compute::NEReorderLayer reorder;
32-
arm_compute::Tensor src_tensor;
33-
arm_compute::Tensor dst_tensor;
34-
arm_compute::WeightFormat src_wf;
35-
arm_compute::WeightFormat dst_wf;
36-
};
37-
38-
struct acl_reorder_conf_t {
39-
arm_compute::TensorInfo src_info;
40-
arm_compute::TensorInfo dst_info;
41-
arm_compute::WeightFormat src_wf = arm_compute::WeightFormat::OHWI;
42-
arm_compute::WeightFormat dst_wf = arm_compute::WeightFormat::OHWI;
43-
bool transpose;
44-
};
45-
46-
struct acl_reorder_resource_t : public resource_t {
47-
acl_reorder_resource_t()
48-
: acl_obj_(utils::make_unique<acl_reorder_obj_t>()) {}
49-
50-
status_t configure(const acl_reorder_conf_t &app);
51-
52-
acl_reorder_obj_t &get_acl_obj() const { return *acl_obj_; }
53-
DNNL_DISALLOW_COPY_AND_ASSIGN(acl_reorder_resource_t);
54-
55-
private:
56-
std::unique_ptr<acl_reorder_obj_t> acl_obj_;
57-
}; // acl_reorder_resource_t
58-
59-
struct acl_reorder_fwd_t : public primitive_t {
60-
using primitive_t::primitive_t;
61-
struct pd_t : public cpu_reorder_pd_t {
62-
63-
using cpu_reorder_pd_t::cpu_reorder_pd_t;
64-
65-
DECLARE_COMMON_PD_T("acl", acl_reorder_fwd_t);
66-
67-
static status_t create(reorder_pd_t **reorder_pd, engine_t *engine,
68-
const primitive_attr_t *attr, engine_t *src_engine,
69-
const memory_desc_t *src_md, engine_t *dst_engine,
70-
const memory_desc_t *dst_md);
71-
72-
friend dnnl::impl::impl_list_item_t;
73-
acl_reorder_conf_t app_;
74-
75-
}; // pd_t
76-
77-
acl_reorder_fwd_t(const pd_t *apd) : primitive_t(apd) {}
78-
79-
status_t create_resource(
80-
engine_t *engine, resource_mapper_t &mapper) const override;
81-
82-
status_t execute(const exec_ctx_t &ctx) const override;
83-
84-
private:
85-
// To guard the const execute_forward, the mutex must be 'mutable'
86-
mutable std::mutex mtx;
87-
status_t execute_forward(const exec_ctx_t &ctx) const;
88-
inline const pd_t *pd() const {
89-
return (const pd_t *)primitive_t::pd().get();
90-
}
91-
92-
}; // acl_reorder_fwd_t
93-
94-
} // namespace acl
95-
} // namespace cpu
96-
} // namespace impl
97-
} // namespace dnnl
98-
99-
#endif // CPU_ACL_REORDER_HPP
21+
#endif // CPU_AARCH64_ACL_REORDER_HPP

src/cpu/aarch64/jit_generator.hpp

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,12 @@
3636
#endif
3737

3838
#define DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_name) \
39-
const char *name() const override { return STRINGIFY(jit_name); } \
40-
const char *source_file() const override { return __FILE__; }
39+
const char *name() const override { \
40+
return STRINGIFY(jit_name); \
41+
} \
42+
const char *source_file() const override { \
43+
return __FILE__; \
44+
}
4145

4246
#define LD_MUL_VL(mn, op, mask, addr, off, size) \
4347
do { \
@@ -709,8 +713,9 @@ class jit_generator_t : public Xbyak_aarch64::CodeGenerator,
709713
jit_generator_t(void *code_ptr = nullptr, size_t code_size = MAX_CODE_SIZE,
710714
bool use_autogrow = true, cpu_isa_t max_cpu_isa = isa_all)
711715
: Xbyak_aarch64::CodeGenerator(code_size,
712-
(code_ptr == nullptr && use_autogrow) ? Xbyak_aarch64::AutoGrow
713-
: code_ptr)
716+
(code_ptr == nullptr && use_autogrow)
717+
? Xbyak_aarch64::AutoGrow
718+
: code_ptr)
714719
, max_cpu_isa_(max_cpu_isa) {}
715720
~jit_generator_t() override = default;
716721

@@ -762,6 +767,9 @@ class jit_generator_t : public Xbyak_aarch64::CodeGenerator,
762767
const uint8_t *jit_ker_ = nullptr;
763768
};
764769

770+
// Preserve compatibility with OpenVINO sources that still use the old name.
771+
using jit_generator = jit_generator_t;
772+
765773
} // namespace aarch64
766774
} // namespace cpu
767775
} // namespace impl

0 commit comments

Comments
 (0)