Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions src/common/memory_desc.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,9 @@ status_t memory_desc_permute_axes(memory_desc_t &out_memory_desc,
} // namespace impl
} // namespace dnnl

// Preserve compatibility with code that still references the unqualified name.
using dnnl::impl::dnnl_memory_extra_flag_none;

// Memory descriptor. The description is based on a number of dimensions,
// dimensions themselves, plus information about elements type and memory
// format. Additionally, contains format-specific descriptions of the data
Expand Down
42 changes: 26 additions & 16 deletions src/common/memory_desc_wrapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,11 @@
namespace dnnl {
namespace impl {

template<typename T>
static status_t fill_blocked_impl(memory_desc_t &md, T&& perm, T&& inner_blks, T&& inner_idxs) {
template <typename T>
static status_t fill_blocked_impl(
memory_desc_t &md, T &&perm, T &&inner_blks, T &&inner_idxs) {
const bool ok = true && perm.size() == (size_t)md.ndims
&& inner_blks.size() == inner_idxs.size();
&& inner_blks.size() == inner_idxs.size();
if (!ok) return status::invalid_arguments;

md.offset0 = 0;
Expand Down Expand Up @@ -88,12 +89,19 @@ status_t fill_blocked(memory_desc_t &md, std::initializer_list<int> perm,
return fill_blocked_impl(md, perm, inner_blks, inner_idxs);
}

status_t fill_blocked(memory_desc_t &md, std::vector<int>& perm,
std::vector<int>& inner_blks,
std::vector<int>& inner_idxs) {
status_t fill_blocked(memory_desc_t &md, std::vector<int> &perm,
std::vector<int> &inner_blks, std::vector<int> &inner_idxs) {
return fill_blocked_impl(md, perm, inner_blks, inner_idxs);
}

status_t fill_blocked(memory_desc_t &md, std::vector<dim_t> &perm,
std::vector<dim_t> &inner_blks, std::vector<dim_t> &inner_idxs) {
std::vector<int> perm_i(perm.begin(), perm.end());
std::vector<int> inner_blks_i(inner_blks.begin(), inner_blks.end());
std::vector<int> inner_idxs_i(inner_idxs.begin(), inner_idxs.end());
return fill_blocked_impl(md, perm_i, inner_blks_i, inner_idxs_i);
}

void memory_desc_wrapper::compute_strides_compat(dims_t *strides_compat) const {

if (ndims() == 0) return;
Expand Down Expand Up @@ -138,8 +146,8 @@ void memory_desc_wrapper::compute_strides_compat(dims_t *strides_compat) const {
utils::array_copy(strides_compat[1], inner_strides, ndims());
}

template<typename F, typename... Args>
status_t process_tag(F f, format_tag_t tag, Args&&... args) {
template <typename F, typename... Args>
status_t process_tag(F f, format_tag_t tag, Args &&...args) {
using namespace format_tag;

// VCHECK_MEMORY((memory_desc.ndims != 0), status::invalid_arguments,
Expand Down Expand Up @@ -1047,20 +1055,22 @@ status_t process_tag(F f, format_tag_t tag, Args&&... args) {
return status::invalid_arguments;
}

status_t memory_desc_wrapper::compute_blocking(memory_desc_t &memory_desc, format_tag_t tag) {
using fill_blocked_t = status_t(memory_desc_t&, std::initializer_list<int>, std::initializer_list<int>, std::initializer_list<int>);
status_t memory_desc_wrapper::compute_blocking(
memory_desc_t &memory_desc, format_tag_t tag) {
using fill_blocked_t = status_t(memory_desc_t &, std::initializer_list<int>,
std::initializer_list<int>, std::initializer_list<int>);
if (memory_desc.ndims == 0) return status::invalid_arguments;
return process_tag<fill_blocked_t>(fill_blocked, tag, memory_desc);
}

status_t memory_desc_wrapper::compute_blocking(format_tag_t tag,
std::vector<size_t> &perm,
std::vector<size_t> &inner_blks,
std::vector<size_t> &inner_idxs) {
std::vector<size_t> &perm, std::vector<size_t> &inner_blks,
std::vector<size_t> &inner_idxs) {

auto extract_data = [&](std::initializer_list<int> _perm,
std::initializer_list<int> _inner_blks,
std::initializer_list<int> _inner_idxs) -> status_t {
auto extract_data
= [&](std::initializer_list<int> _perm,
std::initializer_list<int> _inner_blks,
std::initializer_list<int> _inner_idxs) -> status_t {
perm = {_perm.begin(), _perm.end()};
inner_blks = {_inner_blks.begin(), _inner_blks.end()};
inner_idxs = {_inner_idxs.begin(), _inner_idxs.end()};
Expand Down
55 changes: 36 additions & 19 deletions src/common/memory_desc_wrapper.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,10 @@ namespace dnnl {
namespace impl {

status_t fill_blocked(memory_desc_t &md, std::vector<int> &perm,
std::vector<int> &inner_blks,
std::vector<int> &inner_idxs);
std::vector<int> &inner_blks, std::vector<int> &inner_idxs);

status_t fill_blocked(memory_desc_t &md, std::vector<dim_t> &perm,
std::vector<dim_t> &inner_blks, std::vector<dim_t> &inner_idxs);

/** thin wrapper class over \struct memory_desc_t which allows easy
* manipulations with underlying C structure, which is taken by reference */
Expand Down Expand Up @@ -323,13 +325,20 @@ struct memory_desc_wrapper : public c_compatible {
const size_t metadata = padded_dims()[0] * padded_dims()[1] / 64
* sizeof(uint64_t);
using comp_tile_len_type = int;
size_t comp_tile_data_size = ceil(static_cast<float>(padded_dims()[0] * padded_dims()[1])
/ (64 * 64 * (64 / sizeof(comp_tile_len_type)))) * 64;
return comp_tile_data_size + (padded_dims()[0] * padded_dims()[1] * data_type_size())
size_t comp_tile_data_size
= ceil(static_cast<float>(
padded_dims()[0] * padded_dims()[1])
/ (64 * 64
* (64 / sizeof(comp_tile_len_type))))
* 64;
return comp_tile_data_size
+ (padded_dims()[0] * padded_dims()[1]
* data_type_size())
+ metadata + 1000;
// todo: [av] why 1000?
// todo: [av] why 1000?
} else {
printf("encoding:%d\n", (int)sparse_desc().encoding), fflush(stdout);
printf("encoding:%d\n", (int)sparse_desc().encoding),
fflush(stdout);
assert(!"unknown sparse encoding");
return 0;
}
Expand Down Expand Up @@ -446,7 +455,8 @@ struct memory_desc_wrapper : public c_compatible {
if (utils::one_of(format_kind(), format_kind::undef, format_kind::any))
return false;
if (has_runtime_dims_or_strides() || has_broadcast()) return false;
return utils::div_up(nelems(with_padding)* data_type_size(), sub_byte_data_type_multiplier())
return utils::div_up(nelems(with_padding) * data_type_size(),
sub_byte_data_type_multiplier())
== size(0, /* include_additional_size = */ false);
}

Expand Down Expand Up @@ -530,8 +540,9 @@ struct memory_desc_wrapper : public c_compatible {
* following statement might be true: lhs == rhs && !lhs.similar_to(rhs) */
/* TODO: revise */
bool similar_to(const memory_desc_wrapper &rhs, bool with_padding = true,
bool with_data_type = true, int dim_start = 0, bool use_weak_cmp = false,
bool check_off0 = false, uint64_t stride_mask = 0xffffffffffffffff) const;
bool with_data_type = true, int dim_start = 0,
bool use_weak_cmp = false, bool check_off0 = false,
uint64_t stride_mask = 0xffffffffffffffff) const;

/** returns true if one memory can be reordered to another */
bool consistent_with(const memory_desc_wrapper &rhs) const;
Expand Down Expand Up @@ -670,9 +681,8 @@ struct memory_desc_wrapper : public c_compatible {
memory_desc_t &memory_desc, format_tag_t tag);

static status_t compute_blocking(format_tag_t tag,
std::vector<size_t> &perm,
std::vector<size_t> &inner_blks,
std::vector<size_t> &inner_idxs);
std::vector<size_t> &perm, std::vector<size_t> &inner_blks,
std::vector<size_t> &inner_idxs);

private:
/* TODO: put logical_offset in utils */
Expand Down Expand Up @@ -703,7 +713,8 @@ struct memory_desc_wrapper : public c_compatible {
};

inline bool memory_desc_wrapper::similar_to(const memory_desc_wrapper &rhs,
bool with_padding, bool with_data_type, int dim_start, bool use_weak_cmp, bool check_off0, uint64_t stride_mask) const {
bool with_padding, bool with_data_type, int dim_start,
bool use_weak_cmp, bool check_off0, uint64_t stride_mask) const {
using namespace utils;

if (one_of(format_kind(), format_kind::undef, format_kind::any))
Expand All @@ -718,12 +729,16 @@ inline bool memory_desc_wrapper::similar_to(const memory_desc_wrapper &rhs,
auto custom_cpm = use_weak_cmp ? array_cmp_weak : array_cmp<dnnl_dim_t>;
auto cmp_strides = [&]() {
if (0xffffffffffffffff == stride_mask) {
return custom_cpm(blk.strides + ds, r_blk.strides + ds, ndims() - ds);
return custom_cpm(
blk.strides + ds, r_blk.strides + ds, ndims() - ds);
} else {
for (int i = 0; i < ndims(); ++i) {
if (stride_mask & (1 << i)) {
if (blk.strides[i] != r_blk.strides[i]
&& IMPLICATION(use_weak_cmp, (blk.strides[i] != DNNL_RUNTIME_DIM_VAL && r_blk.strides[i] != DNNL_RUNTIME_DIM_VAL))) {
&& IMPLICATION(use_weak_cmp,
(blk.strides[i] != DNNL_RUNTIME_DIM_VAL
&& r_blk.strides[i]
!= DNNL_RUNTIME_DIM_VAL))) {
return false;
}
}
Expand All @@ -736,8 +751,7 @@ inline bool memory_desc_wrapper::similar_to(const memory_desc_wrapper &rhs,
&& format_kind() == rhs.format_kind()
&& IMPLICATION(with_data_type, data_type() == rhs.data_type())
&& custom_cpm(dims() + ds, rhs.dims() + ds, ndims() - ds)
&& cmp_strides()
&& blk.inner_nblks == r_blk.inner_nblks
&& cmp_strides() && blk.inner_nblks == r_blk.inner_nblks
&& array_cmp(blk.inner_blks, r_blk.inner_blks, blk.inner_nblks)
&& array_cmp(blk.inner_idxs, r_blk.inner_idxs, blk.inner_nblks)
&& IMPLICATION(with_padding,
Expand All @@ -746,7 +760,10 @@ inline bool memory_desc_wrapper::similar_to(const memory_desc_wrapper &rhs,
rhs.padded_dims() + ds, ndims() - ds)
&& custom_cpm(padded_offsets() + ds,
rhs.padded_offsets() + ds, ndims() - ds))
&& IMPLICATION(check_off0, (offset0() == DNNL_RUNTIME_DIM_VAL || rhs.offset0() ==DNNL_RUNTIME_DIM_VAL || offset0() == rhs.offset0()));
&& IMPLICATION(check_off0,
(offset0() == DNNL_RUNTIME_DIM_VAL
|| rhs.offset0() == DNNL_RUNTIME_DIM_VAL
|| offset0() == rhs.offset0()));
}

inline bool memory_desc_wrapper::consistent_with(
Expand Down
86 changes: 4 additions & 82 deletions src/cpu/aarch64/acl_reorder.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,87 +13,9 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#ifndef CPU_ACL_REORDER_HPP
#define CPU_ACL_REORDER_HPP
#ifndef CPU_AARCH64_ACL_REORDER_HPP
#define CPU_AARCH64_ACL_REORDER_HPP

#include "arm_compute/core/Types.h"
#include "common/utils.hpp"
#include "cpu/acl/acl_utils.hpp"
#include "cpu/aarch64/cpu_isa_traits.hpp"
#include "cpu/reorder/cpu_reorder_pd.hpp"
#include "cpu/aarch64/reorder/acl_reorder.hpp"

namespace dnnl {
namespace impl {
namespace cpu {
namespace acl {

struct acl_reorder_obj_t {
arm_compute::NEReorderLayer reorder;
arm_compute::Tensor src_tensor;
arm_compute::Tensor dst_tensor;
arm_compute::WeightFormat src_wf;
arm_compute::WeightFormat dst_wf;
};

struct acl_reorder_conf_t {
arm_compute::TensorInfo src_info;
arm_compute::TensorInfo dst_info;
arm_compute::WeightFormat src_wf = arm_compute::WeightFormat::OHWI;
arm_compute::WeightFormat dst_wf = arm_compute::WeightFormat::OHWI;
bool transpose;
};

struct acl_reorder_resource_t : public resource_t {
acl_reorder_resource_t()
: acl_obj_(utils::make_unique<acl_reorder_obj_t>()) {}

status_t configure(const acl_reorder_conf_t &app);

acl_reorder_obj_t &get_acl_obj() const { return *acl_obj_; }
DNNL_DISALLOW_COPY_AND_ASSIGN(acl_reorder_resource_t);

private:
std::unique_ptr<acl_reorder_obj_t> acl_obj_;
}; // acl_reorder_resource_t

struct acl_reorder_fwd_t : public primitive_t {
using primitive_t::primitive_t;
struct pd_t : public cpu_reorder_pd_t {

using cpu_reorder_pd_t::cpu_reorder_pd_t;

DECLARE_COMMON_PD_T("acl", acl_reorder_fwd_t);

static status_t create(reorder_pd_t **reorder_pd, engine_t *engine,
const primitive_attr_t *attr, engine_t *src_engine,
const memory_desc_t *src_md, engine_t *dst_engine,
const memory_desc_t *dst_md);

friend dnnl::impl::impl_list_item_t;
acl_reorder_conf_t app_;

}; // pd_t

acl_reorder_fwd_t(const pd_t *apd) : primitive_t(apd) {}

status_t create_resource(
engine_t *engine, resource_mapper_t &mapper) const override;

status_t execute(const exec_ctx_t &ctx) const override;

private:
// To guard the const execute_forward, the mutex must be 'mutable'
mutable std::mutex mtx;
status_t execute_forward(const exec_ctx_t &ctx) const;
inline const pd_t *pd() const {
return (const pd_t *)primitive_t::pd().get();
}

}; // acl_reorder_fwd_t

} // namespace acl
} // namespace cpu
} // namespace impl
} // namespace dnnl

#endif // CPU_ACL_REORDER_HPP
#endif // CPU_AARCH64_ACL_REORDER_HPP
16 changes: 12 additions & 4 deletions src/cpu/aarch64/jit_generator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,12 @@
#endif

#define DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_name) \
const char *name() const override { return STRINGIFY(jit_name); } \
const char *source_file() const override { return __FILE__; }
const char *name() const override { \
return STRINGIFY(jit_name); \
} \
const char *source_file() const override { \
return __FILE__; \
}

#define LD_MUL_VL(mn, op, mask, addr, off, size) \
do { \
Expand Down Expand Up @@ -709,8 +713,9 @@ class jit_generator_t : public Xbyak_aarch64::CodeGenerator,
jit_generator_t(void *code_ptr = nullptr, size_t code_size = MAX_CODE_SIZE,
bool use_autogrow = true, cpu_isa_t max_cpu_isa = isa_all)
: Xbyak_aarch64::CodeGenerator(code_size,
(code_ptr == nullptr && use_autogrow) ? Xbyak_aarch64::AutoGrow
: code_ptr)
(code_ptr == nullptr && use_autogrow)
? Xbyak_aarch64::AutoGrow
: code_ptr)
, max_cpu_isa_(max_cpu_isa) {}
~jit_generator_t() override = default;

Expand Down Expand Up @@ -762,6 +767,9 @@ class jit_generator_t : public Xbyak_aarch64::CodeGenerator,
const uint8_t *jit_ker_ = nullptr;
};

// Preserve compatibility with OpenVINO sources that still use the old name.
using jit_generator = jit_generator_t;

} // namespace aarch64
} // namespace cpu
} // namespace impl
Expand Down
Loading
Loading