Skip to content

Commit 508f5aa

Browse files
committed
Add load_unaligned and allow construction from unaligned scalar arrays
1 parent 064fb7e commit 508f5aa

3 files changed

Lines changed: 54 additions & 5 deletions

File tree

src/impl/vamp/vector/avx.hh

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,12 @@ namespace vamp
3131
return _mm256_load_ps(f);
3232
}
3333

34+
template <unsigned int = 0>
35+
inline static constexpr auto load_unaligned(const ScalarT *const f) noexcept -> VectorT
36+
{
37+
return _mm256_loadu_ps(f);
38+
}
39+
3440
template <unsigned int = 0>
3541
inline static constexpr auto store(ScalarT *f, VectorT v) noexcept -> void
3642
{
@@ -408,6 +414,12 @@ namespace vamp
408414

409415
template <unsigned int = 0>
410416
inline static constexpr auto load(const ScalarT *const i) noexcept -> VectorT
417+
{
418+
return _mm256_load_si256((const __m256i *const)i);
419+
}
420+
421+
template <unsigned int = 0>
422+
inline static constexpr auto load_unaligned(const ScalarT *const i) noexcept -> VectorT
411423
{
412424
return _mm256_loadu_si256((const __m256i *const)i);
413425
}

src/impl/vamp/vector/interface.hh

Lines changed: 28 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -59,8 +59,8 @@ namespace vamp
5959
inline static constexpr std::size_t num_rows = Sig::num_rows;
6060
using DataT = typename Sig::DataT;
6161

62-
inline constexpr auto
63-
to_array() const noexcept -> std::array<typename S::ScalarT, num_scalars_rounded>
62+
inline constexpr auto to_array() const noexcept
63+
-> std::array<typename S::ScalarT, num_scalars_rounded>
6464
{
6565
alignas(S::Alignment) std::array<typename S::ScalarT, num_scalars_rounded> result = {};
6666
to_array(result);
@@ -718,9 +718,10 @@ namespace vamp
718718
return S::template constant<0>(s);
719719
}
720720

721+
template <bool is_aligned = true>
721722
inline constexpr void pack(const typename S::ScalarT *const scalar_data) noexcept
722723
{
723-
load_vector(scalar_data, std::make_index_sequence<num_vectors>());
724+
load_vector<is_aligned>(scalar_data, std::make_index_sequence<num_vectors>());
724725
}
725726

726727
template <auto fn, std::size_t stride = 1, std::size_t... I>
@@ -737,13 +738,21 @@ namespace vamp
737738
(..., fn(base + I * stride, std::get<I>(data)));
738739
}
739740

740-
template <std::size_t... I>
741+
template <bool is_aligned, std::size_t... I>
741742
inline constexpr void
742743
load_vector(const typename S::ScalarT *const scalar_array, std::index_sequence<I...>) noexcept
743744
{
744745
// TODO: This might segfault if we had to over-allocate vectors and the scalar data isn't
745746
// full for the over-allocated size
746-
(..., (std::get<I>(d()->data) = S::template load<0>(scalar_array + I * S::VectorWidth)));
747+
if constexpr (is_aligned)
748+
{
749+
(..., (std::get<I>(d()->data) = S::template load<0>(scalar_array + I * S::VectorWidth)));
750+
}
751+
else
752+
{
753+
(...,
754+
(std::get<I>(d()->data) = S::template load_unaligned<0>(scalar_array + I * S::VectorWidth)));
755+
}
747756
}
748757

749758
template <std::size_t... I>
@@ -815,6 +824,20 @@ namespace vamp
815824
{
816825
}
817826

827+
// TODO: Enable unaligned load for other constructors too
828+
constexpr Vector(const typename S::ScalarT *const scalar_data, bool is_aligned) noexcept
829+
{
830+
// NOTE: assumes that scalar_data is a multiple of VectorWidth of valid data
831+
if (is_aligned)
832+
{
833+
Interface::pack(scalar_data);
834+
}
835+
else
836+
{
837+
Interface::pack<false>(scalar_data);
838+
}
839+
}
840+
818841
constexpr Vector(const typename S::ScalarT *const scalar_data) noexcept
819842
{
820843
// NOTE: assumes that scalar_data is a multiple of VectorWidth of valid data

src/impl/vamp/vector/neon.hh

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,13 @@ namespace vamp
3434
return vld1q_f32(f);
3535
}
3636

37+
template <unsigned int = 0>
38+
inline static auto load_unaligned(const ScalarT *const f) noexcept -> VectorT
39+
{
40+
// NOTE: The same instruction seems to do double-duty for ARM?
41+
return vld1q_f32(f);
42+
}
43+
3744
template <unsigned int = 0>
3845
inline static auto store(ScalarT *f, VectorT v) noexcept -> void
3946
{
@@ -490,6 +497,13 @@ namespace vamp
490497
return vld1q_s32((const int32_t *const)i);
491498
}
492499

500+
template <unsigned int = 0>
501+
inline static auto load_unaligned(const ScalarT *const i) noexcept -> VectorT
502+
{
503+
// NOTE: The same instruction seems to do double-duty for ARM?
504+
return vld1q_s32((const int32_t *const)i);
505+
}
506+
493507
template <unsigned int = 0>
494508
inline static auto store(ScalarT *i, VectorT v) noexcept -> void
495509
{

0 commit comments

Comments
 (0)