Skip to content

Commit c81919f

Browse files
committed
fix: reorder first() before get() in avx/avx512f
Under strict two-phase name lookup (Clang, recent GCC), the get() templates called first() before it was declared, and ADL could not find it (first lives in xsimd::kernel). Move the first() overloads ahead of get() so lookup succeeds at template definition time.
1 parent 945fce0 commit c81919f

2 files changed

Lines changed: 80 additions & 80 deletions

File tree

include/xsimd/arch/xsimd_avx.hpp

Lines changed: 40 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -748,6 +748,46 @@ namespace xsimd
748748
return self - batch<T, A>(mask.data);
749749
}
750750

751+
// first
752+
template <class A>
753+
XSIMD_INLINE float first(batch<float, A> const& self, requires_arch<avx>) noexcept
754+
{
755+
return _mm256_cvtss_f32(self);
756+
}
757+
758+
template <class A>
759+
XSIMD_INLINE double first(batch<double, A> const& self, requires_arch<avx>) noexcept
760+
{
761+
return _mm256_cvtsd_f64(self);
762+
}
763+
764+
template <class A, class T, class = std::enable_if_t<std::is_integral<T>::value>>
765+
XSIMD_INLINE T first(batch<T, A> const& self, requires_arch<avx>) noexcept
766+
{
767+
XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
768+
{
769+
return static_cast<T>(_mm256_cvtsi256_si32(self) & 0xFF);
770+
}
771+
else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
772+
{
773+
return static_cast<T>(_mm256_cvtsi256_si32(self) & 0xFFFF);
774+
}
775+
else XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
776+
{
777+
return static_cast<T>(_mm256_cvtsi256_si32(self));
778+
}
779+
else XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
780+
{
781+
batch<T, sse4_2> low = _mm256_castsi256_si128(self);
782+
return first(low, sse4_2 {});
783+
}
784+
else
785+
{
786+
assert(false && "unsupported arch/op combination");
787+
return {};
788+
}
789+
}
790+
751791
// get
752792
template <class A, size_t I>
753793
XSIMD_INLINE float get(batch<float, A> const& self, ::xsimd::index<I>, requires_arch<avx>) noexcept
@@ -2049,46 +2089,6 @@ namespace xsimd
20492089
return _mm256_insertf128_pd(lo, _mm256_castpd256_pd128(hi), 1);
20502090
}
20512091

2052-
// first
2053-
template <class A>
2054-
XSIMD_INLINE float first(batch<float, A> const& self, requires_arch<avx>) noexcept
2055-
{
2056-
return _mm256_cvtss_f32(self);
2057-
}
2058-
2059-
template <class A>
2060-
XSIMD_INLINE double first(batch<double, A> const& self, requires_arch<avx>) noexcept
2061-
{
2062-
return _mm256_cvtsd_f64(self);
2063-
}
2064-
2065-
template <class A, class T, class = std::enable_if_t<std::is_integral<T>::value>>
2066-
XSIMD_INLINE T first(batch<T, A> const& self, requires_arch<avx>) noexcept
2067-
{
2068-
XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
2069-
{
2070-
return static_cast<T>(_mm256_cvtsi256_si32(self) & 0xFF);
2071-
}
2072-
else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
2073-
{
2074-
return static_cast<T>(_mm256_cvtsi256_si32(self) & 0xFFFF);
2075-
}
2076-
else XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
2077-
{
2078-
return static_cast<T>(_mm256_cvtsi256_si32(self));
2079-
}
2080-
else XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
2081-
{
2082-
batch<T, sse4_2> low = _mm256_castsi256_si128(self);
2083-
return first(low, sse4_2 {});
2084-
}
2085-
else
2086-
{
2087-
assert(false && "unsupported arch/op combination");
2088-
return {};
2089-
}
2090-
}
2091-
20922092
// widen
20932093
template <class A, class T>
20942094
XSIMD_INLINE std::array<batch<widen_t<T>, A>, 2> widen(batch<T, A> const& x, requires_arch<avx>) noexcept

include/xsimd/arch/xsimd_avx512f.hpp

Lines changed: 40 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -1346,6 +1346,46 @@ namespace xsimd
13461346
}
13471347
}
13481348

1349+
// first
1350+
template <class A>
1351+
XSIMD_INLINE float first(batch<float, A> const& self, requires_arch<avx512f>) noexcept
1352+
{
1353+
return _mm512_cvtss_f32(self);
1354+
}
1355+
1356+
template <class A>
1357+
XSIMD_INLINE double first(batch<double, A> const& self, requires_arch<avx512f>) noexcept
1358+
{
1359+
return _mm512_cvtsd_f64(self);
1360+
}
1361+
1362+
template <class A, class T, class = std::enable_if_t<std::is_integral<T>::value>>
1363+
XSIMD_INLINE T first(batch<T, A> const& self, requires_arch<avx512f>) noexcept
1364+
{
1365+
XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
1366+
{
1367+
return static_cast<T>(_mm_cvtsi128_si32(_mm512_castsi512_si128(self)) & 0xFF);
1368+
}
1369+
else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
1370+
{
1371+
return static_cast<T>(_mm_cvtsi128_si32(_mm512_castsi512_si128(self)) & 0xFFFF);
1372+
}
1373+
else XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
1374+
{
1375+
return static_cast<T>(_mm_cvtsi128_si32(_mm512_castsi512_si128(self)));
1376+
}
1377+
else XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
1378+
{
1379+
batch<T, sse4_2> low = _mm512_castsi512_si128(self);
1380+
return first(low, sse4_2 {});
1381+
}
1382+
else
1383+
{
1384+
assert(false && "unsupported arch/op combination");
1385+
return {};
1386+
}
1387+
}
1388+
13491389
// get: use valignd/valignq to rotate lane I into position 0 in a single op.
13501390
template <class A, size_t I>
13511391
XSIMD_INLINE float get(batch<float, A> const& self, ::xsimd::index<I>, requires_arch<avx512f>) noexcept
@@ -2804,46 +2844,6 @@ namespace xsimd
28042844
2));
28052845
}
28062846

2807-
// first
2808-
template <class A>
2809-
XSIMD_INLINE float first(batch<float, A> const& self, requires_arch<avx512f>) noexcept
2810-
{
2811-
return _mm512_cvtss_f32(self);
2812-
}
2813-
2814-
template <class A>
2815-
XSIMD_INLINE double first(batch<double, A> const& self, requires_arch<avx512f>) noexcept
2816-
{
2817-
return _mm512_cvtsd_f64(self);
2818-
}
2819-
2820-
template <class A, class T, class = std::enable_if_t<std::is_integral<T>::value>>
2821-
XSIMD_INLINE T first(batch<T, A> const& self, requires_arch<avx512f>) noexcept
2822-
{
2823-
XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
2824-
{
2825-
return static_cast<T>(_mm_cvtsi128_si32(_mm512_castsi512_si128(self)) & 0xFF);
2826-
}
2827-
else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
2828-
{
2829-
return static_cast<T>(_mm_cvtsi128_si32(_mm512_castsi512_si128(self)) & 0xFFFF);
2830-
}
2831-
else XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
2832-
{
2833-
return static_cast<T>(_mm_cvtsi128_si32(_mm512_castsi512_si128(self)));
2834-
}
2835-
else XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
2836-
{
2837-
batch<T, sse4_2> low = _mm512_castsi512_si128(self);
2838-
return first(low, sse4_2 {});
2839-
}
2840-
else
2841-
{
2842-
assert(false && "unsupported arch/op combination");
2843-
return {};
2844-
}
2845-
}
2846-
28472847
// widen
28482848
template <class A, class T>
28492849
XSIMD_INLINE std::array<batch<widen_t<T>, A>, 2> widen(batch<T, A> const& x, requires_arch<avx512f>) noexcept

0 commit comments

Comments
 (0)