Skip to content

Commit 6437cdd

Browse files
committed
enable MSVC cl.exe build changes
1 parent cb24d65 commit 6437cdd

1 file changed

Lines changed: 3 additions & 33 deletions

File tree

include/xsimd/arch/xsimd_neon.hpp

Lines changed: 3 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -289,22 +289,7 @@ namespace xsimd
289289
}
290290

291291
#if defined(_MSC_VER) && defined(_M_ARM64)
292-
// -----------------------------------------------------------------------
293-
// C++14-compatible dispatch helpers for MSVC ARM64.
294-
//
295-
// On MSVC ARM64, all NEON types are the same underlying type (__n128),
296-
// so overload resolution on NEON types does not work and the existing
297-
// std::tuple-based dispatcher cannot be used. The original workaround
298-
// used `if constexpr` (C++17). The helpers below replace that with
299-
// std::enable_if overloads, which are valid C++14.
300-
//
301-
// Each helper is a function template parameterised on the *element* type
302-
// T. The correct intrinsic is selected at compile time via enable_if on
303-
// sizeof(T) and std::is_unsigned<T> / std::is_floating_point<T>.
304-
// -----------------------------------------------------------------------
305292
namespace detail {
306-
307-
// -- load (for set<integral>) --
308293
template <class T>
309294
XSIMD_INLINE typename std::enable_if<sizeof(T) == 1 && std::is_unsigned<T>::value, __n128>::type
310295
msvc_arm64_load(const T* d) noexcept { return vld1q_u8(reinterpret_cast<const uint8_t*>(d)); }
@@ -330,7 +315,6 @@ namespace xsimd
330315
XSIMD_INLINE typename std::enable_if<sizeof(T) == 8 && std::is_signed<T>::value, __n128>::type
331316
msvc_arm64_load(const T* d) noexcept { return vld1q_s64(reinterpret_cast<const int64_t*>(d)); }
332317

333-
// -- load_u (for set<batch_bool>) -- loads from unsigned element array
334318
template <class T>
335319
XSIMD_INLINE typename std::enable_if<sizeof(T) == 1, __n128>::type
336320
msvc_arm64_load_u(const as_unsigned_integer_t<T>* d) noexcept { return vld1q_u8(reinterpret_cast<const uint8_t*>(d)); }
@@ -344,7 +328,6 @@ namespace xsimd
344328
XSIMD_INLINE typename std::enable_if<sizeof(T) == 8, __n128>::type
345329
msvc_arm64_load_u(const as_unsigned_integer_t<T>* d) noexcept { return vld1q_u64(reinterpret_cast<const uint64_t*>(d)); }
346330

347-
// -- eq for batch_bool (unsigned comparison by size) --
348331
template <class T>
349332
XSIMD_INLINE typename std::enable_if<sizeof(T)==1, __n128>::type
350333
msvc_arm64_eq_bool(__n128 a, __n128 b) noexcept { return vceqq_u8(a,b); }
@@ -355,10 +338,8 @@ namespace xsimd
355338
XSIMD_INLINE typename std::enable_if<sizeof(T)==4, __n128>::type
356339
msvc_arm64_eq_bool(__n128 a, __n128 b) noexcept { return vceqq_u32(a,b); }
357340

358-
} // namespace detail (MSVC ARM64 helpers)
341+
}
359342

360-
// Macro to generate C++14 enable_if dispatch overloads for a full binary op
361-
// (all 9 NEON element types: u8,s8,u16,s16,u32,s32,u64,s64,f32).
362343
#define XSIMD_MSVC_ARM64_BINARY_FULL(fname, u8fn, s8fn, u16fn, s16fn, u32fn, s32fn, u64fn, s64fn, f32fn) \
363344
namespace detail { \
364345
template <class T> \
@@ -390,7 +371,6 @@ namespace xsimd
390371
fname(__n128 a, __n128 b) noexcept { return s64fn(a,b); } \
391372
}
392373

393-
// Macro for binary ops excluding int64 (u8,s8,u16,s16,u32,s32,f32).
394374
#define XSIMD_MSVC_ARM64_BINARY_EX64(fname, u8fn, s8fn, u16fn, s16fn, u32fn, s32fn, f32fn) \
395375
namespace detail { \
396376
template <class T> \
@@ -416,7 +396,6 @@ namespace xsimd
416396
fname(__n128 a, __n128 b) noexcept { return f32fn(a,b); } \
417397
}
418398

419-
// Macro for unsigned-only binary ops excluding int64 (u8,u16,u32).
420399
#define XSIMD_MSVC_ARM64_BINARY_UINT_EX64(fname, u8fn, u16fn, u32fn) \
421400
namespace detail { \
422401
template <class T> \
@@ -430,7 +409,6 @@ namespace xsimd
430409
fname(__n128 a, __n128 b) noexcept { return u32fn(a,b); } \
431410
}
432411

433-
// Macro for unary ops excluding int64 (u8,s8,u16,s16,u32,s32,f32).
434412
#define XSIMD_MSVC_ARM64_UNARY_EX64(fname, u8fn, s8fn, u16fn, s16fn, u32fn, s32fn, f32fn) \
435413
namespace detail { \
436414
template <class T> \
@@ -456,7 +434,6 @@ namespace xsimd
456434
fname(__n128 a) noexcept { return f32fn(a); } \
457435
}
458436

459-
// Macro for select (ternary: cond, a, b) — all 9 types.
460437
#define XSIMD_MSVC_ARM64_SELECT_FULL(fname, u8fn, s8fn, u16fn, s16fn, u32fn, s32fn, u64fn, s64fn, f32fn) \
461438
namespace detail { \
462439
template <class T> \
@@ -488,7 +465,6 @@ namespace xsimd
488465
fname(__n128 c, __n128 a, __n128 b) noexcept { return s64fn(c,a,b); } \
489466
}
490467

491-
// Macro for bitwise ops on batch_bool (unsigned only, all sizes).
492468
#define XSIMD_MSVC_ARM64_BINARY_UINT_ALL(fname, u8fn, u16fn, u32fn, u64fn) \
493469
namespace detail { \
494470
template <class T> \
@@ -505,7 +481,6 @@ namespace xsimd
505481
fname##_bool(__n128 a, __n128 b) noexcept { return u64fn(a,b); } \
506482
}
507483

508-
// Macro for bitwise unary ops on batch_bool (unsigned only, all sizes).
509484
#define XSIMD_MSVC_ARM64_UNARY_UINT_ALL(fname, u8fn, u16fn, u32fn, u64fn) \
510485
namespace detail { \
511486
template <class T> \
@@ -522,7 +497,6 @@ namespace xsimd
522497
fname##_bool(__n128 a) noexcept { return u64fn(a); } \
523498
}
524499

525-
// Generate all dispatch helpers used by the MSVC ARM64 paths below.
526500
XSIMD_MSVC_ARM64_BINARY_FULL(msvc_arm64_add, vaddq_u8, vaddq_s8, vaddq_u16, vaddq_s16, vaddq_u32, vaddq_s32, vaddq_u64, vaddq_s64, vaddq_f32)
527501
XSIMD_MSVC_ARM64_BINARY_FULL(msvc_arm64_sadd, vqaddq_u8, vqaddq_s8, vqaddq_u16, vqaddq_s16, vqaddq_u32, vqaddq_s32, vqaddq_u64, vqaddq_s64, vaddq_f32)
528502
XSIMD_MSVC_ARM64_BINARY_FULL(msvc_arm64_sub, vsubq_u8, vsubq_s8, vsubq_u16, vsubq_s16, vsubq_u32, vsubq_s32, vsubq_u64, vsubq_s64, vsubq_f32)
@@ -544,26 +518,22 @@ namespace detail {
544518
}
545519
XSIMD_MSVC_ARM64_UNARY_EX64(msvc_arm64_abs, msvc_arm64_abs_u8, vabsq_s8, msvc_arm64_abs_u16, vabsq_s16, msvc_arm64_abs_u32, vabsq_s32, vabsq_f32)
546520

547-
// bitwise ops on batch<T,A>
548521
XSIMD_MSVC_ARM64_BINARY_FULL(msvc_arm64_and, vandq_u8, vandq_u8, vandq_u16, vandq_u16, vandq_u32, vandq_u32, vandq_u64, vandq_u64, vandq_u8)
549522
XSIMD_MSVC_ARM64_BINARY_FULL(msvc_arm64_or, vorrq_u8, vorrq_u8, vorrq_u16, vorrq_u16, vorrq_u32, vorrq_u32, vorrq_u64, vorrq_u64, vorrq_u8)
550523
XSIMD_MSVC_ARM64_BINARY_FULL(msvc_arm64_xor, veorq_u8, veorq_u8, veorq_u16, veorq_u16, veorq_u32, veorq_u32, veorq_u64, veorq_u64, veorq_u8)
551524
XSIMD_MSVC_ARM64_BINARY_FULL(msvc_arm64_andn, vbicq_u8, vbicq_u8, vbicq_u16, vbicq_u16, vbicq_u32, vbicq_u32, vbicq_u64, vbicq_u64, vbicq_u8)
552-
// bitwise ops on batch_bool<T,A>
525+
553526
XSIMD_MSVC_ARM64_BINARY_UINT_ALL(msvc_arm64_and, vandq_u8, vandq_u16, vandq_u32, vandq_u64)
554527
XSIMD_MSVC_ARM64_BINARY_UINT_ALL(msvc_arm64_or, vorrq_u8, vorrq_u16, vorrq_u32, vorrq_u64)
555528
XSIMD_MSVC_ARM64_BINARY_UINT_ALL(msvc_arm64_xor, veorq_u8, veorq_u16, veorq_u32, veorq_u64)
556529
XSIMD_MSVC_ARM64_BINARY_UINT_ALL(msvc_arm64_andn, vbicq_u8, vbicq_u16, vbicq_u32, vbicq_u64)
557530
namespace detail {
558-
// On MSVC ARM64 all NEON types are __n128, so vmvnq_u32 works for any lane width.
559531
XSIMD_INLINE __n128 msvc_arm64_not_u64_impl(__n128 a) noexcept { return vmvnq_u32(a); }
560532
}
561533
XSIMD_MSVC_ARM64_UNARY_UINT_ALL(msvc_arm64_not, vmvnq_u8, vmvnq_u16, vmvnq_u32, msvc_arm64_not_u64_impl)
562534

563-
// select
564535
XSIMD_MSVC_ARM64_SELECT_FULL(msvc_arm64_select, vbslq_u8, vbslq_s8, vbslq_u16, vbslq_s16, vbslq_u32, vbslq_s32, vbslq_u64, vbslq_s64, vbslq_f32)
565536

566-
// rotate_left (N is a compile-time constant)
567537
namespace detail {
568538
template <size_t N, class T>
569539
XSIMD_INLINE typename std::enable_if<sizeof(T)==1 && std::is_unsigned<T>::value, __n128>::type
@@ -592,7 +562,7 @@ namespace detail {
592562
template <size_t N, class T>
593563
XSIMD_INLINE typename std::enable_if<sizeof(T)==8 && std::is_signed<T>::value, __n128>::type
594564
msvc_arm64_rotate_left(__n128 a) noexcept { return vextq_s64(a, a, N % 2); }
595-
} // namespace detail
565+
}
596566
#endif
597567

598568
/*************

0 commit comments

Comments
 (0)