Skip to content

Commit 6d3a974

Browse files
committed
fix: centralize fast-math reassociation barriers
1 parent 3cea665 commit 6d3a974

16 files changed

Lines changed: 54 additions & 126 deletions

.github/workflows/windows.yml

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,37 @@ jobs:
9292
run: |
9393
cd _build && ./test/test_xsimd
9494
95+
build-windows-clang-cl-fast-math:
96+
name: 'clang-cl 19 x64 /fp:fast'
97+
defaults:
98+
run:
99+
shell: bash {0}
100+
runs-on: windows-2025
101+
steps:
102+
- name: Setup compiler
103+
uses: ilammy/msvc-dev-cmd@v1
104+
with:
105+
arch: amd64
106+
- name: Setup LLVM 19
107+
run: |
108+
choco install llvm --version=19.1.7 -y --no-progress
109+
- name: Setup Ninja
110+
run: |
111+
python3 -m pip install --upgrade pip setuptools wheel
112+
python3 -m pip install ninja
113+
- name: Checkout xsimd
114+
uses: actions/checkout@v3
115+
- name: Setup
116+
run: |
117+
mkdir _build
118+
cd _build && cmake .. -DBUILD_TESTS=ON -DDOWNLOAD_DOCTEST=ON -DBUILD_BENCHMARK=OFF -DBUILD_EXAMPLES=OFF -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_COMPILER="C:/Program Files/LLVM/bin/clang-cl.exe" -DCMAKE_CXX_COMPILER="C:/Program Files/LLVM/bin/clang-cl.exe" -DCMAKE_CXX_FLAGS="/fp:fast" -G Ninja
119+
- name: Build
120+
run: |
121+
cd _build && cmake --build .
122+
- name: Testing xsimd
123+
run: |
124+
cd _build && ./test/test_xsimd
125+
95126
build-windows-arm64:
96127
name: 'MSVC arm64'
97128
defaults:

include/xsimd/arch/common/xsimd_common_details.hpp

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -111,26 +111,20 @@ namespace xsimd
111111

112112
namespace detail
113113
{
114-
template <class A>
115-
XSIMD_INLINE memory_barrier_tag barrier_tag(A const&) noexcept
116-
{
117-
return {};
118-
}
119-
120114
template <class T>
121115
XSIMD_INLINE void reassociation_barrier(T& x, memory_barrier_tag) noexcept
122116
{
123-
#if XSIMD_WITH_GNU_INLINE_ASM
117+
#if XSIMD_WITH_INLINE_ASM
124118
__asm__ volatile("" : : "r"(&x) : "memory");
125119
#else
126120
(void)x;
127121
#endif
128122
}
129123

130124
template <class T, class A>
131-
XSIMD_INLINE void reassociation_barrier(T& x, A const& arch) noexcept
125+
XSIMD_INLINE void reassociation_barrier(T& x, A const&) noexcept
132126
{
133-
detail::reassociation_barrier(x, detail::barrier_tag(arch));
127+
detail::reassociation_barrier(x, memory_barrier_tag {});
134128
}
135129

136130
template <class F, class A, class T, class... Batches>

include/xsimd/arch/xsimd_avx.hpp

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -37,11 +37,6 @@ namespace xsimd
3737

3838
namespace detail
3939
{
40-
XSIMD_INLINE x86_barrier_tag barrier_tag(avx const&) noexcept
41-
{
42-
return {};
43-
}
44-
4540
XSIMD_INLINE __m128i lower_half(__m256i self) noexcept
4641
{
4742
return _mm256_castsi256_si128(self);

include/xsimd/arch/xsimd_avx2.hpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515
#include <complex>
1616
#include <type_traits>
1717

18-
#include "./xsimd_common_fwd.hpp"
1918
#include "../types/xsimd_avx2_register.hpp"
2019
#include "../types/xsimd_batch_constant.hpp"
2120
#include "./utils/shifts.hpp"

include/xsimd/arch/xsimd_avx512f.hpp

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -42,11 +42,6 @@ namespace xsimd
4242

4343
namespace detail
4444
{
45-
XSIMD_INLINE x86_barrier_tag barrier_tag(avx512f const&) noexcept
46-
{
47-
return {};
48-
}
49-
5045
XSIMD_INLINE __m256 lower_half(__m512 self) noexcept
5146
{
5247
return _mm512_castps512_ps256(self);

include/xsimd/arch/xsimd_common_fwd.hpp

Lines changed: 0 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -23,30 +23,13 @@ namespace xsimd
2323
class batch;
2424
template <class T, class A>
2525
class batch_bool;
26-
2726
namespace kernel
2827
{
2928
namespace detail
3029
{
3130
struct memory_barrier_tag
3231
{
3332
};
34-
35-
struct x86_barrier_tag
36-
{
37-
};
38-
39-
struct arm_barrier_tag
40-
{
41-
};
42-
43-
struct vsx_barrier_tag
44-
{
45-
};
46-
47-
struct rvv_barrier_tag
48-
{
49-
};
5033
}
5134
}
5235
template <class T, class A, T... Vs>
@@ -127,9 +110,6 @@ namespace xsimd
127110
// Forward declarations for pack-level helpers
128111
namespace detail
129112
{
130-
template <class A>
131-
XSIMD_INLINE memory_barrier_tag barrier_tag(A const&) noexcept;
132-
133113
template <class T>
134114
XSIMD_INLINE void reassociation_barrier(T& x, memory_barrier_tag) noexcept;
135115

@@ -150,7 +130,6 @@ namespace xsimd
150130
XSIMD_INLINE constexpr bool is_only_from_lo(batch_constant<T, A, Vs...>) noexcept;
151131
template <typename T, class A, T... Vs>
152132
XSIMD_INLINE constexpr bool is_only_from_hi(batch_constant<T, A, Vs...>) noexcept;
153-
154133
}
155134
}
156135
}

include/xsimd/arch/xsimd_neon.hpp

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -159,21 +159,6 @@ namespace xsimd
159159

160160
namespace detail
161161
{
162-
XSIMD_INLINE arm_barrier_tag barrier_tag(neon const&) noexcept
163-
{
164-
return {};
165-
}
166-
167-
template <class T>
168-
XSIMD_INLINE void reassociation_barrier(T& x, arm_barrier_tag) noexcept
169-
{
170-
#if XSIMD_WITH_GNU_INLINE_ASM
171-
__asm__ volatile("" : "+w"(x));
172-
#else
173-
detail::reassociation_barrier(x, memory_barrier_tag {});
174-
#endif
175-
}
176-
177162
template <template <class> class return_type, class... T>
178163
struct neon_dispatcher_base
179164
{

include/xsimd/arch/xsimd_neon64.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -709,7 +709,7 @@ namespace xsimd
709709
return vcvtnq_s32_f32(self);
710710
}
711711

712-
#if !XSIMD_WITH_GNU_INLINE_ASM
712+
#if !defined(__GNUC__)
713713
template <class A>
714714
XSIMD_INLINE batch<int64_t, A> nearbyint_as_int(batch<double, A> const& self,
715715
requires_arch<neon64>) noexcept

include/xsimd/arch/xsimd_rvv.hpp

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -289,21 +289,6 @@ namespace xsimd
289289
{
290290
namespace detail
291291
{
292-
XSIMD_INLINE rvv_barrier_tag barrier_tag(rvv const&) noexcept
293-
{
294-
return {};
295-
}
296-
297-
template <class T>
298-
XSIMD_INLINE void reassociation_barrier(T& x, rvv_barrier_tag) noexcept
299-
{
300-
#if XSIMD_WITH_GNU_INLINE_ASM
301-
__asm__ volatile("" : "+vr"(x));
302-
#else
303-
detail::reassociation_barrier(x, memory_barrier_tag {});
304-
#endif
305-
}
306-
307292
template <class T>
308293
using rvv_fix_char_t = types::detail::rvv_fix_char_t<T>;
309294
template <class T, size_t Width = XSIMD_RVV_BITS>

include/xsimd/arch/xsimd_sse2.hpp

Lines changed: 4 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -37,21 +37,6 @@ namespace xsimd
3737

3838
namespace detail
3939
{
40-
XSIMD_INLINE x86_barrier_tag barrier_tag(sse2 const&) noexcept
41-
{
42-
return {};
43-
}
44-
45-
template <class T>
46-
XSIMD_INLINE void reassociation_barrier(T& x, x86_barrier_tag) noexcept
47-
{
48-
#if XSIMD_WITH_GNU_INLINE_ASM && XSIMD_TARGET_X86
49-
__asm__ volatile("" : "+x"(x));
50-
#else
51-
detail::reassociation_barrier(x, memory_barrier_tag {});
52-
#endif
53-
}
54-
5540
constexpr uint32_t shuffle(uint32_t w, uint32_t x, uint32_t y, uint32_t z)
5641
{
5742
return (z << 6) | (y << 4) | (x << 2) | w;
@@ -731,6 +716,8 @@ namespace xsimd
731716
__m128i mask = _mm_setr_epi16(0xFFFF, 0xFFFF, 0x0000, 0x0000, 0xFFFF, 0xFFFF, 0x0000, 0x0000);
732717
__m128i xL = _mm_or_si128(_mm_and_si128(mask, x), _mm_andnot_si128(mask, _mm_castpd_si128(_mm_set1_pd(0x0010000000000000)))); // 2^52
733718
__m128d f = _mm_sub_pd(_mm_castsi128_pd(xH), _mm_set1_pd(19342813118337666422669312.)); // 2^84 + 2^52
719+
// Prevent -ffast-math from reassociating (xH-C)+xL into xH+(xL-C).
720+
detail::reassociation_barrier(f, sse2 {});
734721
return _mm_add_pd(f, _mm_castsi128_pd(xL));
735722
}
736723

@@ -745,6 +732,8 @@ namespace xsimd
745732
__m128i mask = _mm_setr_epi16(0xFFFF, 0xFFFF, 0xFFFF, 0x0000, 0xFFFF, 0xFFFF, 0xFFFF, 0x0000);
746733
__m128i xL = _mm_or_si128(_mm_and_si128(mask, x), _mm_andnot_si128(mask, _mm_castpd_si128(_mm_set1_pd(0x0010000000000000)))); // 2^52
747734
__m128d f = _mm_sub_pd(_mm_castsi128_pd(xH), _mm_set1_pd(442726361368656609280.)); // 3*2^67 + 2^52
735+
// Prevent -ffast-math from reassociating (xH-C)+xL into xH+(xL-C).
736+
detail::reassociation_barrier(f, sse2 {});
748737
return _mm_add_pd(f, _mm_castsi128_pd(xL));
749738
}
750739

0 commit comments

Comments
 (0)