Skip to content

Commit c78168c

Browse files
Prevent some invalid associativity in to_float implementation for avx2 under -ffast-math
Fix #1264
1 parent 548b05f commit c78168c

File tree

1 file changed

+12
-2
lines changed

1 file changed

+12
-2
lines changed

include/xsimd/arch/xsimd_avx2.hpp

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -527,7 +527,12 @@ namespace xsimd
527527
__m256i mask = _mm256_setr_epi16(0xFFFF, 0xFFFF, 0x0000, 0x0000, 0xFFFF, 0xFFFF, 0x0000, 0x0000,
528528
0xFFFF, 0xFFFF, 0x0000, 0x0000, 0xFFFF, 0xFFFF, 0x0000, 0x0000);
529529
__m256i xL = _mm256_or_si256(_mm256_and_si256(mask, x), _mm256_andnot_si256(mask, _mm256_castpd_si256(_mm256_set1_pd(0x0010000000000000)))); // 2^52
530-
__m256d f = _mm256_sub_pd(_mm256_castsi256_pd(xH), _mm256_set1_pd(19342813118337666422669312.)); // 2^84 + 2^52
530+
#ifdef __FAST_MATH__
531+
volatile
532+
#endif
533+
// Under -ffast-math, prevents some association with the add below, when converting from (-1, -1, ...)
534+
__m256d f
535+
= _mm256_sub_pd(_mm256_castsi256_pd(xH), _mm256_set1_pd(19342813118337666422669312.)); // 2^84 + 2^52
531536
return _mm256_add_pd(f, _mm256_castsi256_pd(xL));
532537
}
533538

@@ -542,7 +547,12 @@ namespace xsimd
542547
__m256i mask = _mm256_setr_epi16(0xFFFF, 0xFFFF, 0xFFFF, 0x0000, 0xFFFF, 0xFFFF, 0xFFFF, 0x0000,
543548
0xFFFF, 0xFFFF, 0xFFFF, 0x0000, 0xFFFF, 0xFFFF, 0xFFFF, 0x0000);
544549
__m256i xL = _mm256_or_si256(_mm256_and_si256(mask, x), _mm256_andnot_si256(mask, _mm256_castpd_si256(_mm256_set1_pd(0x0010000000000000)))); // 2^52
545-
__m256d f = _mm256_sub_pd(_mm256_castsi256_pd(xH), _mm256_set1_pd(442726361368656609280.)); // 3*2^67 + 2^52
550+
#ifdef __FAST_MATH__
551+
volatile
552+
#endif
553+
// Under -ffast-math, prevents some association with the add below, when converting from (-1, -1, ...)
554+
__m256d f
555+
= _mm256_sub_pd(_mm256_castsi256_pd(xH), _mm256_set1_pd(442726361368656609280.)); // 3*2^67 + 2^52
546556
return _mm256_add_pd(f, _mm256_castsi256_pd(xL));
547557
}
548558
}

0 commit comments

Comments
 (0)