@@ -527,7 +527,12 @@ namespace xsimd
527527 __m256i mask = _mm256_setr_epi16 (0xFFFF , 0xFFFF , 0x0000 , 0x0000 , 0xFFFF , 0xFFFF , 0x0000 , 0x0000 ,
528528 0xFFFF , 0xFFFF , 0x0000 , 0x0000 , 0xFFFF , 0xFFFF , 0x0000 , 0x0000 );
529529 __m256i xL = _mm256_or_si256 (_mm256_and_si256 (mask, x), _mm256_andnot_si256 (mask, _mm256_castpd_si256 (_mm256_set1_pd (0x0010000000000000 )))); // 2^52
530- __m256d f = _mm256_sub_pd (_mm256_castsi256_pd (xH), _mm256_set1_pd (19342813118337666422669312 .)); // 2^84 + 2^52
530+ #ifdef __FAST_MATH__
531+ volatile
532+ #endif
533+ // Under -ffast-math, prevents some association with the add below, when converting from (-1, -1, ...)
534+ __m256d f
535+ = _mm256_sub_pd (_mm256_castsi256_pd (xH), _mm256_set1_pd (19342813118337666422669312 .)); // 2^84 + 2^52
531536 return _mm256_add_pd (f, _mm256_castsi256_pd (xL));
532537 }
533538
@@ -542,7 +547,12 @@ namespace xsimd
542547 __m256i mask = _mm256_setr_epi16 (0xFFFF , 0xFFFF , 0xFFFF , 0x0000 , 0xFFFF , 0xFFFF , 0xFFFF , 0x0000 ,
543548 0xFFFF , 0xFFFF , 0xFFFF , 0x0000 , 0xFFFF , 0xFFFF , 0xFFFF , 0x0000 );
544549 __m256i xL = _mm256_or_si256 (_mm256_and_si256 (mask, x), _mm256_andnot_si256 (mask, _mm256_castpd_si256 (_mm256_set1_pd (0x0010000000000000 )))); // 2^52
545- __m256d f = _mm256_sub_pd (_mm256_castsi256_pd (xH), _mm256_set1_pd (442726361368656609280 .)); // 3*2^67 + 2^52
550+ #ifdef __FAST_MATH__
551+ volatile
552+ #endif
553+ // Under -ffast-math, prevents some association with the add below, when converting from (-1, -1, ...)
554+ __m256d f
555+ = _mm256_sub_pd (_mm256_castsi256_pd (xH), _mm256_set1_pd (442726361368656609280 .)); // 3*2^67 + 2^52
546556 return _mm256_add_pd (f, _mm256_castsi256_pd (xL));
547557 }
548558 }
0 commit comments