Skip to content

Commit cf17859

Browse files
committed
Fix SQRTSS/SQRTSD semantics
1 parent e6ac7ca commit cf17859

1 file changed

Lines changed: 29 additions & 23 deletions

File tree

lib/Arch/X86/Semantics/SSE.cpp

Lines changed: 29 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1682,31 +1682,35 @@ IF_AVX(DEF_ISEL(VMOVSHDUP_YMMqq_YMMqq) = MOVSHDUP<VV256W, V256>;)
16821682

16831683
namespace {
16841684

1685-
template <typename S1>
1686-
DEF_SEM(SQRTSS, V128W dst, S1 src1) {
1685+
template <typename D, typename S1, typename S2>
1686+
DEF_SEM(SQRTSS, D dst, S1 src1, S2 src2) {
1687+
1688+
// Extract a "single-precision" (32-bit) float from [31:0] of src2 vector:
1689+
auto src_float = FExtractV32(FReadV32(src2), 0);
16871690

1688-
// Extract a "single-precision" (32-bit) float from [31:0] of src1 vector:
1689-
auto src_float = FExtractV32(FReadV32(src1), 0);
1691+
// Initialize dest vector, while also copying src1[127:32] -> dst[127:32].
1692+
auto temp_vec = FReadV32(src1);
16901693

1691-
// Store the square root result in dest[32:0]:
1694+
// Store the square root result in dest[31:0]:
16921695
auto square_root = SquareRoot32(memory, state, src_float);
1693-
auto temp_vec = FReadV32(dst); // initialize a destination vector
16941696
temp_vec = FInsertV32(temp_vec, 0, square_root);
16951697

16961698
// Write out the result and return memory state:
16971699
FWriteV32(dst, temp_vec); // SSE: Writes to XMM, AVX: Zero-extends XMM.
16981700
return memory;
16991701
}
17001702

1701-
template <typename S1>
1702-
DEF_SEM(RSQRTSS, V128W dst, S1 src1) {
1703+
template <typename D, typename S1, typename S2>
1704+
DEF_SEM(RSQRTSS, D dst, S1 src1, S2 src2) {
17031705

1704-
// Extract a "single-precision" (32-bit) float from [31:0] of src1 vector:
1705-
auto src_float = FExtractV32(FReadV32(src1), 0);
1706+
// Extract a "single-precision" (32-bit) float from [31:0] of src2 vector:
1707+
auto src_float = FExtractV32(FReadV32(src2), 0);
17061708

1707-
// Store the square root result in dest[32:0]:
1709+
// Initialize dest vector, while also copying src1[127:32] -> dst[127:32].
1710+
auto temp_vec = FReadV32(src1);
1711+
1712+
// Store the square root result in dest[31:0]:
17081713
auto square_root = SquareRoot32(memory, state, src_float);
1709-
auto temp_vec = FReadV32(dst); // initialize a destination vector
17101714
temp_vec = FInsertV32(temp_vec, 0, FDiv(1.0f, square_root));
17111715

17121716
// Write out the result and return memory state:
@@ -1753,8 +1757,8 @@ DEF_SEM(VRSQRTSS, D dst, S1 src1, S2 src2) {
17531757
#endif // HAS_FEATURE_AVX
17541758
} // namespace
17551759

1756-
DEF_ISEL(SQRTSS_XMMss_MEMss) = SQRTSS<MV32>;
1757-
DEF_ISEL(SQRTSS_XMMss_XMMss) = SQRTSS<V128>;
1760+
DEF_ISEL(SQRTSS_XMMss_MEMss) = SQRTSS<V128W, V128, MV32>;
1761+
DEF_ISEL(SQRTSS_XMMss_XMMss) = SQRTSS<V128W, V128, V128>;
17581762
IF_AVX(DEF_ISEL(VSQRTSS_XMMdq_XMMdq_MEMd) = VSQRTSS<VV128W, V128, MV32>;)
17591763
IF_AVX(DEF_ISEL(VSQRTSS_XMMdq_XMMdq_XMMd) = VSQRTSS<VV128W, V128, V128>;)
17601764
/*
@@ -1763,8 +1767,8 @@ IF_AVX(DEF_ISEL(VSQRTSS_XMMdq_XMMdq_XMMd) = VSQRTSS<VV128W, V128, V128>;)
17631767
4318 VSQRTSS VSQRTSS_XMMf32_MASKmskw_XMMf32_MEMf32_AVX512 AVX512 AVX512EVEX AVX512F_SCALAR ATTRIBUTES: DISP8_SCALAR MASKOP_EVEX MEMORY_FAULT_SUPPRESSION MXCSR SIMD_SCALAR
17641768
*/
17651769

1766-
DEF_ISEL(RSQRTSS_XMMss_MEMss) = RSQRTSS<MV32>;
1767-
DEF_ISEL(RSQRTSS_XMMss_XMMss) = RSQRTSS<V128>;
1770+
DEF_ISEL(RSQRTSS_XMMss_MEMss) = RSQRTSS<V128W, V128, MV32>;
1771+
DEF_ISEL(RSQRTSS_XMMss_XMMss) = RSQRTSS<V128W, V128, V128>;
17681772
IF_AVX(DEF_ISEL(VRSQRTSS_XMMdq_XMMdq_MEMd) = VRSQRTSS<VV128W, V128, MV32>;)
17691773
IF_AVX(DEF_ISEL(VRSQRTSS_XMMdq_XMMdq_XMMd) = VRSQRTSS<VV128W, V128, V128>;)
17701774

@@ -1801,15 +1805,17 @@ DEF_HELPER(SquareRoot64, float64_t src_float)->float64_t {
18011805
return square_root;
18021806
}
18031807

1804-
template <typename S1>
1805-
DEF_SEM(SQRTSD, V128W dst, S1 src1) {
1808+
template <typename D, typename S1, typename S2>
1809+
DEF_SEM(SQRTSD, D dst, S1 src1, S2 src2) {
1810+
1811+
// Extract a "double-precision" (64-bit) float from [63:0] of src2 vector:
1812+
auto src_float = FExtractV64(FReadV64(src2), 0);
18061813

1807-
// Extract a "double-precision" (64-bit) float from [63:0] of src1 vector:
1808-
auto src_float = FExtractV64(FReadV64(src1), 0);
1814+
// Initialize dest vector, while also copying src1[127:64] -> dst[127:64].
1815+
auto temp_vec = FReadV64(src1);
18091816

18101817
// Store the square root result in dest[63:0]:
18111818
auto square_root = SquareRoot64(memory, state, src_float);
1812-
auto temp_vec = FReadV64(dst); // initialize a destination vector
18131819
temp_vec = FInsertV64(temp_vec, 0, square_root);
18141820

18151821
// Write out the result and return memory state:
@@ -1839,8 +1845,8 @@ DEF_SEM(VSQRTSD, D dst, S1 src1, S2 src2) {
18391845

18401846
} // namespace
18411847

1842-
DEF_ISEL(SQRTSD_XMMsd_MEMsd) = SQRTSD<MV64>;
1843-
DEF_ISEL(SQRTSD_XMMsd_XMMsd) = SQRTSD<V128>;
1848+
DEF_ISEL(SQRTSD_XMMsd_MEMsd) = SQRTSD<V128W, V128, MV64>;
1849+
DEF_ISEL(SQRTSD_XMMsd_XMMsd) = SQRTSD<V128W, V128, V128>;
18441850
IF_AVX(DEF_ISEL(VSQRTSD_XMMdq_XMMdq_MEMq) = VSQRTSD<VV128W, V128, MV64>;)
18451851
IF_AVX(DEF_ISEL(VSQRTSD_XMMdq_XMMdq_XMMq) = VSQRTSD<VV128W, V128, V128>;)
18461852
/*

0 commit comments

Comments
 (0)