@@ -101,13 +101,16 @@ typedef enum {
101101 DEC ,
102102 JMP ,
103103 MOVSXD ,
104+ PUSHFQ ,
105+ POPFQ ,
104106 // FPU
105107 FSTP ,
106108 FSTP32 ,
107109 FLD ,
108110 FLD32 ,
109111 FLDCW ,
110112 // SSE
113+ MOVQ ,
111114 MOVSD ,
112115 MOVSS ,
113116 COMISD ,
@@ -121,6 +124,9 @@ typedef enum {
121124 MULSS ,
122125 DIVSS ,
123126 XORPS ,
127+ ANDPD ,
128+ ANDNPD ,
129+ ORPD ,
124130 XORPD ,
125131 CVTSI2SD ,
126132 CVTSI2SS ,
@@ -227,13 +233,16 @@ static opform OP_FORMS[] = {
227233 { "DEC" , IS_64 ? RM (0xFF ,1 ) : 0x48 , RM (0xFF ,1 ) },
228234 { "JMP" , RM (0xFF ,4 ) },
229235 { "MOVSXD" , 0x63 },
236+ { "PUSHFQ" , 0x9C },
237+ { "POPFQ" , 0x9D },
230238 // FPU
231239 { "FSTP" , 0 , RM (0xDD ,3 ) },
232240 { "FSTP32" , 0 , RM (0xD9 ,3 ) },
233241 { "FLD" , 0 , RM (0xDD ,0 ) },
234242 { "FLD32" , 0 , RM (0xD9 ,0 ) },
235243 { "FLDCW" , 0 , RM (0xD9 , 5 ) },
236244 // SSE
245+ { "MOVQ" , 0x660F6E },
237246 { "MOVSD" , 0xF20F10 , 0xF20F11 },
238247 { "MOVSS" , 0xF30F10 , 0xF30F11 },
239248 { "COMISD" , LONG_RM (0x660F2F ,1 ) },
@@ -247,6 +256,9 @@ static opform OP_FORMS[] = {
247256 { "MULSS" , 0xF30F59 },
248257 { "DIVSS" , 0xF30F5E },
249258 { "XORPS" , LONG_OP (0x0F57 ) },
259+ { "ANDPD" , 0x660F54 },
260+ { "ANDNPD" , 0x660F55 },
261+ { "ORPD" , 0x660F56 },
250262 { "XORPD" , 0x660F57 },
251263 { "CVTSI2SD" , 0xF20F2A },
252264 { "CVTSI2SS" , 0xF30F2A },
@@ -1089,6 +1101,13 @@ static void emit_cmov( code_ctx *ctx, ereg out, ereg r, int cond, emit_mode m )
10891101 MOD_RM (3 ,out ,r );
10901102}
10911103
1104+ static void emit_cset ( code_ctx * ctx , ereg out , int cond ) {
1105+ if ( (out & 8 ) ) B (0x41 );
1106+ B (0x0F );
1107+ B (cond + 0x10 );
1108+ MOD_RM (3 ,0 ,out );
1109+ }
1110+
10921111void hl_codegen_function ( jit_ctx * jit ) {
10931112 code_ctx * ctx = jit -> code ;
10941113 ctx -> flushed = false;
@@ -1500,7 +1519,33 @@ void hl_codegen_function( jit_ctx *jit ) {
15001519 {
15011520 int cond = get_cond_jump (ctx );
15021521 if ( !IS_REG (out ) ) jit_assert ();
1503- if ( IS_REG (e -> a ) ) {
1522+ if ( IS_FLOAT (e -> mode ) ) {
1523+ EMIT (PUSHFQ ,UNUSED ,UNUSED ,M_PTR );
1524+ // create a mask in RTMP to be used for xmm
1525+ emit_cset (ctx ,RTMP ,cond );
1526+ EMIT (MOVZX8 ,RTMP ,RTMP ,M_PTR );
1527+ EMIT (NEG ,RTMP ,UNUSED ,M_PTR );
1528+ // do dst := (mask & src) | (dst & ~mask)
1529+ ereg tmp = get_tmp (M_F64 );
1530+ EMIT (MOVQ ,tmp ,RTMP ,M_PTR );
1531+ EMIT (ANDNPD ,tmp ,out ,M_F64 );
1532+ EMIT (MOVQ ,out ,RTMP ,M_PTR );
1533+ if ( !IS_REG (e -> a ) ) {
1534+ // ANDNPD requires aligned address !
1535+ ereg tmp2 = out == MMX (0 ) ? MMX (1 ) : MMX (0 );
1536+ EMIT (SUB ,R (RSP ),MK_CONST (8 ),M_PTR );
1537+ EMIT (MOVSD ,REG_PTR (R (RSP )),tmp2 ,M_F64 );
1538+ EMIT (e -> mode == M_F32 ? MOVSS : MOVSD ,tmp2 ,e -> a ,e -> mode );
1539+ EMIT (ANDPD ,out ,tmp2 ,M_F64 );
1540+ EMIT (ORPD ,out ,tmp ,M_F64 );
1541+ EMIT (MOVSD ,tmp2 ,REG_PTR (R (RSP )),M_PTR );
1542+ EMIT (ADD ,R (RSP ),MK_CONST (8 ),M_PTR );
1543+ } else {
1544+ EMIT (ANDPD ,out ,e -> a ,M_F64 );
1545+ EMIT (ORPD ,out ,tmp ,M_F64 );
1546+ }
1547+ EMIT (POPFQ ,UNUSED ,UNUSED ,M_PTR );
1548+ } else if ( IS_REG (e -> a ) ) {
15041549 emit_cmov (ctx ,out ,e -> a ,cond ,M_PTR );
15051550 } else {
15061551 emit_mov (ctx ,RTMP ,e -> a ,e -> mode );
0 commit comments