Skip to content

Commit b4068e3

Browse files
committed
implemented CMOV for XMM registers
1 parent 9bcc474 commit b4068e3

1 file changed

Lines changed: 46 additions & 1 deletion

File tree

src/jit_x86_64.c

Lines changed: 46 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,13 +101,16 @@ typedef enum {
101101
DEC,
102102
JMP,
103103
MOVSXD,
104+
PUSHFQ,
105+
POPFQ,
104106
// FPU
105107
FSTP,
106108
FSTP32,
107109
FLD,
108110
FLD32,
109111
FLDCW,
110112
// SSE
113+
MOVQ,
111114
MOVSD,
112115
MOVSS,
113116
COMISD,
@@ -121,6 +124,9 @@ typedef enum {
121124
MULSS,
122125
DIVSS,
123126
XORPS,
127+
ANDPD,
128+
ANDNPD,
129+
ORPD,
124130
XORPD,
125131
CVTSI2SD,
126132
CVTSI2SS,
@@ -227,13 +233,16 @@ static opform OP_FORMS[] = {
227233
{ "DEC", IS_64 ? RM(0xFF,1) : 0x48, RM(0xFF,1) },
228234
{ "JMP", RM(0xFF,4) },
229235
{ "MOVSXD", 0x63 },
236+
{ "PUSHFQ", 0x9C },
237+
{ "POPFQ", 0x9D },
230238
// FPU
231239
{ "FSTP", 0, RM(0xDD,3) },
232240
{ "FSTP32", 0, RM(0xD9,3) },
233241
{ "FLD", 0, RM(0xDD,0) },
234242
{ "FLD32", 0, RM(0xD9,0) },
235243
{ "FLDCW", 0, RM(0xD9, 5) },
236244
// SSE
245+
{ "MOVQ", 0x660F6E },
237246
{ "MOVSD", 0xF20F10, 0xF20F11 },
238247
{ "MOVSS", 0xF30F10, 0xF30F11 },
239248
{ "COMISD", LONG_RM(0x660F2F,1) },
@@ -247,6 +256,9 @@ static opform OP_FORMS[] = {
247256
{ "MULSS", 0xF30F59 },
248257
{ "DIVSS", 0xF30F5E },
249258
{ "XORPS", LONG_OP(0x0F57) },
259+
{ "ANDPD", 0x660F54 },
260+
{ "ANDNPD", 0x660F55 },
261+
{ "ORPD", 0x660F56 },
250262
{ "XORPD", 0x660F57 },
251263
{ "CVTSI2SD", 0xF20F2A },
252264
{ "CVTSI2SS", 0xF30F2A },
@@ -1089,6 +1101,13 @@ static void emit_cmov( code_ctx *ctx, ereg out, ereg r, int cond, emit_mode m )
10891101
MOD_RM(3,out,r);
10901102
}
10911103

1104+
static void emit_cset( code_ctx *ctx, ereg out, int cond ) {
1105+
if( (out&8) ) B(0x41);
1106+
B(0x0F);
1107+
B(cond + 0x10);
1108+
MOD_RM(3,0,out);
1109+
}
1110+
10921111
void hl_codegen_function( jit_ctx *jit ) {
10931112
code_ctx *ctx = jit->code;
10941113
ctx->flushed = false;
@@ -1500,7 +1519,33 @@ void hl_codegen_function( jit_ctx *jit ) {
15001519
{
15011520
int cond = get_cond_jump(ctx);
15021521
if( !IS_REG(out) ) jit_assert();
1503-
if( IS_REG(e->a) ) {
1522+
if( IS_FLOAT(e->mode) ) {
1523+
EMIT(PUSHFQ,UNUSED,UNUSED,M_PTR);
1524+
// create a mask in RTMP to be used for xmm
1525+
emit_cset(ctx,RTMP,cond);
1526+
EMIT(MOVZX8,RTMP,RTMP,M_PTR);
1527+
EMIT(NEG,RTMP,UNUSED,M_PTR);
1528+
// do dst := (mask & src) | (dst & ~mask)
1529+
ereg tmp = get_tmp(M_F64);
1530+
EMIT(MOVQ,tmp,RTMP,M_PTR);
1531+
EMIT(ANDNPD,tmp,out,M_F64);
1532+
EMIT(MOVQ,out,RTMP,M_PTR);
1533+
if( !IS_REG(e->a) ) {
1534+
// ANDNPD requires aligned address !
1535+
ereg tmp2 = out == MMX(0) ? MMX(1) : MMX(0);
1536+
EMIT(SUB,R(RSP),MK_CONST(8),M_PTR);
1537+
EMIT(MOVSD,REG_PTR(R(RSP)),tmp2,M_F64);
1538+
EMIT(e->mode == M_F32 ? MOVSS : MOVSD,tmp2,e->a,e->mode);
1539+
EMIT(ANDPD,out,tmp2,M_F64);
1540+
EMIT(ORPD,out,tmp,M_F64);
1541+
EMIT(MOVSD,tmp2,REG_PTR(R(RSP)),M_PTR);
1542+
EMIT(ADD,R(RSP),MK_CONST(8),M_PTR);
1543+
} else {
1544+
EMIT(ANDPD,out,e->a,M_F64);
1545+
EMIT(ORPD,out,tmp,M_F64);
1546+
}
1547+
EMIT(POPFQ,UNUSED,UNUSED,M_PTR);
1548+
} else if( IS_REG(e->a) ) {
15041549
emit_cmov(ctx,out,e->a,cond,M_PTR);
15051550
} else {
15061551
emit_mov(ctx,RTMP,e->a,e->mode);

0 commit comments

Comments
 (0)