From d0cb44077a3c1e78413e26cbf937bc7fd37cca90 Mon Sep 17 00:00:00 2001 From: Yang Liu Date: Thu, 25 Jun 2026 15:29:37 +0800 Subject: [PATCH] [LA64_DYNAREC] Added PCMPESTRM/PCMPISTRM opcodes --- src/dynarec/la64/dynarec_la64_660f.c | 109 +++++++++++++++++++++++++++ 1 file changed, 109 insertions(+) diff --git a/src/dynarec/la64/dynarec_la64_660f.c b/src/dynarec/la64/dynarec_la64_660f.c index eb457bce6d..4624cac050 100644 --- a/src/dynarec/la64/dynarec_la64_660f.c +++ b/src/dynarec/la64/dynarec_la64_660f.c @@ -1331,6 +1331,62 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int MOV32w(x4, u8); CALL4(const_native_pclmul, -1, x1, x2, x3, x4); break; + case 0x60: + INST_NAME("PCMPESTRM Gx, Ex, Ib"); + nextop = F8; + GETG; + u8 = geted_ib(dyn, addr, ninst, nextop); + SETFLAGS(X_ALL, SF_SET_DF, NAT_FLAGS_NOFUSION); + if (gd > 7) sse_reflect_reg(dyn, ninst, gd); + ADDI_D(x3, xEmu, offsetof(x64emu_t, xmm[gd])); + if (MODREG) { + ed = (nextop & 7) + (rex.b << 3); + if (ed > 7) sse_reflect_reg(dyn, ninst, ed); + ADDI_D(x1, xEmu, offsetof(x64emu_t, xmm[ed])); + ed = x1; + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 0, 1); + } + SEXT_W(x2, xRDX); + SEXT_W(x4, xRAX); + u8 = F8; + MOV32w(x5, u8); + CALL6(const_sse42_compare_string_explicit_len, x1, ed, x2, x3, x4, x5, 0); + v0 = sse_get_reg_empty(dyn, ninst, x1, 0); + if (u8 & 0b1000000) { + v1 = fpu_get_scratch(dyn); + switch (u8 & 1) { + case 0b00: + VREPLGR2VR_B(v0, x1); + SRLI_D(x2, x1, 8); + VREPLGR2VR_B(v1, x2); + VEXTRINS_D(v0, v1, 0x10); + MOV64x(x2, 0x0001020304050607); + VREPLGR2VR_D(v1, x2); + VSLL_B(v0, v0, v1); + MOV32w(x2, 0x80); + VREPLGR2VR_B(v1, x2); + VAND_V(v0, v0, v1); + VSRAI_B(v0, v0, 7); + break; + case 0b01: + VREPLGR2VR_H(v0, x1); + MOV64x(x2, 0x000C000D000E000F); + VREPLGR2VR_D(v1, x2); + MOV64x(x2, 0x00080009000A000B); + VINSGR2VR_D(v1, x2, 1); + VSLL_H(v0, v0, v1); + MOV32w(x2, 0x80); + VREPLGR2VR_B(v1, x2); + VAND_V(v0, v0, v1); + VSRAI_H(v0, v0, 15); + break; + } + } else { + VXOR_V(v0, v0, v0); + VINSGR2VR_H(v0, x1, 0); + } + break; case 0x61: INST_NAME("PCMPESTRI Gx, Ex, Ib"); nextop = F8; @@ -1367,6 +1423,59 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int CTZ_W(xRCX, x1); } break; + case 0x62: + INST_NAME("PCMPISTRM Gx, Ex, Ib"); + SETFLAGS(X_ALL, SF_SET_DF, NAT_FLAGS_NOFUSION); + nextop = F8; + GETG; + if (gd > 7) sse_reflect_reg(dyn, ninst, gd); + if (MODREG) { + ed = (nextop & 7) + (rex.b << 3); + if (ed > 7) sse_reflect_reg(dyn, ninst, ed); + ADDI_D(x1, xEmu, offsetof(x64emu_t, xmm[ed])); + ed = x1; + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 0, 1); + } + ADDI_D(x2, xEmu, offsetof(x64emu_t, xmm[gd])); + u8 = F8; + MOV32w(x3, u8); + CALL4(const_sse42_compare_string_implicit_len, x1, ed, x2, x3, 0); + v0 = sse_get_reg_empty(dyn, ninst, x1, 0); + if (u8 & 0b1000000) { + v1 = fpu_get_scratch(dyn); + switch (u8 & 1) { + case 0b00: + VREPLGR2VR_B(v0, x1); + SRLI_D(x2, x1, 8); + VREPLGR2VR_B(v1, x2); + VEXTRINS_D(v0, v1, 0x10); + MOV64x(x2, 0x0001020304050607); + VREPLGR2VR_D(v1, x2); + VSLL_B(v0, v0, v1); + MOV32w(x2, 0x80); + VREPLGR2VR_B(v1, x2); + VAND_V(v0, v0, v1); + VSRAI_B(v0, v0, 7); + break; + case 0b01: + VREPLGR2VR_H(v0, x1); + MOV64x(x2, 0x000C000D000E000F); + VREPLGR2VR_D(v1, x2); + MOV64x(x2, 0x00080009000A000B); + VINSGR2VR_D(v1, x2, 1); + VSLL_H(v0, v0, v1); + MOV32w(x2, 0x80); + VREPLGR2VR_B(v1, x2); + VAND_V(v0, v0, v1); + VSRAI_H(v0, v0, 15); + break; + } + } else { + VXOR_V(v0, v0, v0); + VINSGR2VR_H(v0, x1, 0); + } + break; case 0x63: INST_NAME("PCMPISTRI Gx, Ex, Ib"); SETFLAGS(X_ALL, SF_SET_DF, NAT_FLAGS_NOFUSION);