@@ -123,6 +123,8 @@ DEF_ISEL(SETP_MEMb) = SETP<M8W>;
123123DEF_ISEL (SETP_GPR8) = SETP<R8W>;
124124DEF_ISEL (SETZ_MEMb) = SETZ<M8W>;
125125DEF_ISEL (SETZ_GPR8) = SETZ<R8W>;
126+ DEF_ISEL (SETE_MEMb) = SETZ<M8W>;
127+ DEF_ISEL (SETE_GPR8) = SETZ<R8W>;
126128DEF_ISEL (SETS_MEMb) = SETS<M8W>;
127129DEF_ISEL (SETS_GPR8) = SETS<R8W>;
128130DEF_ISEL (SETNO_MEMb) = SETNO<M8W>;
@@ -323,6 +325,159 @@ DEF_SEM(LZCNT, D dst, S src) {
323325 return memory;
324326}
325327
328+ template <typename T>
329+ ALWAYS_INLINE static T LowBitMask (T count, T bit_size) {
330+ if (UCmpEq (count, 0 )) {
331+ return Literal<T>(0 );
332+ }
333+ if (!UCmpLt (count, bit_size)) {
334+ return Maximize (Literal<T>(0 ));
335+ }
336+ return USub (UShl (Literal<T>(1 ), count), Literal<T>(1 ));
337+ }
338+
339+ template <typename T>
340+ ALWAYS_INLINE static T PopulationCount (T val) {
341+ auto count = Literal<T>(0 );
342+ for (unsigned i = 0 ; i < (sizeof (T) * 8U ); ++i) {
343+ count = UAdd (count, UAnd (UShr (val, Literal<T>(i)), Literal<T>(1 )));
344+ }
345+ return count;
346+ }
347+
348+ ALWAYS_INLINE static uint32_t CRC32Byte (uint32_t crc, uint8_t byte) {
349+ crc = UXor (crc, ZExtTo<uint32_t >(byte));
350+ for (unsigned i = 0 ; i < 8U ; ++i) {
351+ auto low_bit_set = UCmpNeq (UAnd (crc, Literal<uint32_t >(1 )), 0U );
352+ crc = UShr (crc, 1U );
353+ crc = Select (low_bit_set, UXor (crc, Literal<uint32_t >(0x82F63B78U )), crc);
354+ }
355+ return crc;
356+ }
357+
358+ template <typename T>
359+ ALWAYS_INLINE static uint32_t CRC32Value (uint32_t crc, T val) {
360+ for (unsigned i = 0 ; i < sizeof (T); ++i) {
361+ crc = CRC32Byte (crc,
362+ TruncTo<uint8_t >(UShr (val, Literal<T>(i * 8U ))));
363+ }
364+ return crc;
365+ }
366+
367+ template <typename D, typename S1, typename S2>
368+ DEF_SEM (BEXTR, D dst, S1 src1, S2 src2) {
369+ auto val = Read (src1);
370+ auto control = Read (src2);
371+ auto bit_size = BitSizeOf (src1);
372+ auto start = ZExtTo<S1>(TruncTo<uint8_t >(control));
373+ auto length =
374+ ZExtTo<S1>(TruncTo<uint8_t >(UShr (control, Literal<decltype (control)>(8 ))));
375+ auto res = Literal<S1>(0 );
376+
377+ if (UCmpLt (start, bit_size) && UCmpNeq (length, 0 )) {
378+ auto avail = USub (bit_size, start);
379+ auto use_len = Select (UCmpLt (avail, length), avail, length);
380+ auto mask = LowBitMask (use_len, bit_size);
381+ res = UAnd (UShr (val, start), mask);
382+ }
383+
384+ WriteZExt (dst, res);
385+ Write (FLAG_CF, false );
386+ Write (FLAG_OF, false );
387+ Write (FLAG_ZF, ZeroFlag (res));
388+ Write (FLAG_SF, false );
389+ UndefFlag (af);
390+ UndefFlag (pf);
391+ return memory;
392+ }
393+
394+ template <typename D, typename S1, typename S2>
395+ DEF_SEM (BZHI, D dst, S1 src1, S2 src2) {
396+ auto val = Read (src1);
397+ auto count = ZExtTo<S1>(TruncTo<uint8_t >(Read (src2)));
398+ auto bit_size = BitSizeOf (src1);
399+ auto out_of_range = !UCmpLt (count, bit_size);
400+ auto mask = LowBitMask (count, bit_size);
401+ auto res = Select (out_of_range, val, UAnd (val, mask));
402+
403+ WriteZExt (dst, res);
404+ Write (FLAG_CF, out_of_range);
405+ Write (FLAG_OF, false );
406+ Write (FLAG_ZF, ZeroFlag (res));
407+ Write (FLAG_SF, SignFlag (res));
408+ UndefFlag (af);
409+ UndefFlag (pf);
410+ return memory;
411+ }
412+
413+ template <typename D, typename S>
414+ DEF_SEM (POPCNT, D dst, S src) {
415+ auto val = Read (src);
416+ auto count = PopulationCount (val);
417+ WriteZExt (dst, count);
418+ Write (FLAG_CF, false );
419+ Write (FLAG_PF, false );
420+ Write (FLAG_AF, false );
421+ Write (FLAG_ZF, ZeroFlag (val));
422+ Write (FLAG_SF, false );
423+ Write (FLAG_OF, false );
424+ return memory;
425+ }
426+
427+ template <typename D, typename S1, typename S2>
428+ DEF_SEM (PDEP, D dst, S1 src1, S2 src2) {
429+ auto src = Read (src1);
430+ auto mask = Read (src2);
431+ auto res = Literal<S1>(0 );
432+ unsigned src_index = 0 ;
433+
434+ for (unsigned i = 0 ; i < (sizeof (decltype (src)) * 8U ); ++i) {
435+ auto mask_bit = UShl (Literal<S1>(1 ), Literal<decltype (src)>(i));
436+ if (UCmpNeq (UAnd (mask, mask_bit), 0 )) {
437+ auto src_bit =
438+ UAnd (UShr (src, Literal<decltype (src)>(src_index)), Literal<S1>(1 ));
439+ if (UCmpNeq (src_bit, 0 )) {
440+ res = UOr (res, mask_bit);
441+ }
442+ ++src_index;
443+ }
444+ }
445+
446+ WriteZExt (dst, res);
447+ return memory;
448+ }
449+
450+ template <typename D, typename S1, typename S2>
451+ DEF_SEM (PEXT, D dst, S1 src1, S2 src2) {
452+ auto src = Read (src1);
453+ auto mask = Read (src2);
454+ auto res = Literal<S1>(0 );
455+ unsigned dst_index = 0 ;
456+
457+ for (unsigned i = 0 ; i < (sizeof (decltype (src)) * 8U ); ++i) {
458+ auto mask_bit = UShl (Literal<S1>(1 ), Literal<decltype (src)>(i));
459+ if (UCmpNeq (UAnd (mask, mask_bit), 0 )) {
460+ auto src_bit = UAnd (UShr (src, Literal<decltype (src)>(i)), Literal<S1>(1 ));
461+ if (UCmpNeq (src_bit, 0 )) {
462+ res = UOr (res, UShl (Literal<S1>(1 ), Literal<decltype (src)>(dst_index)));
463+ }
464+ ++dst_index;
465+ }
466+ }
467+
468+ WriteZExt (dst, res);
469+ return memory;
470+ }
471+
472+ template <typename D, typename S1, typename S2>
473+ DEF_SEM (CRC32, D dst, S1 src1, S2 src2) {
474+ auto seed = TruncTo<uint32_t >(Read (src1));
475+ auto val = Read (src2);
476+ auto crc = CRC32Value (seed, val);
477+ WriteZExt (dst, crc);
478+ return memory;
479+ }
480+
326481} // namespace
327482
328483DEF_ISEL (BSWAP_GPRv_16) = BSWAP_16;
@@ -335,6 +490,40 @@ DEF_ISEL_RnW_Rn(TZCNT_GPRv_GPRv, TZCNT);
335490DEF_ISEL_RnW_Mn (LZCNT_GPRv_MEMv, LZCNT);
336491DEF_ISEL_RnW_Rn (LZCNT_GPRv_GPRv, LZCNT);
337492
493+ DEF_ISEL (BEXTR_VGPR32d_MEMd_VGPR32d) = BEXTR<R32W, M32, R32>;
494+ DEF_ISEL (BEXTR_VGPR32d_VGPR32d_VGPR32d) = BEXTR<R32W, R32, R32>;
495+ IF_64BIT (DEF_ISEL(BEXTR_VGPR64q_MEMq_VGPR64q) = BEXTR<R64W, M64, R64>;)
496+ IF_64BIT (DEF_ISEL(BEXTR_VGPR64q_VGPR64q_VGPR64q) = BEXTR<R64W, R64, R64>;)
497+
498+ DEF_ISEL (BZHI_VGPR32d_MEMd_VGPR32d) = BZHI<R32W, M32, R32>;
499+ DEF_ISEL (BZHI_VGPR32d_VGPR32d_VGPR32d) = BZHI<R32W, R32, R32>;
500+ IF_64BIT (DEF_ISEL(BZHI_VGPR64q_MEMq_VGPR64q) = BZHI<R64W, M64, R64>;)
501+ IF_64BIT (DEF_ISEL(BZHI_VGPR64q_VGPR64q_VGPR64q) = BZHI<R64W, R64, R64>;)
502+
503+ DEF_ISEL_RnW_Mn (POPCNT_GPRv_MEMv, POPCNT);
504+ DEF_ISEL_RnW_Rn (POPCNT_GPRv_GPRv, POPCNT);
505+
506+ DEF_ISEL (PDEP_VGPR32d_VGPR32d_MEMd) = PDEP<R32W, R32, M32>;
507+ DEF_ISEL (PDEP_VGPR32d_VGPR32d_VGPR32d) = PDEP<R32W, R32, R32>;
508+ IF_64BIT (DEF_ISEL(PDEP_VGPR64q_VGPR64q_MEMq) = PDEP<R64W, R64, M64>;)
509+ IF_64BIT (DEF_ISEL(PDEP_VGPR64q_VGPR64q_VGPR64q) = PDEP<R64W, R64, R64>;)
510+
511+ DEF_ISEL (PEXT_VGPR32d_VGPR32d_MEMd) = PEXT<R32W, R32, M32>;
512+ DEF_ISEL (PEXT_VGPR32d_VGPR32d_VGPR32d) = PEXT<R32W, R32, R32>;
513+ IF_64BIT (DEF_ISEL(PEXT_VGPR64q_VGPR64q_MEMq) = PEXT<R64W, R64, M64>;)
514+ IF_64BIT (DEF_ISEL(PEXT_VGPR64q_VGPR64q_VGPR64q) = PEXT<R64W, R64, R64>;)
515+
516+ DEF_ISEL (CRC32_GPRyy_MEMb_32) = CRC32<R32W, R32, M8>;
517+ IF_64BIT (DEF_ISEL(CRC32_GPRyy_MEMb_64) = CRC32<R64W, R64, M8>;)
518+ DEF_ISEL (CRC32_GPRyy_GPR8b_32) = CRC32<R32W, R32, R8>;
519+ IF_64BIT (DEF_ISEL(CRC32_GPRyy_GPR8b_64) = CRC32<R64W, R64, R8>;)
520+ DEF_ISEL (CRC32_GPRyy_MEMv_16) = CRC32<R32W, R32, M16>;
521+ DEF_ISEL (CRC32_GPRyy_MEMv_32) = CRC32<R32W, R32, M32>;
522+ IF_64BIT (DEF_ISEL(CRC32_GPRyy_MEMv_64) = CRC32<R64W, R64, M64>;)
523+ DEF_ISEL (CRC32_GPRyy_GPRv_16) = CRC32<R32W, R32, R16>;
524+ DEF_ISEL (CRC32_GPRyy_GPRv_32) = CRC32<R32W, R32, R32>;
525+ IF_64BIT (DEF_ISEL(CRC32_GPRyy_GPRv_64) = CRC32<R64W, R64, R64>;)
526+
338527namespace {
339528template <typename D, typename S>
340529DEF_SEM (BSR, D dst, S src) {
0 commit comments