@@ -761,9 +761,7 @@ simde_mm_move_ss (simde__m128 a, simde__m128 b) {
761761 a_ = simde__m128_to_private (a ),
762762 b_ = simde__m128_to_private (b );
763763
764- #if defined(SIMDE_SHUFFLE_VECTOR_ )
765- r_ .f32 = SIMDE_SHUFFLE_VECTOR_ (32 , 16 , a_ .f32 , b_ .f32 , 4 , 1 , 2 , 3 );
766- #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE )
764+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE )
767765 r_ .neon_f32 = vsetq_lane_f32 (vgetq_lane_f32 (b_ .neon_f32 , 0 ), a_ .neon_f32 , 0 );
768766 #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE )
769767 static const SIMDE_POWER_ALTIVEC_VECTOR (unsigned int ) m = { ~0U , 0U , 0U , 0U };
@@ -772,6 +770,8 @@ simde_mm_move_ss (simde__m128 a, simde__m128 b) {
772770 r_ .wasm_v128 = wasm_i8x16_shuffle (b_ .wasm_v128 , a_ .wasm_v128 , 0 , 1 , 2 , 3 , 20 , 21 , 22 , 23 , 24 , 25 , 26 , 27 , 28 , 29 , 30 , 31 );
773771 #elif defined(SIMDE_LOONGARCH_LSX_NATIVE )
774772 r_ .lsx_i64 = __lsx_vextrins_w (a_ .lsx_i64 , b_ .lsx_i64 , 0 );
773+ #elif defined(SIMDE_SHUFFLE_VECTOR_ )
774+ r_ .f32 = SIMDE_SHUFFLE_VECTOR_ (32 , 16 , a_ .f32 , b_ .f32 , 4 , 1 , 2 , 3 );
775775 #else
776776 r_ .f32 [0 ] = b_ .f32 [0 ];
777777 r_ .f32 [1 ] = a_ .f32 [1 ];
@@ -3238,9 +3238,7 @@ simde_mm_movelh_ps (simde__m128 a, simde__m128 b) {
32383238 a_ = simde__m128_to_private (a ),
32393239 b_ = simde__m128_to_private (b );
32403240
3241- #if defined(SIMDE_SHUFFLE_VECTOR_ )
3242- r_ .f32 = SIMDE_SHUFFLE_VECTOR_ (32 , 16 , a_ .f32 , b_ .f32 , 0 , 1 , 4 , 5 );
3243- #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE )
3241+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE )
32443242 float32x2_t a10 = vget_low_f32 (a_ .neon_f32 );
32453243 float32x2_t b10 = vget_low_f32 (b_ .neon_f32 );
32463244 r_ .neon_f32 = vcombine_f32 (a10 , b10 );
@@ -3249,6 +3247,8 @@ simde_mm_movelh_ps (simde__m128 a, simde__m128 b) {
32493247 vec_mergeh (a_ .altivec_i64 , b_ .altivec_i64 ));
32503248 #elif defined(SIMDE_LOONGARCH_LSX_NATIVE )
32513249 r_ .lsx_i64 = __lsx_vilvl_d (b_ .lsx_i64 , a_ .lsx_i64 );
3250+ #elif defined(SIMDE_SHUFFLE_VECTOR_ )
3251+ r_ .f32 = SIMDE_SHUFFLE_VECTOR_ (32 , 16 , a_ .f32 , b_ .f32 , 0 , 1 , 4 , 5 );
32523252 #else
32533253 r_ .f32 [0 ] = a_ .f32 [0 ];
32543254 r_ .f32 [1 ] = a_ .f32 [1 ];
@@ -4081,28 +4081,38 @@ simde_mm_shuffle_ps (simde__m128 a, simde__m128 b, const int imm8)
40814081}
40824082#if defined(SIMDE_X86_SSE_NATIVE ) && !defined(__PGI )
40834083# define simde_mm_shuffle_ps (a , b , imm8 ) _mm_shuffle_ps(a, b, imm8)
4084- #elif defined(SIMDE_SHUFFLE_VECTOR_ )
4084+ #elif defined(SIMDE_WASM_SIMD128_NATIVE )
40854085 #define simde_mm_shuffle_ps (a , b , imm8 ) (__extension__ ({ \
4086- simde__m128_from_private((simde__m128_private) { .f32 = \
4087- SIMDE_SHUFFLE_VECTOR_(32, 16, \
4088- simde__m128_to_private(a).f32 , \
4089- simde__m128_to_private(b).f32 , \
4090- (((imm8) ) & 3), \
4091- (((imm8) >> 2) & 3), \
4092- (((imm8) >> 4) & 3) + 4, \
4093- (((imm8) >> 6) & 3) + 4) }); }))
4086+ simde__m128_from_private((simde__m128_private) { .wasm_v128 = \
4087+ wasm_i32x4_shuffle( \
4088+ simde__m128_to_private(a).wasm_v128 , \
4089+ simde__m128_to_private(b).wasm_v128 , \
4090+ (((imm8) ) & 3), \
4091+ (((imm8) >> 2) & 3), \
4092+ (((imm8) >> 4) & 3) + 4, \
4093+ (((imm8) >> 6) & 3) + 4) }); }))
40944094#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE ) && defined(SIMDE_STATEMENT_EXPR_ )
40954095 #define simde_mm_shuffle_ps (a , b , imm8 ) \
40964096 (__extension__({ \
4097- float32x4_t simde_mm_shuffle_ps_a_ = simde__m128i_to_neon_f32 (a); \
4098- float32x4_t simde_mm_shuffle_ps_b_ = simde__m128i_to_neon_f32 (b); \
4097+ float32x4_t simde_mm_shuffle_ps_a_ = simde__m128_to_neon_f32 (a); \
4098+ float32x4_t simde_mm_shuffle_ps_b_ = simde__m128_to_neon_f32 (b); \
40994099 float32x4_t simde_mm_shuffle_ps_r_; \
41004100 \
41014101 simde_mm_shuffle_ps_r_ = vmovq_n_f32(vgetq_lane_f32(simde_mm_shuffle_ps_a_, (imm8) & (0x3))); \
41024102 simde_mm_shuffle_ps_r_ = vsetq_lane_f32(vgetq_lane_f32(simde_mm_shuffle_ps_a_, ((imm8) >> 2) & 0x3), simde_mm_shuffle_ps_r_, 1); \
41034103 simde_mm_shuffle_ps_r_ = vsetq_lane_f32(vgetq_lane_f32(simde_mm_shuffle_ps_b_, ((imm8) >> 4) & 0x3), simde_mm_shuffle_ps_r_, 2); \
41044104 vsetq_lane_f32(vgetq_lane_f32(simde_mm_shuffle_ps_b_, ((imm8) >> 6) & 0x3), simde_mm_shuffle_ps_r_, 3); \
41054105 }))
4106+ #elif defined(SIMDE_SHUFFLE_VECTOR_ )
4107+ #define simde_mm_shuffle_ps (a , b , imm8 ) (__extension__ ({ \
4108+ simde__m128_from_private((simde__m128_private) { .f32 = \
4109+ SIMDE_SHUFFLE_VECTOR_(32, 16, \
4110+ simde__m128_to_private(a).f32, \
4111+ simde__m128_to_private(b).f32, \
4112+ (((imm8) ) & 3), \
4113+ (((imm8) >> 2) & 3), \
4114+ (((imm8) >> 4) & 3) + 4, \
4115+ (((imm8) >> 6) & 3) + 4) }); }))
41064116#endif
41074117#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES )
41084118# define _mm_shuffle_ps (a , b , imm8 ) simde_mm_shuffle_ps((a), (b), imm8)
@@ -4675,6 +4685,8 @@ simde_mm_unpackhi_ps (simde__m128 a, simde__m128 b) {
46754685 r_ .neon_f32 = vcombine_f32 (result .val [0 ], result .val [1 ]);
46764686 #elif defined(SIMDE_LOONGARCH_LSX_NATIVE )
46774687 r_ .lsx_i64 = __lsx_vilvh_w (b_ .lsx_i64 , a_ .lsx_i64 );
4688+ #elif defined(SIMDE_WASM_SIMD128_NATIVE )
4689+ r_ .wasm_v128 = wasm_i32x4_shuffle (a_ .wasm_v128 , b_ .wasm_v128 , 2 , 6 , 3 , 7 );
46784690 #elif defined(SIMDE_SHUFFLE_VECTOR_ )
46794691 r_ .f32 = SIMDE_SHUFFLE_VECTOR_ (32 , 16 , a_ .f32 , b_ .f32 , 2 , 6 , 3 , 7 );
46804692 #else
@@ -4708,13 +4720,15 @@ simde_mm_unpacklo_ps (simde__m128 a, simde__m128 b) {
47084720 r_ .altivec_f32 = vec_mergeh (a_ .altivec_f32 , b_ .altivec_f32 );
47094721 #elif defined(SIMDE_LOONGARCH_LSX_NATIVE )
47104722 r_ .lsx_i64 = __lsx_vilvl_w (b_ .lsx_i64 , a_ .lsx_i64 );
4711- #elif defined(SIMDE_SHUFFLE_VECTOR_ )
4712- r_ .f32 = SIMDE_SHUFFLE_VECTOR_ ( 32 , 16 , a_ .f32 , b_ .f32 , 0 , 4 , 1 , 5 );
4723+ #elif defined(SIMDE_WASM_SIMD128_NATIVE )
4724+ r_ .wasm_v128 = wasm_i32x4_shuffle ( a_ .wasm_v128 , b_ .wasm_v128 , 0 , 4 , 1 , 5 );
47134725 #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE )
47144726 float32x2_t a1 = vget_low_f32 (a_ .neon_f32 );
47154727 float32x2_t b1 = vget_low_f32 (b_ .neon_f32 );
47164728 float32x2x2_t result = vzip_f32 (a1 , b1 );
47174729 r_ .neon_f32 = vcombine_f32 (result .val [0 ], result .val [1 ]);
4730+ #elif defined(SIMDE_SHUFFLE_VECTOR_ )
4731+ r_ .f32 = SIMDE_SHUFFLE_VECTOR_ (32 , 16 , a_ .f32 , b_ .f32 , 0 , 4 , 1 , 5 );
47184732 #else
47194733 r_ .f32 [0 ] = a_ .f32 [0 ];
47204734 r_ .f32 [1 ] = b_ .f32 [0 ];
0 commit comments