Skip to content

Commit b0dcc00

Browse files
committed
loongarch: Use intrinsics::simd for selected LSX/LASX intrinsics
This change migrates a subset of LSX/LASX intrinsics to the portable `intrinsics::simd` interface. Only straightforward mappings are converted in this patch. Intrinsics that require more complex transformations or currently result in suboptimal code generation are intentionally left unchanged and will be migrated incrementally in follow-up patches.
1 parent a690147 commit b0dcc00

11 files changed

Lines changed: 1951 additions & 4131 deletions

File tree

crates/core_arch/src/loongarch64/lasx/generated.rs

Lines changed: 203 additions & 2001 deletions
Large diffs are not rendered by default.

crates/core_arch/src/loongarch64/lasx/mod.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,13 @@ mod generated;
1616
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
1717
pub use self::generated::*;
1818

19+
#[rustfmt::skip]
20+
mod portable;
21+
22+
#[rustfmt::skip]
23+
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
24+
pub use self::portable::*;
25+
1926
#[rustfmt::skip]
2027
#[cfg(test)]
2128
mod tests;
Lines changed: 201 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,201 @@
1+
//! LoongArch64 LASX intrinsics - intrinsics::simd implementation
2+
3+
use super::super::{simd::*, *};
4+
use crate::core_arch::simd::*;
5+
use crate::intrinsics::simd::*;
6+
use crate::mem::transmute;
7+
8+
impl_vv!("lasx", lasx_xvpcnt_b, simd_ctpop, m256i, i8x32);
9+
impl_vv!("lasx", lasx_xvpcnt_h, simd_ctpop, m256i, i16x16);
10+
impl_vv!("lasx", lasx_xvpcnt_w, simd_ctpop, m256i, i32x8);
11+
impl_vv!("lasx", lasx_xvpcnt_d, simd_ctpop, m256i, i64x4);
12+
impl_vv!("lasx", lasx_xvclz_b, simd_ctlz, m256i, i8x32);
13+
impl_vv!("lasx", lasx_xvclz_h, simd_ctlz, m256i, i16x16);
14+
impl_vv!("lasx", lasx_xvclz_w, simd_ctlz, m256i, i32x8);
15+
impl_vv!("lasx", lasx_xvclz_d, simd_ctlz, m256i, i64x4);
16+
impl_vv!("lasx", lasx_xvneg_b, simd_neg, m256i, i8x32);
17+
impl_vv!("lasx", lasx_xvneg_h, simd_neg, m256i, i16x16);
18+
impl_vv!("lasx", lasx_xvneg_w, simd_neg, m256i, i32x8);
19+
impl_vv!("lasx", lasx_xvneg_d, simd_neg, m256i, i64x4);
20+
impl_vv!("lasx", lasx_xvfsqrt_s, simd_fsqrt, m256, f32x8);
21+
impl_vv!("lasx", lasx_xvfsqrt_d, simd_fsqrt, m256d, f64x4);
22+
23+
impl_gv!("lasx", lasx_xvreplgr2vr_b, simdl_splat, m256i, i8x32, i32);
24+
impl_gv!("lasx", lasx_xvreplgr2vr_h, simdl_splat, m256i, i16x16, i32);
25+
impl_gv!("lasx", lasx_xvreplgr2vr_w, simdl_splat, m256i, i32x8, i32);
26+
impl_gv!("lasx", lasx_xvreplgr2vr_d, simdl_splat, m256i, i64x4, i64);
27+
28+
impl_sv!("lasx", lasx_xvrepli_b, simdl_splat, m256i, i8x32, 10);
29+
impl_sv!("lasx", lasx_xvrepli_h, simdl_splat, m256i, i16x16, 10);
30+
impl_sv!("lasx", lasx_xvrepli_w, simdl_splat, m256i, i32x8, 10);
31+
impl_sv!("lasx", lasx_xvrepli_d, simdl_splat, m256i, i64x4, 10);
32+
33+
impl_vvv!("lasx", lasx_xvadd_b, simd_add, m256i, i8x32);
34+
impl_vvv!("lasx", lasx_xvadd_h, simd_add, m256i, i16x16);
35+
impl_vvv!("lasx", lasx_xvadd_w, simd_add, m256i, i32x8);
36+
impl_vvv!("lasx", lasx_xvadd_d, simd_add, m256i, i64x4);
37+
impl_vvv!("lasx", lasx_xvsub_b, simd_sub, m256i, i8x32);
38+
impl_vvv!("lasx", lasx_xvsub_h, simd_sub, m256i, i16x16);
39+
impl_vvv!("lasx", lasx_xvsub_w, simd_sub, m256i, i32x8);
40+
impl_vvv!("lasx", lasx_xvsub_d, simd_sub, m256i, i64x4);
41+
impl_vvv!("lasx", lasx_xvmax_b, simd_imax, m256i, i8x32);
42+
impl_vvv!("lasx", lasx_xvmax_h, simd_imax, m256i, i16x16);
43+
impl_vvv!("lasx", lasx_xvmax_w, simd_imax, m256i, i32x8);
44+
impl_vvv!("lasx", lasx_xvmax_d, simd_imax, m256i, i64x4);
45+
impl_vvv!("lasx", lasx_xvmax_bu, simd_imax, m256i, u8x32);
46+
impl_vvv!("lasx", lasx_xvmax_hu, simd_imax, m256i, u16x16);
47+
impl_vvv!("lasx", lasx_xvmax_wu, simd_imax, m256i, u32x8);
48+
impl_vvv!("lasx", lasx_xvmax_du, simd_imax, m256i, u64x4);
49+
impl_vvv!("lasx", lasx_xvmin_b, simd_imin, m256i, i8x32);
50+
impl_vvv!("lasx", lasx_xvmin_h, simd_imin, m256i, i16x16);
51+
impl_vvv!("lasx", lasx_xvmin_w, simd_imin, m256i, i32x8);
52+
impl_vvv!("lasx", lasx_xvmin_d, simd_imin, m256i, i64x4);
53+
impl_vvv!("lasx", lasx_xvmin_bu, simd_imin, m256i, u8x32);
54+
impl_vvv!("lasx", lasx_xvmin_hu, simd_imin, m256i, u16x16);
55+
impl_vvv!("lasx", lasx_xvmin_wu, simd_imin, m256i, u32x8);
56+
impl_vvv!("lasx", lasx_xvmin_du, simd_imin, m256i, u64x4);
57+
impl_vvv!("lasx", lasx_xvseq_b, simd_eq, m256i, i8x32);
58+
impl_vvv!("lasx", lasx_xvseq_h, simd_eq, m256i, i16x16);
59+
impl_vvv!("lasx", lasx_xvseq_w, simd_eq, m256i, i32x8);
60+
impl_vvv!("lasx", lasx_xvseq_d, simd_eq, m256i, i64x4);
61+
impl_vvv!("lasx", lasx_xvslt_b, simd_lt, m256i, i8x32);
62+
impl_vvv!("lasx", lasx_xvslt_h, simd_lt, m256i, i16x16);
63+
impl_vvv!("lasx", lasx_xvslt_w, simd_lt, m256i, i32x8);
64+
impl_vvv!("lasx", lasx_xvslt_d, simd_lt, m256i, i64x4);
65+
impl_vvv!("lasx", lasx_xvslt_bu, simd_lt, m256i, u8x32);
66+
impl_vvv!("lasx", lasx_xvslt_hu, simd_lt, m256i, u16x16);
67+
impl_vvv!("lasx", lasx_xvslt_wu, simd_lt, m256i, u32x8);
68+
impl_vvv!("lasx", lasx_xvslt_du, simd_lt, m256i, u64x4);
69+
impl_vvv!("lasx", lasx_xvsle_b, simd_le, m256i, i8x32);
70+
impl_vvv!("lasx", lasx_xvsle_h, simd_le, m256i, i16x16);
71+
impl_vvv!("lasx", lasx_xvsle_w, simd_le, m256i, i32x8);
72+
impl_vvv!("lasx", lasx_xvsle_d, simd_le, m256i, i64x4);
73+
impl_vvv!("lasx", lasx_xvsle_bu, simd_le, m256i, u8x32);
74+
impl_vvv!("lasx", lasx_xvsle_hu, simd_le, m256i, u16x16);
75+
impl_vvv!("lasx", lasx_xvsle_wu, simd_le, m256i, u32x8);
76+
impl_vvv!("lasx", lasx_xvsle_du, simd_le, m256i, u64x4);
77+
impl_vvv!("lasx", lasx_xvmul_b, simd_mul, m256i, i8x32);
78+
impl_vvv!("lasx", lasx_xvmul_h, simd_mul, m256i, i16x16);
79+
impl_vvv!("lasx", lasx_xvmul_w, simd_mul, m256i, i32x8);
80+
impl_vvv!("lasx", lasx_xvmul_d, simd_mul, m256i, i64x4);
81+
impl_vvv!("lasx", lasx_xvdiv_b, simd_div, m256i, i8x32);
82+
impl_vvv!("lasx", lasx_xvdiv_h, simd_div, m256i, i16x16);
83+
impl_vvv!("lasx", lasx_xvdiv_w, simd_div, m256i, i32x8);
84+
impl_vvv!("lasx", lasx_xvdiv_d, simd_div, m256i, i64x4);
85+
impl_vvv!("lasx", lasx_xvdiv_bu, simd_div, m256i, u8x32);
86+
impl_vvv!("lasx", lasx_xvdiv_hu, simd_div, m256i, u16x16);
87+
impl_vvv!("lasx", lasx_xvdiv_wu, simd_div, m256i, u32x8);
88+
impl_vvv!("lasx", lasx_xvdiv_du, simd_div, m256i, u64x4);
89+
impl_vvv!("lasx", lasx_xvmod_b, simd_rem, m256i, i8x32);
90+
impl_vvv!("lasx", lasx_xvmod_h, simd_rem, m256i, i16x16);
91+
impl_vvv!("lasx", lasx_xvmod_w, simd_rem, m256i, i32x8);
92+
impl_vvv!("lasx", lasx_xvmod_d, simd_rem, m256i, i64x4);
93+
impl_vvv!("lasx", lasx_xvmod_bu, simd_rem, m256i, u8x32);
94+
impl_vvv!("lasx", lasx_xvmod_hu, simd_rem, m256i, u16x16);
95+
impl_vvv!("lasx", lasx_xvmod_wu, simd_rem, m256i, u32x8);
96+
impl_vvv!("lasx", lasx_xvmod_du, simd_rem, m256i, u64x4);
97+
impl_vvv!("lasx", lasx_xvand_v, simd_and, m256i, u8x32);
98+
impl_vvv!("lasx", lasx_xvandn_v, simdl_andn, m256i, u8x32);
99+
impl_vvv!("lasx", lasx_xvor_v, simd_or, m256i, u8x32);
100+
impl_vvv!("lasx", lasx_xvorn_v, simdl_orn, m256i, u8x32);
101+
impl_vvv!("lasx", lasx_xvnor_v, simdl_nor, m256i, u8x32);
102+
impl_vvv!("lasx", lasx_xvxor_v, simd_xor, m256i, u8x32);
103+
impl_vvv!("lasx", lasx_xvfadd_s, simd_add, m256, f32x8);
104+
impl_vvv!("lasx", lasx_xvfadd_d, simd_add, m256d, f64x4);
105+
impl_vvv!("lasx", lasx_xvfsub_s, simd_sub, m256, f32x8);
106+
impl_vvv!("lasx", lasx_xvfsub_d, simd_sub, m256d, f64x4);
107+
impl_vvv!("lasx", lasx_xvfmul_s, simd_mul, m256, f32x8);
108+
impl_vvv!("lasx", lasx_xvfmul_d, simd_mul, m256d, f64x4);
109+
impl_vvv!("lasx", lasx_xvfdiv_s, simd_div, m256, f32x8);
110+
impl_vvv!("lasx", lasx_xvfdiv_d, simd_div, m256d, f64x4);
111+
impl_vvv!("lasx", lasx_xvsll_b, simdl_shl, m256i, i8x32);
112+
impl_vvv!("lasx", lasx_xvsll_h, simdl_shl, m256i, i16x16);
113+
impl_vvv!("lasx", lasx_xvsll_w, simdl_shl, m256i, i32x8);
114+
impl_vvv!("lasx", lasx_xvsll_d, simdl_shl, m256i, i64x4);
115+
impl_vvv!("lasx", lasx_xvsra_b, simdl_shr, m256i, i8x32);
116+
impl_vvv!("lasx", lasx_xvsra_h, simdl_shr, m256i, i16x16);
117+
impl_vvv!("lasx", lasx_xvsra_w, simdl_shr, m256i, i32x8);
118+
impl_vvv!("lasx", lasx_xvsra_d, simdl_shr, m256i, i64x4);
119+
impl_vvv!("lasx", lasx_xvsrl_b, simdl_shr, m256i, u8x32);
120+
impl_vvv!("lasx", lasx_xvsrl_h, simdl_shr, m256i, u16x16);
121+
impl_vvv!("lasx", lasx_xvsrl_w, simdl_shr, m256i, u32x8);
122+
impl_vvv!("lasx", lasx_xvsrl_d, simdl_shr, m256i, u64x4);
123+
124+
impl_vuv!("lasx", lasx_xvslli_b, simd_shl, m256i, i8x32);
125+
impl_vuv!("lasx", lasx_xvslli_h, simd_shl, m256i, i16x16);
126+
impl_vuv!("lasx", lasx_xvslli_w, simd_shl, m256i, i32x8);
127+
impl_vuv!("lasx", lasx_xvslli_d, simd_shl, m256i, i64x4);
128+
impl_vuv!("lasx", lasx_xvsrai_b, simd_shr, m256i, i8x32);
129+
impl_vuv!("lasx", lasx_xvsrai_h, simd_shr, m256i, i16x16);
130+
impl_vuv!("lasx", lasx_xvsrai_w, simd_shr, m256i, i32x8);
131+
impl_vuv!("lasx", lasx_xvsrai_d, simd_shr, m256i, i64x4);
132+
impl_vuv!("lasx", lasx_xvsrli_b, simd_shr, m256i, u8x32);
133+
impl_vuv!("lasx", lasx_xvsrli_h, simd_shr, m256i, u16x16);
134+
impl_vuv!("lasx", lasx_xvsrli_w, simd_shr, m256i, u32x8);
135+
impl_vuv!("lasx", lasx_xvsrli_d, simd_shr, m256i, u64x4);
136+
impl_vuv!("lasx", lasx_xvaddi_bu, simd_add, m256i, u8x32, 5);
137+
impl_vuv!("lasx", lasx_xvaddi_hu, simd_add, m256i, u16x16, 5);
138+
impl_vuv!("lasx", lasx_xvaddi_wu, simd_add, m256i, u32x8, 5);
139+
impl_vuv!("lasx", lasx_xvaddi_du, simd_add, m256i, u64x4, 5);
140+
impl_vuv!("lasx", lasx_xvslti_bu, simd_lt, m256i, u8x32, 5);
141+
impl_vuv!("lasx", lasx_xvslti_hu, simd_lt, m256i, u16x16, 5);
142+
impl_vuv!("lasx", lasx_xvslti_wu, simd_lt, m256i, u32x8, 5);
143+
impl_vuv!("lasx", lasx_xvslti_du, simd_lt, m256i, u64x4, 5);
144+
impl_vuv!("lasx", lasx_xvslei_bu, simd_le, m256i, u8x32, 5);
145+
impl_vuv!("lasx", lasx_xvslei_hu, simd_le, m256i, u16x16, 5);
146+
impl_vuv!("lasx", lasx_xvslei_wu, simd_le, m256i, u32x8, 5);
147+
impl_vuv!("lasx", lasx_xvslei_du, simd_le, m256i, u64x4, 5);
148+
impl_vuv!("lasx", lasx_xvmaxi_bu, simd_imax, m256i, u8x32, 5);
149+
impl_vuv!("lasx", lasx_xvmaxi_hu, simd_imax, m256i, u16x16, 5);
150+
impl_vuv!("lasx", lasx_xvmaxi_wu, simd_imax, m256i, u32x8, 5);
151+
impl_vuv!("lasx", lasx_xvmaxi_du, simd_imax, m256i, u64x4, 5);
152+
impl_vuv!("lasx", lasx_xvmini_bu, simd_imin, m256i, u8x32, 5);
153+
impl_vuv!("lasx", lasx_xvmini_hu, simd_imin, m256i, u16x16, 5);
154+
impl_vuv!("lasx", lasx_xvmini_wu, simd_imin, m256i, u32x8, 5);
155+
impl_vuv!("lasx", lasx_xvmini_du, simd_imin, m256i, u64x4, 5);
156+
157+
impl_vug!("lasx", lasx_xvpickve2gr_w, simd_extract, m256i, i32x8, i32, 3);
158+
impl_vug!("lasx", lasx_xvpickve2gr_d, simd_extract, m256i, i64x4, i64, 2);
159+
impl_vug!("lasx", lasx_xvpickve2gr_wu, simd_extract, m256i, u32x8, u32, 3);
160+
impl_vug!("lasx", lasx_xvpickve2gr_du, simd_extract, m256i, u64x4, u64, 2);
161+
162+
impl_vsv!("lasx", lasx_xvseqi_b, simd_eq, m256i, i8x32, 5);
163+
impl_vsv!("lasx", lasx_xvseqi_h, simd_eq, m256i, i16x16, 5);
164+
impl_vsv!("lasx", lasx_xvseqi_w, simd_eq, m256i, i32x8, 5);
165+
impl_vsv!("lasx", lasx_xvseqi_d, simd_eq, m256i, i64x4, 5);
166+
impl_vsv!("lasx", lasx_xvslti_b, simd_lt, m256i, i8x32, 5);
167+
impl_vsv!("lasx", lasx_xvslti_h, simd_lt, m256i, i16x16, 5);
168+
impl_vsv!("lasx", lasx_xvslti_w, simd_lt, m256i, i32x8, 5);
169+
impl_vsv!("lasx", lasx_xvslti_d, simd_lt, m256i, i64x4, 5);
170+
impl_vsv!("lasx", lasx_xvslei_b, simd_le, m256i, i8x32, 5);
171+
impl_vsv!("lasx", lasx_xvslei_h, simd_le, m256i, i16x16, 5);
172+
impl_vsv!("lasx", lasx_xvslei_w, simd_le, m256i, i32x8, 5);
173+
impl_vsv!("lasx", lasx_xvslei_d, simd_le, m256i, i64x4, 5);
174+
impl_vsv!("lasx", lasx_xvmaxi_b, simd_imax, m256i, i8x32, 5);
175+
impl_vsv!("lasx", lasx_xvmaxi_h, simd_imax, m256i, i16x16, 5);
176+
impl_vsv!("lasx", lasx_xvmaxi_w, simd_imax, m256i, i32x8, 5);
177+
impl_vsv!("lasx", lasx_xvmaxi_d, simd_imax, m256i, i64x4, 5);
178+
impl_vsv!("lasx", lasx_xvmini_b, simd_imin, m256i, i8x32, 5);
179+
impl_vsv!("lasx", lasx_xvmini_h, simd_imin, m256i, i16x16, 5);
180+
impl_vsv!("lasx", lasx_xvmini_w, simd_imin, m256i, i32x8, 5);
181+
impl_vsv!("lasx", lasx_xvmini_d, simd_imin, m256i, i64x4, 5);
182+
183+
impl_vvvv!("lasx", lasx_xvmadd_b, simdl_madd, m256i, i8x32);
184+
impl_vvvv!("lasx", lasx_xvmadd_h, simdl_madd, m256i, i16x16);
185+
impl_vvvv!("lasx", lasx_xvmadd_w, simdl_madd, m256i, i32x8);
186+
impl_vvvv!("lasx", lasx_xvmadd_d, simdl_madd, m256i, i64x4);
187+
impl_vvvv!("lasx", lasx_xvmsub_b, simdl_msub, m256i, i8x32);
188+
impl_vvvv!("lasx", lasx_xvmsub_h, simdl_msub, m256i, i16x16);
189+
impl_vvvv!("lasx", lasx_xvmsub_w, simdl_msub, m256i, i32x8);
190+
impl_vvvv!("lasx", lasx_xvmsub_d, simdl_msub, m256i, i64x4);
191+
impl_vvvv!("lasx", lasx_xvfmadd_s, simd_fma, m256, f32x8);
192+
impl_vvvv!("lasx", lasx_xvfmadd_d, simd_fma, m256d, f64x4);
193+
impl_vvvv!("lasx", lasx_xvfmsub_s, simdl_fms, m256, f32x8);
194+
impl_vvvv!("lasx", lasx_xvfmsub_d, simdl_fms, m256d, f64x4);
195+
impl_vvvv!("lasx", lasx_xvfnmadd_s, simdl_nfma, m256, f32x8);
196+
impl_vvvv!("lasx", lasx_xvfnmadd_d, simdl_nfma, m256d, f64x4);
197+
impl_vvvv!("lasx", lasx_xvfnmsub_s, simdl_nfms, m256, f32x8);
198+
impl_vvvv!("lasx", lasx_xvfnmsub_d, simdl_nfms, m256d, f64x4);
199+
200+
impl_vugv!("lasx", lasx_xvinsgr2vr_w, simd_insert, m256i, i32x8, i32, 3);
201+
impl_vugv!("lasx", lasx_xvinsgr2vr_d, simd_insert, m256i, i64x4, i64, 2);

0 commit comments

Comments
 (0)