@@ -9,10 +9,14 @@ mod x86;
99#[ cfg( target_arch = "aarch64" ) ]
1010mod aarch64;
1111
12+ #[ cfg( target_arch = "loongarch64" ) ]
13+ mod loongarch64;
14+
1215macro_rules! convert_fn {
13- (
14- if x86_feature( "f16c" ) { $f16c: expr } else if aarch64_feature( "fp16" ) { $aarch64: expr } else { $fallback: expr }
15- ) => {
16+ ( if x86_feature( "f16c" ) { $f16c: expr }
17+ else if aarch64_feature( "fp16" ) { $aarch64: expr }
18+ else if loongarch64_feature( "lsx" ) { $loongarch64: expr }
19+ else { $fallback: expr } ) => {
1620 cfg_if:: cfg_if! {
1721 // Use intrinsics directly when a compile target or using no_std
1822 if #[ cfg( all(
@@ -29,6 +33,12 @@ macro_rules! convert_fn {
2933 ) ) ] {
3034 $aarch64
3135 }
36+ else if #[ cfg( all(
37+ target_arch = "loongarch64" ,
38+ target_feature = "lsx"
39+ ) ) ] {
40+ $loongarch64
41+ }
3242
3343 // Use CPU feature detection if using std
3444 else if #[ cfg( all(
@@ -55,6 +65,17 @@ macro_rules! convert_fn {
5565 $fallback
5666 }
5767 }
68+ else if #[ cfg( all(
69+ feature = "std" ,
70+ target_arch = "loongarch64" ,
71+ ) ) ] {
72+ use std:: arch:: is_loongarch_feature_detected;
73+ if is_loongarch_feature_detected!( "lsx" ) {
74+ $loongarch64
75+ } else {
76+ $fallback
77+ }
78+ }
5879
5980 // Fallback to software
6081 else {
@@ -71,6 +92,8 @@ pub(crate) fn f32_to_f16(f: f32) -> u16 {
7192 unsafe { x86:: f32_to_f16_x86_f16c( f) }
7293 } else if aarch64_feature( "fp16" ) {
7394 unsafe { aarch64:: f32_to_f16_fp16( f) }
95+ } else if loongarch64_feature( "lsx" ) {
96+ unsafe { loongarch64:: f32_to_f16_lsx( f) }
7497 } else {
7598 f32_to_f16_fallback( f)
7699 }
@@ -84,6 +107,8 @@ pub(crate) fn f64_to_f16(f: f64) -> u16 {
84107 unsafe { x86:: f64_to_f16_x86_f16c( f) }
85108 } else if aarch64_feature( "fp16" ) {
86109 unsafe { aarch64:: f64_to_f16_fp16( f) }
110+ } else if loongarch64_feature( "lsx" ) {
111+ f64_to_f16_fallback( f)
87112 } else {
88113 f64_to_f16_fallback( f)
89114 }
@@ -97,6 +122,8 @@ pub(crate) fn f16_to_f32(i: u16) -> f32 {
97122 unsafe { x86:: f16_to_f32_x86_f16c( i) }
98123 } else if aarch64_feature( "fp16" ) {
99124 unsafe { aarch64:: f16_to_f32_fp16( i) }
125+ } else if loongarch64_feature( "lsx" ) {
126+ unsafe { loongarch64:: f16_to_f32_lsx( i) }
100127 } else {
101128 f16_to_f32_fallback( i)
102129 }
@@ -110,6 +137,8 @@ pub(crate) fn f16_to_f64(i: u16) -> f64 {
110137 unsafe { x86:: f16_to_f64_x86_f16c( i) }
111138 } else if aarch64_feature( "fp16" ) {
112139 unsafe { aarch64:: f16_to_f64_fp16( i) }
140+ } else if loongarch64_feature( "lsx" ) {
141+ unsafe { loongarch64:: f16_to_f32_lsx( i) as f64 }
113142 } else {
114143 f16_to_f64_fallback( i)
115144 }
@@ -123,6 +152,8 @@ pub(crate) fn f32x4_to_f16x4(f: &[f32; 4]) -> [u16; 4] {
123152 unsafe { x86:: f32x4_to_f16x4_x86_f16c( f) }
124153 } else if aarch64_feature( "fp16" ) {
125154 unsafe { aarch64:: f32x4_to_f16x4_fp16( f) }
155+ } else if loongarch64_feature( "lsx" ) {
156+ unsafe { loongarch64:: f32x4_to_f16x4_lsx( f) }
126157 } else {
127158 f32x4_to_f16x4_fallback( f)
128159 }
@@ -136,6 +167,8 @@ pub(crate) fn f16x4_to_f32x4(i: &[u16; 4]) -> [f32; 4] {
136167 unsafe { x86:: f16x4_to_f32x4_x86_f16c( i) }
137168 } else if aarch64_feature( "fp16" ) {
138169 unsafe { aarch64:: f16x4_to_f32x4_fp16( i) }
170+ } else if loongarch64_feature( "lsx" ) {
171+ unsafe { loongarch64:: f16x4_to_f32x4_lsx( i) }
139172 } else {
140173 f16x4_to_f32x4_fallback( i)
141174 }
@@ -149,6 +182,8 @@ pub(crate) fn f64x4_to_f16x4(f: &[f64; 4]) -> [u16; 4] {
149182 unsafe { x86:: f64x4_to_f16x4_x86_f16c( f) }
150183 } else if aarch64_feature( "fp16" ) {
151184 unsafe { aarch64:: f64x4_to_f16x4_fp16( f) }
185+ } else if loongarch64_feature( "lsx" ) {
186+ unsafe { loongarch64:: f64x4_to_f16x4_lsx( f) }
152187 } else {
153188 f64x4_to_f16x4_fallback( f)
154189 }
@@ -162,6 +197,8 @@ pub(crate) fn f16x4_to_f64x4(i: &[u16; 4]) -> [f64; 4] {
162197 unsafe { x86:: f16x4_to_f64x4_x86_f16c( i) }
163198 } else if aarch64_feature( "fp16" ) {
164199 unsafe { aarch64:: f16x4_to_f64x4_fp16( i) }
200+ } else if loongarch64_feature( "lsx" ) {
201+ unsafe { loongarch64:: f16x4_to_f64x4_lsx( i) }
165202 } else {
166203 f16x4_to_f64x4_fallback( i)
167204 }
@@ -180,6 +217,13 @@ pub(crate) fn f32x8_to_f16x8(f: &[f32; 8]) -> [u16; 8] {
180217 aarch64:: f32x4_to_f16x4_fp16) ;
181218 result
182219 }
220+ } else if loongarch64_feature( "lsx" ) {
221+ {
222+ let mut result = [ 0u16 ; 8 ] ;
223+ convert_chunked_slice_4( f. as_slice( ) , result. as_mut_slice( ) ,
224+ loongarch64:: f32x4_to_f16x4_lsx) ;
225+ result
226+ }
183227 } else {
184228 f32x8_to_f16x8_fallback( f)
185229 }
@@ -198,6 +242,13 @@ pub(crate) fn f16x8_to_f32x8(i: &[u16; 8]) -> [f32; 8] {
198242 aarch64:: f16x4_to_f32x4_fp16) ;
199243 result
200244 }
245+ } else if loongarch64_feature( "lsx" ) {
246+ {
247+ let mut result = [ 0f32 ; 8 ] ;
248+ convert_chunked_slice_4( i. as_slice( ) , result. as_mut_slice( ) ,
249+ loongarch64:: f16x4_to_f32x4_lsx) ;
250+ result
251+ }
201252 } else {
202253 f16x8_to_f32x8_fallback( i)
203254 }
@@ -216,6 +267,13 @@ pub(crate) fn f64x8_to_f16x8(f: &[f64; 8]) -> [u16; 8] {
216267 aarch64:: f64x4_to_f16x4_fp16) ;
217268 result
218269 }
270+ } else if loongarch64_feature( "lsx" ) {
271+ {
272+ let mut result = [ 0u16 ; 8 ] ;
273+ convert_chunked_slice_4( f. as_slice( ) , result. as_mut_slice( ) ,
274+ loongarch64:: f64x4_to_f16x4_lsx) ;
275+ result
276+ }
219277 } else {
220278 f64x8_to_f16x8_fallback( f)
221279 }
@@ -234,6 +292,13 @@ pub(crate) fn f16x8_to_f64x8(i: &[u16; 8]) -> [f64; 8] {
234292 aarch64:: f16x4_to_f64x4_fp16) ;
235293 result
236294 }
295+ } else if loongarch64_feature( "lsx" ) {
296+ {
297+ let mut result = [ 0f64 ; 8 ] ;
298+ convert_chunked_slice_4( i. as_slice( ) , result. as_mut_slice( ) ,
299+ loongarch64:: f16x4_to_f64x4_lsx) ;
300+ result
301+ }
237302 } else {
238303 f16x8_to_f64x8_fallback( i)
239304 }
@@ -248,6 +313,8 @@ pub(crate) fn f32_to_f16_slice(src: &[f32], dst: &mut [u16]) {
248313 x86:: f32x4_to_f16x4_x86_f16c)
249314 } else if aarch64_feature( "fp16" ) {
250315 convert_chunked_slice_4( src, dst, aarch64:: f32x4_to_f16x4_fp16)
316+ } else if loongarch64_feature( "lsx" ) {
317+ convert_chunked_slice_4( src, dst, loongarch64:: f32x4_to_f16x4_lsx)
251318 } else {
252319 slice_fallback( src, dst, f32_to_f16_fallback)
253320 }
@@ -262,6 +329,8 @@ pub(crate) fn f16_to_f32_slice(src: &[u16], dst: &mut [f32]) {
262329 x86:: f16x4_to_f32x4_x86_f16c)
263330 } else if aarch64_feature( "fp16" ) {
264331 convert_chunked_slice_4( src, dst, aarch64:: f16x4_to_f32x4_fp16)
332+ } else if loongarch64_feature( "lsx" ) {
333+ convert_chunked_slice_4( src, dst, loongarch64:: f16x4_to_f32x4_lsx)
265334 } else {
266335 slice_fallback( src, dst, f16_to_f32_fallback)
267336 }
@@ -276,6 +345,8 @@ pub(crate) fn f64_to_f16_slice(src: &[f64], dst: &mut [u16]) {
276345 x86:: f64x4_to_f16x4_x86_f16c)
277346 } else if aarch64_feature( "fp16" ) {
278347 convert_chunked_slice_4( src, dst, aarch64:: f64x4_to_f16x4_fp16)
348+ } else if loongarch64_feature( "lsx" ) {
349+ convert_chunked_slice_4( src, dst, loongarch64:: f64x4_to_f16x4_lsx)
279350 } else {
280351 slice_fallback( src, dst, f64_to_f16_fallback)
281352 }
@@ -290,6 +361,8 @@ pub(crate) fn f16_to_f64_slice(src: &[u16], dst: &mut [f64]) {
290361 x86:: f16x4_to_f64x4_x86_f16c)
291362 } else if aarch64_feature( "fp16" ) {
292363 convert_chunked_slice_4( src, dst, aarch64:: f16x4_to_f64x4_fp16)
364+ } else if loongarch64_feature( "lsx" ) {
365+ convert_chunked_slice_4( src, dst, loongarch64:: f16x4_to_f64x4_lsx)
293366 } else {
294367 slice_fallback( src, dst, f16_to_f64_fallback)
295368 }
0 commit comments