|
| 1 | +//! LoongArch64 LASX intrinsics - intrinsics::simd implementation |
| 2 | +
|
| 3 | +use super::super::{simd::*, *}; |
| 4 | +use crate::core_arch::simd::*; |
| 5 | +use crate::intrinsics::simd::*; |
| 6 | +use crate::mem::transmute; |
| 7 | + |
| 8 | +impl_vv!("lasx", lasx_xvpcnt_b, simd_ctpop, m256i, i8x32); |
| 9 | +impl_vv!("lasx", lasx_xvpcnt_h, simd_ctpop, m256i, i16x16); |
| 10 | +impl_vv!("lasx", lasx_xvpcnt_w, simd_ctpop, m256i, i32x8); |
| 11 | +impl_vv!("lasx", lasx_xvpcnt_d, simd_ctpop, m256i, i64x4); |
| 12 | +impl_vv!("lasx", lasx_xvclz_b, simd_ctlz, m256i, i8x32); |
| 13 | +impl_vv!("lasx", lasx_xvclz_h, simd_ctlz, m256i, i16x16); |
| 14 | +impl_vv!("lasx", lasx_xvclz_w, simd_ctlz, m256i, i32x8); |
| 15 | +impl_vv!("lasx", lasx_xvclz_d, simd_ctlz, m256i, i64x4); |
| 16 | +impl_vv!("lasx", lasx_xvneg_b, simd_neg, m256i, i8x32); |
| 17 | +impl_vv!("lasx", lasx_xvneg_h, simd_neg, m256i, i16x16); |
| 18 | +impl_vv!("lasx", lasx_xvneg_w, simd_neg, m256i, i32x8); |
| 19 | +impl_vv!("lasx", lasx_xvneg_d, simd_neg, m256i, i64x4); |
| 20 | +impl_vv!("lasx", lasx_xvfsqrt_s, simd_fsqrt, m256, f32x8); |
| 21 | +impl_vv!("lasx", lasx_xvfsqrt_d, simd_fsqrt, m256d, f64x4); |
| 22 | + |
| 23 | +impl_gv!("lasx", lasx_xvreplgr2vr_b, simdl_splat, m256i, i8x32, i32); |
| 24 | +impl_gv!("lasx", lasx_xvreplgr2vr_h, simdl_splat, m256i, i16x16, i32); |
| 25 | +impl_gv!("lasx", lasx_xvreplgr2vr_w, simdl_splat, m256i, i32x8, i32); |
| 26 | +impl_gv!("lasx", lasx_xvreplgr2vr_d, simdl_splat, m256i, i64x4, i64); |
| 27 | + |
| 28 | +impl_sv!("lasx", lasx_xvrepli_b, simdl_splat, m256i, i8x32, 10); |
| 29 | +impl_sv!("lasx", lasx_xvrepli_h, simdl_splat, m256i, i16x16, 10); |
| 30 | +impl_sv!("lasx", lasx_xvrepli_w, simdl_splat, m256i, i32x8, 10); |
| 31 | +impl_sv!("lasx", lasx_xvrepli_d, simdl_splat, m256i, i64x4, 10); |
| 32 | + |
| 33 | +impl_vvv!("lasx", lasx_xvadd_b, simd_add, m256i, i8x32); |
| 34 | +impl_vvv!("lasx", lasx_xvadd_h, simd_add, m256i, i16x16); |
| 35 | +impl_vvv!("lasx", lasx_xvadd_w, simd_add, m256i, i32x8); |
| 36 | +impl_vvv!("lasx", lasx_xvadd_d, simd_add, m256i, i64x4); |
| 37 | +impl_vvv!("lasx", lasx_xvsub_b, simd_sub, m256i, i8x32); |
| 38 | +impl_vvv!("lasx", lasx_xvsub_h, simd_sub, m256i, i16x16); |
| 39 | +impl_vvv!("lasx", lasx_xvsub_w, simd_sub, m256i, i32x8); |
| 40 | +impl_vvv!("lasx", lasx_xvsub_d, simd_sub, m256i, i64x4); |
| 41 | +impl_vvv!("lasx", lasx_xvmax_b, simd_imax, m256i, i8x32); |
| 42 | +impl_vvv!("lasx", lasx_xvmax_h, simd_imax, m256i, i16x16); |
| 43 | +impl_vvv!("lasx", lasx_xvmax_w, simd_imax, m256i, i32x8); |
| 44 | +impl_vvv!("lasx", lasx_xvmax_d, simd_imax, m256i, i64x4); |
| 45 | +impl_vvv!("lasx", lasx_xvmax_bu, simd_imax, m256i, u8x32); |
| 46 | +impl_vvv!("lasx", lasx_xvmax_hu, simd_imax, m256i, u16x16); |
| 47 | +impl_vvv!("lasx", lasx_xvmax_wu, simd_imax, m256i, u32x8); |
| 48 | +impl_vvv!("lasx", lasx_xvmax_du, simd_imax, m256i, u64x4); |
| 49 | +impl_vvv!("lasx", lasx_xvmin_b, simd_imin, m256i, i8x32); |
| 50 | +impl_vvv!("lasx", lasx_xvmin_h, simd_imin, m256i, i16x16); |
| 51 | +impl_vvv!("lasx", lasx_xvmin_w, simd_imin, m256i, i32x8); |
| 52 | +impl_vvv!("lasx", lasx_xvmin_d, simd_imin, m256i, i64x4); |
| 53 | +impl_vvv!("lasx", lasx_xvmin_bu, simd_imin, m256i, u8x32); |
| 54 | +impl_vvv!("lasx", lasx_xvmin_hu, simd_imin, m256i, u16x16); |
| 55 | +impl_vvv!("lasx", lasx_xvmin_wu, simd_imin, m256i, u32x8); |
| 56 | +impl_vvv!("lasx", lasx_xvmin_du, simd_imin, m256i, u64x4); |
| 57 | +impl_vvv!("lasx", lasx_xvseq_b, simd_eq, m256i, i8x32); |
| 58 | +impl_vvv!("lasx", lasx_xvseq_h, simd_eq, m256i, i16x16); |
| 59 | +impl_vvv!("lasx", lasx_xvseq_w, simd_eq, m256i, i32x8); |
| 60 | +impl_vvv!("lasx", lasx_xvseq_d, simd_eq, m256i, i64x4); |
| 61 | +impl_vvv!("lasx", lasx_xvslt_b, simd_lt, m256i, i8x32); |
| 62 | +impl_vvv!("lasx", lasx_xvslt_h, simd_lt, m256i, i16x16); |
| 63 | +impl_vvv!("lasx", lasx_xvslt_w, simd_lt, m256i, i32x8); |
| 64 | +impl_vvv!("lasx", lasx_xvslt_d, simd_lt, m256i, i64x4); |
| 65 | +impl_vvv!("lasx", lasx_xvslt_bu, simd_lt, m256i, u8x32); |
| 66 | +impl_vvv!("lasx", lasx_xvslt_hu, simd_lt, m256i, u16x16); |
| 67 | +impl_vvv!("lasx", lasx_xvslt_wu, simd_lt, m256i, u32x8); |
| 68 | +impl_vvv!("lasx", lasx_xvslt_du, simd_lt, m256i, u64x4); |
| 69 | +impl_vvv!("lasx", lasx_xvsle_b, simd_le, m256i, i8x32); |
| 70 | +impl_vvv!("lasx", lasx_xvsle_h, simd_le, m256i, i16x16); |
| 71 | +impl_vvv!("lasx", lasx_xvsle_w, simd_le, m256i, i32x8); |
| 72 | +impl_vvv!("lasx", lasx_xvsle_d, simd_le, m256i, i64x4); |
| 73 | +impl_vvv!("lasx", lasx_xvsle_bu, simd_le, m256i, u8x32); |
| 74 | +impl_vvv!("lasx", lasx_xvsle_hu, simd_le, m256i, u16x16); |
| 75 | +impl_vvv!("lasx", lasx_xvsle_wu, simd_le, m256i, u32x8); |
| 76 | +impl_vvv!("lasx", lasx_xvsle_du, simd_le, m256i, u64x4); |
| 77 | +impl_vvv!("lasx", lasx_xvmul_b, simd_mul, m256i, i8x32); |
| 78 | +impl_vvv!("lasx", lasx_xvmul_h, simd_mul, m256i, i16x16); |
| 79 | +impl_vvv!("lasx", lasx_xvmul_w, simd_mul, m256i, i32x8); |
| 80 | +impl_vvv!("lasx", lasx_xvmul_d, simd_mul, m256i, i64x4); |
| 81 | +impl_vvv!("lasx", lasx_xvdiv_b, simd_div, m256i, i8x32); |
| 82 | +impl_vvv!("lasx", lasx_xvdiv_h, simd_div, m256i, i16x16); |
| 83 | +impl_vvv!("lasx", lasx_xvdiv_w, simd_div, m256i, i32x8); |
| 84 | +impl_vvv!("lasx", lasx_xvdiv_d, simd_div, m256i, i64x4); |
| 85 | +impl_vvv!("lasx", lasx_xvdiv_bu, simd_div, m256i, u8x32); |
| 86 | +impl_vvv!("lasx", lasx_xvdiv_hu, simd_div, m256i, u16x16); |
| 87 | +impl_vvv!("lasx", lasx_xvdiv_wu, simd_div, m256i, u32x8); |
| 88 | +impl_vvv!("lasx", lasx_xvdiv_du, simd_div, m256i, u64x4); |
| 89 | +impl_vvv!("lasx", lasx_xvmod_b, simd_rem, m256i, i8x32); |
| 90 | +impl_vvv!("lasx", lasx_xvmod_h, simd_rem, m256i, i16x16); |
| 91 | +impl_vvv!("lasx", lasx_xvmod_w, simd_rem, m256i, i32x8); |
| 92 | +impl_vvv!("lasx", lasx_xvmod_d, simd_rem, m256i, i64x4); |
| 93 | +impl_vvv!("lasx", lasx_xvmod_bu, simd_rem, m256i, u8x32); |
| 94 | +impl_vvv!("lasx", lasx_xvmod_hu, simd_rem, m256i, u16x16); |
| 95 | +impl_vvv!("lasx", lasx_xvmod_wu, simd_rem, m256i, u32x8); |
| 96 | +impl_vvv!("lasx", lasx_xvmod_du, simd_rem, m256i, u64x4); |
| 97 | +impl_vvv!("lasx", lasx_xvand_v, simd_and, m256i, u8x32); |
| 98 | +impl_vvv!("lasx", lasx_xvandn_v, simdl_andn, m256i, u8x32); |
| 99 | +impl_vvv!("lasx", lasx_xvor_v, simd_or, m256i, u8x32); |
| 100 | +impl_vvv!("lasx", lasx_xvorn_v, simdl_orn, m256i, u8x32); |
| 101 | +impl_vvv!("lasx", lasx_xvnor_v, simdl_nor, m256i, u8x32); |
| 102 | +impl_vvv!("lasx", lasx_xvxor_v, simd_xor, m256i, u8x32); |
| 103 | +impl_vvv!("lasx", lasx_xvfadd_s, simd_add, m256, f32x8); |
| 104 | +impl_vvv!("lasx", lasx_xvfadd_d, simd_add, m256d, f64x4); |
| 105 | +impl_vvv!("lasx", lasx_xvfsub_s, simd_sub, m256, f32x8); |
| 106 | +impl_vvv!("lasx", lasx_xvfsub_d, simd_sub, m256d, f64x4); |
| 107 | +impl_vvv!("lasx", lasx_xvfmul_s, simd_mul, m256, f32x8); |
| 108 | +impl_vvv!("lasx", lasx_xvfmul_d, simd_mul, m256d, f64x4); |
| 109 | +impl_vvv!("lasx", lasx_xvfdiv_s, simd_div, m256, f32x8); |
| 110 | +impl_vvv!("lasx", lasx_xvfdiv_d, simd_div, m256d, f64x4); |
| 111 | +impl_vvv!("lasx", lasx_xvsll_b, simdl_shl, m256i, i8x32); |
| 112 | +impl_vvv!("lasx", lasx_xvsll_h, simdl_shl, m256i, i16x16); |
| 113 | +impl_vvv!("lasx", lasx_xvsll_w, simdl_shl, m256i, i32x8); |
| 114 | +impl_vvv!("lasx", lasx_xvsll_d, simdl_shl, m256i, i64x4); |
| 115 | +impl_vvv!("lasx", lasx_xvsra_b, simdl_shr, m256i, i8x32); |
| 116 | +impl_vvv!("lasx", lasx_xvsra_h, simdl_shr, m256i, i16x16); |
| 117 | +impl_vvv!("lasx", lasx_xvsra_w, simdl_shr, m256i, i32x8); |
| 118 | +impl_vvv!("lasx", lasx_xvsra_d, simdl_shr, m256i, i64x4); |
| 119 | +impl_vvv!("lasx", lasx_xvsrl_b, simdl_shr, m256i, u8x32); |
| 120 | +impl_vvv!("lasx", lasx_xvsrl_h, simdl_shr, m256i, u16x16); |
| 121 | +impl_vvv!("lasx", lasx_xvsrl_w, simdl_shr, m256i, u32x8); |
| 122 | +impl_vvv!("lasx", lasx_xvsrl_d, simdl_shr, m256i, u64x4); |
| 123 | + |
| 124 | +impl_vuv!("lasx", lasx_xvslli_b, simd_shl, m256i, i8x32); |
| 125 | +impl_vuv!("lasx", lasx_xvslli_h, simd_shl, m256i, i16x16); |
| 126 | +impl_vuv!("lasx", lasx_xvslli_w, simd_shl, m256i, i32x8); |
| 127 | +impl_vuv!("lasx", lasx_xvslli_d, simd_shl, m256i, i64x4); |
| 128 | +impl_vuv!("lasx", lasx_xvsrai_b, simd_shr, m256i, i8x32); |
| 129 | +impl_vuv!("lasx", lasx_xvsrai_h, simd_shr, m256i, i16x16); |
| 130 | +impl_vuv!("lasx", lasx_xvsrai_w, simd_shr, m256i, i32x8); |
| 131 | +impl_vuv!("lasx", lasx_xvsrai_d, simd_shr, m256i, i64x4); |
| 132 | +impl_vuv!("lasx", lasx_xvsrli_b, simd_shr, m256i, u8x32); |
| 133 | +impl_vuv!("lasx", lasx_xvsrli_h, simd_shr, m256i, u16x16); |
| 134 | +impl_vuv!("lasx", lasx_xvsrli_w, simd_shr, m256i, u32x8); |
| 135 | +impl_vuv!("lasx", lasx_xvsrli_d, simd_shr, m256i, u64x4); |
| 136 | +impl_vuv!("lasx", lasx_xvaddi_bu, simd_add, m256i, u8x32, 5); |
| 137 | +impl_vuv!("lasx", lasx_xvaddi_hu, simd_add, m256i, u16x16, 5); |
| 138 | +impl_vuv!("lasx", lasx_xvaddi_wu, simd_add, m256i, u32x8, 5); |
| 139 | +impl_vuv!("lasx", lasx_xvaddi_du, simd_add, m256i, u64x4, 5); |
| 140 | +impl_vuv!("lasx", lasx_xvslti_bu, simd_lt, m256i, u8x32, 5); |
| 141 | +impl_vuv!("lasx", lasx_xvslti_hu, simd_lt, m256i, u16x16, 5); |
| 142 | +impl_vuv!("lasx", lasx_xvslti_wu, simd_lt, m256i, u32x8, 5); |
| 143 | +impl_vuv!("lasx", lasx_xvslti_du, simd_lt, m256i, u64x4, 5); |
| 144 | +impl_vuv!("lasx", lasx_xvslei_bu, simd_le, m256i, u8x32, 5); |
| 145 | +impl_vuv!("lasx", lasx_xvslei_hu, simd_le, m256i, u16x16, 5); |
| 146 | +impl_vuv!("lasx", lasx_xvslei_wu, simd_le, m256i, u32x8, 5); |
| 147 | +impl_vuv!("lasx", lasx_xvslei_du, simd_le, m256i, u64x4, 5); |
| 148 | +impl_vuv!("lasx", lasx_xvmaxi_bu, simd_imax, m256i, u8x32, 5); |
| 149 | +impl_vuv!("lasx", lasx_xvmaxi_hu, simd_imax, m256i, u16x16, 5); |
| 150 | +impl_vuv!("lasx", lasx_xvmaxi_wu, simd_imax, m256i, u32x8, 5); |
| 151 | +impl_vuv!("lasx", lasx_xvmaxi_du, simd_imax, m256i, u64x4, 5); |
| 152 | +impl_vuv!("lasx", lasx_xvmini_bu, simd_imin, m256i, u8x32, 5); |
| 153 | +impl_vuv!("lasx", lasx_xvmini_hu, simd_imin, m256i, u16x16, 5); |
| 154 | +impl_vuv!("lasx", lasx_xvmini_wu, simd_imin, m256i, u32x8, 5); |
| 155 | +impl_vuv!("lasx", lasx_xvmini_du, simd_imin, m256i, u64x4, 5); |
| 156 | + |
| 157 | +impl_vug!("lasx", lasx_xvpickve2gr_w, simd_extract, m256i, i32x8, i32, 3); |
| 158 | +impl_vug!("lasx", lasx_xvpickve2gr_d, simd_extract, m256i, i64x4, i64, 2); |
| 159 | +impl_vug!("lasx", lasx_xvpickve2gr_wu, simd_extract, m256i, u32x8, u32, 3); |
| 160 | +impl_vug!("lasx", lasx_xvpickve2gr_du, simd_extract, m256i, u64x4, u64, 2); |
| 161 | + |
| 162 | +impl_vsv!("lasx", lasx_xvseqi_b, simd_eq, m256i, i8x32, 5); |
| 163 | +impl_vsv!("lasx", lasx_xvseqi_h, simd_eq, m256i, i16x16, 5); |
| 164 | +impl_vsv!("lasx", lasx_xvseqi_w, simd_eq, m256i, i32x8, 5); |
| 165 | +impl_vsv!("lasx", lasx_xvseqi_d, simd_eq, m256i, i64x4, 5); |
| 166 | +impl_vsv!("lasx", lasx_xvslti_b, simd_lt, m256i, i8x32, 5); |
| 167 | +impl_vsv!("lasx", lasx_xvslti_h, simd_lt, m256i, i16x16, 5); |
| 168 | +impl_vsv!("lasx", lasx_xvslti_w, simd_lt, m256i, i32x8, 5); |
| 169 | +impl_vsv!("lasx", lasx_xvslti_d, simd_lt, m256i, i64x4, 5); |
| 170 | +impl_vsv!("lasx", lasx_xvslei_b, simd_le, m256i, i8x32, 5); |
| 171 | +impl_vsv!("lasx", lasx_xvslei_h, simd_le, m256i, i16x16, 5); |
| 172 | +impl_vsv!("lasx", lasx_xvslei_w, simd_le, m256i, i32x8, 5); |
| 173 | +impl_vsv!("lasx", lasx_xvslei_d, simd_le, m256i, i64x4, 5); |
| 174 | +impl_vsv!("lasx", lasx_xvmaxi_b, simd_imax, m256i, i8x32, 5); |
| 175 | +impl_vsv!("lasx", lasx_xvmaxi_h, simd_imax, m256i, i16x16, 5); |
| 176 | +impl_vsv!("lasx", lasx_xvmaxi_w, simd_imax, m256i, i32x8, 5); |
| 177 | +impl_vsv!("lasx", lasx_xvmaxi_d, simd_imax, m256i, i64x4, 5); |
| 178 | +impl_vsv!("lasx", lasx_xvmini_b, simd_imin, m256i, i8x32, 5); |
| 179 | +impl_vsv!("lasx", lasx_xvmini_h, simd_imin, m256i, i16x16, 5); |
| 180 | +impl_vsv!("lasx", lasx_xvmini_w, simd_imin, m256i, i32x8, 5); |
| 181 | +impl_vsv!("lasx", lasx_xvmini_d, simd_imin, m256i, i64x4, 5); |
| 182 | + |
| 183 | +impl_vvvv!("lasx", lasx_xvmadd_b, simdl_madd, m256i, i8x32); |
| 184 | +impl_vvvv!("lasx", lasx_xvmadd_h, simdl_madd, m256i, i16x16); |
| 185 | +impl_vvvv!("lasx", lasx_xvmadd_w, simdl_madd, m256i, i32x8); |
| 186 | +impl_vvvv!("lasx", lasx_xvmadd_d, simdl_madd, m256i, i64x4); |
| 187 | +impl_vvvv!("lasx", lasx_xvmsub_b, simdl_msub, m256i, i8x32); |
| 188 | +impl_vvvv!("lasx", lasx_xvmsub_h, simdl_msub, m256i, i16x16); |
| 189 | +impl_vvvv!("lasx", lasx_xvmsub_w, simdl_msub, m256i, i32x8); |
| 190 | +impl_vvvv!("lasx", lasx_xvmsub_d, simdl_msub, m256i, i64x4); |
| 191 | +impl_vvvv!("lasx", lasx_xvfmadd_s, simd_fma, m256, f32x8); |
| 192 | +impl_vvvv!("lasx", lasx_xvfmadd_d, simd_fma, m256d, f64x4); |
| 193 | +impl_vvvv!("lasx", lasx_xvfmsub_s, simdl_fms, m256, f32x8); |
| 194 | +impl_vvvv!("lasx", lasx_xvfmsub_d, simdl_fms, m256d, f64x4); |
| 195 | +impl_vvvv!("lasx", lasx_xvfnmadd_s, simdl_nfma, m256, f32x8); |
| 196 | +impl_vvvv!("lasx", lasx_xvfnmadd_d, simdl_nfma, m256d, f64x4); |
| 197 | +impl_vvvv!("lasx", lasx_xvfnmsub_s, simdl_nfms, m256, f32x8); |
| 198 | +impl_vvvv!("lasx", lasx_xvfnmsub_d, simdl_nfms, m256d, f64x4); |
| 199 | + |
| 200 | +impl_vugv!("lasx", lasx_xvinsgr2vr_w, simd_insert, m256i, i32x8, i32, 3); |
| 201 | +impl_vugv!("lasx", lasx_xvinsgr2vr_d, simd_insert, m256i, i64x4, i64, 2); |
0 commit comments