Skip to content
7 changes: 1 addition & 6 deletions kernel/loongarch64/amax_lasx.S
Original file line number Diff line number Diff line change
Expand Up @@ -56,17 +56,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
LDINT INCX, 0(INCX)
#endif

xvxor.v VM0, VM0, VM0
bge $r0, N, .L999
bge $r0, INCX, .L999
li.d TEMP, 1
slli.d TEMP, TEMP, BASE_SHIFT
slli.d INCX, INCX, BASE_SHIFT
#ifdef DOUBLE
xvldrepl.d VM0, X, 0
#else
xvldrepl.w VM0, X, 0
#endif
XVFSUB VM0, VM0, VM0
bne INCX, TEMP, .L20

srai.d I, N, 4
Expand Down
22 changes: 10 additions & 12 deletions kernel/loongarch64/asum_lasx.S
Original file line number Diff line number Diff line change
Expand Up @@ -103,21 +103,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
xvfadd.d res1, VX2, res1
xvfadd.d res1, VX3, res1
#else
xvfadd.s res2, res1, res2
xvpickve.w VX1, res1, 1
xvpickve.w VX2, res1, 2
xvpickve.w VX3, res1, 3
xvfadd.s res1, VX1, res1
xvfadd.s res1, VX2, res1
xvfadd.s res1, VX3, res1
xvpickve.w VX0, res2, 4
xvpickve.w VX1, res2, 5
xvpickve.w VX2, res2, 6
xvpickve.w VX3, res2, 7
xvpickve.w VX0, res1, 4
xvpickve.w VX1, res1, 5
xvpickve.w VX2, res1, 6
xvpickve.w VX3, res1, 7
xvfadd.s res1, VX0, res1
xvfadd.s res1, VX1, res1
xvfadd.s res1, VX2, res1
xvfadd.s res1, VX2, res1
xvfadd.s res1, VX3, res1
#endif
.align 3

Expand Down Expand Up @@ -217,21 +216,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
xvfadd.d res1, VX2, res1
xvfadd.d res1, VX3, res1
#else
xvfadd.s res2, res1, res2
xvpickve.w VX1, res1, 1
xvpickve.w VX2, res1, 2
xvpickve.w VX3, res1, 3
xvfadd.s res1, VX1, res1
xvfadd.s res1, VX2, res1
xvfadd.s res1, VX3, res1
xvpickve.w VX0, res2, 4
xvpickve.w VX1, res2, 5
xvpickve.w VX2, res2, 6
xvpickve.w VX3, res2, 7
xvpickve.w VX0, res1, 4
xvpickve.w VX1, res1, 5
xvpickve.w VX2, res1, 6
xvpickve.w VX3, res1, 7
xvfadd.s res1, VX0, res1
xvfadd.s res1, VX1, res1
xvfadd.s res1, VX2, res1
xvfadd.s res1, VX2, res1
xvfadd.s res1, VX3, res1
#endif
.align 3

Expand Down
2 changes: 1 addition & 1 deletion kernel/loongarch64/cdot_lasx.S
Original file line number Diff line number Diff line change
Expand Up @@ -288,7 +288,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
xvinsgr2vr.w x2, t2, 6
xvinsgr2vr.w x1, t3, 7
xvinsgr2vr.w x2, t4, 7
addi.d Y, Y, 8 * SIZE
addi.d Y, Y, 16 * SIZE
xvpickev.w x3, VX3, VX2
xvpickod.w x4, VX3, VX2
xvfmadd.s res1, x1, x3, res1
Expand Down
78 changes: 53 additions & 25 deletions kernel/loongarch64/cnrm2_lasx.S
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define VX4 $xr21
#define res1 $xr19
#define res2 $xr20
#define RCP $f2
#define VALPHA $xr3

PROLOGUE

Expand All @@ -55,10 +57,33 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
LDINT INCX, 0(INCX)
#endif

xvxor.v res1, res1, res1
xvxor.v res2, res2, res2
bge $r0, N, .L999
beq $r0, INCX, .L999

addi.d $sp, $sp, -32
st.d $ra, $sp, 0
st.d N, $sp, 8
st.d X, $sp, 16
st.d INCX, $sp, 24
#ifdef DYNAMIC_ARCH
bl camax_k_LA264
#else
bl camax_k
#endif
ld.d $ra, $sp, 0
ld.d N, $sp, 8
ld.d X, $sp, 16
ld.d INCX, $sp, 24
addi.d $sp, $sp, 32

frecip.s RCP, $f0
vreplvei.w $vr3, $vr2, 0
xvpermi.d VALPHA, $xr3,0x00
xvxor.v res1, res1, res1
xvxor.v res2, res2, res2
fcmp.ceq.s $fcc0, $f0, $f19
bcnez $fcc0, .L999

li.d TEMP, SIZE
slli.d INCX, INCX, ZBASE_SHIFT
srai.d I, N, 2
Expand All @@ -67,13 +92,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.align 3

.L10:
xvld VX0, X, 0 * SIZE
xvfcvtl.d.s VX1, VX0
xvfcvth.d.s VX2, VX0
xvfmadd.d res1, VX1, VX1, res1
xvfmadd.d res2, VX2, VX2, res2
addi.d I, I, -1
addi.d X, X, 8 * SIZE

xvld VX0, X, 0 * SIZE
xvld VX1, X, 8 * SIZE
xvfmul.s VX0, VX0, VALPHA
xvfmul.s VX1, VX1, VALPHA
xvfmadd.s res1, VX0, VX0, res1
xvfmadd.s res2, VX1, VX1, res2

addi.d X, X, 16 * SIZE
blt $r0, I, .L10
.align 3
b .L996
Expand Down Expand Up @@ -103,22 +131,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
xvinsgr2vr.w VX0, t3, 6
xvinsgr2vr.w VX0, t4, 7
add.d X, X, INCX
xvfcvtl.d.s VX1, VX0
xvfcvth.d.s VX2, VX0
xvfmadd.d res1, VX1, VX1, res1
xvfmadd.d res2, VX2, VX2, res2
xvfmul.s VX0, VX0, VALPHA
xvfmadd.s res2, VX0, VX0, res2
addi.d I, I, -1
blt $r0, I, .L21
b .L996

.L996:
xvfadd.d res1, res1, res2
xvpickve.d VX1, res1, 1
xvpickve.d VX2, res1, 2
xvpickve.d VX3, res1, 3
xvfadd.d res1, VX1, res1
xvfadd.d res1, VX2, res1
xvfadd.d res1, VX3, res1
xvfadd.s res1, res1, res2
xvpermi.d VX1, res1, 0x4e
xvfadd.s res1, res1, VX1
vreplvei.w $vr17, $vr19, 1
vreplvei.w $vr18, $vr19, 2
vreplvei.w $vr21, $vr19, 3
xvfadd.s res1, VX2, res1
xvfadd.s res1, VX3, res1
xvfadd.s res1, VX4, res1
.align 3

.L997:
Expand All @@ -130,18 +158,18 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
fld.s a1, X, 0 * SIZE
fld.s a2, X, 1 * SIZE
addi.d I, I, -1
fcvt.d.s a1, a1
fcvt.d.s a2, a2
fmadd.d res, a1, a1, res
fmadd.d res, a2, a2, res
fmul.s a1, a1, RCP
fmul.s a2, a2, RCP
fmadd.s res, a1, a1, res
fmadd.s res, a2, a2, res
add.d X, X, INCX
blt $r0, I, .L998
.align 3

.L999:
fsqrt.d res, res
fsqrt.s res, res
fmul.s $f0, res, $f0
move $r4, $r17
fcvt.s.d $f0, res
jirl $r0, $r1, 0x0

EPILOGUE
8 changes: 4 additions & 4 deletions kernel/loongarch64/copy_lasx.S
Original file line number Diff line number Diff line change
Expand Up @@ -260,9 +260,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
add.d Y, Y, INCY
ST a2, Y, 0
add.d Y, Y, INCY
ST a3, X, 0
ST a3, Y, 0
add.d Y, Y, INCY
ST a4, X, 0
ST a4, Y, 0
add.d Y, Y, INCY
LD a1, X, 0
add.d X, X, INCX
Expand All @@ -276,9 +276,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
add.d Y, Y, INCY
ST a2, Y, 0
add.d Y, Y, INCY
ST a3, X, 0
ST a3, Y, 0
add.d Y, Y, INCY
ST a4, X, 0
ST a4, Y, 0
add.d Y, Y, INCY
addi.d I, I, -1
blt $r0, I, .L222
Expand Down
Loading
Loading