Skip to content

Commit 3f80d12

Browse files
committed
optimize zgemm lsx kernel for 2k3000 cpu
1 parent 1bd74ad commit 3f80d12

4 files changed

Lines changed: 126 additions & 167 deletions

File tree

driver/others/parameter.c

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -790,6 +790,17 @@ int get_L3_size() {
790790
return ((ret & 0xffff) + 1) * pow(2, ((ret >> 16) & 0xff)) * pow(2, ((ret >> 24) & 0x7f)) / 1024 / 1024; // MB
791791
}
792792

793+
int get_cpu_prid() {
794+
int ret = 0, id = 0x0;
795+
__asm__ volatile (
796+
"cpucfg %[ret], %[id]"
797+
: [ret]"=r"(ret)
798+
: [id]"r"(id)
799+
: "memory"
800+
);
801+
return ret;
802+
}
803+
793804
void blas_set_parameter(void){
794805
#if defined(LA464)
795806
int L3_size = get_L3_size();
@@ -868,6 +879,18 @@ void blas_set_parameter(void){
868879
}
869880
}
870881
#endif
882+
#elif defined(LA264)
883+
int prid = get_cpu_prid();
884+
if (prid == 0x0014b020) { //2k3000
885+
886+
zgemm_p = 128;
887+
zgemm_q = 176;
888+
zgemm_r = 360;
889+
} else {
890+
zgemm_p = 64;
891+
zgemm_q = 120;
892+
zgemm_r = 4096;
893+
}
871894
#endif
872895
}
873896
#endif

kernel/loongarch64/zgemm_kernel_4x4_lsx.S

Lines changed: 32 additions & 128 deletions
Original file line numberDiff line numberDiff line change
@@ -271,10 +271,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
271271
vld D4, B0, 0x30 // b3ri
272272
vld D0, A0, 0x00 // a0ri
273273

274-
vand.v D5, D0, D0
275-
vand.v D6, D0, D0
276-
vshuf4i.d D5, D0, 0x00 //a0rr
277-
vshuf4i.d D6, D0, 0x55 //a0ii
274+
vshuf4i.d D5, D0, 0x0a //a0rr
275+
vshuf4i.d D6, D0, 0x0f //a0ii
278276

279277
vpackev.d D7, D2, D1 //b0r b1r
280278
vpackod.d D8, D2, D1 //b0i b1i
@@ -294,10 +292,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
294292

295293
vld D0, A0, 0x10 // a1ri
296294

297-
vand.v D5, D0, D0
298-
vand.v D6, D0, D0
299-
vshuf4i.d D5, D0, 0x00 //a1rr
300-
vshuf4i.d D6, D0, 0x55 //a1ii
295+
vshuf4i.d D5, D0, 0x0a //a1rr
296+
vshuf4i.d D6, D0, 0x0f //a1ii
301297

302298
VMADD1 U4, D5, D7, U4 //01r 11r
303299
VMADD2 U5, D6, D7, U5 //01i 11i
@@ -311,10 +307,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
311307

312308
vld D0, A0, 0x20 // a2ri
313309

314-
vand.v D5, D0, D0
315-
vand.v D6, D0, D0
316-
vshuf4i.d D5, D0, 0x00 //a2rr
317-
vshuf4i.d D6, D0, 0x55 //a2ii
310+
vshuf4i.d D5, D0, 0x0a //a2rr
311+
vshuf4i.d D6, D0, 0x0f //a2ii
318312

319313
VMADD1 U8, D5, D7, U8 //02r 12r
320314
VMADD2 U9, D6, D7, U9 //02i 12i
@@ -328,10 +322,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
328322

329323
vld D0, A0, 0x30 // a3ri
330324

331-
vand.v D5, D0, D0
332-
vand.v D6, D0, D0
333-
vshuf4i.d D5, D0, 0x00 //a3rr
334-
vshuf4i.d D6, D0, 0x55 //a3ii
325+
vshuf4i.d D5, D0, 0x0a //a3rr
326+
vshuf4i.d D6, D0, 0x0f //a3ii
335327

336328
VMADD1 U12, D5, D7, U12 //03r 13r
337329
VMADD2 U13, D6, D7, U13 //03i 13i
@@ -523,70 +515,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
523515
vld D0, C0, 0x00 //c0: 0 1
524516
vld D1, C1, 0x00 //c1: 0 1
525517

526-
vst U0, C0, 0x00
527-
fld.d $f27, C0, 0x00
528-
fld.d $f27, C0, 0x08
529-
530-
vst U1, C0, 0x00
531-
fld.d $f27, C0, 0x00
532-
fld.d $f27, C0, 0x08
533-
534-
vst U2, C0, 0x00
535-
fld.d $f27, C0, 0x00
536-
fld.d $f27, C0, 0x08
537-
538-
vst U3, C0, 0x00
539-
fld.d $f27, C0, 0x00
540-
fld.d $f27, C0, 0x08
541-
542-
vst U4, C0, 0x00
543-
fld.d $f27, C0, 0x00
544-
fld.d $f27, C0, 0x08
545-
546-
vst U5, C0, 0x00
547-
fld.d $f27, C0, 0x00
548-
fld.d $f27, C0, 0x08
549-
550-
vst U6, C0, 0x00
551-
fld.d $f27, C0, 0x00
552-
fld.d $f27, C0, 0x08
553-
554-
vst U7, C0, 0x00
555-
fld.d $f27, C0, 0x00
556-
fld.d $f27, C0, 0x08
557-
558-
vst U8, C0, 0x00
559-
fld.d $f27, C0, 0x00
560-
fld.d $f27, C0, 0x08
561-
562-
vst U9, C0, 0x00
563-
fld.d $f27, C0, 0x00
564-
fld.d $f27, C0, 0x08
565-
566-
vst U10, C0, 0x00
567-
fld.d $f27, C0, 0x00
568-
fld.d $f27, C0, 0x08
569-
570-
vst U11, C0, 0x00
571-
fld.d $f27, C0, 0x00
572-
fld.d $f27, C0, 0x08
573-
574-
vst U12, C0, 0x00
575-
fld.d $f27, C0, 0x00
576-
fld.d $f27, C0, 0x08
577-
578-
vst U13, C0, 0x00
579-
fld.d $f27, C0, 0x00
580-
fld.d $f27, C0, 0x08
581-
582-
vst U14, C0, 0x00
583-
fld.d $f27, C0, 0x00
584-
fld.d $f27, C0, 0x08
585-
586-
vst U15, C0, 0x00
587-
fld.d $f27, C0, 0x00
588-
fld.d $f27, C0, 0x08
589-
590518
vpackev.d D2, D1, D0 //c0[0] c1[0]
591519
vpackod.d D3, D1, D0 //c0[1] c1[1]
592520

@@ -823,10 +751,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
823751
vld D4, B0, 0x30 // b3ri
824752
vld D0, A0, 0x00 // a0ri
825753

826-
vand.v D5, D0, D0
827-
vand.v D6, D0, D0
828-
vshuf4i.d D5, D0, 0x00 //a0rr
829-
vshuf4i.d D6, D0, 0x55 //a0ii
754+
vshuf4i.d D5, D0, 0x0a //a0rr
755+
vshuf4i.d D6, D0, 0x0f //a0ii
830756

831757
vpackev.d D7, D2, D1 //b0r b1r
832758
vpackod.d D8, D2, D1 //b0i b1i
@@ -846,10 +772,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
846772

847773
vld D0, A0, 0x10 // a1ri
848774

849-
vand.v D5, D0, D0
850-
vand.v D6, D0, D0
851-
vshuf4i.d D5, D0, 0x00 //a1rr
852-
vshuf4i.d D6, D0, 0x55 //a1ii
775+
vshuf4i.d D5, D0, 0x0a //a1rr
776+
vshuf4i.d D6, D0, 0x0f //a1ii
853777

854778
VMADD1 U4, D5, D7, U4 //01r 11r
855779
VMADD2 U5, D6, D7, U5 //01i 11i
@@ -1100,10 +1024,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
11001024
vld D4, B0, 0x30 // b3ri
11011025
vld D0, A0, 0x00 // a0ri
11021026

1103-
vand.v D5, D0, D0
1104-
vand.v D6, D0, D0
1105-
vshuf4i.d D5, D0, 0x00 //a0rr
1106-
vshuf4i.d D6, D0, 0x55 //a0ii
1027+
vshuf4i.d D5, D0, 0x0a //a0rr
1028+
vshuf4i.d D6, D0, 0x0f //a0ii
11071029

11081030
vpackev.d D7, D2, D1 //b0r b1r
11091031
vpackod.d D8, D2, D1 //b0i b1i
@@ -1309,10 +1231,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
13091231
vld D2, B0, 0x10 // b1ri
13101232
vld D0, A0, 0x00 // a0ri
13111233

1312-
vand.v D5, D0, D0
1313-
vand.v D6, D0, D0
1314-
vshuf4i.d D5, D0, 0x00 //a0rr
1315-
vshuf4i.d D6, D0, 0x55 //a0ii
1234+
vshuf4i.d D5, D0, 0x0a //a0rr
1235+
vshuf4i.d D6, D0, 0x0f //a0ii
13161236

13171237
vpackev.d D7, D2, D1 //b0r b1r
13181238
vpackod.d D8, D2, D1 //b0i b1i
@@ -1324,10 +1244,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
13241244

13251245
vld D0, A0, 0x10 // a1ri
13261246

1327-
vand.v D5, D0, D0
1328-
vand.v D6, D0, D0
1329-
vshuf4i.d D5, D0, 0x00 //a1rr
1330-
vshuf4i.d D6, D0, 0x55 //a1ii
1247+
vshuf4i.d D5, D0, 0x0a //a1rr
1248+
vshuf4i.d D6, D0, 0x0f //a1ii
13311249

13321250
VMADD1 U2, D5, D7, U2 //01r 11r
13331251
VMADD2 U3, D6, D7, U3 //01i 11i
@@ -1336,10 +1254,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
13361254

13371255
vld D0, A0, 0x20 // a2ri
13381256

1339-
vand.v D5, D0, D0
1340-
vand.v D6, D0, D0
1341-
vshuf4i.d D5, D0, 0x00 //a2rr
1342-
vshuf4i.d D6, D0, 0x55 //a2ii
1257+
vshuf4i.d D5, D0, 0x0a //a2rr
1258+
vshuf4i.d D6, D0, 0x0f //a2ii
13431259

13441260
VMADD1 U4, D5, D7, U4 //02r 12r
13451261
VMADD2 U5, D6, D7, U5 //02i 12i
@@ -1348,10 +1264,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
13481264

13491265
vld D0, A0, 0x30 // a3ri
13501266

1351-
vand.v D5, D0, D0
1352-
vand.v D6, D0, D0
1353-
vshuf4i.d D5, D0, 0x00 //a3rr
1354-
vshuf4i.d D6, D0, 0x55 //a3ii
1267+
vshuf4i.d D5, D0, 0x0a //a3rr
1268+
vshuf4i.d D6, D0, 0x0f //a3ii
13551269

13561270
VMADD1 U6, D5, D7, U6 //03r 13r
13571271
VMADD2 U7, D6, D7, U7 //03i 13i
@@ -1598,10 +1512,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
15981512
vld D2, B0, 0x10 // b1ri
15991513
vld D0, A0, 0x00 // a0ri
16001514

1601-
vand.v D5, D0, D0
1602-
vand.v D6, D0, D0
1603-
vshuf4i.d D5, D0, 0x00 //a0rr
1604-
vshuf4i.d D6, D0, 0x55 //a0ii
1515+
vshuf4i.d D5, D0, 0x0a //a0rr
1516+
vshuf4i.d D6, D0, 0x0f //a0ii
16051517

16061518
vpackev.d D7, D2, D1 //b0r b1r
16071519
vpackod.d D8, D2, D1 //b0i b1i
@@ -1613,10 +1525,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
16131525

16141526
vld D0, A0, 0x10 // a1ri
16151527

1616-
vand.v D5, D0, D0
1617-
vand.v D6, D0, D0
1618-
vshuf4i.d D5, D0, 0x00 //a1rr
1619-
vshuf4i.d D6, D0, 0x55 //a1ii
1528+
vshuf4i.d D5, D0, 0x0a //a1rr
1529+
vshuf4i.d D6, D0, 0x0f //a1ii
16201530

16211531
VMADD1 U2, D5, D7, U2 //01r 11r
16221532
VMADD2 U3, D6, D7, U3 //01i 11i
@@ -1775,10 +1685,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
17751685
vld D2, B0, 0x10 // b1ri
17761686
vld D0, A0, 0x00 // a0ri
17771687

1778-
vand.v D5, D0, D0
1779-
vand.v D6, D0, D0
1780-
vshuf4i.d D5, D0, 0x00 //a0rr
1781-
vshuf4i.d D6, D0, 0x55 //a0ii
1688+
vshuf4i.d D5, D0, 0x0a //a0rr
1689+
vshuf4i.d D6, D0, 0x0f //a0ii
17821690

17831691
vpackev.d D7, D2, D1 //b0r b1r
17841692
vpackod.d D8, D2, D1 //b0i b1i
@@ -1930,10 +1838,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
19301838
vpackev.d D5, D2, D0 //a0r a1r
19311839
vpackod.d D6, D2, D0 //a0i a1i
19321840

1933-
vand.v D7, D1, D1
1934-
vand.v D8, D1, D1
1935-
vshuf4i.d D7, D1, 0x00 //b0rr
1936-
vshuf4i.d D8, D1, 0x55 //b0ii
1841+
vshuf4i.d D7, D1, 0x0a //b0rr
1842+
vshuf4i.d D8, D1, 0x0f //b0ii
19371843

19381844
VMADD1 U0, D5, D7, U0 //00r 01r
19391845
VMADD2 U1, D6, D7, U1 //00i 01i
@@ -2108,10 +2014,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
21082014
vpackev.d D5, D2, D0 //a0r a1r
21092015
vpackod.d D6, D2, D0 //a0i a1i
21102016

2111-
vand.v D7, D1, D1
2112-
vand.v D8, D1, D1
2113-
vshuf4i.d D7, D1, 0x00 //b0rr
2114-
vshuf4i.d D8, D1, 0x55 //b0ii
2017+
vshuf4i.d D7, D1, 0x0a //b0rr
2018+
vshuf4i.d D8, D1, 0x0f //b0ii
21152019

21162020
VMADD1 U0, D5, D7, U0 //00r 01r
21172021
VMADD2 U1, D6, D7, U1 //00i 01i

0 commit comments

Comments
 (0)