@@ -49,12 +49,33 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, FLOAT *a, BLASLO
4949{
5050 if (n < 0 ) return (0 );
5151
52- FLOAT * a_ptr , * y_ptr , temp ;
52+ FLOAT * a_ptr , * y_ptr , * a2_ptr , temp , temp2 ;
5353 BLASLONG i , j , vl ;
54- FLOAT_V_T va , vy ;
54+ FLOAT_V_T va , vy , va2 ;
5555
5656 if (inc_y == 1 ) {
57- for (j = 0 ; j < n ; j ++ ) {
57+ for (j = 0 ; j < (n >> 1 ); j ++ ) {
58+ temp = alpha * x [0 ];
59+ temp2 = alpha * x [inc_x ];
60+ y_ptr = y ;
61+ a_ptr = a ;
62+ a2_ptr = a + lda ;
63+ for (i = m ; i > 0 ; i -= vl ) {
64+ vl = VSETVL (i );
65+ vy = VLEV_FLOAT (y_ptr , vl );
66+ va = VLEV_FLOAT (a_ptr , vl );
67+ va2 = VLEV_FLOAT (a2_ptr , vl );
68+ vy = VFMACCVF_FLOAT (vy , temp , va , vl );
69+ vy = VFMACCVF_FLOAT (vy , temp2 , va2 , vl );
70+ VSEV_FLOAT (y_ptr , vy , vl );
71+ y_ptr += vl ;
72+ a_ptr += vl ;
73+ a2_ptr += vl ;
74+ }
75+ x += inc_x * 2 ;
76+ a += lda * 2 ;
77+ }
78+ if (n & 1 ) {
5879 temp = alpha * x [0 ];
5980 y_ptr = y ;
6081 a_ptr = a ;
@@ -67,12 +88,31 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, FLOAT *a, BLASLO
6788 y_ptr += vl ;
6889 a_ptr += vl ;
6990 }
70- x += inc_x ;
71- a += lda ;
7291 }
7392 } else {
7493 BLASLONG stride_y = inc_y * sizeof (FLOAT );
75- for (j = 0 ; j < n ; j ++ ) {
94+ for (j = 0 ; j < (n >> 1 ); j ++ ) {
95+ temp = alpha * x [0 ];
96+ temp2 = alpha * x [inc_x ];
97+ y_ptr = y ;
98+ a_ptr = a ;
99+ a2_ptr = a + lda ;
100+ for (i = m ; i > 0 ; i -= vl ) {
101+ vl = VSETVL (i );
102+ vy = VLSEV_FLOAT (y_ptr , stride_y , vl );
103+ va = VLEV_FLOAT (a_ptr , vl );
104+ va2 = VLEV_FLOAT (a2_ptr , vl );
105+ vy = VFMACCVF_FLOAT (vy , temp , va , vl );
106+ vy = VFMACCVF_FLOAT (vy , temp2 , va2 , vl );
107+ VSSEV_FLOAT (y_ptr , stride_y , vy , vl );
108+ y_ptr += vl * inc_y ;
109+ a_ptr += vl ;
110+ a2_ptr += vl ;
111+ }
112+ x += inc_x * 2 ;
113+ a += lda * 2 ;
114+ }
115+ if (n & 1 ) {
76116 temp = alpha * x [0 ];
77117 y_ptr = y ;
78118 a_ptr = a ;
@@ -85,8 +125,6 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, FLOAT *a, BLASLO
85125 y_ptr += vl * inc_y ;
86126 a_ptr += vl ;
87127 }
88- x += inc_x ;
89- a += lda ;
90128 }
91129 }
92130 return (0 );
0 commit comments