@@ -58,6 +58,7 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, FLOAT *a, BLASLO
5858 ix = 0 ;
5959 a_ptr = a ;
6060 if (inc_y == 1 ) {
61+ BLASLONG width = n / 3 ;
6162 BLASLONG width = n / 3 ;
6263 uint64_t sve_size = SV_COUNT ();
6364 svbool_t pg_true = SV_TRUE ();
@@ -68,8 +69,8 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, FLOAT *a, BLASLO
6869 FLOAT * a2_ptr = a + lda * width * 2 ;
6970
7071 for (j = 0 ; j < width ; j ++ ) {
71- i = 0 ;
72- while ( (i + sve_size * 1 - 1 ) < m ) {
72+
73+ for ( i = 0 ; (i + sve_size - 1 ) < m ; i += sve_size ) {
7374 ix = j * inc_x ;
7475
7576 SV_TYPE x0_vec = SV_DUP (alpha * x [ix + (inc_x * width * 0 )]);
@@ -86,8 +87,6 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, FLOAT *a, BLASLO
8687 y_vec = svmla_lane (y_vec , a02_vec , x2_vec , 0 );
8788
8889 svst1 (pg_true , y + i , y_vec );
89-
90- i += sve_size * 1 ;
9190 }
9291
9392 if (i < m ) {
@@ -117,29 +116,31 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, FLOAT *a, BLASLO
117116 a_ptr = a2_ptr ;
118117 for (j = width * 3 ; j < n ; j ++ ) {
119118 ix = j * inc_x ;
120- i = 0 ;
121- while ((i + sve_size * 1 - 1 ) < m ) {
119+ for (i = 0 ; (i + sve_size - 1 ) < m ; i += sve_size ) {
122120 SV_TYPE y_vec = svld1 (pg_true , y + i );
123121 SV_TYPE x_vec = SV_DUP (alpha * x [(ix )]);
124122 SV_TYPE a_vec = svld1 (pg_true , a_ptr + i );
125123 y_vec = svmla_x (pg_true , y_vec , a_vec , x_vec );
126124 svst1 (pg_true , y + i , y_vec );
127- i += sve_size * 1 ;
128125 }
129126
130127 if (i < m ) {
131128 SV_TYPE y_vec = svld1 (pg , y + i );
132129 SV_TYPE x_vec = SV_DUP (alpha * x [(ix )]);
133130 SV_TYPE a_vec = svld1 (pg , a_ptr + i );
134131 y_vec = svmla_m (pg , y_vec , a_vec , x_vec );
132+ y_vec = svmla_m (pg , y_vec , a_vec , x_vec );
135133 svst1 (pg , y + i , y_vec );
136134 }
137135
136+
138137 a_ptr += lda ;
139138 ix += inc_x ;
140139 }
141140
142141 return (0 );
142+
143+ return (0 );
143144 }
144145
145146 for (j = 0 ; j < n ; j ++ ) {
0 commit comments