@@ -111,7 +111,6 @@ return;
111111}
112112
113113__arm_new ("za" ) __arm_locally_streaming
114- __attribute__((visibility ("hidden" )))
115114static void sgemm_direct_alpha_beta_sme1_2VLx2VL (uint64_t m , uint64_t k , uint64_t n , const float * alpha ,\
116115 const float * ba , const float * restrict bb , const float * beta ,\
117116 float * restrict C ) {
@@ -177,11 +176,11 @@ void CNAME (BLASLONG M, BLASLONG N, BLASLONG K, float alpha, float * __restrict
177176 * of reading directly from vector (z) registers.
178177 * */
179178 asm volatile ("" : : :"p0" , "p1" , "p2" , "p3" , "p4" , "p5" , "p6" , "p7" ,
180- "p8" , "p9" , "p10" , "p11" , "p12" , "p13" , "p14" , "p15" ,
179+ "p8" , "p9" , "p10" , "p11" , "p12" , "p13" , "p14" , "p15" , "d8" , "d9" , "d10" , "d11" , "d12" , "d13" , "d14" , "d15" ,
181180 "z0" , "z1" , "z2" , "z3" , "z4" , "z5" , "z6" , "z7" ,
182181 "z8" , "z9" , "z10" , "z11" , "z12" , "z13" , "z14" , "z15" ,
183182 "z16" , "z17" , "z18" , "z19" , "z20" , "z21" , "z22" , "z23" ,
184- "z24" , "z25" , "z26" , "z27" , "z28" , "z29" , "z30" , "z31" );
183+ "z24" , "z25" , "z26" , "z27" , "z28" , "z29" , "z30" , "z31" , "za" );
185184
186185 /* Pre-process the left matrix to make it suitable for
187186 matrix sum of outer-product calculation
@@ -190,11 +189,11 @@ void CNAME (BLASLONG M, BLASLONG N, BLASLONG K, float alpha, float * __restrict
190189 SME1_PREPROCESS (M , K , A , A_mod );
191190
192191 asm volatile ("" : : :"p0" , "p1" , "p2" , "p3" , "p4" , "p5" , "p6" , "p7" ,
193- "p8" , "p9" , "p10" , "p11" , "p12" , "p13" , "p14" , "p15" ,
192+ "p8" , "p9" , "p10" , "p11" , "p12" , "p13" , "p14" , "p15" ,"d8" , "d9" , "d10" , "d11" , "d12" , "d13" , "d14" , "d15" ,
194193 "z0" , "z1" , "z2" , "z3" , "z4" , "z5" , "z6" , "z7" ,
195194 "z8" , "z9" , "z10" , "z11" , "z12" , "z13" , "z14" , "z15" ,
196195 "z16" , "z17" , "z18" , "z19" , "z20" , "z21" , "z22" , "z23" ,
197- "z24" , "z25" , "z26" , "z27" , "z28" , "z29" , "z30" , "z31" );
196+ "z24" , "z25" , "z26" , "z27" , "z28" , "z29" , "z30" , "z31" , "za" );
198197
199198 /* Calculate C = alpha*A*B + beta*C */
200199
@@ -210,4 +209,3 @@ void CNAME (BLASLONG M, BLASLONG N, BLASLONG K, float alpha, float * __restrict
210209 float beta , float * __restrict R , BLASLONG strideR ){fprintf (stderr ,"empty sgemm_direct_alpha_beta should not be called!!!\n" );}
211210
212211#endif
213-
0 commit comments