@@ -53,8 +53,16 @@ ifeq ($(ARCH), arm64)
5353USE_TRMM = 1
5454USE_DIRECT_SGEMM = 1
5555USE_DIRECT_SSYMM = 1
56- USE_DIRECT_STRMM = 1
5756USE_DIRECT_SSYRK = 1
57+ USE_DIRECT_STRMM = 1
58+ ifeq ($(CORE), ARMV9SME)
59+ USE_SME = 1
60+ endif
61+ ifeq ($(CORE), VORTEXM4)
62+ ifneq ($(C_COMPILER), GCC)
63+ USE_SME = 1
64+ endif
65+ endif
5866endif
5967
6068ifeq ($(ARCH), riscv64)
@@ -131,12 +139,6 @@ SGEMMDIRECTKERNEL = sgemm_direct_skylakex.c
131139SGEMMDIRECTPERFORMANT = sgemm_direct_performant.c
132140endif
133141ifeq ($(ARCH), arm64)
134- ifeq ($(TARGET_CORE), ARMV9SME)
135- HAVE_SME = 1
136- endif
137- ifeq ($(TARGET_CORE), VORTEXM4)
138- HAVE_SME = 1
139- endif
140142SGEMMDIRECTKERNEL = sgemm_direct_arm64_sme1.c
141143SGEMMDIRECTKERNEL_ALPHA_BETA = sgemm_direct_alpha_beta_arm64_sme1.c
142144SGEMMDIRECTPERFORMANT = sgemm_direct_performant.c
@@ -147,9 +149,6 @@ endif
147149ifdef USE_DIRECT_SSYMM
148150ifndef SSYMMDIRECTKERNEL_ALPHA_BETA
149151ifeq ($(ARCH), arm64)
150- ifeq ($(TARGET_CORE), ARMV9SME)
151- HAVE_SME = 1
152- endif
153152SSYMMDIRECTKERNEL_ALPHA_BETA = ssymm_direct_alpha_beta_arm64_sme1.c
154153endif
155154endif
@@ -158,9 +157,6 @@ endif
158157ifdef USE_DIRECT_STRMM
159158ifndef STRMMDIRECTKERNEL
160159ifeq ($(ARCH), arm64)
161- ifeq ($(TARGET_CORE), ARMV9SME)
162- HAVE_SME = 1
163- endif
164160STRMMDIRECTKERNEL = strmm_direct_arm64_sme1.c
165161endif
166162endif
@@ -169,9 +165,6 @@ endif
169165ifdef USE_DIRECT_SSYRK
170166ifndef SSYRKDIRECTKERNEL_ALPHA_BETA
171167ifeq ($(ARCH), arm64)
172- ifeq ($(TARGET_CORE), ARMV9SME)
173- HAVE_SME = 1
174- endif
175168SSYRKDIRECTKERNEL_ALPHA_BETA = ssyrk_direct_alpha_beta_arm64_sme1.c
176169endif
177170endif
@@ -252,7 +245,7 @@ SKERNELOBJS += \
252245 sgemm_direct_performant$(TSUFFIX).$(SUFFIX) \
253246 sgemm_direct$(TSUFFIX).$(SUFFIX) \
254247 sgemm_direct_alpha_beta$(TSUFFIX).$(SUFFIX)
255- ifdef HAVE_SME
248+ ifdef USE_SME
256249SKERNELOBJS += \
257250 sgemm_direct_sme1_2VLx2VL$(TSUFFIX).$(SUFFIX) \
258251 sgemm_direct_sme1_preprocess$(TSUFFIX).$(SUFFIX)
@@ -280,8 +273,8 @@ endif
280273ifdef USE_DIRECT_SSYRK
281274ifeq ($(ARCH), arm64)
282275SKERNELOBJS += \
283- ssyrk_direct_alpha_betaUN$(TSUFFIX).$(SUFFIX) ssyrk_direct_alpha_betaUT$(TSUFFIX).$(SUFFIX) \
284- ssyrk_direct_alpha_betaLN$(TSUFFIX).$(SUFFIX) ssyrk_direct_alpha_betaLT$(TSUFFIX).$(SUFFIX)
276+ ssyrk_direct_alpha_betaUN$(TSUFFIX).$(SUFFIX) ssyrk_direct_alpha_betaUT$(TSUFFIX).$(SUFFIX) \
277+ ssyrk_direct_alpha_betaLN$(TSUFFIX).$(SUFFIX) ssyrk_direct_alpha_betaLT$(TSUFFIX).$(SUFFIX)
285278endif
286279endif
287280
@@ -1040,7 +1033,7 @@ $(KDIR)sgemm_direct$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEMMDIRECTKERNEL)
10401033 $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX $< -o $@
10411034$(KDIR)sgemm_direct_alpha_beta$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEMMDIRECTKERNEL_ALPHA_BETA)
10421035 $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX $< -o $@
1043- ifdef HAVE_SME
1036+ ifdef USE_SME
10441037$(KDIR)sgemm_direct_sme1_2VLx2VL$(TSUFFIX).$(SUFFIX) :
10451038 $(CC) $(CFLAGS) -c $(KERNELDIR)/sgemm_direct_sme1_2VLx2VL.S -UDOUBLE -UCOMPLEX -o $@
10461039$(KDIR)sgemm_direct_sme1_preprocess$(TSUFFIX).$(SUFFIX) :
@@ -1058,6 +1051,22 @@ $(KDIR)ssymm_direct_alpha_betaLL$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SSYMMDIREC
10581051endif
10591052endif
10601053
1054+ ifdef USE_DIRECT_SSYRK
1055+ ifeq ($(ARCH), arm64)
1056+ $(KDIR)ssyrk_direct_alpha_betaUN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SSYRKDIRECTKERNEL_ALPHA_BETA)
1057+ $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX -DUPPER -UTRANSA $< -o $@
1058+
1059+ $(KDIR)ssyrk_direct_alpha_betaUT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SSYRKDIRECTKERNEL_ALPHA_BETA)
1060+ $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX -DUPPER -DTRANSA $< -o $@
1061+
1062+ $(KDIR)ssyrk_direct_alpha_betaLN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SSYRKDIRECTKERNEL_ALPHA_BETA)
1063+ $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX -UUPPER -UTRANSA $< -o $@
1064+
1065+ $(KDIR)ssyrk_direct_alpha_betaLT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SSYRKDIRECTKERNEL_ALPHA_BETA)
1066+ $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX -UUPPER -DTRANSA $< -o $@
1067+ endif
1068+ endif
1069+
10611070ifeq ($(BUILD_BFLOAT16), 1)
10621071$(KDIR)bgemm_kernel$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(BGEMMKERNEL)
10631072 $(CC) $(CFLAGS) -c -DBFLOAT16 -DBGEMM -UDOUBLE -UCOMPLEX $< -o $@
@@ -1184,21 +1193,6 @@ $(KDIR)xgemm_kernel_r$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(XGEMMKERNEL) $(XGEMMD
11841193$(KDIR)xgemm_kernel_b$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(XGEMMKERNEL) $(XGEMMDEPEND)
11851194 $(CC) $(CFLAGS) -c -DXDOUBLE -DCOMPLEX -DCC $< -o $@
11861195
1187- ifdef USE_DIRECT_SSYRK
1188- ifeq ($(ARCH), arm64)
1189- $(KDIR)ssyrk_direct_alpha_betaUN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SSYRKDIRECTKERNEL_ALPHA_BETA)
1190- $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX -DUPPER -UTRANSA $< -o $@
1191-
1192- $(KDIR)ssyrk_direct_alpha_betaUT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SSYRKDIRECTKERNEL_ALPHA_BETA)
1193- $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX -DUPPER -DTRANSA $< -o $@
1194-
1195- $(KDIR)ssyrk_direct_alpha_betaLN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SSYRKDIRECTKERNEL_ALPHA_BETA)
1196- $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX -UUPPER -UTRANSA $< -o $@
1197-
1198- $(KDIR)ssyrk_direct_alpha_betaLT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SSYRKDIRECTKERNEL_ALPHA_BETA)
1199- $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX -UUPPER -DTRANSA $< -o $@
1200- endif
1201- endif
12021196
12031197ifdef USE_TRMM
12041198$(KDIR)strmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(STRMMKERNEL)
0 commit comments