Skip to content

Commit 5b22140

Browse files
committed
Merge branch 'develop' of https://github.com/OpenMathLib/OpenBLAS into develop
2 parents b528b9b + 1bd74ad commit 5b22140

11 files changed

Lines changed: 31 additions & 1043 deletions

File tree

Makefile.install

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ PKG_EXTRALIB := $(EXTRALIB)
3232
ifeq ($(INTERFACE64),1)
3333
SUFFIX64=64
3434
endif
35-
PKGFILE="$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/$(LIBSONAMEBASE)$(SUFFIX64).pc"
35+
PKGFILE := $(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/$(LIBSONAMEBASE)$(SUFFIX64).pc
3636

3737
ifeq ($(USE_OPENMP), 1)
3838
ifeq ($(C_COMPILER), PGI)
@@ -191,8 +191,6 @@ endif
191191
ifeq ($(INTERFACE64),1)
192192
SUFFIX64=64
193193
endif
194-
PKGFILE="$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/$(LIBSONAMEBASE)$(SUFFIX64).pc"
195-
196194
@echo Generating $(LIBSONAMEBASE)$(SUFFIX64).pc in "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)"
197195
@echo 'libdir='$(OPENBLAS_LIBRARY_DIR) > "$(PKGFILE)"
198196
@echo 'libprefix='$(LIBNAMEPREFIX) >> "$(PKGFILE)"

Makefile.system

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -477,7 +477,7 @@ EXTRALIB += -lm
477477
endif
478478

479479
ifeq ($(OSNAME), $(filter $(OSNAME),FreeBSD OpenBSD NetBSD DragonFly))
480-
ifeq ($(ARCH), $(filter $(ARCH),arm arm64))
480+
ifeq ($(ARCH), $(filter $(ARCH),arm arm64 power))
481481
EXTRALIB += -lm
482482
endif
483483
endif

interface/trsm.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -381,8 +381,12 @@ if (strcmp(gotoblas_corename(), "armv9sme") == 0
381381
#endif
382382
#endif
383383

384+
385+
386+
//end of the ifndef CBLAS ... else ... section
384387
#endif
385388

389+
if ((args.m == 0) || (args.n == 0)) return;
386390
IDEBUG_START;
387391

388392
FUNCTION_PROFILE_START();

kernel/Makefile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -97,11 +97,11 @@ else ifeq ($(TARGET_CORE), ZEN)
9797
override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE) $(AVX2OPT)
9898
else ifeq ($(TARGET_CORE), LOONGSON3R4)
9999
override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE) $(MSA_FLAGS)
100-
else ifneq ($(filter NEOVERSEN2 NEOVERSEV1, $(TARGET_CORE)),)
100+
else ifneq ($(filter NEOVERSEN2 NEOVERSEV1 ARMV8SVE A64FX, $(TARGET_CORE)),)
101101
ifeq ($(C_COMPILER), PGI)
102102
override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE) -Msve_intrinsics
103103
else
104-
override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE)
104+
override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE) -march=armv8.4-a+sve+bf16
105105
endif
106106
else
107107
override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE)

kernel/riscv64/dgemm_kernel_8x8_zvl256b.c

Lines changed: 2 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1628,11 +1628,12 @@ static void NM_TAIL(BLASLONG K, BLASLONG M, const BLASLONG m_edge, const BLASLON
16281628
}
16291629
}
16301630
}
1631+
16311632
int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT alpha, FLOAT* A, FLOAT* B, FLOAT* C, BLASLONG ldc)
16321633
{
16331634
if (K <= 0) return 0;
16341635
const BLASLONG m_edge = M & 7;
1635-
const bool S = (M == (ldc & 0x7));
1636+
const bool S = (ldc == m_edge);
16361637

16371638
// -- MAIN PASS
16381639

@@ -1689,24 +1690,6 @@ int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT alpha, FLOAT* A, FLOAT* B, F
16891690
resultEF = __riscv_vfmacc_vf_f64m2( resultEF, B7, A00, 8 );
16901691
}
16911692

1692-
// LMUL = 2 does worst here
1693-
vfloat64m1_t result0 = __riscv_vget_v_f64m2_f64m1(result01, 0);
1694-
vfloat64m1_t result1 = __riscv_vget_v_f64m2_f64m1(result01, 1);
1695-
vfloat64m1_t result2 = __riscv_vget_v_f64m2_f64m1(result23, 0);
1696-
vfloat64m1_t result3 = __riscv_vget_v_f64m2_f64m1(result23, 1);
1697-
vfloat64m1_t result4 = __riscv_vget_v_f64m2_f64m1(result45, 0);
1698-
vfloat64m1_t result5 = __riscv_vget_v_f64m2_f64m1(result45, 1);
1699-
vfloat64m1_t result6 = __riscv_vget_v_f64m2_f64m1(result67, 0);
1700-
vfloat64m1_t result7 = __riscv_vget_v_f64m2_f64m1(result67, 1);
1701-
vfloat64m1_t result8 = __riscv_vget_v_f64m2_f64m1(result89, 0);
1702-
vfloat64m1_t result9 = __riscv_vget_v_f64m2_f64m1(result89, 1);
1703-
vfloat64m1_t result10 = __riscv_vget_v_f64m2_f64m1(resultAB, 0);
1704-
vfloat64m1_t result11 = __riscv_vget_v_f64m2_f64m1(resultAB, 1);
1705-
vfloat64m1_t result12 = __riscv_vget_v_f64m2_f64m1(resultCD, 0);
1706-
vfloat64m1_t result13 = __riscv_vget_v_f64m2_f64m1(resultCD, 1);
1707-
vfloat64m1_t result14 = __riscv_vget_v_f64m2_f64m1(resultEF, 0);
1708-
vfloat64m1_t result15 = __riscv_vget_v_f64m2_f64m1(resultEF, 1);
1709-
17101693
FLOAT *C2 = C;
17111694

17121695
vfloat64m2_t c01 = __riscv_vle64_v_f64m2(C, 8); C += ldc;

kernel/riscv64/sgemm_kernel_16x8_zvl256b.c

Lines changed: 1 addition & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -2137,7 +2137,7 @@ int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT alpha, FLOAT* A, FLOAT* B, F
21372137
{
21382138
if (K <= 0) return 0;
21392139
const BLASLONG m_edge = M & 15;
2140-
const bool S = (M == (ldc & 0xF));
2140+
const bool S = (ldc == m_edge);
21412141

21422142
// -- MAIN PASS
21432143

@@ -2194,24 +2194,6 @@ int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT alpha, FLOAT* A, FLOAT* B, F
21942194
resultEF = __riscv_vfmacc_vf_f32m2( resultEF, B7, A00, 16 );
21952195
}
21962196

2197-
// LMUL = 2 does worst here
2198-
vfloat32m1_t result0 = __riscv_vget_v_f32m2_f32m1(result01, 0);
2199-
vfloat32m1_t result1 = __riscv_vget_v_f32m2_f32m1(result01, 1);
2200-
vfloat32m1_t result2 = __riscv_vget_v_f32m2_f32m1(result23, 0);
2201-
vfloat32m1_t result3 = __riscv_vget_v_f32m2_f32m1(result23, 1);
2202-
vfloat32m1_t result4 = __riscv_vget_v_f32m2_f32m1(result45, 0);
2203-
vfloat32m1_t result5 = __riscv_vget_v_f32m2_f32m1(result45, 1);
2204-
vfloat32m1_t result6 = __riscv_vget_v_f32m2_f32m1(result67, 0);
2205-
vfloat32m1_t result7 = __riscv_vget_v_f32m2_f32m1(result67, 1);
2206-
vfloat32m1_t result8 = __riscv_vget_v_f32m2_f32m1(result89, 0);
2207-
vfloat32m1_t result9 = __riscv_vget_v_f32m2_f32m1(result89, 1);
2208-
vfloat32m1_t result10 = __riscv_vget_v_f32m2_f32m1(resultAB, 0);
2209-
vfloat32m1_t result11 = __riscv_vget_v_f32m2_f32m1(resultAB, 1);
2210-
vfloat32m1_t result12 = __riscv_vget_v_f32m2_f32m1(resultCD, 0);
2211-
vfloat32m1_t result13 = __riscv_vget_v_f32m2_f32m1(resultCD, 1);
2212-
vfloat32m1_t result14 = __riscv_vget_v_f32m2_f32m1(resultEF, 0);
2213-
vfloat32m1_t result15 = __riscv_vget_v_f32m2_f32m1(resultEF, 1);
2214-
22152197
FLOAT *C2 = C;
22162198

22172199
vfloat32m2_t c01 = __riscv_vle32_v_f32m2(C, 16); C += ldc;

0 commit comments

Comments
 (0)