Skip to content

Commit 556ffac

Browse files
authored
Merge branch 'OpenMathLib:develop' into gemmt_tests
2 parents e1a6703 + dbd5643 commit 556ffac

41 files changed

Lines changed: 2158 additions & 992 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.github/workflows/c910v.yml

Lines changed: 33 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -83,9 +83,39 @@ jobs:
8383
8484
- name: test
8585
run: |
86-
export PATH=$GITHUB_WORKSPACE/qemu-install/bin/:$PATH
87-
qemu-riscv64 ./utest/openblas_utest
88-
qemu-riscv64 ./utest/openblas_utest_ext
86+
run_with_retry() {
87+
local cmd="$1"
88+
local time_out=10
89+
local retries=10
90+
local attempt=0
91+
92+
for ((i=1; i<=retries; i++)); do
93+
attempt=$((i))
94+
if timeout -s 12 --preserve-status $time_out $cmd; then
95+
echo "Command succeeded on attempt $i."
96+
return 0
97+
else
98+
local exit_code=$?
99+
if [ $exit_code -eq 140 ]; then
100+
echo "Attempt $i timed out (retrying...)"
101+
time_out=$((time_out + 5))
102+
else
103+
echo "Attempt $i failed with exit code $exit_code. Aborting workflow."
104+
exit $exit_code
105+
fi
106+
fi
107+
done
108+
echo "All $retries attempts failed, giving up."
109+
echo "Final failure was due to timeout."
110+
echo "Aborting workflow."
111+
exit $exit_code
112+
}
113+
export PATH=$GITHUB_WORKSPACE/qemu-install/bin:$PATH
114+
which qemu-riscv64
115+
export QEMU_BIN=$(which qemu-riscv64)
116+
run_with_retry "$QEMU_BIN ./utest/openblas_utest"
117+
run_with_retry "$QEMU_BIN ./utest/openblas_utest_ext"
118+
89119
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./ctest/xscblat1
90120
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./ctest/xdcblat1
91121
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./ctest/xccblat1

CMakeLists.txt

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ option(CPP_THREAD_SAFETY_TEST "Run a massively parallel DGEMM test to confirm th
6262

6363
option(CPP_THREAD_SAFETY_GEMV "Run a massively parallel DGEMV test to confirm thread safety of the library (requires OpenMP)" OFF)
6464
option(BUILD_STATIC_LIBS "Build static library" OFF)
65+
option(BUILD_SHARED_LIBS "Build shared library" OFF)
6566
if(NOT BUILD_STATIC_LIBS AND NOT BUILD_SHARED_LIBS)
6667
set(BUILD_STATIC_LIBS ON CACHE BOOL "Build static library" FORCE)
6768
endif()
@@ -123,7 +124,12 @@ message(WARNING "CMake support is experimental. It does not yet support all buil
123124
include("${PROJECT_SOURCE_DIR}/cmake/utils.cmake")
124125
include("${PROJECT_SOURCE_DIR}/cmake/system.cmake")
125126

126-
set(OpenBLAS_LIBNAME ${LIBNAMEPREFIX}openblas${LIBNAMESUFFIX}${SUFFIX64_UNDERSCORE})
127+
string(FIND "${LIBNAMESUFFIX}" "${SUFFIX64_UNDERSCORE}" HAVE64)
128+
if (${HAVE64} GREATER -1)
129+
set(OpenBLAS_LIBNAME ${LIBNAMEPREFIX}openblas${LIBNAMESUFFIX})
130+
else ()
131+
set(OpenBLAS_LIBNAME ${LIBNAMEPREFIX}openblas${LIBNAMESUFFIX}${SUFFIX64_UNDERSCORE})
132+
endif ()
127133

128134
set(BLASDIRS interface driver/level2 driver/level3 driver/others)
129135

@@ -716,4 +722,5 @@ install(FILES ${CMAKE_CURRENT_BINARY_DIR}/${PN}ConfigVersion.cmake
716722
DESTINATION ${CMAKECONFIG_INSTALL_DIR})
717723
install(EXPORT "${PN}${SUFFIX64}Targets"
718724
NAMESPACE "${PN}${SUFFIX64}::"
719-
DESTINATION ${CMAKECONFIG_INSTALL_DIR})
725+
DESTINATION ${CMAKECONFIG_INSTALL_DIR})
726+

Makefile.system

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -276,6 +276,7 @@ SMALL_MATRIX_OPT = 1
276276
endif
277277
ifeq ($(ARCH), arm64)
278278
GEMM_GEMV_FORWARD = 1
279+
GEMM_GEMV_FORWARD_BF16 = 1
279280
endif
280281
ifeq ($(ARCH), riscv)
281282
GEMM_GEMV_FORWARD = 1

cmake/cc.cmake

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -229,9 +229,9 @@ if (${CORE} STREQUAL NEOVERSEN1)
229229
if (${CMAKE_C_COMPILER_ID} STREQUAL "NVC" AND NOT NO_SVE)
230230
set (CCOMMON_OPT "${CCOMMON_OPT} -tp=neoverse-n1")
231231
elseif (${GCC_VERSION} VERSION_GREATER 9.4 OR ${GCC_VERSION} VERSION_EQUAL 9.4)
232-
set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8.2-a+sve -mtune=neoverse-n1")
232+
set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8.2-a -mtune=neoverse-n1")
233233
else ()
234-
set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8.2-a+sve")
234+
set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8.2-a")
235235
endif()
236236
endif ()
237237
endif ()
@@ -260,13 +260,13 @@ endif ()
260260

261261
if (${CORE} STREQUAL CORTEXA510)
262262
if (NOT DYNAMIC_ARCH)
263-
set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8-a+sve")
263+
set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8.4-a+sve")
264264
endif ()
265265
endif ()
266266

267267
if (${CORE} STREQUAL CORTEXA710)
268268
if (NOT DYNAMIC_ARCH)
269-
set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8-a+sve")
269+
set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8.4-a+sve")
270270
endif ()
271271
endif ()
272272

@@ -278,7 +278,7 @@ endif ()
278278

279279
if (${CORE} STREQUAL CORTEXX2)
280280
if (NOT DYNAMIC_ARCH)
281-
set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8-a+sve")
281+
set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8.4-a+sve")
282282
endif ()
283283
endif ()
284284

ctest/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ enable_language(Fortran)
66
endif()
77

88
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DADD${BU} -DCBLAS")
9-
if (BINARY32 AND CMAKE_C_PLATFORM_ID MATCHES "MinGW" AND CMAKE_Fortran_COMPILER_VERSION VERSION_EQUAL 14.2)
9+
if (BINARY32 AND CMAKE_C_PLATFORM_ID MATCHES "MinGW" AND CMAKE_Fortran_COMPILER_VERSION VERSION_GREATER 14.1)
1010
list(REMOVE_ITEM ${CMAKE_Fortran_FLAGS} -O3 -O2 -O1 -Os)
1111
set (CMAKE_Fortran_FLAGS_RELEASE "" CACHE STRING "" FORCE)
1212
endif()

driver/level3/level3_thread.c

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -851,9 +851,20 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, IFLOAT *sa, IF
851851
/* Objective function come from sum of partitions in m and n. */
852852
/* (n / nthreads_n) + (m / nthreads_m) */
853853
/* = (n * nthreads_m + m * nthreads_n) / (nthreads_n * nthreads_m) */
854-
while (nthreads_m % 2 == 0 && n * nthreads_m + m * nthreads_n > n * (nthreads_m / 2) + m * (nthreads_n * 2)) {
855-
nthreads_m /= 2;
856-
nthreads_n *= 2;
854+
BLASLONG cost = 0, div = 0;
855+
BLASLONG i;
856+
for (i = 1; i <= sqrt(nthreads_m); i++) {
857+
if (nthreads_m % i) continue;
858+
BLASLONG j = nthreads_m / i;
859+
BLASLONG cost_i = n * j + m * nthreads_n * i;
860+
BLASLONG cost_j = n * i + m * nthreads_n * j;
861+
if (cost == 0 ||
862+
cost_i < cost) {cost = cost_i; div = i;}
863+
if (cost_j < cost) {cost = cost_j; div = j;}
864+
}
865+
if (div > 1) {
866+
nthreads_m /= div;
867+
nthreads_n *= div;
857868
}
858869
}
859870

interface/gemm.c

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -417,21 +417,24 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANS
417417

418418
PRINT_DEBUG_CNAME;
419419

420-
#if !defined(COMPLEX) && !defined(DOUBLE) && !defined(BFLOAT16) && defined(USE_SGEMM_KERNEL_DIRECT)
421-
#if defined(DYNAMIC_ARCH) && defined(ARCH_x86)
422-
if (support_avx512() )
420+
#if !defined(COMPLEX) && !defined(DOUBLE) && !defined(BFLOAT16)
421+
#if defined(ARCH_x86) && (defined(USE_SGEMM_KERNEL_DIRECT)||defined(DYNAMIC_ARCH))
422+
#if defined(DYNAMIC_ARCH)
423+
if (support_avx512() )
424+
#endif
423425
if (beta == 0 && alpha == 1.0 && order == CblasRowMajor && TransA == CblasNoTrans && TransB == CblasNoTrans && SGEMM_DIRECT_PERFORMANT(m,n,k)) {
424426
SGEMM_DIRECT(m, n, k, a, lda, b, ldb, c, ldc);
425427
return;
426428
}
427429
#endif
428-
#if defined(DYNAMIC_ARCH) && defined(ARCH_ARM64)
429-
if (support_sme1()){
430+
#if defined(ARCH_ARM64) && (defined(USE_SGEMM_KERNEL_DIRECT)||defined(DYNAMIC_ARCH))
431+
#if defined(DYNAMIC_ARCH)
432+
if (support_sme1())
433+
#endif
430434
if (beta == 0 && alpha == 1.0 && order == CblasRowMajor && TransA == CblasNoTrans && TransB == CblasNoTrans) {
431435
SGEMM_DIRECT(m, n, k, a, lda, b, ldb, c, ldc);
432436
return;
433437
}
434-
}
435438
#endif
436439
#endif
437440

interface/zscal.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ void CNAME(blasint n, FLOAT alpha_r, void *vx, blasint incx){
9898
if (nthreads == 1) {
9999
#endif
100100

101-
SCAL_K(n, 0, 0, alpha[0], alpha[1], x, incx, NULL, 0, NULL, 0);
101+
SCAL_K(n, 0, 0, alpha[0], alpha[1], x, incx, NULL, 0, NULL, 1);
102102

103103
#ifdef SMP
104104
} else {
@@ -108,7 +108,7 @@ void CNAME(blasint n, FLOAT alpha_r, void *vx, blasint incx){
108108
mode = BLAS_SINGLE | BLAS_COMPLEX;
109109
#endif
110110

111-
blas_level1_thread(mode, n, 0, 0, alpha, x, incx, NULL, 0, NULL, 0, (int (*)(void))SCAL_K, nthreads);
111+
blas_level1_thread(mode, n, 0, 0, alpha, x, incx, NULL, 0, NULL, 1, (int (*)(void))SCAL_K, nthreads);
112112

113113
}
114114
#endif

interface/zsyr.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -116,12 +116,12 @@ void NAME(char *UPLO, blasint *N, FLOAT *ALPHA,
116116

117117
#else
118118

119-
void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, int n, FLOAT alpha, FLOAT *x, int incx, FLOAT *a, int lda) {
119+
void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, int n, void* valpha, FLOAT *x, int incx, FLOAT *a, int lda) {
120120

121121
FLOAT *buffer;
122122
int uplo;
123123
blasint info;
124-
FLOAT * ALPHA = &alpha;
124+
FLOAT * ALPHA = (FLOAT*)valpha;
125125
FLOAT alpha_r = ALPHA[0];
126126
FLOAT alpha_i = ALPHA[1];
127127
#ifdef SMP

kernel/arm/zscal.c

Lines changed: 40 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -27,65 +27,56 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2727

2828
/**************************************************************************************
2929
* 2013/09/14 Saar
30-
* BLASTEST float : OK
31-
* BLASTEST double : OK
32-
* CTEST : OK
33-
* TEST : OK
30+
* BLASTEST float : OK
31+
* BLASTEST double : OK
32+
* CTEST : OK
33+
* TEST : OK
3434
*
3535
**************************************************************************************/
3636

3737
#include "common.h"
3838

39+
// The c/zscal_k function is called not only by cblas_c/zscal but also by other upper-level interfaces.
40+
// In certain cases, the expected return values for cblas_s/zscal differ from those of other upper-level interfaces.
41+
// To handle this, we use the dummy2 parameter to differentiate between them.
3942
int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r,FLOAT da_i, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *dummy, BLASLONG dummy2)
4043
{
41-
BLASLONG i=0;
42-
BLASLONG inc_x2;
43-
BLASLONG ip = 0;
44-
FLOAT temp;
44+
BLASLONG i = 0;
45+
BLASLONG inc_x2;
46+
BLASLONG ip = 0;
47+
FLOAT temp;
4548

46-
if ( (n <= 0) || (inc_x <= 0))
47-
return(0);
49+
if ((n <= 0) || (inc_x <= 0))
50+
return(0);
4851

52+
inc_x2 = 2 * inc_x;
53+
if (dummy2 == 0) {
54+
for (i = 0; i < n; i++)
55+
{
56+
if (da_r == 0.0 && da_i == 0.0)
57+
{
58+
x[ip] = 0.0;
59+
x[ip+1] = 0.0;
60+
}
61+
else
62+
{
63+
temp = da_r * x[ip] - da_i * x[ip+1];
64+
x[ip+1] = da_r * x[ip+1] + da_i * x[ip] ;
65+
x[ip] = temp;
66+
}
4967

50-
inc_x2 = 2 * inc_x;
51-
for ( i=0; i<n; i++ )
52-
{
53-
if ( da_r == 0.0 )
54-
{
55-
if ( da_i == 0.0 )
56-
{
57-
temp = 0.0;
58-
x[ip+1] = 0.0 ;
59-
}
60-
else
61-
{
62-
temp = - da_i * x[ip+1] ;
63-
if (isnan(x[ip]) || isinf(x[ip])) temp = NAN;
64-
if (!isinf(x[ip+1]))
65-
x[ip+1] = da_i * x[ip] ;
66-
else x[ip+1] = NAN;
67-
}
68-
}
69-
else
70-
{
71-
if ( da_i == 0.0 )
72-
{
73-
temp = da_r * x[ip] ;
74-
x[ip+1] = da_r * x[ip+1];
75-
}
76-
else
77-
{
78-
temp = da_r * x[ip] - da_i * x[ip+1] ;
79-
x[ip+1] = da_r * x[ip+1] + da_i * x[ip] ;
80-
}
81-
}
82-
x[ip] = temp;
68+
ip += inc_x2;
69+
}
70+
return(0);
71+
}
72+
for (i = 0; i < n; i++)
73+
{
74+
temp = da_r * x[ip] - da_i * x[ip+1];
75+
x[ip+1] = da_r * x[ip+1] + da_i * x[ip] ;
8376

84-
ip += inc_x2;
85-
}
86-
87-
return(0);
77+
x[ip] = temp;
78+
ip += inc_x2;
79+
}
8880

81+
return(0);
8982
}
90-
91-

0 commit comments

Comments
 (0)