Skip to content

Commit 4e0cf1e

Browse files
authored
Merge branch 'OpenMathLib:develop' into gemmt_tests
2 parents d7d6e6b + 39c90f9 commit 4e0cf1e

79 files changed

Lines changed: 3393 additions & 338 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.github/workflows/arm64_graviton.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,13 +88,14 @@ jobs:
8888
run: |
8989
case "${{ matrix.build }}" in
9090
"make")
91-
make -j$(nproc) DYNAMIC_ARCH=1 USE_OPENMP=0 FC="ccache ${{ matrix.fortran }}"
91+
make -j$(nproc) DYNAMIC_ARCH=1 BUILD_BFLOAT16=1 USE_OPENMP=0 FC="ccache ${{ matrix.fortran }}"
9292
;;
9393
"cmake")
9494
mkdir build && cd build
9595
cmake -DDYNAMIC_ARCH=1 \
9696
-DNOFORTRAN=0 \
9797
-DBUILD_WITHOUT_LAPACK=0 \
98+
-DBUILD_BFLOAT16=1 \
9899
-DCMAKE_VERBOSE_MAKEFILE=ON \
99100
-DCMAKE_BUILD_TYPE=Release \
100101
-DCMAKE_Fortran_COMPILER=${{ matrix.fortran }} \
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
name: Windows ARM64 CI
2+
3+
on:
4+
push:
5+
branches:
6+
- develop
7+
- release-**
8+
pull_request:
9+
branches:
10+
- develop
11+
- release-**
12+
13+
concurrency:
14+
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
15+
cancel-in-progress: true
16+
17+
permissions:
18+
contents: read # to fetch code (actions/checkout)
19+
20+
jobs:
21+
build:
22+
if: "github.repository == 'OpenMathLib/OpenBLAS'"
23+
runs-on: windows-11-arm
24+
steps:
25+
- name: Checkout repository
26+
uses: actions/checkout@v3
27+
28+
- name: Install LLVM for Win-ARM64
29+
shell: pwsh
30+
run: |
31+
Invoke-WebRequest https://github.com/llvm/llvm-project/releases/download/llvmorg-20.1.6/LLVM-20.1.6-woa64.exe -UseBasicParsing -OutFile LLVM-woa64.exe
32+
Start-Process -FilePath ".\LLVM-woa64.exe" -ArgumentList "/S" -Wait
33+
echo "C:\Program Files\LLVM\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
34+
35+
- name: Install CMake and Ninja for Win-ARM64
36+
shell: pwsh
37+
run: |
38+
Invoke-WebRequest https://github.com/Kitware/CMake/releases/download/v3.29.4/cmake-3.29.4-windows-arm64.msi -OutFile cmake-arm64.msi
39+
Start-Process msiexec.exe -ArgumentList "/i cmake-arm64.msi /quiet /norestart" -Wait
40+
echo "C:\Program Files\CMake\bin" >> $env:GITHUB_PATH
41+
42+
Invoke-WebRequest https://github.com/ninja-build/ninja/releases/download/v1.13.1/ninja-winarm64.zip -OutFile ninja-winarm64.zip
43+
Expand-Archive ninja-winarm64.zip -DestinationPath ninja
44+
Copy-Item ninja\ninja.exe -Destination "C:\Windows\System32"
45+
46+
- name: Configure OpenBLAS
47+
shell: cmd
48+
run: |
49+
CALL "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsarm64.bat"
50+
mkdir build
51+
cd build
52+
cmake .. -G Ninja ^
53+
-DCMAKE_BUILD_TYPE=Release ^
54+
-DTARGET=ARMV8 ^
55+
-DBINARY=64 ^
56+
-DCMAKE_C_COMPILER=clang-cl ^
57+
-DCMAKE_Fortran_COMPILER=flang-new ^
58+
-DBUILD_SHARED_LIBS=ON ^
59+
-DCMAKE_SYSTEM_PROCESSOR=arm64 ^
60+
-DCMAKE_SYSTEM_NAME=Windows ^
61+
-DCMAKE_INSTALL_PREFIX=C:/opt
62+
63+
- name: Build OpenBLAS
64+
shell: cmd
65+
run: |
66+
cd build
67+
ninja -j16
68+
69+
- name: Install OpenBLAS
70+
shell: cmd
71+
run: |
72+
cd build
73+
cmake --install .
74+
75+
- name: Run ctests
76+
shell: pwsh
77+
run: |
78+
$env:PATH = "C:\opt\bin;$env:PATH"
79+
cd build
80+
ctest
81+
82+

.gitignore

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,8 @@ lapack-3.4.1.tgz
1313
lapack-3.4.2
1414
lapack-3.4.2.tgz
1515
lapack-netlib/make.inc
16-
lapack-netlib/lapacke/include/lapacke_mangling.h
1716
lapack-netlib/SRC/la_constants.mod
17+
lapack-netlib/SRC/la_xisnan.mod
1818
lapack-netlib/TESTING/testing_results.txt
1919
lapack-netlib/INSTALL/test*
2020
lapack-netlib/TESTING/xeigtstc
@@ -81,7 +81,10 @@ test/ZBLAT2.SUMM
8181
test/ZBLAT3.SUMM
8282
test/ZBLAT3_3M.SUMM
8383
test/SHBLAT3.SUMM
84+
test/SBBLAT2.SUMM
8485
test/SBBLAT3.SUMM
86+
test/BBLAT2.SUMM
87+
test/BBLAT3.SUMM
8588
test/cblat1
8689
test/cblat2
8790
test/cblat3
@@ -96,6 +99,9 @@ test/sblat3
9699
test/sblat3_3m
97100
test/test_shgemm
98101
test/test_sbgemm
102+
test/test_sbgemv
103+
test/test_bgemm
104+
test/test_bgemv
99105
test/zblat1
100106
test/zblat2
101107
test/zblat3

CONTRIBUTORS.md

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -251,6 +251,18 @@ In chronological order:
251251
* Ye Tao <ye.tao@arm.com>
252252
* [2025-02-03] Optimize SBGEMM kernel on NEOVERSEV1
253253
* [2025-02-27] Add sbgemv_n_neon kernel
254+
* [2025-05-17] Impl prototype of BGEMM inferface
254255

255256
* Abhishek Kumar <https://github.com/abhishek-iitmadras>
256-
* [2025-04-22] Optimise dot kernel for NEOVERSE V1
257+
* [2025-04-22] Optimise dot kernel for NEOVERSE V1
258+
259+
* Sharif Inamdar <sharif.inamdar@arm.com>
260+
* [2025-06-05] Optimize gemv_n_sve_v1x3 kernel
261+
262+
* Guoyuan Li <https://github.com/guoyuanplct>
263+
* [2025-04-11] Optimise gemv kernel for RISCV64_ZVL256B
264+
* [2025-05-01] Optimise zgemv kernel for RISCV64_ZVL256B
265+
* [2025-05-17] Optimise omatcopy/zomatcopy kernel for RISCV64_ZVL256B
266+
* [2025-05-29] Optimise axpby kernel for RISCV64_ZVL256B
267+
* [2025-06-05] Optimise hbmv kernel for RISCV64_ZVL256B
268+

Makefile.system

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -276,14 +276,14 @@ SMALL_MATRIX_OPT = 1
276276
endif
277277
ifeq ($(ARCH), arm64)
278278
GEMM_GEMV_FORWARD = 1
279-
GEMM_GEMV_FORWARD_BF16 = 1
279+
SBGEMM_GEMV_FORWARD = 1
280280
endif
281281
ifeq ($(ARCH), riscv)
282282
GEMM_GEMV_FORWARD = 1
283283
endif
284284
ifeq ($(ARCH), power)
285285
GEMM_GEMV_FORWARD = 1
286-
GEMM_GEMV_FORWARD_BF16 = 1
286+
SBGEMM_GEMV_FORWARD = 1
287287
endif
288288

289289
ifeq ($(SMALL_MATRIX_OPT), 1)
@@ -293,8 +293,8 @@ ifneq ($(ONLY_CBLAS), 1)
293293
ifeq ($(GEMM_GEMV_FORWARD), 1)
294294
CCOMMON_OPT += -DGEMM_GEMV_FORWARD
295295
endif
296-
ifeq ($(GEMM_GEMV_FORWARD_BF16), 1)
297-
CCOMMON_OPT += -DGEMM_GEMV_FORWARD_BF16
296+
ifeq ($(SBGEMM_GEMV_FORWARD), 1)
297+
CCOMMON_OPT += -DSBGEMM_GEMV_FORWARD
298298
endif
299299
endif
300300

@@ -1905,6 +1905,8 @@ export BUILD_HFLOAT16
19051905
export NO_LSX
19061906
export NO_LASX
19071907

1908+
export BGEMM_UNROLL_M
1909+
export BGEMM_UNROLL_N
19081910
export SBGEMM_UNROLL_M
19091911
export SBGEMM_UNROLL_N
19101912
export SHGEMM_UNROLL_M

Makefile.tail

Lines changed: 33 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,32 @@
1+
###############################################################################
2+
# Copyright (c) 2025, The OpenBLAS Project
3+
# All rights reserved.
4+
# Redistribution and use in source and binary forms, with or without
5+
# modification, are permitted provided that the following conditions are
6+
# met:
7+
# 1. Redistributions of source code must retain the above copyright
8+
# notice, this list of conditions and the following disclaimer.
9+
# 2. Redistributions in binary form must reproduce the above copyright
10+
# notice, this list of conditions and the following disclaimer in
11+
# the documentation and/or other materials provided with the
12+
# distribution.
13+
# 3. Neither the name of the OpenBLAS project nor the names of
14+
# its contributors may be used to endorse or promote products
15+
# derived from this software without specific prior written permission.
16+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17+
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18+
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19+
# ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
20+
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21+
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22+
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23+
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24+
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25+
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26+
# POSSIBILITY OF SUCH DAMAGE.
27+
###############################################################################
28+
29+
BBLASOBJS_P = $(BBLASOBJS:.$(SUFFIX)=.$(PSUFFIX))
130
SBBLASOBJS_P = $(SBBLASOBJS:.$(SUFFIX)=.$(PSUFFIX))
231
SHBLASPBJS_P = $(SHBLASOBJS:.$(SUFFIX)=.$(PSUFFIX))
332
SBLASOBJS_P = $(SBLASOBJS:.$(SUFFIX)=.$(PSUFFIX))
@@ -12,8 +41,8 @@ COMMONOBJS_P = $(COMMONOBJS:.$(SUFFIX)=.$(PSUFFIX))
1241

1342
HPLOBJS_P = $(HPLOBJS:.$(SUFFIX)=.$(PSUFFIX))
1443

15-
BLASOBJS = $(SHBLASOBJS) $(SBEXTOBJS) $(SBBLASOBJS) $(SBLASOBJS) $(DBLASOBJS) $(CBLASOBJS) $(ZBLASOBJS) $(CBAUXOBJS)
16-
BLASOBJS_P = $(SHBLASPBJS_P) $(SBEXTOBJS_P) $(SBBLASOBJS_P) $(SBLASOBJS_P) $(DBLASOBJS_P) $(CBLASOBJS_P) $(ZBLASOBJS_P) $(CBAUXOBJS_P)
44+
BLASOBJS = $(SHBLASOBJS) $(BBLASOBJS) $(SBEXTOBJS) $(SBBLASOBJS) $(SBLASOBJS) $(DBLASOBJS) $(CBLASOBJS) $(ZBLASOBJS) $(CBAUXOBJS)
45+
BLASOBJS_P = $(SHBLASPBJS_P) $(BBLASOBJS_P) $(SBEXTOBJS_P) $(SBBLASOBJS_P) $(SBLASOBJS_P) $(DBLASOBJS_P) $(CBLASOBJS_P) $(ZBLASOBJS_P) $(CBAUXOBJS_P)
1746

1847
ifdef EXPRECISION
1948
BLASOBJS += $(QBLASOBJS) $(XBLASOBJS)
@@ -26,6 +55,7 @@ BLASOBJS_P += $(QBLASOBJS_P) $(XBLASOBJS_P)
2655
endif
2756

2857
$(SHBLASOBJS) $(SHBLASOBJS_P) : override CFLAGS += -DHFLOAT16 -UDOUBLE -UCOMPLEX
58+
$(BBLASOBJS) $(BBLASOBJS_P) : override CFLAGS += -DBFLOAT16 -DBGEMM -UDOUBLE -UCOMPLEX
2959
$(SBBLASOBJS) $(SBBLASOBJS_P) : override CFLAGS += -DBFLOAT16 -UDOUBLE -UCOMPLEX
3060
$(SBLASOBJS) $(SBLASOBJS_P) : override CFLAGS += -UDOUBLE -UCOMPLEX
3161
$(DBLASOBJS) $(DBLASOBJS_P) : override CFLAGS += -DDOUBLE -UCOMPLEX
@@ -36,6 +66,7 @@ $(XBLASOBJS) $(XBLASOBJS_P) : override CFLAGS += -DXDOUBLE -DCOMPLEX
3666
$(SBEXTOBJS) $(SBEXTOBJS_P) : override CFLAGS += -DBFLOAT16 -UDOUBLE -UCOMPLEX
3767

3868
$(SHBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
69+
$(BBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
3970
$(SBBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
4071
$(SBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
4172
$(DBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ For a general introduction to the BLAS routines, please refer to the extensive d
2929
We provide official binary packages for the following platform:
3030

3131
* Windows x86/x86_64
32+
* Windows arm64 (woa)
3233

3334
You can download them from [file hosting on sourceforge.net](https://sourceforge.net/projects/openblas/files/) or from the [Releases section of the GitHub project page](https://github.com/OpenMathLib/OpenBLAS/releases).
3435

TargetList.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ POWER7
5252
POWER8
5353
POWER9
5454
POWER10
55+
POWER11
5556
PPCG4
5657
PPC970
5758
PPC970MP

benchmark/Makefile

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,31 @@
1+
###############################################################################
2+
# Copyright (c) 2025, The OpenBLAS Project
3+
# All rights reserved.
4+
# Redistribution and use in source and binary forms, with or without
5+
# modification, are permitted provided that the following conditions are
6+
# met:
7+
# 1. Redistributions of source code must retain the above copyright
8+
# notice, this list of conditions and the following disclaimer.
9+
# 2. Redistributions in binary form must reproduce the above copyright
10+
# notice, this list of conditions and the following disclaimer in
11+
# the documentation and/or other materials provided with the
12+
# distribution.
13+
# 3. Neither the name of the OpenBLAS project nor the names of
14+
# its contributors may be used to endorse or promote products
15+
# derived from this software without specific prior written permission.
16+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17+
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18+
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19+
# ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
20+
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21+
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22+
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23+
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24+
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25+
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26+
# POSSIBILITY OF SUCH DAMAGE.
27+
###############################################################################
28+
129
TOPDIR = ..
230
include $(TOPDIR)/Makefile.system
331

@@ -56,7 +84,7 @@ GOTO_LAPACK_TARGETS=
5684
endif
5785

5886
ifeq ($(BUILD_BFLOAT16),1)
59-
GOTO_BFLOAT_TARGETS=sbgemm.goto
87+
GOTO_BFLOAT_TARGETS=bgemm.goto sbgemm.goto
6088
else
6189
GOTO_BFLOAT_TARGETS=
6290
endif
@@ -635,6 +663,8 @@ zcholesky.essl : zcholesky.$(SUFFIX)
635663

636664
##################################### Sgemm ####################################################
637665
ifeq ($(BUILD_BFLOAT16),1)
666+
bgemm.goto : bgemm.$(SUFFIX) ../$(LIBNAME)
667+
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
638668
sbgemm.goto : sbgemm.$(SUFFIX) ../$(LIBNAME)
639669
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
640670
endif
@@ -2970,6 +3000,8 @@ zcholesky.$(SUFFIX) : cholesky.c
29703000
$(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^
29713001

29723002
ifeq ($(BUILD_BFLOAT16),1)
3003+
bgemm.$(SUFFIX) : gemm.c
3004+
$(CC) $(CFLAGS) -c -DBFLOAT16 -DBGEMM -UCOMPLEX -UDOUBLE -o $(@F) $^
29733005
sbgemm.$(SUFFIX) : gemm.c
29743006
$(CC) $(CFLAGS) -c -DBFLOAT16 -UCOMPLEX -UDOUBLE -o $(@F) $^
29753007
endif

benchmark/gemm.c

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
3333

3434
#ifdef DOUBLE
3535
#define GEMM BLASFUNC(dgemm)
36+
#elif defined(BFLOAT16) && defined(BGEMM)
37+
#define GEMM BLASFUNC(bgemm)
3638
#elif defined(BFLOAT16)
3739
#define GEMM BLASFUNC(sbgemm)
3840
#undef IFLOAT
@@ -60,8 +62,18 @@ int main(int argc, char *argv[]){
6062

6163
IFLOAT *a, *b;
6264
FLOAT *c;
65+
#ifdef BGEMM
66+
blasint one=1;
67+
blasint two=2;
68+
float alpha_in[] = {1.0, 0.0};
69+
float beta_in[] = {0.0, 0.0};
70+
FLOAT alpha[2], beta[2];
71+
sbstobf16_(&two, alpha_in, &one, alpha, &one);
72+
sbstobf16_(&two, beta_in, &one, beta, &one);
73+
#else
6374
FLOAT alpha[] = {1.0, 0.0};
6475
FLOAT beta [] = {0.0, 0.0};
76+
#endif
6577
char transa = 'N';
6678
char transb = 'N';
6779
blasint m, n, k, i, j, lda, ldb, ldc;

0 commit comments

Comments
 (0)