Skip to content

Commit eb30ec7

Browse files
authored
Merge branch 'develop' into sdk26
2 parents cb90a7e + b27a118 commit eb30ec7

214 files changed

Lines changed: 11526 additions & 1851 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.github/workflows/apple_m.yml

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ jobs:
4444
elif [ "$RUNNER_OS" == "macOS" ]; then
4545
# It looks like "gfortran" isn't working correctly unless "gcc" is re-installed.
4646
brew reinstall gcc
47-
brew install coreutils cmake ccache
47+
brew install coreutils ccache
4848
brew install llvm
4949
else
5050
echo "::error::$RUNNER_OS not supported"
@@ -87,10 +87,16 @@ jobs:
8787
echo "max_size = 300M" > ~/.ccache/ccache.conf
8888
echo "compression = true" >> ~/.ccache/ccache.conf
8989
ccache -s
90+
91+
- name: Add gfortran runtime to link path
92+
if: matrix.build == 'make' && runner.os == 'macOS'
93+
run: |
94+
GFORTRAN_LIBDIR=$(gfortran -print-file-name=libgfortran.dylib | xargs dirname)
95+
echo "Using gfortran runtime in $GFORTRAN_LIBDIR"
96+
echo "LDFLAGS=-L/opt/homebrew/opt/llvm/lib -L$GFORTRAN_LIBDIR" >> $GITHUB_ENV
9097
9198
- name: Build OpenBLAS
9299
run: |
93-
export LDFLAGS="-L/opt/homebrew/opt/llvm/lib"
94100
export CPPFLAGS="-I/opt/homebrew/opt/llvm/include"
95101
export CC="/opt/homebrew/opt/llvm/bin/clang"
96102
case "${{ matrix.build }}" in

.github/workflows/arm64_graviton.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,13 +88,14 @@ jobs:
8888
run: |
8989
case "${{ matrix.build }}" in
9090
"make")
91-
make -j$(nproc) DYNAMIC_ARCH=1 USE_OPENMP=0 FC="ccache ${{ matrix.fortran }}"
91+
make -j$(nproc) DYNAMIC_ARCH=1 BUILD_BFLOAT16=1 USE_OPENMP=0 FC="ccache ${{ matrix.fortran }}"
9292
;;
9393
"cmake")
9494
mkdir build && cd build
9595
cmake -DDYNAMIC_ARCH=1 \
9696
-DNOFORTRAN=0 \
9797
-DBUILD_WITHOUT_LAPACK=0 \
98+
-DBUILD_BFLOAT16=1 \
9899
-DCMAKE_VERBOSE_MAKEFILE=ON \
99100
-DCMAKE_BUILD_TYPE=Release \
100101
-DCMAKE_Fortran_COMPILER=${{ matrix.fortran }} \

.github/workflows/dynamic_arch.yml

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ jobs:
4949
elif [ "$RUNNER_OS" == "macOS" ]; then
5050
# It looks like "gfortran" isn't working correctly unless "gcc" is re-installed.
5151
brew reinstall gcc
52-
brew install coreutils cmake ccache
52+
brew install coreutils ccache
5353
else
5454
echo "::error::$RUNNER_OS not supported"
5555
exit 1
@@ -89,6 +89,14 @@ jobs:
8989
echo "max_size = 300M" > ~/.ccache/ccache.conf
9090
echo "compression = true" >> ~/.ccache/ccache.conf
9191
ccache -s
92+
93+
- name: Add gfortran runtime to link path
94+
if: matrix.build == 'make' && runner.os == 'macOS'
95+
run: |
96+
GFORTRAN_LIBDIR=$(gfortran -print-file-name=libgfortran.dylib | xargs dirname)
97+
echo "Using gfortran runtime in $GFORTRAN_LIBDIR"
98+
# Preserve whatever LDFLAGS may already contain
99+
echo "LDFLAGS=${LDFLAGS:+$LDFLAGS }-L$GFORTRAN_LIBDIR" >> "$GITHUB_ENV"
92100
93101
- name: Build OpenBLAS
94102
run: |

.github/workflows/loongarch64_clang.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ jobs:
3535

3636
- name: Install libffi6
3737
run: |
38-
wget http://ftp.ca.debian.org/debian/pool/main/libf/libffi/libffi6_3.2.1-9_amd64.deb
38+
wget https://download.nvidia.com/cumulus/apt.cumulusnetworks.com/pool/upstream/libf/libffi/libffi6_3.2.1-9_amd64.deb
3939
sudo dpkg -i libffi6_3.2.1-9_amd64.deb
4040
4141
- name: Install APT deps

.github/workflows/riscv64_vector.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ jobs:
1717
triple: riscv64-unknown-linux-gnu
1818
riscv_gnu_toolchain: https://github.com/riscv-collab/riscv-gnu-toolchain
1919
riscv_gnu_toolchain_version: 13.2.0
20-
riscv_gnu_toolchain_nightly_download_path: /releases/download/2024.02.02/riscv64-glibc-ubuntu-22.04-llvm-nightly-2024.02.02-nightly.tar.gz
20+
riscv_gnu_toolchain_nightly_download_path: /releases/download/2025.08.29/riscv64-glibc-ubuntu-22.04-llvm-nightly-2025.08.29-nightly.tar.xz
2121
strategy:
2222
fail-fast: false
2323
matrix:
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
name: Windows ARM64 CI
2+
3+
on:
4+
push:
5+
branches:
6+
- develop
7+
pull_request:
8+
branches:
9+
- develop
10+
11+
concurrency:
12+
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
13+
cancel-in-progress: true
14+
15+
permissions:
16+
contents: read # to fetch code (actions/checkout)
17+
18+
jobs:
19+
build:
20+
if: "github.repository == 'OpenMathLib/OpenBLAS'"
21+
runs-on: windows-11-arm
22+
steps:
23+
- name: Checkout repository
24+
uses: actions/checkout@v3
25+
26+
- name: Install LLVM for Win-ARM64
27+
shell: pwsh
28+
run: |
29+
Invoke-WebRequest https://github.com/llvm/llvm-project/releases/download/llvmorg-20.1.8/LLVM-20.1.8-woa64.exe -UseBasicParsing -OutFile LLVM-woa64.exe
30+
Start-Process -FilePath ".\LLVM-woa64.exe" -ArgumentList "/S" -Wait
31+
echo "C:\Program Files\LLVM\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
32+
#dir "C:\Program Files\LLVM\include\flang"
33+
#rmdir /Q /S "C:/Program Files/Microsoft Visual Studio/2022/Enterprise/VC/Tools/Llvm/ARM64"
34+
35+
- name: Install CMake and Ninja for Win-ARM64
36+
shell: pwsh
37+
run: |
38+
Invoke-WebRequest https://github.com/Kitware/CMake/releases/download/v3.29.4/cmake-3.29.4-windows-arm64.msi -OutFile cmake-arm64.msi
39+
Start-Process msiexec.exe -ArgumentList "/i cmake-arm64.msi /quiet /norestart" -Wait
40+
echo "C:\Program Files\CMake\bin" >> $env:GITHUB_PATH
41+
42+
Invoke-WebRequest https://github.com/ninja-build/ninja/releases/download/v1.13.1/ninja-winarm64.zip -OutFile ninja-winarm64.zip
43+
Expand-Archive ninja-winarm64.zip -DestinationPath ninja
44+
Copy-Item ninja\ninja.exe -Destination "C:\Windows\System32"
45+
46+
- name: Configure OpenBLAS
47+
shell: cmd
48+
run: |
49+
CALL "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsarm64.bat"
50+
set PATH=C:\Program Files\LLVM\bin;%PATH%
51+
52+
mkdir build
53+
cd build
54+
cmake .. -G Ninja ^
55+
-DCMAKE_BUILD_TYPE=Release ^
56+
-DTARGET=ARMV8 ^
57+
-DBINARY=64 ^
58+
-DCMAKE_C_COMPILER=clang-cl ^
59+
-DCMAKE_Fortran_COMPILER=flang-new ^
60+
-DBUILD_SHARED_LIBS=ON ^
61+
-DCMAKE_SYSTEM_PROCESSOR=arm64 ^
62+
-DCMAKE_SYSTEM_NAME=Windows ^
63+
-DCMAKE_INSTALL_PREFIX=C:/opt
64+
65+
- name: Build OpenBLAS
66+
shell: cmd
67+
run: |
68+
cd build
69+
ninja -j16
70+
71+
- name: Install OpenBLAS
72+
shell: cmd
73+
run: |
74+
cd build
75+
cmake --install .
76+
77+
- name: Run ctests
78+
shell: pwsh
79+
run: |
80+
$env:PATH = "C:\opt\bin;$env:PATH"
81+
cd build
82+
ctest
83+
84+

.gitignore

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,8 @@ lapack-3.4.1.tgz
1313
lapack-3.4.2
1414
lapack-3.4.2.tgz
1515
lapack-netlib/make.inc
16-
lapack-netlib/lapacke/include/lapacke_mangling.h
1716
lapack-netlib/SRC/la_constants.mod
17+
lapack-netlib/SRC/la_xisnan.mod
1818
lapack-netlib/TESTING/testing_results.txt
1919
lapack-netlib/INSTALL/test*
2020
lapack-netlib/TESTING/xeigtstc
@@ -81,7 +81,10 @@ test/ZBLAT2.SUMM
8181
test/ZBLAT3.SUMM
8282
test/ZBLAT3_3M.SUMM
8383
test/SHBLAT3.SUMM
84+
test/SBBLAT2.SUMM
8485
test/SBBLAT3.SUMM
86+
test/BBLAT2.SUMM
87+
test/BBLAT3.SUMM
8588
test/cblat1
8689
test/cblat2
8790
test/cblat3
@@ -96,6 +99,9 @@ test/sblat3
9699
test/sblat3_3m
97100
test/test_shgemm
98101
test/test_sbgemm
102+
test/test_sbgemv
103+
test/test_bgemm
104+
test/test_bgemv
99105
test/zblat1
100106
test/zblat2
101107
test/zblat3

CMakeLists.txt

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ project(OpenBLAS C ASM)
99

1010
set(OpenBLAS_MAJOR_VERSION 0)
1111
set(OpenBLAS_MINOR_VERSION 3)
12-
set(OpenBLAS_PATCH_VERSION 30)
12+
set(OpenBLAS_PATCH_VERSION 30.dev)
1313

1414
set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}")
1515

@@ -152,6 +152,9 @@ endif ()
152152
if (NOT DEFINED BUILD_BFLOAT16)
153153
set (BUILD_BFLOAT16 false)
154154
endif ()
155+
if (NOT DEFINED BUILD_HFLOAT16)
156+
set (BUILD_HFLOAT16 false)
157+
endif ()
155158
# set which float types we want to build for
156159
if (NOT DEFINED BUILD_SINGLE AND NOT DEFINED BUILD_DOUBLE AND NOT DEFINED BUILD_COMPLEX AND NOT DEFINED BUILD_COMPLEX16)
157160
# if none are defined, build for all
@@ -305,8 +308,8 @@ if (USE_OPENMP)
305308
endif()
306309
endif()
307310

308-
# Fix "Argument list too long" for macOS with Intel CPUs and DYNAMIC_ARCH turned on
309-
if(APPLE AND DYNAMIC_ARCH AND (NOT CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL "arm64"))
311+
# Fix "Argument list too long" for macOS with POWERPC or Intel CPUs
312+
if(APPLE AND (NOT CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL "arm64"))
310313
# Use response files
311314
set(CMAKE_C_USE_RESPONSE_FILE_FOR_OBJECTS 1)
312315
# Always build static library first
@@ -541,13 +544,13 @@ message(STATUS "adding postbuild instruction to rename syms")
541544
if (NOT USE_PERL)
542545
add_custom_command(TARGET ${OpenBLAS_LIBNAME}_shared POST_BUILD
543546
COMMAND sh ${PROJECT_SOURCE_DIR}/exports/gensymbol "objcopy" "${ARCH}" "${BU}" "${EXPRECISION_IN}" "${NO_CBLAS_IN}" "${NO_LAPACK_IN}" "${NO_LAPACKE_IN}" "${NEED2UNDERSCORES_IN}" "${ONLY_CBLAS_IN}" \"${SYMBOLPREFIX}\" \"${SYMBOLSUFFIX}\" "${BLD}" "${BBF16}" "${BS}" "${BD}" "${BC}" "${BZ}" > ${PROJECT_BINARY_DIR}/objcopy.def
544-
COMMAND objcopy -v --redefine-syms ${PROJECT_BINARY_DIR}/objcopy.def ${PROJECT_BINARY_DIR}/lib/${OpenBLAS_LIBNAME}.so
547+
COMMAND objcopy --redefine-syms ${PROJECT_BINARY_DIR}/objcopy.def ${PROJECT_BINARY_DIR}/lib/${OpenBLAS_LIBNAME}.so
545548
COMMENT "renaming symbols"
546549
)
547550
else()
548551
add_custom_command(TARGET ${OpenBLAS_LIBNAME}_shared POST_BUILD
549552
COMMAND perl ${PROJECT_SOURCE_DIR}/exports/gensymbol.pl "objcopy" "${ARCH}" "${BU}" "${EXPRECISION_IN}" "${NO_CBLAS_IN}" "${NO_LAPACK_IN}" "${NO_LAPACKE_IN}" "${NEED2UNDERSCORES_IN}" "${ONLY_CBLAS_IN}" \"${SYMBOLPREFIX}\" \"${SYMBOLSUFFIX}\" "${BLD}" "${BBF16}" "${BS}" "${BD}" "${BC}" "${BZ}" > ${PROJECT_BINARY_DIR}/objcopy.def
550-
COMMAND objcopy -v --redefine-syms ${PROJECT_BINARY_DIR}/objcopy.def ${PROJECT_BINARY_DIR}/lib/lib${OpenBLAS_LIBNAME}.so
553+
COMMAND objcopy --redefine-syms ${PROJECT_BINARY_DIR}/objcopy.def ${PROJECT_BINARY_DIR}/lib/lib${OpenBLAS_LIBNAME}.so
551554
COMMENT "renaming symbols"
552555
)
553556
endif()

CONTRIBUTORS.md

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -251,6 +251,19 @@ In chronological order:
251251
* Ye Tao <ye.tao@arm.com>
252252
* [2025-02-03] Optimize SBGEMM kernel on NEOVERSEV1
253253
* [2025-02-27] Add sbgemv_n_neon kernel
254+
* [2025-05-17] Impl prototype of BGEMM inferface
254255

255256
* Abhishek Kumar <https://github.com/abhishek-iitmadras>
256-
* [2025-04-22] Optimise dot kernel for NEOVERSE V1
257+
* [2025-04-22] Optimise dot kernel for NEOVERSE V1
258+
* [2025-07-23] ARM64-Enable bfloat16 kernels by default
259+
260+
* Sharif Inamdar <sharif.inamdar@arm.com>
261+
* [2025-06-05] Optimize gemv_n_sve_v1x3 kernel
262+
263+
* Guoyuan Li <https://github.com/guoyuanplct>
264+
* [2025-04-11] Optimise gemv kernel for RISCV64_ZVL256B
265+
* [2025-05-01] Optimise zgemv kernel for RISCV64_ZVL256B
266+
* [2025-05-17] Optimise omatcopy/zomatcopy kernel for RISCV64_ZVL256B
267+
* [2025-05-29] Optimise axpby kernel for RISCV64_ZVL256B
268+
* [2025-06-05] Optimise hbmv kernel for RISCV64_ZVL256B
269+

Jenkinsfile

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,5 +10,10 @@ pipeline {
1010
sh 'make clean && make'
1111
}
1212
}
13+
stage('CMakeBuild') {
14+
steps {
15+
sh 'sudo apt update && sudo apt install cmake -y && make clean && rm -rf build && mkdir build && cd build && cmake -DDYNAMIC_ARCH=1 .. && make'
16+
}
17+
}
1318
}
1419
}

0 commit comments

Comments
 (0)