Skip to content

Commit 81b0d88

Browse files
ggml-cpu: Add IME2 Instruction Support for the SpacemiT Backend (ggml-org#22863)
1 parent 0f45f1a commit 81b0d88

21 files changed

Lines changed: 14764 additions & 3509 deletions

.github/workflows/build-cross.yml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -301,16 +301,17 @@ jobs:
301301
export RISCV_ROOT_PATH=${PWD}/spacemit_toolchain
302302
cmake -B build -DLLAMA_OPENSSL=OFF \
303303
-DCMAKE_BUILD_TYPE=Release \
304-
-DGGML_OPENMP=OFF \
305304
-DLLAMA_BUILD_EXAMPLES=ON \
305+
-DGGML_CPU_REPACK=OFF \
306306
-DLLAMA_BUILD_TOOLS=ON \
307307
-DLLAMA_BUILD_TESTS=OFF \
308308
-DGGML_CPU_RISCV64_SPACEMIT=ON \
309309
-DGGML_RVV=ON \
310+
-DGGML_RV_ZVFH=ON \
310311
-DGGML_RV_ZFH=ON \
311312
-DGGML_RV_ZICBOP=ON \
312313
-DGGML_RV_ZIHINTPAUSE=ON \
313-
-DRISCV64_SPACEMIT_IME_SPEC=RISCV64_SPACEMIT_IME1 \
314+
-DGGML_RV_ZBA=ON \
314315
-DCMAKE_TOOLCHAIN_FILE=${PWD}/cmake/riscv64-spacemit-linux-gnu-gcc.cmake
315316
316317
cmake --build build --config Release -j $(nproc)

cmake/riscv64-spacemit-linux-gnu-gcc.cmake

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,6 @@ set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
2424
set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
2525
set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
2626
set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)
27-
set(CMAKE_C_FLAGS "-march=rv64gcv_zfh_zba_zicbop -mabi=lp64d ${CMAKE_C_FLAGS}")
28-
set(CMAKE_CXX_FLAGS "-march=rv64gcv_zfh_zba_zicbop -mabi=lp64d ${CXX_FLAGS}")
27+
set(CMAKE_C_FLAGS "-march=rv64gcv_zfh_zvfh_zba_zicbop -mabi=lp64d -fno-tree-vectorize -fno-tree-loop-vectorize ${CMAKE_C_FLAGS}")
28+
set(CMAKE_CXX_FLAGS "-march=rv64gcv_zfh_zvfh_zba_zicbop -mabi=lp64d -fno-tree-vectorize -fno-tree-loop-vectorize ${CMAKE_CXX_FLAGS}")
2929
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -latomic")

docs/build-riscv64-spacemit.md

Lines changed: 53 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -9,18 +9,20 @@ wget https://archive.spacemit.com/toolchain/spacemit-toolchain-linux-glibc-x86_6
99
~~~
1010

1111
2. Build
12-
Below is the build script: it requires utilizing RISC-V vector instructions for acceleration. Ensure the `GGML_CPU_RISCV64_SPACEMIT` compilation option is enabled. The currently supported optimization version is `RISCV64_SPACEMIT_IME1`, corresponding to the `RISCV64_SPACEMIT_IME_SPEC` compilation option. Compiler configurations are defined in the `riscv64-spacemit-linux-gnu-gcc.cmake` file. Please ensure you have installed the RISC-V compiler and set the environment variable via `export RISCV_ROOT_PATH={your_compiler_path}`.
12+
Below is the build script: it requires utilizing RISC-V vector instructions for acceleration. Ensure the `GGML_CPU_RISCV64_SPACEMIT` compilation option is enabled. The currently supported optimization version is `RISCV64_SPACEMIT_IME1` and `RISCV64_SPACEMIT_IME2`, corresponding to the `RISCV64_SPACEMIT_IME_SPEC` compilation option. Compiler configurations are defined in the `riscv64-spacemit-linux-gnu-gcc.cmake` file. Please ensure you have installed the RISC-V compiler and set the environment variable via `export RISCV_ROOT_PATH={your_compiler_path}`.
1313
```bash
1414

1515
cmake -B build \
1616
-DCMAKE_BUILD_TYPE=Release \
1717
-DGGML_CPU_RISCV64_SPACEMIT=ON \
18+
-DGGML_CPU_REPACK=OFF \
1819
-DLLAMA_OPENSSL=OFF \
1920
-DGGML_RVV=ON \
21+
-DGGML_RV_ZVFH=ON \
2022
-DGGML_RV_ZFH=ON \
2123
-DGGML_RV_ZICBOP=ON \
2224
-DGGML_RV_ZIHINTPAUSE=ON \
23-
-DRISCV64_SPACEMIT_IME_SPEC=RISCV64_SPACEMIT_IME1 \
25+
-DGGML_RV_ZBA=ON \
2426
-DCMAKE_TOOLCHAIN_FILE=${PWD}/cmake/riscv64-spacemit-linux-gnu-gcc.cmake \
2527
-DCMAKE_INSTALL_PREFIX=build/installed
2628

@@ -47,8 +49,25 @@ export RISCV_ROOT_PATH_IME1={your RISC-V compiler path}
4749
4850
${QEMU_ROOT_PATH}/bin/qemu-riscv64 -L ${RISCV_ROOT_PATH_IME1}/sysroot -cpu max,vlen=256,elen=64,vext_spec=v1.0 ${PWD}/build/bin/llama-cli -m ${PWD}/models/Qwen2.5-0.5B-Instruct-Q4_0.gguf -t 1
4951
~~~
52+
53+
## Quantization Support For Matrix
54+
55+
| Quantization Type | X60 | A100 |
56+
| ---: | ---: | ---: |
57+
| Q2_K | | :heavy_check_mark: |
58+
| Q3_K | | :heavy_check_mark: |
59+
| Q4_0 | :heavy_check_mark: | :heavy_check_mark: |
60+
| Q4_1 | :heavy_check_mark: | :heavy_check_mark: |
61+
| Q4_K | :heavy_check_mark: | :heavy_check_mark: |
62+
| Q5_0 | | :heavy_check_mark: |
63+
| Q5_1 | | :heavy_check_mark: |
64+
| Q5_K | | :heavy_check_mark: |
65+
| Q6_K | | :heavy_check_mark: |
66+
| Q8_0 | | :heavy_check_mark: |
67+
68+
5069
## Performance
51-
#### Quantization Support For Matrix
70+
* Spacemit(R) X60
5271
~~~
5372
model name : Spacemit(R) X60
5473
isa : rv64imafdcv_zicbom_zicboz_zicntr_zicond_zicsr_zifencei_zihintpause_zihpm_zfh_zfhmin_zca_zcd_zba_zbb_zbc_zbs_zkt_zve32f_zve32x_zve64d_zve64f_zve64x_zvfh_zvfhmin_zvkt_sscofpmf_sstc_svinval_svnapot_svpbmt
@@ -58,33 +77,34 @@ mvendorid : 0x710
5877
marchid : 0x8000000058000001
5978
~~~
6079

61-
Q4_0
62-
| Model | Size | Params | backend | threads | test | t/s |
63-
| -----------| -------- | ------ | ------- | ------- | ---- |------|
64-
Qwen2.5 0.5B |403.20 MiB|630.17 M| cpu | 4 | pp512|64.12 ± 0.26|
65-
Qwen2.5 0.5B |403.20 MiB|630.17 M| cpu | 4 | tg128|10.03 ± 0.01|
66-
Qwen2.5 1.5B |1011.16 MiB| 1.78 B | cpu | 4 | pp512|24.16 ± 0.02|
67-
Qwen2.5 1.5B |1011.16 MiB| 1.78 B | cpu | 4 | tg128|3.83 ± 0.06|
68-
Qwen2.5 3B | 1.86 GiB | 3.40 B | cpu | 4 | pp512|12.08 ± 0.02|
69-
Qwen2.5 3B | 1.86 GiB | 3.40 B | cpu | 4 | tg128|2.23 ± 0.02|
70-
71-
Q4_1
72-
| Model | Size | Params | backend | threads | test | t/s |
73-
| -----------| -------- | ------ | ------- | ------- | ---- |------|
74-
Qwen2.5 0.5B |351.50 MiB|494.03 M| cpu | 4 | pp512|62.07 ± 0.12|
75-
Qwen2.5 0.5B |351.50 MiB|494.03 M| cpu | 4 | tg128|9.91 ± 0.01|
76-
Qwen2.5 1.5B |964.06 MiB| 1.54 B | cpu | 4 | pp512|22.95 ± 0.25|
77-
Qwen2.5 1.5B |964.06 MiB| 1.54 B | cpu | 4 | tg128|4.01 ± 0.15|
78-
Qwen2.5 3B | 1.85 GiB | 3.09 B | cpu | 4 | pp512|11.55 ± 0.16|
79-
Qwen2.5 3B | 1.85 GiB | 3.09 B | cpu | 4 | tg128|2.25 ± 0.04|
80-
81-
82-
Q4_K
83-
| Model | Size | Params | backend | threads | test | t/s |
84-
| -----------| -------- | ------ | ------- | ------- | ---- |------|
85-
Qwen2.5 0.5B |462.96 MiB|630.17 M| cpu | 4 | pp512|9.29 ± 0.05|
86-
Qwen2.5 0.5B |462.96 MiB|630.17 M| cpu | 4 | tg128|5.67 ± 0.04|
87-
Qwen2.5 1.5B | 1.04 GiB | 1.78 B | cpu | 4 | pp512|10.38 ± 0.10|
88-
Qwen2.5 1.5B | 1.04 GiB | 1.78 B | cpu | 4 | tg128|3.17 ± 0.08|
89-
Qwen2.5 3B | 1.95 GiB | 3.40 B | cpu | 4 | pp512|4.23 ± 0.04|
90-
Qwen2.5 3B | 1.95 GiB | 3.40 B | cpu | 4 | tg128|1.73 ± 0.00|
80+
| model | size | params | backend | threads | n_ubatch | fa | mmap | test | t/s |
81+
| ------------------------------ | ---------: | ---------: | ---------- | ------: | -------: | -: | ---: | --------------: | -------------------: |
82+
| qwen35 2B Q4_1 | 1.19 GiB | 1.88 B | CPU | 4 | 128 | 1 | 0 | pp128 | 10.32 ± 0.02 |
83+
| qwen35 2B Q4_1 | 1.19 GiB | 1.88 B | CPU | 4 | 128 | 1 | 0 | tg128 | 3.07 ± 0.01 |
84+
| qwen3 0.6B Q4_0 | 358.78 MiB | 596.05 M | CPU | 4 | 128 | 1 | 0 | pp128 | 49.15 ± 0.25 |
85+
| qwen3 0.6B Q4_0 | 358.78 MiB | 596.05 M | CPU | 4 | 128 | 1 | 0 | tg128 | 11.73 ± 0.02 |
86+
87+
88+
* Spacemit(R) A100
89+
~~~
90+
model name : Spacemit(R) A100
91+
isa : rv64imafdcvh_zicbom_zicbop_zicboz_zicntr_zicond_zicsr_zifencei_zihintntl_zihintpause_zihpm_zimop_zaamo_zalrsc_zawrs_zfa_zfh_zfhmin_zca_zcb_zcd_zcmop_zba_zbb_zbc_zbs_zkt_zvbb_zvbc_zve32f_zve32x_zve64d_zve64f_zve64x_zvfh_zvfhmin_zvkb_zvkg_zvkned_zvknha_zvknhb_zvksed_zvksh_zvkt_smaia_smstateen_ssaia_sscofpmf_sstc_svinval_svnapot_svpbmt_sdtrig
92+
mmu : sv39
93+
mvendorid : 0x710
94+
marchid : 0x8000000041000002
95+
mimpid : 0x10000000d5686200
96+
hart isa : rv64imafdcv_zicbom_zicbop_zicboz_zicntr_zicond_zicsr_zifencei_zihintntl_zihintpause_zihpm_zimop_zaamo_zalrsc_zawrs_zfa_zfh_zfhmin_zca_zcb_zcd_zcmop_zba_zbb_zbc_zbs_zkt_zvbb_zvbc_zve32f_zve32x_zve64d_zve64f_zve64x_zvfh_zvfhmin_zvkb_zvkg_zvkned_zvknha_zvknhb_zvksed_zvksh_zvkt_smaia_smstateen_ssaia_sscofpmf_sstc_svinval_svnapot_svpbmt_sdtrig
97+
~~~
98+
99+
| model | size | params | backend | threads | n_ubatch | fa | mmap | test | t/s |
100+
| ------------------------------ | ---------: | ---------: | ---------- | ------: | -------: | -: | ---: | --------------: | -------------------: |
101+
| qwen3 0.6B Q4_0 | 358.78 MiB | 596.05 M | CPU | 8 | 128 | 1 | 0 | pp128 | 565.83 ± 0.31 |
102+
| qwen3 0.6B Q4_0 | 358.78 MiB | 596.05 M | CPU | 8 | 128 | 1 | 0 | tg128 | 55.77 ± 0.02 |
103+
| qwen3 4B Q4_0 | 2.21 GiB | 4.02 B | CPU | 8 | 128 | 1 | 0 | pp128 | 79.74 ± 0.04 |
104+
| qwen3 4B Q4_0 | 2.21 GiB | 4.02 B | CPU | 8 | 128 | 1 | 0 | tg128 | 11.29 ± 0.00 |
105+
| qwen3moe 30B.A3B Q4_0 | 16.18 GiB | 30.53 B | CPU | 8 | 128 | 1 | 0 | pp128 | 57.88 ± 0.31 |
106+
| qwen3moe 30B.A3B Q4_0 | 16.18 GiB | 30.53 B | CPU | 8 | 128 | 1 | 0 | tg128 | 12.79 ± 0.00 |
107+
| qwen35 2B Q4_1 | 1.19 GiB | 1.88 B | CPU | 8 | 128 | 1 | 0 | pp128 | 115.23 ± 0.04 |
108+
| qwen35 2B Q4_1 | 1.19 GiB | 1.88 B | CPU | 8 | 128 | 1 | 0 | tg128 | 16.49 ± 0.01 |
109+
| gemma4 E4B Q4_K - Medium | 4.76 GiB | 7.52 B | CPU | 8 | 128 | 1 | 0 | pp128 | 21.13 ± 0.01 |
110+
| gemma4 E4B Q4_K - Medium | 4.76 GiB | 7.52 B | CPU | 8 | 128 | 1 | 0 | tg128 | 5.66 ± 0.00 |

ggml/src/ggml-cpu/CMakeLists.txt

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -450,12 +450,22 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
450450
ggml-cpu/arch/riscv/repack.cpp
451451
)
452452
if (GGML_CPU_RISCV64_SPACEMIT)
453+
include(ggml-cpu/cmake/FindSMTIME.cmake)
453454
target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_USE_CPU_RISCV64_SPACEMIT ${RISCV64_SPACEMIT_IME_SPEC})
454455
list(APPEND GGML_CPU_SOURCES
455456
ggml-cpu/spacemit/ime.cpp
456457
ggml-cpu/spacemit/ime.h
458+
ggml-cpu/spacemit/spine_mem_pool.cpp
459+
ggml-cpu/spacemit/spine_mem_pool.h
460+
ggml-cpu/spacemit/repack.cpp
461+
ggml-cpu/spacemit/repack.h
462+
ggml-cpu/spacemit/ime_env.cpp
463+
ggml-cpu/spacemit/ime_env.h
457464
ggml-cpu/spacemit/ime1_kernels.cpp
465+
ggml-cpu/spacemit/ime2_kernels.cpp
458466
ggml-cpu/spacemit/ime_kernels.h
467+
ggml-cpu/spacemit/rvv_kernels.cpp
468+
ggml-cpu/spacemit/rvv_kernels.h
459469
)
460470
endif()
461471
if(NOT GGML_CPU_ALL_VARIANTS)
@@ -485,6 +495,9 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
485495
if (GGML_RV_ZIHINTPAUSE)
486496
string(APPEND MARCH_STR "_zihintpause")
487497
endif()
498+
if (GGML_RV_ZBA)
499+
string(APPEND MARCH_STR "_zba")
500+
endif()
488501
if (GGML_CPU_RISCV64_SPACEMIT)
489502
# `xsmtvdotii' is only required for GCC >= 15.
490503
if (CMAKE_C_COMPILER_ID STREQUAL "GNU" AND
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
include(CheckCSourceRuns)
2+
3+
if (CMAKE_SYSTEM_PROCESSOR MATCHES "^(riscv)" AND GGML_CPU_RISCV64_SPACEMIT)
4+
set(SMT_MARCH_STR "-march=rv64gcv_zfh_zvfh_zba_zicbop")
5+
if (CMAKE_C_COMPILER_ID STREQUAL "GNU" AND
6+
CMAKE_C_COMPILER_VERSION VERSION_GREATER_EQUAL 15)
7+
string(APPEND SMT_MARCH_STR "_xsmtvdotii")
8+
endif()
9+
set(CMAKE_REQUIRED_FLAGS "${SMT_MARCH_STR}")
10+
11+
check_c_source_compiles("int main() {__asm__ volatile(\"vmadot v2, v0, v1\");}" SPACEMIT_RISCV_COMPILER_SUPPORT_IME1)
12+
check_c_source_compiles("int main() {__asm__ volatile(\"vmadot v2, v0, v1, i4\");}" SPACEMIT_RISCV_COMPILER_SUPPORT_VMADOT_S4)
13+
check_c_source_compiles("int main() {__asm__ volatile(\"vmadot v2, v0, v1, i8\");}" SPACEMIT_RISCV_COMPILER_SUPPORT_VMADOT_S8)
14+
check_c_source_compiles("int main() {__asm__ volatile(\"vfwmadot v2, v0, v1, fp16\");}" SPACEMIT_RISCV_COMPILER_SUPPORT_VFWMADOT_FP16)
15+
check_c_source_compiles("int main() {__asm__ volatile(\"vmadot.hp v2, v0, v1, v0, 0, i4\");}" SPACEMIT_RISCV_COMPILER_SUPPORT_VFMADOT_S4)
16+
check_c_source_compiles("int main() {__asm__ volatile(\"vmadot.hp v2, v0, v1, v0, 0, i8\");}" SPACEMIT_RISCV_COMPILER_SUPPORT_VFMADOT_S8)
17+
check_c_source_compiles("int main() {__asm__ volatile(\"vmadot1 v2, v0, v1\");}" SPACEMIT_RISCV_COMPILER_SUPPORT_VMADOTN)
18+
check_c_source_compiles("int main() {__asm__ volatile(\"vpack.vv v2, v0, v1, 2\");}" SPACEMIT_RISCV_COMPILER_SUPPORT_VPACK)
19+
check_c_source_compiles("int main() {__asm__ volatile(\"vnspack.vv v2, v0, v1, 2\");}" SPACEMIT_RISCV_COMPILER_SUPPORT_VNPACK)
20+
unset(CMAKE_REQUIRED_FLAGS)
21+
22+
list(APPEND RISCV64_SPACEMIT_IME_SPEC "")
23+
if (SPACEMIT_RISCV_COMPILER_SUPPORT_IME1)
24+
set(RISCV64_SPACEMIT_IME_SPEC "RISCV64_SPACEMIT_IME1")
25+
endif()
26+
27+
if (SPACEMIT_RISCV_COMPILER_SUPPORT_VMADOT_S4 AND SPACEMIT_RISCV_COMPILER_SUPPORT_VPACK AND SPACEMIT_RISCV_COMPILER_SUPPORT_VNPACK)
28+
list(APPEND RISCV64_SPACEMIT_IME_SPEC "RISCV64_SPACEMIT_IME2")
29+
endif()
30+
31+
message("RISCV64_SPACEMIT_IME_SPEC: ${RISCV64_SPACEMIT_IME_SPEC}")
32+
endif()

ggml/src/ggml-cpu/ggml-cpu.c

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,10 @@
5050
#include "llamafile/sgemm.h"
5151
#endif
5252

53+
#ifdef GGML_USE_CPU_RISCV64_SPACEMIT
54+
# include "spacemit/ime.h"
55+
#endif
56+
5357
// Note: once we move threading into a separate C++ file
5458
// will use std::hardware_destructive_interference_size instead of hardcoding it here
5559
// and we'll use C++ attribute syntax.
@@ -3011,7 +3015,11 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
30113015
const struct ggml_cgraph * cgraph = tp->cgraph;
30123016
const struct ggml_cplan * cplan = tp->cplan;
30133017

3018+
#ifdef GGML_USE_CPU_RISCV64_SPACEMIT
3019+
ggml_backend_cpu_riscv64_spacemit_set_numa_thread_affinity(state->ith);
3020+
#else
30143021
set_numa_thread_affinity(state->ith);
3022+
#endif
30153023

30163024
struct ggml_compute_params params = {
30173025
/*.ith =*/ state->ith,
@@ -3068,6 +3076,10 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
30683076

30693077
ggml_barrier(state->threadpool);
30703078

3079+
#ifdef GGML_USE_CPU_RISCV64_SPACEMIT
3080+
ggml_backend_cpu_riscv64_spacemit_clear_numa_thread_affinity_threaded(state->ith);
3081+
#endif
3082+
30713083
return 0;
30723084
}
30733085

0 commit comments

Comments
 (0)