Skip to content

Commit e1770af

Browse files
committed
Update speed benchmark results
1 parent e70da91 commit e1770af

3 files changed

Lines changed: 27 additions & 20 deletions

File tree

CMakeLists.txt

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -march=native")
2424
add_subdirectory(ggml)
2525

2626
# vit executable
27-
add_executable(${PROJECT_NAME} vit.cpp)
27+
add_executable(${PROJECT_NAME} main.cpp vit.cpp)
2828
target_link_libraries(${PROJECT_NAME} PUBLIC ggml)
2929
target_include_directories(${PROJECT_NAME} PUBLIC .)
3030
target_compile_features(${PROJECT_NAME} PUBLIC cxx_std_11)
@@ -33,4 +33,10 @@ target_compile_features(${PROJECT_NAME} PUBLIC cxx_std_11)
3333
add_executable(quantize quantize.cpp)
3434
target_link_libraries(quantize PUBLIC ggml)
3535
target_include_directories(quantize PUBLIC .)
36-
target_compile_features(quantize PUBLIC cxx_std_11)
36+
target_compile_features(quantize PUBLIC cxx_std_11)
37+
38+
# benchmark executable
39+
add_executable(benchmark tests/benchmark.cpp vit.cpp)
40+
target_link_libraries(benchmark PUBLIC ggml)
41+
target_include_directories(benchmark PUBLIC .)
42+
target_compile_features(benchmark PUBLIC cxx_std_17)

README.md

Lines changed: 18 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -240,29 +240,30 @@ Then you can use `tiny-ggml-model-f16-quant.gguf` just like the model in F16.
240240
### Results
241241

242242
Here are the benchmarks for the different models and quantizations on my machine:
243+
For accurate estimation of run times, these benchmarks were run 100 times each.
243244

244245
| Model | Quantization | Speed (ms) | Mem (MB) |
245246
| :----: | :----------: | :-----------: | :---------------: |
246-
| tiny | q4_0 | 100 ms | 12 MB |
247-
| tiny | q4_1 | 102 ms | 12 MB |
247+
| tiny | q4_0 | 105 ms | 12 MB |
248+
| tiny | q4_1 | 97 ms | 12 MB |
248249
| tiny | q5_0 | 116 ms | 13 MB |
249250
| tiny | q5_1 | 112 ms | 13 MB |
250-
| tiny | q8_0 | 92 ms | 15 MB |
251-
| small | q4_0 | 261 ms | 23 MB |
252-
| small | q4_1 | 229 ms | 24 MB |
253-
| small | q5_0 | 291 ms | 25 MB |
254-
| small | q5_1 | 276 ms | 27 MB |
255-
| small | q8_0 | 232 ms | 33 MB |
256-
| base | q4_0 | 714 ms | 61 MB |
257-
| base | q4_1 | 657 ms | 66 MB |
258-
| base | q5_0 | 879 ms | 71 MB |
259-
| base | q5_1 | 838 ms | 76 MB |
260-
| base | q8_0 | 658 ms | 102 MB |
251+
| tiny | q8_0 | 90 ms | 15 MB |
252+
| small | q4_0 | 240 ms | 23 MB |
253+
| small | q4_1 | 224 ms | 24 MB |
254+
| small | q5_0 | 288 ms | 25 MB |
255+
| small | q5_1 | 277 ms | 27 MB |
256+
| small | q8_0 | 228 ms | 33 MB |
257+
| base | q4_0 | 704 ms | 61 MB |
258+
| base | q4_1 | 626 ms | 66 MB |
259+
| base | q5_0 | 851 ms | 71 MB |
260+
| base | q5_1 | 806 ms | 76 MB |
261+
| base | q8_0 | 659 ms | 102 MB |
261262
| large | q4_0 | 2189 ms | 181 MB |
262-
| large | q4_1 | 1935 ms | 199 MB |
263-
| large | q5_0 | 2708 ms | 217 MB |
264-
| large | q5_1 | 2560 ms | 235 MB |
265-
| large | q8_0 | 2042 ms | 325 MB |
263+
| large | q4_1 | 1919 ms | 199 MB |
264+
| large | q5_0 | 2676 ms | 217 MB |
265+
| large | q5_1 | 2547 ms | 235 MB |
266+
| large | q8_0 | 1994 ms | 325 MB |
266267

267268
## To-Do List
268269

scripts/benchmark.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ declare -A memory_results
1111
# defaults
1212
num_threads=4
1313
quantize_flag=0 # 0 for no quantization, 1 for quantization
14-
N=10 # number of times to run each model
14+
N=100 # number of times to run each model
1515

1616
if [ "$#" -ge 1 ]; then
1717
echo "num_threads=$1"

0 commit comments

Comments
 (0)