22
33# arrays
44declare -a models=(" tiny" " small" " base" " large" )
5- declare -a speed_results
6- declare -a memory_results
5+ declare -a quant_names=(" q4_0" " q4_1" " q5_0" " q5_1" " q8_0" )
6+ declare -a quant_ids=(2 3 6 7 8)
7+ # associative array
8+ declare -A speed_results
9+ declare -A memory_results
10+
11+ # defaults
12+ num_threads=4
13+ quantize_flag=0 # 0 for no quantization, 1 for quantization
14+ N=10 # number of times to run each model
15+
16+ if [ " $# " -ge 1 ]; then
17+ echo " num_threads=$1 "
18+ num_threads=$1
19+ fi
20+
21+ if [ " $# " -ge 2 ]; then
22+ echo " quantize_flag=$2 "
23+ quantize_flag=$2
24+ fi
25+
726
827for model in " ${models[@]} " ; do
28+ # convert the model to gguf
929 echo " Converting model: vit_${model} _patch16_224.augreg_in21k_ft_in1k"
1030 python convert-pth-to-ggml.py --model_name " vit_${model} _patch16_224.augreg_in21k_ft_in1k" --ftype 1 > /dev/null 2>&1
1131
1232 cd build/ || exit
1333
14- echo " Quantizing ..."
15- ./bin/quantize ../ggml-model-f16.gguf ../ggml-model-f16-quant.gguf 2 > /dev/null 2>&1
16-
17- # run N times
18- N=10
19- sum=0
20- mem_usage=0
21-
22- for (( i= 1 ; i<= N; i++ )) ; do
23- start=$( date +%s%N)
24- /usr/bin/time -f " %M" -o mem.txt ./bin/vit -t 4 -m ../ggml-model-f16-quant.gguf -i ../assets/tench.jpg > /dev/null 2>&1
25- end=$( date +%s%N)
26- diff=$(( end- start))
27- sum=$(( sum+ diff))
28- mem_usage=$(( $mem_usage + $(cat mem.txt)) )
29- done
34+ # quantize the model
35+ if [ " $quantize_flag " -eq 1 ]; then
36+ for i in " ${! quant_ids[@]} " ; do
37+ q=" ${quant_names[$i]} "
38+ q_index=" ${quant_ids[$i]} "
39+ echo " Quantizing ... to ${q} ie ${q_index} "
40+ ./bin/quantize ../ggml-model-f16.gguf ../ggml-model-f16-quant.gguf ${q_index} > /dev/null 2>&1
41+
42+ sum=0
43+ mem_usage=0
44+
45+ for (( i= 1 ; i<= N; i++ )) ; do
46+ start=$( date +%s%N)
47+ /usr/bin/time -f " %M" -o mem.txt ./bin/vit -t $num_threads -m ../ggml-model-f16-quant.gguf -i ../assets/tench.jpg > /dev/null 2>&1
48+ end=$( date +%s%N)
49+ diff=$(( end- start))
50+ sum=$(( sum+ diff))
51+ mem_usage=$(( $mem_usage + $(cat mem.txt)) )
52+ done
53+
54+ avg_mem_usage=$(( $mem_usage / N / 1024 ))
55+ avg_speed=$(( $sum / N / 1000000 ))
3056
31- avg_mem_usage= $(( $mem_usage / N / 1024 ))
32- avg_speed= $(( $sum / N / 1000000 ))
57+ speed_results[ " $model , ${q} " ]= $avg_speed
58+ memory_results[ " $model , ${q} " ]= $avg_mem_usage
3359
34- speed_results+=(" $avg_speed " )
35- memory_results+=(" $avg_mem_usage " )
60+ rm mem.txt
61+ done
62+ else
63+ echo " No quantization ... for model $model "
64+ # run N times
65+ sum=0
66+ mem_usage=0
3667
37- # del the mem file / back to parent
38- rm mem.txt
68+ for (( i= 1 ; i<= N; i++ )) ; do
69+ start=$( date +%s%N)
70+ /usr/bin/time -f " %M" -o mem.txt ./bin/vit -t $num_threads -m ../ggml-model-f16.gguf -i ../assets/tench.jpg > /dev/null 2>&1
71+ end=$( date +%s%N)
72+ diff=$(( end- start))
73+ sum=$(( sum+ diff))
74+ mem_usage=$(( $mem_usage + $(cat mem.txt)) )
75+ done
76+
77+ avg_mem_usage=$(( $mem_usage / N / 1024 ))
78+ avg_speed=$(( $sum / N / 1000000 ))
79+
80+ speed_results[" $model " ]=$avg_speed
81+ memory_results[" $model " ]=$avg_mem_usage
82+
83+ rm mem.txt
84+ fi
85+
3986 cd ..
87+
4088done
4189
90+ # kind of a poor man's table
91+ if [ " $quantize_flag " -eq 1 ]; then
92+ echo " | Model | Quantization | Speed (ms) | Mem (MB) |"
93+ echo " | :----: | :----------: | :-----------: | :---------------: |"
4294
43- # kind of poor man's table
44- echo " | Model | Speed (ms) | Mem (MB) |"
45- echo " | :----: | :-----------: | :---------------: |"
95+ for model in " ${models[@]} " ; do
96+ for i in " ${! quant_ids[@]} " ; do
97+ quant_name=" ${quant_names[$i]} "
98+ key=" $model ,$quant_name "
99+ if [ -v speed_results[$key ] ]; then
100+ echo " | $model | $quant_name | ${speed_results[$key]} ms | ${memory_results[$key]} MB |"
101+ fi
102+ done
103+ done
104+ else
105+ echo " | Model | Speed (ms) | Mem (MB) |"
106+ echo " | :----: | :-----------: | :---------------: |"
46107
47- for i in " ${! models[@]} " ; do
48- echo " | ${models[$i]} | ${speed_results[$i]} ms | ${memory_results[$i]} MB |"
49- done
108+ for model in " ${models[@]} " ; do
109+ key=" $model "
110+ echo " | $model | ${speed_results[$key]} ms | ${memory_results[$key]} MB |"
111+ done
112+ fi
0 commit comments