Skip to content

Commit 9684f36

Browse files
committed
Improve the script by adding quants and cli args
1 parent af2036a commit 9684f36

1 file changed

Lines changed: 93 additions & 30 deletions

File tree

scripts/benchmark.sh

Lines changed: 93 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -2,48 +2,111 @@
22

33
# arrays
44
declare -a models=("tiny" "small" "base" "large")
5-
declare -a speed_results
6-
declare -a memory_results
5+
declare -a quant_names=("q4_0" "q4_1" "q5_0" "q5_1" "q8_0")
6+
declare -a quant_ids=(2 3 6 7 8)
7+
# associative array
8+
declare -A speed_results
9+
declare -A memory_results
10+
11+
# defaults
12+
num_threads=4
13+
quantize_flag=0 # 0 for no quantization, 1 for quantization
14+
N=10 # number of times to run each model
15+
16+
if [ "$#" -ge 1 ]; then
17+
echo "num_threads=$1"
18+
num_threads=$1
19+
fi
20+
21+
if [ "$#" -ge 2 ]; then
22+
echo "quantize_flag=$2"
23+
quantize_flag=$2
24+
fi
25+
726

827
for model in "${models[@]}"; do
28+
# convert the model to gguf
929
echo "Converting model: vit_${model}_patch16_224.augreg_in21k_ft_in1k"
1030
python convert-pth-to-ggml.py --model_name "vit_${model}_patch16_224.augreg_in21k_ft_in1k" --ftype 1 > /dev/null 2>&1
1131

1232
cd build/ || exit
1333

14-
echo "Quantizing ..."
15-
./bin/quantize ../ggml-model-f16.gguf ../ggml-model-f16-quant.gguf 2 > /dev/null 2>&1
16-
17-
# run N times
18-
N=10
19-
sum=0
20-
mem_usage=0
21-
22-
for ((i=1; i<=N; i++)); do
23-
start=$(date +%s%N)
24-
/usr/bin/time -f "%M" -o mem.txt ./bin/vit -t 4 -m ../ggml-model-f16-quant.gguf -i ../assets/tench.jpg > /dev/null 2>&1
25-
end=$(date +%s%N)
26-
diff=$((end-start))
27-
sum=$((sum+diff))
28-
mem_usage=$(($mem_usage + $(cat mem.txt)))
29-
done
34+
# quantize the model
35+
if [ "$quantize_flag" -eq 1 ]; then
36+
for i in "${!quant_ids[@]}"; do
37+
q="${quant_names[$i]}"
38+
q_index="${quant_ids[$i]}"
39+
echo "Quantizing ... to ${q} ie ${q_index}"
40+
./bin/quantize ../ggml-model-f16.gguf ../ggml-model-f16-quant.gguf ${q_index} > /dev/null 2>&1
41+
42+
sum=0
43+
mem_usage=0
44+
45+
for ((i=1; i<=N; i++)); do
46+
start=$(date +%s%N)
47+
/usr/bin/time -f "%M" -o mem.txt ./bin/vit -t $num_threads -m ../ggml-model-f16-quant.gguf -i ../assets/tench.jpg > /dev/null 2>&1
48+
end=$(date +%s%N)
49+
diff=$((end-start))
50+
sum=$((sum+diff))
51+
mem_usage=$(($mem_usage + $(cat mem.txt)))
52+
done
53+
54+
avg_mem_usage=$(($mem_usage / N / 1024))
55+
avg_speed=$(($sum / N / 1000000))
3056

31-
avg_mem_usage=$(($mem_usage / N / 1024))
32-
avg_speed=$(($sum / N / 1000000))
57+
speed_results["$model,${q}"]=$avg_speed
58+
memory_results["$model,${q}"]=$avg_mem_usage
3359

34-
speed_results+=("$avg_speed")
35-
memory_results+=("$avg_mem_usage")
60+
rm mem.txt
61+
done
62+
else
63+
echo "No quantization ... for model $model"
64+
# run N times
65+
sum=0
66+
mem_usage=0
3667

37-
# del the mem file / back to parent
38-
rm mem.txt
68+
for ((i=1; i<=N; i++)); do
69+
start=$(date +%s%N)
70+
/usr/bin/time -f "%M" -o mem.txt ./bin/vit -t $num_threads -m ../ggml-model-f16.gguf -i ../assets/tench.jpg > /dev/null 2>&1
71+
end=$(date +%s%N)
72+
diff=$((end-start))
73+
sum=$((sum+diff))
74+
mem_usage=$(($mem_usage + $(cat mem.txt)))
75+
done
76+
77+
avg_mem_usage=$(($mem_usage / N / 1024))
78+
avg_speed=$(($sum / N / 1000000))
79+
80+
speed_results["$model"]=$avg_speed
81+
memory_results["$model"]=$avg_mem_usage
82+
83+
rm mem.txt
84+
fi
85+
3986
cd ..
87+
4088
done
4189

90+
# kind of a poor man's table
91+
if [ "$quantize_flag" -eq 1 ]; then
92+
echo "| Model | Quantization | Speed (ms) | Mem (MB) |"
93+
echo "| :----: | :----------: | :-----------: | :---------------: |"
4294

43-
# kind of poor man's table
44-
echo "| Model | Speed (ms) | Mem (MB) |"
45-
echo "| :----: | :-----------: | :---------------: |"
95+
for model in "${models[@]}"; do
96+
for i in "${!quant_ids[@]}"; do
97+
quant_name="${quant_names[$i]}"
98+
key="$model,$quant_name"
99+
if [ -v speed_results[$key] ]; then
100+
echo "| $model | $quant_name | ${speed_results[$key]} ms | ${memory_results[$key]} MB |"
101+
fi
102+
done
103+
done
104+
else
105+
echo "| Model | Speed (ms) | Mem (MB) |"
106+
echo "| :----: | :-----------: | :---------------: |"
46107

47-
for i in "${!models[@]}"; do
48-
echo "| ${models[$i]} | ${speed_results[$i]} ms | ${memory_results[$i]} MB |"
49-
done
108+
for model in "${models[@]}"; do
109+
key="$model"
110+
echo "| $model | ${speed_results[$key]} ms | ${memory_results[$key]} MB |"
111+
done
112+
fi

0 commit comments

Comments
 (0)