1+ # Benchmark
2+ name : Benchmark
3+
4+ on :
5+ workflow_dispatch :
6+ inputs :
7+ gpu-series :
8+ description : ' Azure GPU series to run with'
9+ required : true
10+ type : choice
11+ options :
12+ - Standard_NC4as_T4_v3
13+ - Standard_NC64as_T4_v3
14+ - Standard_NC24ads_A100_v4
15+ - Standard_NC48ads_A100_v4
16+ - Standard_ND96asr_A100_v4
17+ - Standard_NC40ads_H100_v5
18+ - Standard_NC80adis_H100_v5
19+ push :
20+ branches :
21+ - master
22+ - hp/server/bench/workflow # FIXME remove
23+ paths : ['.github/workflows/server.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'examples/server/bench/**.*']
24+ pull_request :
25+ types : [opened, synchronize, reopened]
26+ paths : ['.github/workflows/server.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'examples/server/bench/**.*']
27+ schedule :
28+ - cron : ' 04 2 * * *'
29+
30+ concurrency :
31+ group : ${{ github.workflow }}-${{ github.ref }}
32+ cancel-in-progress : true
33+
34+ jobs :
35+ bench-server-baseline :
36+ runs-on : Standard_NC4as_T4_v3
37+ env :
38+ RUNNER_LABEL : Standard_NC4as_T4_v3 # FIXME Do not find a way to not duplicate it
39+ if : ${{ github.event.inputs.gpu-series == 'Standard_NC4as_T4_v3' || github.event.schedule || github.event.pull_request != '' || github.event.push.ref == 'refs/heads/master' }}
40+ steps :
41+ - name : Clone
42+ id : checkout
43+ uses : actions/checkout@v3
44+ with :
45+ fetch-depth : 0
46+
47+ - name : Install python env
48+ id : pipenv
49+ run : |
50+ cd examples/server/bench
51+ python3 -m venv venv
52+ source venv/bin/activate
53+ pip install -r requirements.txt
54+
55+ - name : Prometheus
56+ id : install_prometheus
57+ run : |
58+ wget --quiet https://github.com/prometheus/prometheus/releases/download/v2.51.0/prometheus-2.51.0.linux-amd64.tar.gz
59+ tar xzf prometheus*.tar.gz --strip-components=1
60+ ./prometheus --config.file=examples/server/bench/prometheus.yml &
61+ while ! nc -z localhost 9090; do
62+ sleep 0.1
63+ done
64+
65+ - name : Install k6
66+ id : k6_installation
67+ run : |
68+ cd examples/server/bench
69+ wget --quiet https://github.com/grafana/k6/releases/download/v0.49.0/k6-v0.49.0-linux-amd64.tar.gz
70+ tar xzf k6*.tar.gz --strip-components=1
71+
72+ - name : Build
73+ id : cmake_build
74+ run : |
75+ set -eux
76+ mkdir build
77+ cd build
78+ cmake .. \
79+ -DLLAMA_NATIVE=OFF \
80+ -DLLAMA_BUILD_SERVER=ON \
81+ -DLLAMA_CURL=ON \
82+ -DLLAMA_CUBLAS=ON \
83+ -DCUDAToolkit_ROOT=/usr/local/cuda \
84+ -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc \
85+ -DCMAKE_CUDA_ARCHITECTURES=75 \
86+ -DLLAMA_FATAL_WARNINGS=OFF \
87+ -DLLAMA_ALL_WARNINGS=OFF \
88+ -DCMAKE_BUILD_TYPE=Release;
89+ cmake --build . --config Release -j $(nproc) --target server
90+
91+ - name : Download the dataset
92+ id : download_dataset
93+ run : |
94+ cd examples/server/bench
95+ wget --quiet https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json
96+
97+ - name : Server bench
98+ id : server_bench
99+ run : |
100+ set -eux
101+
102+ cd examples/server/bench
103+ source venv/bin/activate
104+ BENCH_K6_BIN_PATH=./k6 python bench.py \
105+ --runner-label ${{ env.RUNNER_LABEL }} \
106+ --name ${{ github.job }} \
107+ --branch ${{ github.ref_name }} \
108+ --commit ${{ github.sha }} \
109+ --scenario script.js \
110+ --duration 30s \
111+ --hf-repo ggml-org/models \
112+ --hf-file phi-2/ggml-model-q4_0.gguf \
113+ --model-path-prefix /models \
114+ --parallel 8 \
115+ -ngl 33 \
116+ --batch-size 2048 \
117+ --ubatch-size 256 \
118+ --ctx-size 16384 \
119+ --n-prompts 1000 \
120+ --max-prompt-tokens 1024 \
121+ --max-tokens 2048
122+
123+ cat results.github.env >> $GITHUB_ENV
124+
125+ # - name: Comment PR
126+ # uses: mshick/add-pr-comment@v2
127+ # id: comment_pr
128+ # if: ${{ github.event.pull_request != '' }}
129+ # with:
130+ # message-id: bench-${{ github.job }}-${{ env.RUNNER_LABEL }}
131+ # message: |
132+ # $BENCH_PR_COMMENT
133+
134+ - name : Commit status
135+ uses : Sibz/github-status-action@v1
136+ with :
137+ context : ${{ github.job }}
138+ description : |
139+ $BENCH_RESULTS
140+ state : ' success'
141+
142+ - name : Upload results
143+ if : ${{ github.event.pull_request != '' }}
144+ uses : edunad/actions-image@v2.0.0
145+ with :
146+ path : ' *.png'
147+ title : |
148+ llama.cpp server benchmark results for ${{ github.job }} on ${{ env.RUNNER_LABEL }}: ${{ env.LLAMACPP_TOKENS_SECOND_AVG}}tk/s
149+ annotationLevel : ' success'
0 commit comments