Skip to content

Commit 3de7c08

Browse files
committed
Add vLLM support for DocSum
Signed-off-by: Eero Tamminen <eero.t.tamminen@intel.com>
1 parent 1fcde6d commit 3de7c08

8 files changed

Lines changed: 61 additions & 4 deletions

File tree

helm-charts/docsum/Chart.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,11 @@ dependencies:
99
- name: tgi
1010
version: 0-latest
1111
repository: "file://../common/tgi"
12+
condition: tgi.enabled
13+
- name: vllm
14+
version: 0-latest
15+
repository: "file://../common/vllm"
16+
condition: vllm.enabled
1217
- name: llm-uservice
1318
version: 0-latest
1419
repository: "file://../common/llm-uservice"

helm-charts/docsum/README.md

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,10 @@ export HFTOKEN="insert-your-huggingface-token-here"
1616
export MODELDIR="/mnt/opea-models"
1717
export MODELNAME="Intel/neural-chat-7b-v3-3"
1818
helm install docsum docsum --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} --set tgi.LLM_MODEL_ID=${MODELNAME}
19-
# To use Gaudi device
20-
# helm install docsum docsum --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --values docsum/gaudi-values.yaml
19+
# To use Gaudi device with TGI
20+
# helm install docsum docsum --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --values docsum/gaudi-tgi-values.yaml ...
21+
# To use Gaudi device with vLLM
22+
# helm install docsum docsum --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --values docsum/gaudi-vllm-values.yaml ..
2123
```
2224

2325
## Verify
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
gaudi-tgi-values.yaml

helm-charts/docsum/ci-gaudi-values.yaml

Lines changed: 0 additions & 1 deletion
This file was deleted.
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
gaudi-vllm-values.yaml
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
# Copyright (C) 2024 Intel Corporation
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
# Accelerate inferencing in heaviest components to improve performance
5+
# by overriding their subchart values
6+
7+
tgi:
8+
enabled: false
9+
10+
llm-uservice:
11+
image:
12+
repository: opea/llm-docsum-vllm
13+
tag: "latest"
14+
15+
vllm:
16+
enabled: true
17+
image:
18+
repository: opea/vllm-gaudi
19+
tag: "latest"
20+
resources:
21+
limits:
22+
habana.ai/gaudi: 1
23+
startupProbe:
24+
initialDelaySeconds: 5
25+
periodSeconds: 5
26+
timeoutSeconds: 1
27+
failureThreshold: 120
28+
readinessProbe:
29+
initialDelaySeconds: 5
30+
periodSeconds: 5
31+
timeoutSeconds: 1
32+
livenessProbe:
33+
initialDelaySeconds: 5
34+
periodSeconds: 5
35+
timeoutSeconds: 1
36+
37+
PT_HPU_ENABLE_LAZY_COLLECTIVES: "true"
38+
OMPI_MCA_btl_vader_single_copy_mechanism: "none"
39+
40+
extraCmdArgs: [
41+
"--tensor-parallel-size", "1",
42+
"--block-size", "128",
43+
"--max-num-seqs", "256",
44+
"--max-seq_len-to-capture", "2048"
45+
]

helm-charts/docsum/values.yaml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,8 +64,12 @@ llm-uservice:
6464
MAX_TOTAL_TOKENS: "2048"
6565
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
6666

67-
# To override values in subchart tgi
67+
# To override values in tgi/vllm subcharts
6868
tgi:
69+
enabled: true
70+
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
71+
vllm:
72+
enabled: false
6973
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
7074
MAX_INPUT_LENGTH: "1024"
7175
MAX_TOTAL_TOKENS: "2048"

0 commit comments

Comments
 (0)