File tree Expand file tree Collapse file tree 1 file changed +31
-0
lines changed
examples/llms/gpt-oss/amd Expand file tree Collapse file tree 1 file changed +31
-0
lines changed Original file line number Diff line number Diff line change 1+ type : service
2+ name : gpt-oss-120b
3+
4+ model : openai/gpt-oss-120b
5+
6+ env :
7+ - HF_TOKEN
8+ - MODEL=openai/gpt-oss-120b
9+ # To enable AITER, set below to 1. Otherwise, set it to 0.
10+ - VLLM_ROCM_USE_AITER=1
11+ # To enable AITER Triton unified attention
12+ - VLLM_USE_AITER_UNIFIED_ATTENTION=1
13+ # below is required in order to enable AITER unified attention by disabling AITER MHA
14+ - VLLM_ROCM_USE_AITER_MHA=0
15+ image : rocm/vllm-dev:open-mi300-08052025
16+ commands :
17+ - |
18+ vllm serve $MODEL \
19+ --tensor-parallel $DSTACK_GPUS_NUM \
20+ --no-enable-prefix-caching \
21+ --disable-log-requests \
22+ --compilation-config '{"full_cuda_graph": true}'
23+ port : 8000
24+
25+ volumes :
26+ # Cache downloaded models
27+ - /root/.cache/huggingface:/root/.cache/huggingface
28+
29+ resources :
30+ gpu : MI300X:8
31+ shm_size : 32GB
You can’t perform that action at this time.
0 commit comments