Skip to content

Commit 71a6f5c

Browse files
[Examples] Added gpt-oss-120b for AMD
1 parent 95d844b commit 71a6f5c

File tree

1 file changed

+31
-0
lines changed

1 file changed

+31
-0
lines changed
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
type: service
2+
name: gpt-oss-120b
3+
4+
model: openai/gpt-oss-120b
5+
6+
env:
7+
- HF_TOKEN
8+
- MODEL=openai/gpt-oss-120b
9+
# To enable AITER, set below to 1. Otherwise, set it to 0.
10+
- VLLM_ROCM_USE_AITER=1
11+
# To enable AITER Triton unified attention
12+
- VLLM_USE_AITER_UNIFIED_ATTENTION=1
13+
# below is required in order to enable AITER unified attention by disabling AITER MHA
14+
- VLLM_ROCM_USE_AITER_MHA=0
15+
image: rocm/vllm-dev:open-mi300-08052025
16+
commands:
17+
- |
18+
vllm serve $MODEL \
19+
--tensor-parallel $DSTACK_GPUS_NUM \
20+
--no-enable-prefix-caching \
21+
--disable-log-requests \
22+
--compilation-config '{"full_cuda_graph": true}'
23+
port: 8000
24+
25+
volumes:
26+
# Cache downloaded models
27+
- /root/.cache/huggingface:/root/.cache/huggingface
28+
29+
resources:
30+
gpu: MI300X:8
31+
shm_size: 32GB

0 commit comments

Comments
 (0)