Skip to content

Commit fea78b2

Browse files
committed
Add AMD ROCm Docker support (RDNA3/RDNA4)
1 parent 02995ed commit fea78b2

3 files changed

Lines changed: 201 additions & 0 deletions

File tree

compose.rocm.yml

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
name: fish-speech-rocm
2+
3+
# AMD ROCm compose for Fish Speech (RDNA3 / RDNA4).
4+
# Mount your checkpoints into ./checkpoints before running.
5+
#
6+
# docker compose -f compose.rocm.yml --profile webui up --build
7+
# docker compose -f compose.rocm.yml --profile server up --build
8+
9+
services:
10+
webui:
11+
build:
12+
context: .
13+
dockerfile: docker/Dockerfile.rocm
14+
target: webui
15+
image: fish-speech-webui:rocm
16+
profiles: ["webui"]
17+
ports:
18+
- "${GRADIO_PORT:-7860}:7860"
19+
volumes:
20+
- ./checkpoints:/app/checkpoints
21+
- ./references:/app/references
22+
environment:
23+
- ROCBLAS_USE_HIPBLASLT=0
24+
- COMPILE=${COMPILE:-1}
25+
devices:
26+
- /dev/kfd
27+
- /dev/dri
28+
group_add:
29+
- video
30+
- render
31+
shm_size: "16g"
32+
tty: true
33+
stdin_open: true
34+
35+
server:
36+
build:
37+
context: .
38+
dockerfile: docker/Dockerfile.rocm
39+
target: server
40+
image: fish-speech-server:rocm
41+
profiles: ["server"]
42+
ports:
43+
- "${API_PORT:-8080}:8080"
44+
volumes:
45+
- ./checkpoints:/app/checkpoints
46+
- ./references:/app/references
47+
environment:
48+
- ROCBLAS_USE_HIPBLASLT=0
49+
- COMPILE=${COMPILE:-1}
50+
devices:
51+
- /dev/kfd
52+
- /dev/dri
53+
group_add:
54+
- video
55+
- render
56+
shm_size: "16g"
57+
tty: true
58+
stdin_open: true

docker/Dockerfile.rocm

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
# docker/Dockerfile.rocm
2+
#
3+
# Fish Speech on AMD ROCm (RDNA3 / RDNA4).
4+
# The checkpoints are NOT bundled — mount them at /app/checkpoints.
5+
#
6+
# Build:
7+
# docker build -f docker/Dockerfile.rocm --target webui -t fish-speech-webui:rocm .
8+
# docker build -f docker/Dockerfile.rocm --target server -t fish-speech-server:rocm .
9+
#
10+
# Run (webui):
11+
# docker run --device=/dev/kfd --device=/dev/dri \
12+
# --group-add video --group-add render \
13+
# -e ROCBLAS_USE_HIPBLASLT=0 \
14+
# -v ./checkpoints:/app/checkpoints \
15+
# -p 7860:7860 fish-speech-webui:rocm
16+
17+
ARG ROCM_VERSION=7.2.3
18+
ARG BASE_IMAGE=rocm/pytorch:rocm${ROCM_VERSION}_ubuntu24.04_py3.12_pytorch_release_2.9.1
19+
20+
FROM ${BASE_IMAGE} AS app-base
21+
22+
ENV DEBIAN_FRONTEND=noninteractive \
23+
PYTHONDONTWRITEBYTECODE=1 \
24+
PYTHONUNBUFFERED=1 \
25+
ROCBLAS_USE_HIPBLASLT=0
26+
27+
RUN apt-get update \
28+
&& apt-get install -y --no-install-recommends \
29+
git ffmpeg libsox-dev build-essential cmake \
30+
libasound-dev portaudio19-dev libportaudio2 libportaudiocpp0 \
31+
&& apt-get clean \
32+
&& rm -rf /var/lib/apt/lists/*
33+
34+
WORKDIR /app
35+
36+
COPY . /app
37+
38+
# Install runtime dependencies WITHOUT torch/torchaudio — the ROCm base image
39+
# already ships a gfx-tuned torch (2.9.1+rocm7.2.3). Then install the package
40+
# itself with --no-deps so pip does not try to pull a CUDA/CPU torch.
41+
RUN pip install --no-cache-dir --upgrade pip setuptools wheel \
42+
&& pip install --no-cache-dir \
43+
numpy "transformers<=4.57.3" datasets lightning pytorch_lightning \
44+
hydra-core natsort einops librosa rich "gradio>5.0.0" wandb grpcio kui \
45+
uvicorn loguru loralib pyrootutils resampy "einx[torch]==0.2.2" zstandard \
46+
pydub "modelscope==1.17.1" "opencc-python-reimplemented==0.1.7" \
47+
silero-vad ormsgpack tiktoken "pydantic==2.9.2" cachetools \
48+
descript-audio-codec safetensors soundfile vector_quantize_pytorch \
49+
&& pip install --no-cache-dir --no-build-isolation pyaudio \
50+
&& pip install --no-cache-dir --no-deps -e . \
51+
# descript-audiotools pins protobuf<3.20, but fish-speech's generated proto
52+
# code needs >=3.20. Override after install (mirrors pyproject's uv override).
53+
&& pip install --no-cache-dir --no-deps --upgrade "protobuf>=4.25,<6.0"
54+
55+
EXPOSE 7860 8080
56+
57+
# torch.compile is enabled by default (verified working on gfx1201/RDNA4).
58+
# Set COMPILE=0 to disable.
59+
ENV COMPILE=1
60+
61+
##############################################################
62+
# Gradio WebUI
63+
##############################################################
64+
FROM app-base AS webui
65+
66+
ARG GRADIO_SERVER_NAME="0.0.0.0"
67+
ARG GRADIO_SERVER_PORT=7860
68+
ENV GRADIO_SERVER_NAME=${GRADIO_SERVER_NAME} \
69+
GRADIO_SERVER_PORT=${GRADIO_SERVER_PORT}
70+
71+
RUN printf '%s\n' \
72+
'#!/bin/bash' \
73+
'set -e' \
74+
'ARGS=()' \
75+
'if [ "${COMPILE:-0}" = "1" ] || [ "${COMPILE:-}" = "true" ]; then ARGS+=(--compile); fi' \
76+
'exec python tools/run_webui.py \' \
77+
' --llama-checkpoint-path checkpoints/s2-pro \' \
78+
' --decoder-checkpoint-path checkpoints/s2-pro/codec.pth \' \
79+
' --decoder-config-name modded_dac_vq "${ARGS[@]}"' \
80+
> /app/start_webui.sh && chmod +x /app/start_webui.sh
81+
82+
ENTRYPOINT ["/app/start_webui.sh"]
83+
84+
##############################################################
85+
# API Server
86+
##############################################################
87+
FROM app-base AS server
88+
89+
ARG API_SERVER_NAME="0.0.0.0"
90+
ARG API_SERVER_PORT=8080
91+
ENV API_SERVER_NAME=${API_SERVER_NAME} \
92+
API_SERVER_PORT=${API_SERVER_PORT}
93+
94+
RUN printf '%s\n' \
95+
'#!/bin/bash' \
96+
'set -e' \
97+
'ARGS=()' \
98+
'if [ "${COMPILE:-0}" = "1" ] || [ "${COMPILE:-}" = "true" ]; then ARGS+=(--compile); fi' \
99+
'exec python tools/api_server.py \' \
100+
' --listen 0.0.0.0:8080 \' \
101+
' --llama-checkpoint-path checkpoints/s2-pro \' \
102+
' --decoder-checkpoint-path checkpoints/s2-pro/codec.pth \' \
103+
' --decoder-config-name modded_dac_vq "${ARGS[@]}"' \
104+
> /app/start_server.sh && chmod +x /app/start_server.sh
105+
106+
ENTRYPOINT ["/app/start_server.sh"]

docs/en/install.md

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -189,3 +189,40 @@ Both methods require mounting these directories:
189189

190190
!!! warning
191191
GPU support requires NVIDIA Docker runtime. For CPU-only deployment, remove the `--gpus all` flag and use CPU images.
192+
193+
### AMD ROCm support
194+
195+
Fish Speech runs on AMD GPUs via ROCm. The ROCm image is based on the official `rocm/pytorch` image, which already ships a gfx-tuned PyTorch, so no separate torch install is needed. Verified on RDNA4 (Radeon AI PRO R9700 / gfx1201) with ROCm 7.2.3; RDNA3 (gfx1100/gfx1101) should also work.
196+
197+
**Prerequisites:**
198+
199+
- AMD GPU with ROCm support (RDNA3 / RDNA4)
200+
- ROCm drivers installed on the host
201+
- Docker with GPU passthrough (`/dev/kfd` and `/dev/dri`)
202+
203+
**Using Docker Compose:**
204+
205+
```bash
206+
# WebUI
207+
docker compose -f compose.rocm.yml --profile webui up --build
208+
209+
# API server
210+
docker compose -f compose.rocm.yml --profile server up --build
211+
```
212+
213+
**Manual build and run:**
214+
215+
```bash
216+
docker build -f docker/Dockerfile.rocm --target webui -t fish-speech-webui:rocm .
217+
218+
docker run \
219+
--device=/dev/kfd --device=/dev/dri \
220+
--group-add video --group-add render \
221+
-e ROCBLAS_USE_HIPBLASLT=0 \
222+
-v ./checkpoints:/app/checkpoints \
223+
-p 7860:7860 \
224+
fish-speech-webui:rocm
225+
```
226+
227+
!!! note
228+
`ROCBLAS_USE_HIPBLASLT=0` is set by default for RDNA4 (gfx1201) stability; RDNA3 users may not need it. Fish Speech uses `scaled_dot_product_attention`, which dispatches to ROCm's AOTriton flash-attention backend automatically — no custom kernel build is required. The first run is slower while MIOpen auto-tunes kernels. `torch.compile` is enabled by default (`COMPILE=1`); set `COMPILE=0` to disable.

0 commit comments

Comments
 (0)