Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 55 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
# VoxCPM2 – Docker image with CUDA 13.0 (native sm_120 / Blackwell support)
# Pattern adapted from suite-redazione/tada-tts (proven on RTX 5070)
# Requires: nvidia-container-toolkit on the host
FROM nvidia/cuda:13.0.1-cudnn-devel-ubuntu22.04

ENV DEBIAN_FRONTEND=noninteractive
ENV PYTHONUNBUFFERED=1
ENV TOKENIZERS_PARALLELISM=false
ENV HF_HOME=/cache/huggingface
ENV MODELSCOPE_CACHE=/cache/modelscope
ENV TORCH_HOME=/cache/torch

# ── System packages ───────────────────────────────────────────────────────────
RUN apt-get update && apt-get install -y --no-install-recommends \
software-properties-common \
curl \
&& add-apt-repository ppa:deadsnakes/ppa \
&& apt-get update && apt-get install -y --no-install-recommends \
python3.11 \
python3.11-dev \
python3.11-venv \
build-essential \
git \
ffmpeg \
libsndfile1 \
libsndfile1-dev \
libgomp1 \
# cuBLAS 12 compat — some deps (funasr, ctranslate2) ship CUDA 12 binaries
libcublas-12-8 \
&& rm -rf /var/lib/apt/lists/*

# Virtualenv con Python 3.11 — isolato dal sistema
RUN python3.11 -m venv /opt/venv
ENV PATH="/opt/venv/bin:$PATH"

# setuptools>=78 required for PEP 639 SPDX license strings
RUN pip install --no-cache-dir --upgrade "pip>=24" "setuptools>=78" wheel

# ── Application ───────────────────────────────────────────────────────────────
WORKDIR /app
COPY . .

# Install voxcpm and all its deps (may pull CPU torch as transitive dep — OK)
RUN pip install --no-cache-dir -e .

# Force-reinstall the cu130 torch stack last so CUDA versions always win.
# CUDA 13.0 wheels include Triton with native sm_120 (Blackwell) support.
RUN pip install --no-cache-dir --force-reinstall \
--index-url https://download.pytorch.org/whl/cu130 \
torch torchaudio torchcodec

# ── Runtime ───────────────────────────────────────────────────────────────────
EXPOSE 8808

CMD ["python", "app.py", "--port", "8808"]
54 changes: 54 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
services:
voxcpm:
build:
context: .
dockerfile: Dockerfile
image: voxcpm:latest
container_name: voxcpm-demo

# ── GPU access ─────────────────────────────────────────────────────────────
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: all
capabilities: [gpu]

# ── Ports ──────────────────────────────────────────────────────────────────
ports:
- "8808:8808"

# ── Persistent volumes ─────────────────────────────────────────────────────
# Model weights (~4-6 GB) are cached here so they survive container restarts.
volumes:
- voxcpm-cache:/cache
# Optional: mount a local output directory
# - ./outputs:/app/outputs

# ── Environment ────────────────────────────────────────────────────────────
environment:
- NVIDIA_VISIBLE_DEVICES=all
- NVIDIA_DRIVER_CAPABILITIES=compute,utility
- HF_HOME=/cache/huggingface
- MODELSCOPE_CACHE=/cache/modelscope
- TORCH_HOME=/cache/torch
- TRITON_CACHE_DIR=/cache/triton
- TORCHINDUCTOR_CACHE_DIR=/cache/inductor
# Fallback: decommentare se Triton crasha ancora (eager mode, ~2x più lento)
# - TORCHDYNAMO_DISABLE=1
# Uncomment to use a specific model version (default: openbmb/VoxCPM2)
# - VOXCPM_MODEL_ID=openbmb/VoxCPM2

# ── Restart policy ─────────────────────────────────────────────────────────
restart: unless-stopped

# ── Custom entrypoint (supports VOXCPM_MODEL_ID env var) ──────────────────
command: >
python app.py
--port 8808
${VOXCPM_MODEL_ID:+--model-id $VOXCPM_MODEL_ID}

volumes:
voxcpm-cache:
driver: local