diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..b7d8036 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,55 @@ +# VoxCPM2 – Docker image with CUDA 13.0 (native sm_120 / Blackwell support) +# Pattern adapted from suite-redazione/tada-tts (proven on RTX 5070) +# Requires: nvidia-container-toolkit on the host +FROM nvidia/cuda:13.0.1-cudnn-devel-ubuntu22.04 + +ENV DEBIAN_FRONTEND=noninteractive +ENV PYTHONUNBUFFERED=1 +ENV TOKENIZERS_PARALLELISM=false +ENV HF_HOME=/cache/huggingface +ENV MODELSCOPE_CACHE=/cache/modelscope +ENV TORCH_HOME=/cache/torch + +# ── System packages ─────────────────────────────────────────────────────────── +RUN apt-get update && apt-get install -y --no-install-recommends \ + software-properties-common \ + curl \ + && add-apt-repository ppa:deadsnakes/ppa \ + && apt-get update && apt-get install -y --no-install-recommends \ + python3.11 \ + python3.11-dev \ + python3.11-venv \ + build-essential \ + git \ + ffmpeg \ + libsndfile1 \ + libsndfile1-dev \ + libgomp1 \ + # cuBLAS 12 compat — some deps (funasr, ctranslate2) ship CUDA 12 binaries + libcublas-12-8 \ + && rm -rf /var/lib/apt/lists/* + +# Virtualenv con Python 3.11 — isolato dal sistema +RUN python3.11 -m venv /opt/venv +ENV PATH="/opt/venv/bin:$PATH" + +# setuptools>=78 required for PEP 639 SPDX license strings +RUN pip install --no-cache-dir --upgrade "pip>=24" "setuptools>=78" wheel + +# ── Application ─────────────────────────────────────────────────────────────── +WORKDIR /app +COPY . . + +# Install voxcpm and all its deps (may pull CPU torch as transitive dep — OK) +RUN pip install --no-cache-dir -e . + +# Force-reinstall the cu130 torch stack last so CUDA versions always win. +# CUDA 13.0 wheels include Triton with native sm_120 (Blackwell) support. +RUN pip install --no-cache-dir --force-reinstall \ + --index-url https://download.pytorch.org/whl/cu130 \ + torch torchaudio torchcodec + +# ── Runtime ─────────────────────────────────────────────────────────────────── +EXPOSE 8808 + +CMD ["python", "app.py", "--port", "8808"] diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..4c91435 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,54 @@ +services: + voxcpm: + build: + context: . + dockerfile: Dockerfile + image: voxcpm:latest + container_name: voxcpm-demo + + # ── GPU access ───────────────────────────────────────────────────────────── + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: all + capabilities: [gpu] + + # ── Ports ────────────────────────────────────────────────────────────────── + ports: + - "8808:8808" + + # ── Persistent volumes ───────────────────────────────────────────────────── + # Model weights (~4-6 GB) are cached here so they survive container restarts. + volumes: + - voxcpm-cache:/cache + # Optional: mount a local output directory + # - ./outputs:/app/outputs + + # ── Environment ──────────────────────────────────────────────────────────── + environment: + - NVIDIA_VISIBLE_DEVICES=all + - NVIDIA_DRIVER_CAPABILITIES=compute,utility + - HF_HOME=/cache/huggingface + - MODELSCOPE_CACHE=/cache/modelscope + - TORCH_HOME=/cache/torch + - TRITON_CACHE_DIR=/cache/triton + - TORCHINDUCTOR_CACHE_DIR=/cache/inductor + # Fallback: decommentare se Triton crasha ancora (eager mode, ~2x più lento) + # - TORCHDYNAMO_DISABLE=1 + # Uncomment to use a specific model version (default: openbmb/VoxCPM2) + # - VOXCPM_MODEL_ID=openbmb/VoxCPM2 + + # ── Restart policy ───────────────────────────────────────────────────────── + restart: unless-stopped + + # ── Custom entrypoint (supports VOXCPM_MODEL_ID env var) ────────────────── + command: > + python app.py + --port 8808 + ${VOXCPM_MODEL_ID:+--model-id $VOXCPM_MODEL_ID} + +volumes: + voxcpm-cache: + driver: local