python-audio-separator/Dockerfile.cloudrun at 92fc053e9ca73cb486a9d34bd70d069c6ae357f3 · nomadkaraoke/python-audio-separator · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
# Audio Separator API - Cloud Run GPU Deployment
# Optimized for NVIDIA L4 GPU on Google Cloud Run
#
# Models are baked into the image for zero cold-start latency.
# To update models, rebuild the image.
#
# Build: docker build -f Dockerfile.cloudrun -t audio-separator-cloudrun .
# Run:   docker run --gpus all -p 8080:8080 audio-separator-cloudrun

FROM nvidia/cuda:12.6.3-runtime-ubuntu22.04

# Prevent interactive prompts during package installation
ENV DEBIAN_FRONTEND=noninteractive

# Install Python 3.12 from deadsnakes PPA (onnxruntime-gpu requires >= 3.11)
# and system dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
    software-properties-common \
    && add-apt-repository -y ppa:deadsnakes/ppa \
    && apt-get update && apt-get install -y --no-install-recommends \
    # Python 3.12
    python3.12 \
    python3.12-dev \
    python3.12-venv \
    # FFmpeg
    ffmpeg \
    # Audio libraries
    libsndfile1 \
    libsndfile1-dev \
    libsox-dev \
    sox \
    libportaudio2 \
    portaudio19-dev \
    libasound2-dev \
    libpulse-dev \
    libjack-dev \
    libsamplerate0 \
    libsamplerate0-dev \
    # Build tools (for compiling Python packages with C extensions)
    build-essential \
    gcc \
    g++ \
    pkg-config \
    # Utilities
    curl \
    && rm -rf /var/lib/apt/lists/* \
    && python3.12 --version && ffmpeg -version

# Set Python 3.12 as default and install pip
RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.12 1 \
    && update-alternatives --install /usr/bin/python python /usr/bin/python3.12 1 \
    && curl -sS https://bootstrap.pypa.io/get-pip.py | python3.12 \
    && python3 -m pip install --no-cache-dir --upgrade pip setuptools wheel

# Install PyTorch with CUDA 12.6 support BEFORE audio-separator[gpu].
# Without this, `pip install ".[gpu]"` pulls the default CPU-only PyTorch wheel
# from PyPI and Separator silently falls back to CPU (~10× slower).
# Cloud Run L4 GPUs have NVIDIA driver 570 (supports up to CUDA 12.8), so cu126
# works. cu130 would fail with "NVIDIA driver is too old".
# Installing torch first means audio-separator[gpu] sees it already satisfied.
RUN pip install --no-cache-dir \
        torch==2.6.0+cu126 \
        torchvision==0.21.0+cu126 \
        --index-url https://download.pytorch.org/whl/cu126

# Install audio-separator with GPU support and API dependencies
COPY . /tmp/audio-separator-src
RUN cd /tmp/audio-separator-src \
    && pip install --no-cache-dir ".[gpu]" \
    && pip install --no-cache-dir \
        "fastapi>=0.104.0" \
        "uvicorn[standard]>=0.24.0" \
        "python-multipart>=0.0.6" \
        "filetype>=1.2.0" \
        "google-cloud-storage>=2.0.0" \
        "google-cloud-firestore>=2.0.0" \
    && rm -rf /tmp/audio-separator-src

# Set up CUDA library paths
RUN echo '/usr/local/cuda/lib64' >> /etc/ld.so.conf.d/cuda.conf && ldconfig

# Environment configuration
ENV MODEL_DIR=/models \
    STORAGE_DIR=/tmp/storage \
    PORT=8080 \
    LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH \
    PATH=/usr/local/cuda/bin:$PATH \
    PYTHONUNBUFFERED=1

# Create directories
RUN mkdir -p /models /tmp/storage/outputs

# Bake ensemble preset models into the image.
# These are the models used by the default presets (instrumental_clean + karaoke).
# Total: ~1-1.5 GB. This eliminates cold-start model download time.
COPY scripts/download_preset_models.py /tmp/download_preset_models.py
RUN python3 /tmp/download_preset_models.py && rm /tmp/download_preset_models.py && ls -lh /models/

# Expose Cloud Run default port
EXPOSE 8080

# Health check for container orchestration
HEALTHCHECK --interval=30s --timeout=10s --start-period=30s --retries=3 \
    CMD curl -f http://localhost:8080/health || exit 1

# Run the API server
CMD ["python3", "-m", "audio_separator.remote.deploy_cloudrun"]