Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions Dockerfile.cloudrun
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,17 @@ RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.12 1
&& curl -sS https://bootstrap.pypa.io/get-pip.py | python3.12 \
&& python3 -m pip install --no-cache-dir --upgrade pip setuptools wheel

# Install PyTorch with CUDA 12.6 support BEFORE audio-separator[gpu].
# Without this, `pip install ".[gpu]"` pulls the default CPU-only PyTorch wheel
# from PyPI and Separator silently falls back to CPU (~10× slower).
# Cloud Run L4 GPUs have NVIDIA driver 570 (supports up to CUDA 12.8), so cu126
# works. cu130 would fail with "NVIDIA driver is too old".
# Installing torch first means audio-separator[gpu] sees it already satisfied.
RUN pip install --no-cache-dir \
torch==2.6.0+cu126 \
torchvision==0.21.0+cu126 \
--index-url https://download.pytorch.org/whl/cu126

# Install audio-separator with GPU support and API dependencies
COPY . /tmp/audio-separator-src
RUN cd /tmp/audio-separator-src \
Expand Down
17 changes: 17 additions & 0 deletions audio_separator/remote/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,22 @@ audio-separator-remote separate audio.wav \
--vr_aggression 10
```

**Large files (>30 MiB):**

When the deployment runs on Cloud Run, request bodies are capped at 32 MiB. For larger inputs the CLI automatically uploads the file to GCS first and tells the server to fetch from `gs://...`, bypassing the limit. This is transparent — the same `separate` command works for any file size:

```bash
# Same command, file size detected automatically
audio-separator-remote separate big_song.wav --preset vocal_balanced
```

Requirements when the GCS path activates:
- Application Default Credentials on the laptop (`gcloud auth application-default login`)
- Write permission on the input bucket (defaults to `nomadkaraoke-audio-separator-outputs`)
- The Cloud Run service account needs read permission on the same bucket (it already does for the default bucket)

Override the bucket with `--gcs-bucket my-bucket` or by setting `AUDIO_SEPARATOR_GCS_INPUT_BUCKET`. Uploaded inputs are deleted after the job finishes (success or failure); the bucket's lifecycle policy is the safety net if cleanup fails.

**Check job status:**

```bash
Expand Down Expand Up @@ -236,6 +252,7 @@ audio-separator-remote --version
**Global Options:**

- `--api_url`: Override the API URL
- `--gcs-bucket`: Bucket used for the >30 MiB upload fallback (env: `AUDIO_SEPARATOR_GCS_INPUT_BUCKET`, default: `nomadkaraoke-audio-separator-outputs`)
- `--timeout`: Set timeout for polling (default: 600 seconds)
- `--poll_interval`: Set polling interval (default: 10 seconds)
- `--debug`: Enable debug logging
Expand Down
98 changes: 94 additions & 4 deletions audio_separator/remote/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,66 @@
import os
import sys
import time
import uuid
from importlib import metadata

from audio_separator.remote import AudioSeparatorAPIClient

# Cloud Run hard-limits request bodies to 32 MiB. Use 30 MiB threshold so a
# little request overhead won't push us over. Larger files go via GCS.
GCS_UPLOAD_THRESHOLD_BYTES = 30 * 1024 * 1024
DEFAULT_GCS_INPUT_BUCKET = "nomadkaraoke-audio-separator-outputs"
GCS_INPUT_PREFIX = "cli-uploads"


def upload_to_gcs(file_path: str, bucket_name: str, logger: logging.Logger) -> str:
"""Upload a local file to GCS and return its gs:// URI.

Requires `google-cloud-storage` and Application Default Credentials
(run `gcloud auth application-default login` on the laptop).
"""
try:
from google.cloud import storage
except ImportError as e:
raise RuntimeError(
"google-cloud-storage is required to upload files larger than "
f"{GCS_UPLOAD_THRESHOLD_BYTES // (1024 * 1024)} MiB. "
"Install it with: pip install google-cloud-storage"
) from e

filename = os.path.basename(file_path)
blob_path = f"{GCS_INPUT_PREFIX}/{uuid.uuid4()}-{filename}"
gcs_uri = f"gs://{bucket_name}/{blob_path}"

size_mib = os.path.getsize(file_path) / (1024 * 1024)
logger.info(f"Uploading {size_mib:.1f} MiB to {gcs_uri} (server fetches from GCS, bypasses Cloud Run 32 MiB limit)")

client = storage.Client()
bucket = client.bucket(bucket_name)
blob = bucket.blob(blob_path)
blob.upload_from_filename(file_path)

logger.info(f"Upload complete: {gcs_uri}")
return gcs_uri


def delete_from_gcs(gcs_uri: str, logger: logging.Logger) -> None:
"""Best-effort delete of a GCS object. Logs but doesn't raise on failure."""
try:
from google.cloud import storage

without_prefix = gcs_uri[len("gs://"):]
slash_idx = without_prefix.index("/")
bucket_name = without_prefix[:slash_idx]
blob_path = without_prefix[slash_idx + 1:]

client = storage.Client()
bucket = client.bucket(bucket_name)
bucket.blob(blob_path).delete()
logger.info(f"Cleaned up uploaded input: {gcs_uri}")
except Exception as e:
logger.warning(f"Failed to delete {gcs_uri}: {e} (bucket lifecycle will reclaim it)")


def main():
"""Main entry point for the remote CLI."""
Expand Down Expand Up @@ -104,6 +160,13 @@ def main():
parser.add_argument("-d", "--debug", action="store_true", help="Enable debug logging")
parser.add_argument("--log_level", default="info", help="Log level (default: info)")
parser.add_argument("--api_url", help="API URL (overrides AUDIO_SEPARATOR_API_URL env var)")
parser.add_argument(
"--gcs-bucket",
help=(
f"GCS bucket for uploading files >{GCS_UPLOAD_THRESHOLD_BYTES // (1024 * 1024)} MiB "
f"(overrides AUDIO_SEPARATOR_GCS_INPUT_BUCKET env var, default: {DEFAULT_GCS_INPUT_BUCKET})"
),
)

args = parser.parse_args()

Expand Down Expand Up @@ -145,9 +208,12 @@ def main():
# Create API client
api_client = AudioSeparatorAPIClient(api_url, logger)

# Resolve GCS bucket for large-file uploads
gcs_bucket = args.gcs_bucket or os.environ.get("AUDIO_SEPARATOR_GCS_INPUT_BUCKET", DEFAULT_GCS_INPUT_BUCKET)

# Handle commands
if args.command == "separate":
handle_separate_command(args, api_client, logger)
handle_separate_command(args, api_client, logger, gcs_bucket)
elif args.command == "status":
handle_status_command(args, api_client, logger)
elif args.command == "models":
Expand All @@ -159,14 +225,35 @@ def main():
sys.exit(1)


def handle_separate_command(args, api_client: AudioSeparatorAPIClient, logger: logging.Logger):
def handle_separate_command(args, api_client: AudioSeparatorAPIClient, logger: logging.Logger, gcs_bucket: str):
"""Handle the separate command."""
for audio_file in args.audio_files:
logger.info(f"Uploading '{audio_file}' to audio separator...")
logger.info(f"Processing '{audio_file}'...")

# Decide upload path: small files go via multipart POST, large files via GCS
# to bypass the Cloud Run 32 MiB request body limit.
uploaded_gcs_uri = None
try:
file_size = os.path.getsize(audio_file)
use_gcs = file_size > GCS_UPLOAD_THRESHOLD_BYTES
except OSError as e:
logger.error(f"❌ Cannot read '{audio_file}': {e}")
continue

try:
if use_gcs:
logger.info(
f"File is {file_size / (1024 * 1024):.1f} MiB (>{GCS_UPLOAD_THRESHOLD_BYTES // (1024 * 1024)} MiB), "
"uploading via GCS"
)
uploaded_gcs_uri = upload_to_gcs(audio_file, gcs_bucket, logger)
source_kwargs = {"file_path": None, "gcs_uri": uploaded_gcs_uri}
else:
source_kwargs = {"file_path": audio_file, "gcs_uri": None}

# Prepare parameters for separation
kwargs = {
**source_kwargs,
"model": args.model,
"models": args.models,
"preset": args.preset,
Expand Down Expand Up @@ -213,7 +300,7 @@ def handle_separate_command(args, api_client: AudioSeparatorAPIClient, logger: l
}

# Use the convenience method that handles everything
result = api_client.separate_audio_and_wait(audio_file, **kwargs)
result = api_client.separate_audio_and_wait(**kwargs)

if result["status"] == "completed":
if "downloaded_files" in result:
Expand All @@ -227,6 +314,9 @@ def handle_separate_command(args, api_client: AudioSeparatorAPIClient, logger: l

except Exception as e:
logger.error(f"❌ Error processing '{audio_file}': {e}")
finally:
if uploaded_gcs_uri:
delete_from_gcs(uploaded_gcs_uri, logger)


def handle_status_command(args, api_client: AudioSeparatorAPIClient, logger: logging.Logger):
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"

[tool.poetry]
name = "audio-separator"
version = "0.44.1"
version = "0.44.2"
description = "Easy to use audio stem separation, using various models from UVR trained primarily by @Anjok07"
authors = ["Andrew Beveridge <andrew@beveridge.uk>"]
license = "MIT"
Expand Down
2 changes: 1 addition & 1 deletion tests/integration/test_remote_api_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -487,7 +487,7 @@ def test_cli_separate_command_integration(self, mock_client_class, test_audio_fi
logger = Mock()

# Execute the command
handle_separate_command(args, mock_client, logger)
handle_separate_command(args, mock_client, logger, "test-bucket")

# Verify the API client method was called
mock_client.separate_audio_and_wait.assert_called_once()
Expand Down
Loading
Loading