feat: add GCS URI support to /separate endpoint

beveradb · claude · beveradb · commit 1a8fa1e82df7 · 2026-03-25T12:06:40.000-04:00
Allow callers to pass a gcs_uri (gs://bucket/path) instead of uploading
the audio file as a multipart POST body. This avoids Cloud Run's 32MB
request body limit for large FLAC files.

- Server: new gcs_uri parameter on /separate, downloads from GCS directly
- Client: separate_audio() and separate_audio_and_wait() accept gcs_uri
- Backward compatible: file upload still works when gcs_uri is not set
- Version bump to 0.44.0

Co-Authored-By: Claude Opus 4.6 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/audio_separator/remote/api_client.py b/audio_separator/remote/api_client.py
@@ -29,10 +29,11 @@ def __init__(self, api_url: str, logger: logging.Logger):
 
     def separate_audio(
         self,
-        file_path: str,
+        file_path: Optional[str] = None,
         model: Optional[str] = None,
         models: Optional[List[str]] = None,
         preset: Optional[str] = None,
+        gcs_uri: Optional[str] = None,
         # Output parameters
         output_format: str = "flac",
         output_bitrate: Optional[str] = None,
@@ -70,13 +71,28 @@ def separate_audio(
         mdxc_batch_size: int = 1,
         mdxc_pitch_shift: int = 0,
     ) -> dict:
-        """Submit audio separation job (asynchronous processing)."""
-        if not os.path.exists(file_path):
-            raise FileNotFoundError(f"Audio file not found: {file_path}")
+        """Submit audio separation job (asynchronous processing).
+
+        Provide either file_path (uploads file) or gcs_uri (server fetches from GCS).
+        """
+        if not file_path and not gcs_uri:
+            raise ValueError("Must provide either file_path or gcs_uri")
+        if file_path and gcs_uri:
+            raise ValueError("Provide either file_path or gcs_uri, not both")
+
+        files = {}
+        file_handle = None
+        if file_path:
+            if not os.path.exists(file_path):
+                raise FileNotFoundError(f"Audio file not found: {file_path}")
+            file_handle = open(file_path, "rb")
+            files = {"file": (os.path.basename(file_path), file_handle)}
 
-        files = {"file": (os.path.basename(file_path), open(file_path, "rb"))}
         data = {}
 
+        if gcs_uri:
+            data["gcs_uri"] = gcs_uri
+
         # Handle model/preset parameters
         if preset:
             data["preset"] = preset
@@ -133,21 +149,28 @@ def separate_audio(
 
         try:
             # Increase timeout for large files (5 minutes)
-            response = self.session.post(f"{self.api_url}/separate", files=files, data=data, timeout=300)
+            response = self.session.post(
+                f"{self.api_url}/separate",
+                files=files if files else None,
+                data=data,
+                timeout=300,
+            )
             response.raise_for_status()
             return response.json()
         except requests.RequestException as e:
             self.logger.error(f"Separation request failed: {e}")
             raise
         finally:
-            files["file"][1].close()
+            if file_handle:
+                file_handle.close()
 
     def separate_audio_and_wait(
         self,
-        file_path: str,
+        file_path: Optional[str] = None,
         model: Optional[str] = None,
         models: Optional[List[str]] = None,
         preset: Optional[str] = None,
+        gcs_uri: Optional[str] = None,
         timeout: int = 600,
         poll_interval: int = 10,
         download: bool = True,
@@ -192,9 +215,10 @@ def separate_audio_and_wait(
         and optionally download the result files.
 
         Args:
-            file_path: Path to the audio file to separate
+            file_path: Path to the audio file to separate (or None if using gcs_uri)
             model: Single model to use for separation (for backwards compatibility)
             models: List of models to use for separation
+            gcs_uri: GCS URI (gs://bucket/path) - server fetches directly from GCS
             timeout: Maximum time to wait for completion in seconds (default: 600)
             poll_interval: How often to check status in seconds (default: 10)
             download: Whether to automatically download result files (default: True)
@@ -216,13 +240,15 @@ def separate_audio_and_wait(
             models_desc = f"preset:{preset}"
         else:
             models_desc = models or ([model] if model else ["default"])
-        self.logger.info(f"Submitting separation job for '{file_path}' with {models_desc} (audio-separator v{AUDIO_SEPARATOR_VERSION})")
+        source_desc = gcs_uri if gcs_uri else file_path
+        self.logger.info(f"Submitting separation job for '{source_desc}' with {models_desc} (audio-separator v{AUDIO_SEPARATOR_VERSION})")
 
         result = self.separate_audio(
             file_path,
             model,
             models,
             preset,
+            gcs_uri,
             output_format,
             output_bitrate,
             normalization_threshold,
diff --git a/audio_separator/remote/deploy_cloudrun.py b/audio_separator/remote/deploy_cloudrun.py
@@ -61,6 +61,37 @@ def generate_file_hash(filename: str) -> str:
     return hashlib.sha256(filename.encode("utf-8")).hexdigest()[:16]
 
 
+def download_from_gcs(gcs_uri: str) -> tuple[bytes, str]:
+    """Download an audio file from GCS.
+
+    Args:
+        gcs_uri: GCS URI in the format gs://bucket/path/to/file
+
+    Returns:
+        Tuple of (file_bytes, filename)
+    """
+    from google.cloud import storage
+
+    if not gcs_uri.startswith("gs://"):
+        raise ValueError(f"Invalid GCS URI (must start with gs://): {gcs_uri}")
+
+    # Parse gs://bucket/path
+    without_prefix = gcs_uri[len("gs://"):]
+    slash_idx = without_prefix.index("/")
+    bucket_name = without_prefix[:slash_idx]
+    blob_path = without_prefix[slash_idx + 1:]
+    filename = os.path.basename(blob_path)
+
+    logger.info(f"Downloading from GCS: bucket={bucket_name}, path={blob_path}")
+    client = storage.Client()
+    bucket = client.bucket(bucket_name)
+    blob = bucket.blob(blob_path)
+    audio_bytes = blob.download_as_bytes()
+    logger.info(f"Downloaded {len(audio_bytes)} bytes from GCS")
+
+    return audio_bytes, filename
+
+
 try:
     AUDIO_SEPARATOR_VERSION = version("audio-separator")
 except Exception:
@@ -335,7 +366,8 @@ def render(self, content: typing.Any) -> bytes:
 
 @web_app.post("/separate")
 async def separate_audio(
-    file: UploadFile = File(..., description="Audio file to separate"),
+    file: Optional[UploadFile] = File(None, description="Audio file to separate"),
+    gcs_uri: Optional[str] = Form(None, description="GCS URI (gs://bucket/path) to fetch audio from instead of uploading"),
     model: Optional[str] = Form(None, description="Single model to use for separation"),
     models: Optional[str] = Form(None, description='JSON list of models, e.g. ["model1.ckpt", "model2.onnx"]'),
     preset: Optional[str] = Form(None, description="Ensemble preset name (e.g. instrumental_clean, karaoke)"),
@@ -376,9 +408,14 @@ async def separate_audio(
     mdxc_batch_size: int = Form(1),
     mdxc_pitch_shift: int = Form(0),
 ) -> dict:
-    """Upload an audio file and separate it into stems."""
-    if not file.filename:
-        raise HTTPException(status_code=400, detail="No file provided")
+    """Upload an audio file (or provide a GCS URI) and separate it into stems."""
+    # Validate: must provide exactly one of file or gcs_uri
+    has_file = file is not None and file.filename
+    has_gcs = gcs_uri is not None and gcs_uri.strip()
+    if not has_file and not has_gcs:
+        raise HTTPException(status_code=400, detail="Must provide either a file upload or gcs_uri parameter")
+    if has_file and has_gcs:
+        raise HTTPException(status_code=400, detail="Provide either file upload or gcs_uri, not both")
 
     try:
         # Parse models parameter
@@ -403,15 +440,24 @@ async def separate_audio(
             except json.JSONDecodeError as e:
                 raise HTTPException(status_code=400, detail=f"Invalid JSON in custom_output_names parameter: {e}")
 
-        audio_data = await file.read()
+        # Get audio data from file upload or GCS
+        if has_gcs:
+            try:
+                audio_data, filename = download_from_gcs(gcs_uri.strip())
+            except Exception as e:
+                raise HTTPException(status_code=400, detail=f"Failed to download from GCS: {e}")
+        else:
+            audio_data = await file.read()
+            filename = file.filename
+
         task_id = str(uuid.uuid4())
 
         # Set initial status
         job_status_store[task_id] = {
             "task_id": task_id,
             "status": "submitted",
             "progress": 0,
-            "original_filename": file.filename,
+            "original_filename": filename,
             "models_used": [f"preset:{preset}"] if preset else (models_list or ["default"]),
             "total_models": 1 if preset else (len(models_list) if models_list else 1),
             "current_model_index": 0,
@@ -425,7 +471,7 @@ async def separate_audio(
             None,
             lambda: separate_audio_sync(
                 audio_data,
-                file.filename,
+                filename,
                 task_id,
                 models_list,
                 preset,
@@ -608,7 +654,7 @@ async def root() -> dict:
             "All MDX, VR, Demucs, and MDXC architectures supported",
         ],
         "endpoints": {
-            "POST /separate": "Upload and separate audio file (supports presets, multiple models, all parameters)",
+            "POST /separate": "Separate audio file via upload or GCS URI (supports presets, multiple models, all parameters)",
             "GET /status/{task_id}": "Get job status and progress",
             "GET /download/{task_id}/{file_hash}": "Download separated file using hash identifier",
             "GET /presets": "List available ensemble presets",
diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
 
 [tool.poetry]
 name = "audio-separator"
-version = "0.43.1"
+version = "0.44.0"
 description = "Easy to use audio stem separation, using various models from UVR trained primarily by @Anjok07"
 authors = ["Andrew Beveridge <andrew@beveridge.uk>"]
 license = "MIT"
diff --git a/tests/unit/test_remote_api_client.py b/tests/unit/test_remote_api_client.py
@@ -517,3 +517,60 @@ def test_separate_audio_and_wait_with_hash_format(self, mock_sleep, mock_downloa
         ]
         actual_calls = [call.args for call in mock_download_hash.call_args_list]
         assert actual_calls == expected_calls
+
+    @patch("requests.Session.post")
+    def test_separate_audio_with_gcs_uri(self, mock_post, api_client):
+        """Test audio separation using GCS URI instead of file upload."""
+        mock_response = Mock()
+        mock_response.json.return_value = {
+            "task_id": "test-task-gcs",
+            "status": "submitted",
+        }
+        mock_response.raise_for_status.return_value = None
+        mock_post.return_value = mock_response
+
+        result = api_client.separate_audio(
+            gcs_uri="gs://my-bucket/path/to/audio.flac",
+            preset="instrumental_clean",
+        )
+
+        assert result["task_id"] == "test-task-gcs"
+
+        # Verify gcs_uri was sent in form data, no file upload
+        call_args = mock_post.call_args
+        assert call_args[1]["files"] is None
+        assert call_args[1]["data"]["gcs_uri"] == "gs://my-bucket/path/to/audio.flac"
+
+    def test_separate_audio_requires_file_or_gcs_uri(self, api_client):
+        """Test that either file_path or gcs_uri must be provided."""
+        with pytest.raises(ValueError, match="Must provide either"):
+            api_client.separate_audio()
+
+    def test_separate_audio_rejects_both_file_and_gcs_uri(self, api_client, mock_audio_file):
+        """Test that providing both file_path and gcs_uri raises an error."""
+        with pytest.raises(ValueError, match="not both"):
+            api_client.separate_audio(
+                file_path=mock_audio_file,
+                gcs_uri="gs://bucket/file.flac",
+            )
+
+    @patch.object(AudioSeparatorAPIClient, "separate_audio")
+    @patch.object(AudioSeparatorAPIClient, "get_job_status")
+    @patch("time.sleep")
+    def test_separate_audio_and_wait_with_gcs_uri(self, mock_sleep, mock_status, mock_separate, api_client):
+        """Test separate_audio_and_wait with GCS URI."""
+        mock_separate.return_value = {"task_id": "test-task-gcs"}
+        mock_status.side_effect = [
+            {"status": "completed", "files": {"hash1": "output.flac"}},
+        ]
+
+        result = api_client.separate_audio_and_wait(
+            gcs_uri="gs://my-bucket/audio.flac",
+            preset="instrumental_clean",
+            download=False,
+        )
+
+        assert result["status"] == "completed"
+        # Verify gcs_uri was passed through to separate_audio
+        call_args = mock_separate.call_args
+        assert call_args[0][4] == "gs://my-bucket/audio.flac"  # positional arg for gcs_uri