Skip to content

Commit b8ded33

Browse files
beveradbclaude
andauthored
fix: synchronous endpoint + no semaphore, let Cloud Run scale like Modal (#282)
The fire-and-forget + semaphore design caused all jobs to queue on one instance. Cloud Run couldn't see background threads as "busy" so it never scaled to new instances. Fix: make endpoint synchronous (await executor) with concurrency=1. Cloud Run sees each request as active during processing and scales to new GPU instances for concurrent jobs — matching Modal's .spawn(). Increase client POST timeout to 1800s to match. Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 349ba98 commit b8ded33

2 files changed

Lines changed: 12 additions & 21 deletions

File tree

audio_separator/remote/api_client.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,7 @@ def separate_audio(
148148
data["custom_output_names"] = json.dumps(custom_output_names)
149149

150150
try:
151-
# Server returns immediately with task_id; 60s is generous for submission
151+
# Server processes synchronously; 1800s matches Cloud Run request timeout.
152152
# When using gcs_uri (no file upload), we still need multipart/form-data
153153
# encoding because FastAPI requires it for endpoints with File()/Form() params.
154154
# Passing a dummy empty file field forces requests to use multipart encoding.
@@ -158,7 +158,7 @@ def separate_audio(
158158
f"{self.api_url}/separate",
159159
files=files,
160160
data=data,
161-
timeout=60,
161+
timeout=1800,
162162
)
163163
response.raise_for_status()
164164
return response.json()

audio_separator/remote/deploy_cloudrun.py

Lines changed: 10 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,6 @@
5555
models_ready = False
5656

5757
# --- Async job infrastructure ---
58-
gpu_semaphore = threading.Semaphore(1)
5958

6059
OUTPUT_BUCKET = os.environ.get("OUTPUT_BUCKET", "nomadkaraoke-audio-separator-outputs")
6160
GCP_PROJECT = os.environ.get("GCP_PROJECT", "nomadkaraoke")
@@ -231,11 +230,8 @@ def update_status(status: str, progress: int = 0, error: str = None, files: dict
231230
except Exception as e:
232231
logger.warning(f"[{task_id}] Failed to update Firestore status: {e}")
233232

234-
# Wait for GPU availability
235-
update_status("queued", 0)
236-
logger.info(f"[{task_id}] Waiting for GPU semaphore...")
237-
gpu_semaphore.acquire()
238-
logger.info(f"[{task_id}] GPU semaphore acquired, starting separation")
233+
update_status("processing", 0)
234+
logger.info(f"[{task_id}] Starting separation")
239235
try:
240236
os.makedirs(f"{STORAGE_DIR}/outputs/{task_id}", exist_ok=True)
241237
output_dir = f"{STORAGE_DIR}/outputs/{task_id}"
@@ -379,8 +375,7 @@ def update_status(status: str, progress: int = 0, error: str = None, files: dict
379375
return {"task_id": task_id, "status": "error", "error": str(e), "models_used": models_used}
380376

381377
finally:
382-
gpu_semaphore.release()
383-
logger.info(f"[{task_id}] GPU semaphore released")
378+
logger.info(f"[{task_id}] Separation finished, cleaning up local files")
384379
# Clean up local files (outputs are in GCS now)
385380
output_dir = f"{STORAGE_DIR}/outputs/{task_id}"
386381
if os.path.exists(output_dir):
@@ -507,9 +502,12 @@ async def separate_audio(
507502
"instance_id": instance_id,
508503
})
509504

510-
# Fire-and-forget: run separation in background thread
505+
# Run separation synchronously — Cloud Run keeps this request active,
506+
# which lets the autoscaler know this instance is busy and route new
507+
# requests to new instances (with concurrency=1).
508+
# This matches Modal's .spawn() pattern: each job gets its own GPU instance.
511509
loop = asyncio.get_event_loop()
512-
loop.run_in_executor(
510+
result = await loop.run_in_executor(
513511
None,
514512
lambda: separate_audio_sync(
515513
audio_data,
@@ -551,15 +549,8 @@ async def separate_audio(
551549
),
552550
)
553551

554-
# Return immediately — client polls /status/{task_id}
555-
return {
556-
"task_id": task_id,
557-
"status": "submitted",
558-
"progress": 0,
559-
"original_filename": filename,
560-
"models_used": [f"preset:{preset}"] if preset else (models_list or ["default"]),
561-
"total_models": 1 if preset else (len(models_list) if models_list else 1),
562-
}
552+
# Return the completed/error result (Firestore + GCS already updated by separate_audio_sync)
553+
return result
563554

564555
except HTTPException:
565556
raise

0 commit comments

Comments
 (0)