From e83741e99a6b8e737cc5c17f95052d2f50f42afc Mon Sep 17 00:00:00 2001
From: Andrew Beveridge <andrew@beveridge.uk>
Date: Mon, 23 Mar 2026 10:37:49 -0400
Subject: [PATCH 1/2] fix: ensemble separation ignores custom_output_names,
 misclassifies stems

When using ensemble presets with custom_output_names, intermediate per-model
separations received custom names that replaced the _(StemType)_ filename
markers. This broke stem type classification (regex extraction), causing all
stems to be labeled "Unknown"/"Other" and custom_output_names to not match.

Fix: pass None to _separate_file for intermediate ensemble files (matching
how _process_with_chunking already works), apply custom_output_names only
to the final ensembled output.

Bumps version to 0.43.1.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 audio_separator/separator/separator.py |   7 +-
 pyproject.toml                         |   2 +-
 tests/reproduce_ensemble_bug.py        | 113 +++++++++++++++++++++++++
 tests/unit/test_stem_naming.py         |  66 +++++++++++++++
 4 files changed, 185 insertions(+), 3 deletions(-)
 create mode 100644 tests/reproduce_ensemble_bug.py

diff --git a/audio_separator/separator/separator.py b/audio_separator/separator/separator.py
index b2c4002..8302488 100644
--- a/audio_separator/separator/separator.py
+++ b/audio_separator/separator/separator.py
@@ -1279,8 +1279,11 @@ def _separate_ensemble(self, audio_file_path, custom_output_names=None):
                         self.model_instance.output_dir = temp_dir
 
                     try:
-                        # Perform separation
-                        model_stems = self._separate_file(path, custom_output_names)
+                        # Perform separation WITHOUT custom_output_names for intermediate files.
+                        # Intermediate stems must use the default "base_(StemType)_model.ext" naming
+                        # so the regex below can extract stem types for classification.
+                        # custom_output_names is applied later to the final ensembled output.
+                        model_stems = self._separate_file(path, None)
 
                         # Extract and normalize stem names from this model's outputs
                         model_stem_names = []
diff --git a/pyproject.toml b/pyproject.toml
index f99f809..c668678 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
 
 [tool.poetry]
 name = "audio-separator"
-version = "0.43.0"
+version = "0.43.1"
 description = "Easy to use audio stem separation, using various models from UVR trained primarily by @Anjok07"
 authors = ["Andrew Beveridge <andrew@beveridge.uk>"]
 license = "MIT"
diff --git a/tests/reproduce_ensemble_bug.py b/tests/reproduce_ensemble_bug.py
new file mode 100644
index 0000000..c9f720f
--- /dev/null
+++ b/tests/reproduce_ensemble_bug.py
@@ -0,0 +1,113 @@
+"""
+Reproduce the ensemble + custom_output_names bug against the live API.
+
+This script simulates exactly what karaoke-gen's audio_processor does:
+1. Call the API with preset=instrumental_clean and custom_output_names
+2. Download the results
+3. Check if the expected filenames exist
+
+Expected behavior (fixed): files named job123_mixed_vocals.flac and job123_mixed_instrumental.flac
+Bug behavior (current prod): files named with original filename + _(Unknown)_ or _(Other)_
+
+Usage:
+    python tests/reproduce_ensemble_bug.py [--api-url URL]
+"""
+import json
+import os
+import sys
+import tempfile
+
+# Add the repo to path so we can import the API client
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from audio_separator.remote.api_client import AudioSeparatorAPIClient
+
+
+def main():
+    api_url = os.environ.get("AUDIO_SEPARATOR_API_URL")
+    if not api_url:
+        print("ERROR: Set AUDIO_SEPARATOR_API_URL environment variable")
+        sys.exit(1)
+
+    test_audio = os.path.join(os.path.dirname(os.path.abspath(__file__)), "inputs", "under_pressure_harmonies.flac")
+    if not os.path.exists(test_audio):
+        print(f"ERROR: Test audio file not found: {test_audio}")
+        sys.exit(1)
+
+    with tempfile.TemporaryDirectory(prefix="ensemble_bug_test_") as output_dir:
+        print(f"API URL: {api_url}")
+        print(f"Output dir: {output_dir}")
+        print()
+
+        import logging
+        logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(name)s: %(message)s")
+        logger = logging.getLogger("test")
+
+        client = AudioSeparatorAPIClient(api_url, logger)
+
+        # This is exactly what karaoke-gen does in _process_audio_separation_remote
+        file_prefix = "job123"  # Simulates job_id-based prefix
+        custom_output_names = {
+            "Vocals": f"{file_prefix}_mixed_vocals",
+            "Instrumental": f"{file_prefix}_mixed_instrumental",
+        }
+
+        print("=" * 60)
+        print("TEST: Preset + custom_output_names (reproduces karaoke-gen bug)")
+        print(f"  preset: instrumental_clean")
+        print(f"  custom_output_names: {custom_output_names}")
+        print("=" * 60)
+        print()
+
+        result = client.separate_audio_and_wait(
+            test_audio,
+            preset="instrumental_clean",
+            timeout=600,
+            poll_interval=10,
+            download=True,
+            output_dir=output_dir,
+            output_format="flac",
+            custom_output_names=custom_output_names,
+        )
+
+        print()
+        print("=" * 60)
+        print("RESULTS")
+        print("=" * 60)
+        print(f"Status: {result.get('status')}")
+        print(f"Downloaded files: {result.get('downloaded_files', [])}")
+        print()
+
+        # List what's actually in the output dir
+        actual_files = os.listdir(output_dir)
+        print(f"Files in output dir: {actual_files}")
+        print()
+
+        # Check for expected files
+        fmt = "flac"
+        expected_vocals = f"{file_prefix}_mixed_vocals.{fmt}"
+        expected_instrumental = f"{file_prefix}_mixed_instrumental.{fmt}"
+
+        vocals_exists = os.path.exists(os.path.join(output_dir, expected_vocals))
+        instrumental_exists = os.path.exists(os.path.join(output_dir, expected_instrumental))
+
+        print("EXPECTED FILE CHECK:")
+        print(f"  {expected_vocals}: {'FOUND' if vocals_exists else 'MISSING'}")
+        print(f"  {expected_instrumental}: {'FOUND' if instrumental_exists else 'MISSING'}")
+        print()
+
+        if vocals_exists and instrumental_exists:
+            print("RESULT: PASS - custom_output_names working correctly")
+            return 0
+        else:
+            print("RESULT: FAIL - custom_output_names NOT applied (bug reproduced)")
+            print()
+            print("Actual files downloaded:")
+            for f in actual_files:
+                size = os.path.getsize(os.path.join(output_dir, f))
+                print(f"  {f} ({size / 1024:.1f} KB)")
+            return 1
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/tests/unit/test_stem_naming.py b/tests/unit/test_stem_naming.py
index 7b1af75..183eec4 100644
--- a/tests/unit/test_stem_naming.py
+++ b/tests/unit/test_stem_naming.py
@@ -161,3 +161,69 @@ def test_custom_ensemble_slug_generation(self):
         assert "Inst_HQ_5" in filename
         assert "karaoke_aufr" in filename
         assert filename.startswith("mardy20s_(Vocals)_custom_ensemble_")
+
+
+class TestEnsembleCustomOutputNames:
+    """Test that custom_output_names works correctly with ensemble separation."""
+
+    def test_custom_output_names_not_passed_to_intermediate_separation(self):
+        """Intermediate per-model separations must NOT receive custom_output_names.
+
+        custom_output_names replaces the default '_(StemType)_model' naming, which
+        removes the _(StemType)_ markers needed by _separate_ensemble to classify
+        stems. custom_output_names should only be applied to the final ensembled output.
+        """
+        import re
+        from unittest.mock import patch, MagicMock, call
+        from audio_separator.separator.separator import Separator
+
+        sep = Separator(
+            log_level=logging.WARNING,
+            model_file_dir="/tmp/models",
+            output_dir="/tmp/output",
+            output_format="flac",
+        )
+        sep.model_filenames = ["model_a.ckpt", "model_b.ckpt"]
+        sep.model_filename = ["model_a.ckpt", "model_b.ckpt"]
+        sep.ensemble_algorithm = "uvr_max_spec"
+        sep.ensemble_weights = None
+        sep.ensemble_preset = "test_preset"
+        sep.sample_rate = 44100
+
+        custom_names = {"Vocals": "job123_mixed_vocals", "Instrumental": "job123_mixed_instrumental"}
+
+        with patch.object(sep, '_separate_file') as mock_separate, \
+             patch.object(sep, 'load_model'), \
+             patch('audio_separator.separator.separator.Ensembler') as MockEnsembler, \
+             patch('audio_separator.separator.separator.librosa') as mock_librosa, \
+             patch('audio_separator.separator.separator.np') as mock_np:
+
+            # Mock _separate_file to return files with proper _(StemType)_ naming
+            mock_separate.side_effect = [
+                ["/tmp/ensemble/song_(Vocals)_model_a.flac", "/tmp/ensemble/song_(Instrumental)_model_a.flac"],
+                ["/tmp/ensemble/song_(Vocals)_model_b.flac", "/tmp/ensemble/song_(Instrumental)_model_b.flac"],
+            ]
+
+            # Mock librosa and numpy for ensembling
+            mock_wav = MagicMock()
+            mock_wav.ndim = 2
+            mock_wav.shape = (2, 44100)
+            mock_librosa.load.return_value = (mock_wav, 44100)
+            mock_np.asfortranarray.return_value = mock_wav
+
+            mock_ensembler = MagicMock()
+            mock_ensembler.ensemble.return_value = mock_wav
+            MockEnsembler.return_value = mock_ensembler
+
+            # Mock model_instance for write_audio
+            sep.model_instance = MagicMock()
+            sep.model_instance.output_dir = "/tmp/output"
+
+            sep._separate_ensemble("/tmp/song.flac", custom_output_names=custom_names)
+
+            # Key assertion: _separate_file must be called with None, not custom_names
+            for call_args in mock_separate.call_args_list:
+                assert call_args[0][1] is None, (
+                    f"_separate_file was called with custom_output_names={call_args[0][1]!r} "
+                    f"but should be None for intermediate ensemble files"
+                )

From 812934997ae51d327c308499d730ab09e9fdf2af Mon Sep 17 00:00:00 2001
From: Andrew Beveridge <andrew@beveridge.uk>
Date: Mon, 23 Mar 2026 11:13:57 -0400
Subject: [PATCH 2/2] docs: add CI GPU runner infrastructure documentation

Documents how the auto-scaling GPU runner system works, including
architecture, troubleshooting steps, and the critical requirement
to update branch protection rules when renaming integration test jobs.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 docs/CI-GPU-RUNNERS.md | 165 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 165 insertions(+)
 create mode 100644 docs/CI-GPU-RUNNERS.md

diff --git a/docs/CI-GPU-RUNNERS.md b/docs/CI-GPU-RUNNERS.md
new file mode 100644
index 0000000..ea1e261
--- /dev/null
+++ b/docs/CI-GPU-RUNNERS.md
@@ -0,0 +1,165 @@
+# CI GPU Runner Infrastructure
+
+This document explains how the GPU-based integration test infrastructure works for this repo.
+
+## Overview
+
+Integration tests require GPU hardware to run ML model inference. GPU VMs are expensive (~$1.62/hr for 3x T4), so they auto-scale to zero when idle. The system automatically starts runners when CI jobs need them and stops them after 15 minutes of inactivity.
+
+## Architecture
+
+```
+GitHub webhook (workflow_job.queued)
+    │
+    ▼
+Cloud Function (github-runner-manager)
+    │
+    ├── Job has "gpu" label? → Start GPU runners (3x n1-standard-4 + T4)
+    ├── Job has "self-hosted" label? → Start CPU runners
+    └── Neither? → Ignore
+
+Cloud Scheduler (every 15 min)
+    │
+    ▼
+Cloud Function (?action=check_idle)
+    │
+    └── No pending jobs + runner idle > 15 min? → Stop runner
+```
+
+### Components
+
+| Component | Location | Purpose |
+|-----------|----------|---------|
+| Cloud Function | `karaoke-gen/infrastructure/functions/runner_manager/main.py` | Starts/stops runner VMs based on demand |
+| Pulumi module | `karaoke-gen/infrastructure/modules/runner_manager.py` | Deploys the function, scheduler, and IAM |
+| GPU VM definitions | `karaoke-gen/infrastructure/compute/github_runners.py` | 3x n1-standard-4 with T4 GPU |
+| GPU startup script | `karaoke-gen/infrastructure/compute/startup_scripts/github_runner_gpu.sh` | Installs NVIDIA drivers, Python, registers runner |
+| Config | `karaoke-gen/infrastructure/config.py` | Runner count, labels, idle timeout |
+| GitHub webhook | Org-level (`nomadkaraoke`) | Sends `workflow_job` events to Cloud Function |
+
+### GPU Runner VMs
+
+- **Count**: 3 (configurable via `NUM_GPU_RUNNERS` in config.py)
+- **Machine type**: n1-standard-4 (4 vCPU, 15GB RAM) + 1x NVIDIA T4
+- **Zone**: us-central1-a
+- **Labels**: `self-hosted, linux, x64, gcp, gpu`
+- **Startup time**: ~15-20 min (NVIDIA driver install, Python build, model download)
+- **Model cache**: ~14GB of ML models pre-downloaded to `/opt/audio-separator-models/`
+
+### Required GitHub Branch Protection Checks
+
+The `Protect main` ruleset (ID: 529535) requires these checks to pass before merge:
+
+- `unit-tests` — from `run-unit-tests.yaml` (runs on GitHub-hosted runners)
+- `ensemble-presets` — from `run-integration-tests.yaml` (runs on GPU runners)
+- `core-models` — from `run-integration-tests.yaml` (runs on GPU runners)
+- `stems-and-quality` — from `run-integration-tests.yaml` (runs on GPU runners)
+
+**IMPORTANT**: If integration test job names change (e.g., splitting or renaming jobs), you MUST update the ruleset to match. The ruleset is configured at:
+https://github.com/nomadkaraoke/python-audio-separator/settings/rules/529535
+
+To update via API:
+```bash
+gh api repos/nomadkaraoke/python-audio-separator/rulesets/529535 \
+  --method PUT --input - <<'EOF'
+{
+  "name": "Protect main",
+  "enforcement": "active",
+  "target": "branch",
+  "conditions": {"ref_name": {"include": ["~DEFAULT_BRANCH"], "exclude": []}},
+  "rules": [
+    {"type": "deletion"},
+    {"type": "pull_request", "parameters": {
+      "required_approving_review_count": 0,
+      "allowed_merge_methods": ["squash"]
+    }},
+    {"type": "required_status_checks", "parameters": {
+      "required_status_checks": [
+        {"context": "unit-tests", "integration_id": 15368},
+        {"context": "JOB_NAME_HERE", "integration_id": 15368}
+      ]
+    }}
+  ]
+}
+EOF
+```
+
+## Troubleshooting
+
+### Integration tests stuck in "queued"
+
+**Symptoms**: PR checks show `pending` for `ensemble-presets`, `core-models`, `stems-and-quality`.
+
+**Diagnosis steps**:
+
+1. Check if GPU runners are online:
+   ```bash
+   gh api orgs/nomadkaraoke/actions/runners \
+     --jq '.runners[] | select(.labels[].name == "gpu") | {name, status, busy}'
+   ```
+
+2. Check if GPU VMs exist:
+   ```bash
+   gcloud compute instances list --project=nomadkaraoke --filter="name~gpu"
+   ```
+
+3. Check Cloud Function logs for webhook delivery:
+   ```bash
+   gcloud logging read 'resource.labels.service_name="github-runner-manager"' \
+     --project=nomadkaraoke --limit=20 \
+     --format="value(timestamp,textPayload,jsonPayload.message)"
+   ```
+
+4. Check GPU runner startup logs (if VMs are RUNNING but GitHub shows offline):
+   ```bash
+   gcloud compute ssh github-gpu-runner-1 --zone=us-central1-a --project=nomadkaraoke \
+     --command="tail -50 /var/log/github-runner-startup.log"
+   ```
+
+### GPU VMs don't exist
+
+If `gcloud compute instances list` shows no GPU runners but Pulumi state thinks they exist:
+
+```bash
+# 1. Remove stale state (from karaoke-gen/infrastructure/ dir)
+pulumi state delete "urn:pulumi:prod::karaoke-gen-infrastructure::gcp:compute/instance:Instance::github-gpu-runner-1" --target-dependents --yes
+pulumi state delete "urn:pulumi:prod::karaoke-gen-infrastructure::gcp:compute/instance:Instance::github-gpu-runner-2" --target-dependents --yes
+pulumi state delete "urn:pulumi:prod::karaoke-gen-infrastructure::gcp:compute/instance:Instance::github-gpu-runner-3" --target-dependents --yes
+
+# 2. Recreate
+pulumi up --yes
+
+# 3. Re-import dependent resources that got removed (runner-manager function, IAM, scheduler)
+# Check `pulumi preview` for what needs importing
+```
+
+### GPU runner startup fails (NVIDIA driver issues)
+
+The startup script handles kernel header mismatches by upgrading the kernel and rebooting once. If the runner still fails:
+
+```bash
+# SSH in and check
+gcloud compute ssh github-gpu-runner-1 --zone=us-central1-a --project=nomadkaraoke \
+  --command="nvidia-smi; dkms status; uname -r"
+```
+
+See `karaoke-gen` memory file `project_gpu_runner_drivers.md` for known issues.
+
+### Webhook not firing
+
+Check the org-level webhook configuration:
+```bash
+gh api orgs/nomadkaraoke/hooks \
+  --jq '.[] | select(.events[] == "workflow_job") | {id, active, config: {url: .config.url}}'
+```
+
+The webhook URL should point to: `https://us-central1-nomadkaraoke.cloudfunctions.net/github-runner-manager`
+
+## Cost
+
+| Scenario | Cost |
+|----------|------|
+| Per GPU runner hour | ~$0.54/hr (n1-standard-4 + T4) |
+| 3 runners × 15 min CI run | ~$0.41 |
+| Idle (scale to zero) | $0 |
+| Typical daily cost (5 PRs) | ~$2 |