From e83741e99a6b8e737cc5c17f95052d2f50f42afc Mon Sep 17 00:00:00 2001 From: Andrew Beveridge Date: Mon, 23 Mar 2026 10:37:49 -0400 Subject: [PATCH 1/2] fix: ensemble separation ignores custom_output_names, misclassifies stems When using ensemble presets with custom_output_names, intermediate per-model separations received custom names that replaced the _(StemType)_ filename markers. This broke stem type classification (regex extraction), causing all stems to be labeled "Unknown"/"Other" and custom_output_names to not match. Fix: pass None to _separate_file for intermediate ensemble files (matching how _process_with_chunking already works), apply custom_output_names only to the final ensembled output. Bumps version to 0.43.1. Co-Authored-By: Claude Opus 4.6 (1M context) --- audio_separator/separator/separator.py | 7 +- pyproject.toml | 2 +- tests/reproduce_ensemble_bug.py | 113 +++++++++++++++++++++++++ tests/unit/test_stem_naming.py | 66 +++++++++++++++ 4 files changed, 185 insertions(+), 3 deletions(-) create mode 100644 tests/reproduce_ensemble_bug.py diff --git a/audio_separator/separator/separator.py b/audio_separator/separator/separator.py index b2c4002..8302488 100644 --- a/audio_separator/separator/separator.py +++ b/audio_separator/separator/separator.py @@ -1279,8 +1279,11 @@ def _separate_ensemble(self, audio_file_path, custom_output_names=None): self.model_instance.output_dir = temp_dir try: - # Perform separation - model_stems = self._separate_file(path, custom_output_names) + # Perform separation WITHOUT custom_output_names for intermediate files. + # Intermediate stems must use the default "base_(StemType)_model.ext" naming + # so the regex below can extract stem types for classification. + # custom_output_names is applied later to the final ensembled output. + model_stems = self._separate_file(path, None) # Extract and normalize stem names from this model's outputs model_stem_names = [] diff --git a/pyproject.toml b/pyproject.toml index f99f809..c668678 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api" [tool.poetry] name = "audio-separator" -version = "0.43.0" +version = "0.43.1" description = "Easy to use audio stem separation, using various models from UVR trained primarily by @Anjok07" authors = ["Andrew Beveridge "] license = "MIT" diff --git a/tests/reproduce_ensemble_bug.py b/tests/reproduce_ensemble_bug.py new file mode 100644 index 0000000..c9f720f --- /dev/null +++ b/tests/reproduce_ensemble_bug.py @@ -0,0 +1,113 @@ +""" +Reproduce the ensemble + custom_output_names bug against the live API. + +This script simulates exactly what karaoke-gen's audio_processor does: +1. Call the API with preset=instrumental_clean and custom_output_names +2. Download the results +3. Check if the expected filenames exist + +Expected behavior (fixed): files named job123_mixed_vocals.flac and job123_mixed_instrumental.flac +Bug behavior (current prod): files named with original filename + _(Unknown)_ or _(Other)_ + +Usage: + python tests/reproduce_ensemble_bug.py [--api-url URL] +""" +import json +import os +import sys +import tempfile + +# Add the repo to path so we can import the API client +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from audio_separator.remote.api_client import AudioSeparatorAPIClient + + +def main(): + api_url = os.environ.get("AUDIO_SEPARATOR_API_URL") + if not api_url: + print("ERROR: Set AUDIO_SEPARATOR_API_URL environment variable") + sys.exit(1) + + test_audio = os.path.join(os.path.dirname(os.path.abspath(__file__)), "inputs", "under_pressure_harmonies.flac") + if not os.path.exists(test_audio): + print(f"ERROR: Test audio file not found: {test_audio}") + sys.exit(1) + + with tempfile.TemporaryDirectory(prefix="ensemble_bug_test_") as output_dir: + print(f"API URL: {api_url}") + print(f"Output dir: {output_dir}") + print() + + import logging + logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(name)s: %(message)s") + logger = logging.getLogger("test") + + client = AudioSeparatorAPIClient(api_url, logger) + + # This is exactly what karaoke-gen does in _process_audio_separation_remote + file_prefix = "job123" # Simulates job_id-based prefix + custom_output_names = { + "Vocals": f"{file_prefix}_mixed_vocals", + "Instrumental": f"{file_prefix}_mixed_instrumental", + } + + print("=" * 60) + print("TEST: Preset + custom_output_names (reproduces karaoke-gen bug)") + print(f" preset: instrumental_clean") + print(f" custom_output_names: {custom_output_names}") + print("=" * 60) + print() + + result = client.separate_audio_and_wait( + test_audio, + preset="instrumental_clean", + timeout=600, + poll_interval=10, + download=True, + output_dir=output_dir, + output_format="flac", + custom_output_names=custom_output_names, + ) + + print() + print("=" * 60) + print("RESULTS") + print("=" * 60) + print(f"Status: {result.get('status')}") + print(f"Downloaded files: {result.get('downloaded_files', [])}") + print() + + # List what's actually in the output dir + actual_files = os.listdir(output_dir) + print(f"Files in output dir: {actual_files}") + print() + + # Check for expected files + fmt = "flac" + expected_vocals = f"{file_prefix}_mixed_vocals.{fmt}" + expected_instrumental = f"{file_prefix}_mixed_instrumental.{fmt}" + + vocals_exists = os.path.exists(os.path.join(output_dir, expected_vocals)) + instrumental_exists = os.path.exists(os.path.join(output_dir, expected_instrumental)) + + print("EXPECTED FILE CHECK:") + print(f" {expected_vocals}: {'FOUND' if vocals_exists else 'MISSING'}") + print(f" {expected_instrumental}: {'FOUND' if instrumental_exists else 'MISSING'}") + print() + + if vocals_exists and instrumental_exists: + print("RESULT: PASS - custom_output_names working correctly") + return 0 + else: + print("RESULT: FAIL - custom_output_names NOT applied (bug reproduced)") + print() + print("Actual files downloaded:") + for f in actual_files: + size = os.path.getsize(os.path.join(output_dir, f)) + print(f" {f} ({size / 1024:.1f} KB)") + return 1 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/tests/unit/test_stem_naming.py b/tests/unit/test_stem_naming.py index 7b1af75..183eec4 100644 --- a/tests/unit/test_stem_naming.py +++ b/tests/unit/test_stem_naming.py @@ -161,3 +161,69 @@ def test_custom_ensemble_slug_generation(self): assert "Inst_HQ_5" in filename assert "karaoke_aufr" in filename assert filename.startswith("mardy20s_(Vocals)_custom_ensemble_") + + +class TestEnsembleCustomOutputNames: + """Test that custom_output_names works correctly with ensemble separation.""" + + def test_custom_output_names_not_passed_to_intermediate_separation(self): + """Intermediate per-model separations must NOT receive custom_output_names. + + custom_output_names replaces the default '_(StemType)_model' naming, which + removes the _(StemType)_ markers needed by _separate_ensemble to classify + stems. custom_output_names should only be applied to the final ensembled output. + """ + import re + from unittest.mock import patch, MagicMock, call + from audio_separator.separator.separator import Separator + + sep = Separator( + log_level=logging.WARNING, + model_file_dir="/tmp/models", + output_dir="/tmp/output", + output_format="flac", + ) + sep.model_filenames = ["model_a.ckpt", "model_b.ckpt"] + sep.model_filename = ["model_a.ckpt", "model_b.ckpt"] + sep.ensemble_algorithm = "uvr_max_spec" + sep.ensemble_weights = None + sep.ensemble_preset = "test_preset" + sep.sample_rate = 44100 + + custom_names = {"Vocals": "job123_mixed_vocals", "Instrumental": "job123_mixed_instrumental"} + + with patch.object(sep, '_separate_file') as mock_separate, \ + patch.object(sep, 'load_model'), \ + patch('audio_separator.separator.separator.Ensembler') as MockEnsembler, \ + patch('audio_separator.separator.separator.librosa') as mock_librosa, \ + patch('audio_separator.separator.separator.np') as mock_np: + + # Mock _separate_file to return files with proper _(StemType)_ naming + mock_separate.side_effect = [ + ["/tmp/ensemble/song_(Vocals)_model_a.flac", "/tmp/ensemble/song_(Instrumental)_model_a.flac"], + ["/tmp/ensemble/song_(Vocals)_model_b.flac", "/tmp/ensemble/song_(Instrumental)_model_b.flac"], + ] + + # Mock librosa and numpy for ensembling + mock_wav = MagicMock() + mock_wav.ndim = 2 + mock_wav.shape = (2, 44100) + mock_librosa.load.return_value = (mock_wav, 44100) + mock_np.asfortranarray.return_value = mock_wav + + mock_ensembler = MagicMock() + mock_ensembler.ensemble.return_value = mock_wav + MockEnsembler.return_value = mock_ensembler + + # Mock model_instance for write_audio + sep.model_instance = MagicMock() + sep.model_instance.output_dir = "/tmp/output" + + sep._separate_ensemble("/tmp/song.flac", custom_output_names=custom_names) + + # Key assertion: _separate_file must be called with None, not custom_names + for call_args in mock_separate.call_args_list: + assert call_args[0][1] is None, ( + f"_separate_file was called with custom_output_names={call_args[0][1]!r} " + f"but should be None for intermediate ensemble files" + ) From 812934997ae51d327c308499d730ab09e9fdf2af Mon Sep 17 00:00:00 2001 From: Andrew Beveridge Date: Mon, 23 Mar 2026 11:13:57 -0400 Subject: [PATCH 2/2] docs: add CI GPU runner infrastructure documentation Documents how the auto-scaling GPU runner system works, including architecture, troubleshooting steps, and the critical requirement to update branch protection rules when renaming integration test jobs. Co-Authored-By: Claude Opus 4.6 (1M context) --- docs/CI-GPU-RUNNERS.md | 165 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 165 insertions(+) create mode 100644 docs/CI-GPU-RUNNERS.md diff --git a/docs/CI-GPU-RUNNERS.md b/docs/CI-GPU-RUNNERS.md new file mode 100644 index 0000000..ea1e261 --- /dev/null +++ b/docs/CI-GPU-RUNNERS.md @@ -0,0 +1,165 @@ +# CI GPU Runner Infrastructure + +This document explains how the GPU-based integration test infrastructure works for this repo. + +## Overview + +Integration tests require GPU hardware to run ML model inference. GPU VMs are expensive (~$1.62/hr for 3x T4), so they auto-scale to zero when idle. The system automatically starts runners when CI jobs need them and stops them after 15 minutes of inactivity. + +## Architecture + +``` +GitHub webhook (workflow_job.queued) + │ + ▼ +Cloud Function (github-runner-manager) + │ + ├── Job has "gpu" label? → Start GPU runners (3x n1-standard-4 + T4) + ├── Job has "self-hosted" label? → Start CPU runners + └── Neither? → Ignore + +Cloud Scheduler (every 15 min) + │ + ▼ +Cloud Function (?action=check_idle) + │ + └── No pending jobs + runner idle > 15 min? → Stop runner +``` + +### Components + +| Component | Location | Purpose | +|-----------|----------|---------| +| Cloud Function | `karaoke-gen/infrastructure/functions/runner_manager/main.py` | Starts/stops runner VMs based on demand | +| Pulumi module | `karaoke-gen/infrastructure/modules/runner_manager.py` | Deploys the function, scheduler, and IAM | +| GPU VM definitions | `karaoke-gen/infrastructure/compute/github_runners.py` | 3x n1-standard-4 with T4 GPU | +| GPU startup script | `karaoke-gen/infrastructure/compute/startup_scripts/github_runner_gpu.sh` | Installs NVIDIA drivers, Python, registers runner | +| Config | `karaoke-gen/infrastructure/config.py` | Runner count, labels, idle timeout | +| GitHub webhook | Org-level (`nomadkaraoke`) | Sends `workflow_job` events to Cloud Function | + +### GPU Runner VMs + +- **Count**: 3 (configurable via `NUM_GPU_RUNNERS` in config.py) +- **Machine type**: n1-standard-4 (4 vCPU, 15GB RAM) + 1x NVIDIA T4 +- **Zone**: us-central1-a +- **Labels**: `self-hosted, linux, x64, gcp, gpu` +- **Startup time**: ~15-20 min (NVIDIA driver install, Python build, model download) +- **Model cache**: ~14GB of ML models pre-downloaded to `/opt/audio-separator-models/` + +### Required GitHub Branch Protection Checks + +The `Protect main` ruleset (ID: 529535) requires these checks to pass before merge: + +- `unit-tests` — from `run-unit-tests.yaml` (runs on GitHub-hosted runners) +- `ensemble-presets` — from `run-integration-tests.yaml` (runs on GPU runners) +- `core-models` — from `run-integration-tests.yaml` (runs on GPU runners) +- `stems-and-quality` — from `run-integration-tests.yaml` (runs on GPU runners) + +**IMPORTANT**: If integration test job names change (e.g., splitting or renaming jobs), you MUST update the ruleset to match. The ruleset is configured at: +https://github.com/nomadkaraoke/python-audio-separator/settings/rules/529535 + +To update via API: +```bash +gh api repos/nomadkaraoke/python-audio-separator/rulesets/529535 \ + --method PUT --input - <<'EOF' +{ + "name": "Protect main", + "enforcement": "active", + "target": "branch", + "conditions": {"ref_name": {"include": ["~DEFAULT_BRANCH"], "exclude": []}}, + "rules": [ + {"type": "deletion"}, + {"type": "pull_request", "parameters": { + "required_approving_review_count": 0, + "allowed_merge_methods": ["squash"] + }}, + {"type": "required_status_checks", "parameters": { + "required_status_checks": [ + {"context": "unit-tests", "integration_id": 15368}, + {"context": "JOB_NAME_HERE", "integration_id": 15368} + ] + }} + ] +} +EOF +``` + +## Troubleshooting + +### Integration tests stuck in "queued" + +**Symptoms**: PR checks show `pending` for `ensemble-presets`, `core-models`, `stems-and-quality`. + +**Diagnosis steps**: + +1. Check if GPU runners are online: + ```bash + gh api orgs/nomadkaraoke/actions/runners \ + --jq '.runners[] | select(.labels[].name == "gpu") | {name, status, busy}' + ``` + +2. Check if GPU VMs exist: + ```bash + gcloud compute instances list --project=nomadkaraoke --filter="name~gpu" + ``` + +3. Check Cloud Function logs for webhook delivery: + ```bash + gcloud logging read 'resource.labels.service_name="github-runner-manager"' \ + --project=nomadkaraoke --limit=20 \ + --format="value(timestamp,textPayload,jsonPayload.message)" + ``` + +4. Check GPU runner startup logs (if VMs are RUNNING but GitHub shows offline): + ```bash + gcloud compute ssh github-gpu-runner-1 --zone=us-central1-a --project=nomadkaraoke \ + --command="tail -50 /var/log/github-runner-startup.log" + ``` + +### GPU VMs don't exist + +If `gcloud compute instances list` shows no GPU runners but Pulumi state thinks they exist: + +```bash +# 1. Remove stale state (from karaoke-gen/infrastructure/ dir) +pulumi state delete "urn:pulumi:prod::karaoke-gen-infrastructure::gcp:compute/instance:Instance::github-gpu-runner-1" --target-dependents --yes +pulumi state delete "urn:pulumi:prod::karaoke-gen-infrastructure::gcp:compute/instance:Instance::github-gpu-runner-2" --target-dependents --yes +pulumi state delete "urn:pulumi:prod::karaoke-gen-infrastructure::gcp:compute/instance:Instance::github-gpu-runner-3" --target-dependents --yes + +# 2. Recreate +pulumi up --yes + +# 3. Re-import dependent resources that got removed (runner-manager function, IAM, scheduler) +# Check `pulumi preview` for what needs importing +``` + +### GPU runner startup fails (NVIDIA driver issues) + +The startup script handles kernel header mismatches by upgrading the kernel and rebooting once. If the runner still fails: + +```bash +# SSH in and check +gcloud compute ssh github-gpu-runner-1 --zone=us-central1-a --project=nomadkaraoke \ + --command="nvidia-smi; dkms status; uname -r" +``` + +See `karaoke-gen` memory file `project_gpu_runner_drivers.md` for known issues. + +### Webhook not firing + +Check the org-level webhook configuration: +```bash +gh api orgs/nomadkaraoke/hooks \ + --jq '.[] | select(.events[] == "workflow_job") | {id, active, config: {url: .config.url}}' +``` + +The webhook URL should point to: `https://us-central1-nomadkaraoke.cloudfunctions.net/github-runner-manager` + +## Cost + +| Scenario | Cost | +|----------|------| +| Per GPU runner hour | ~$0.54/hr (n1-standard-4 + T4) | +| 3 runners × 15 min CI run | ~$0.41 | +| Idle (scale to zero) | $0 | +| Typical daily cost (5 PRs) | ~$2 |