Skip to content

Commit 4549b04

Browse files
beveradbclaude
andauthored
fix: prevent chunk overwrite with custom output names, clarify demucs defaults (#270)
Fix chunking bug where custom_output_names passed to _separate_file caused chunks to overwrite each other (fixes #259). Custom names are now only applied to the final merged output. Adds regression test with multi-chunk + multi-stem scenario that verifies _separate_file is called without custom names. Also clarifies demucs_params segment_size documentation: "Default" maps to 40 for older Demucs models and 10 for Demucs v4/htdemucs. Incorporates work from PR #260 (@Kulin-Soni) and PR #264 (@Madduri-Ganesh) with additional test coverage. Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 73061f0 commit 4549b04

3 files changed

Lines changed: 40 additions & 12 deletions

File tree

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -618,7 +618,7 @@ You can also rename specific stems:
618618
- **`use_autocast`:** (Optional) Flag to use PyTorch autocast for faster inference. Do not use for CPU inference. `Default: False`
619619
- **`mdx_params`:** (Optional) MDX Architecture Specific Attributes & Defaults. `Default: {"hop_length": 1024, "segment_size": 256, "overlap": 0.25, "batch_size": 1, "enable_denoise": False}`
620620
- **`vr_params`:** (Optional) VR Architecture Specific Attributes & Defaults. `Default: {"batch_size": 1, "window_size": 512, "aggression": 5, "enable_tta": False, "enable_post_process": False, "post_process_threshold": 0.2, "high_end_process": False}`
621-
- **`demucs_params`:** (Optional) Demucs Architecture Specific Attributes & Defaults. `Default: {"segment_size": "Default", "shifts": 2, "overlap": 0.25, "segments_enabled": True}`
621+
- **`demucs_params`:** (Optional) Demucs Architecture Specific Attributes & Defaults. `Default: {"segment_size": "Default", "shifts": 2, "overlap": 0.25, "segments_enabled": True}` _(Note: `segment_size` "Default" uses the model's internal default, typically 40 for older Demucs models and 10 for Demucs v4/htdemucs)_
622622
- **`mdxc_params`:** (Optional) MDXC Architecture Specific Attributes & Defaults. `Default: {"segment_size": 256, "override_model_segment_size": False, "batch_size": 1, "overlap": 8, "pitch_shift": 0}`
623623
- **`ensemble_algorithm`:** (Optional) Algorithm to use for ensembling multiple models. `Default: 'avg_wave'`
624624
- **`ensemble_weights`:** (Optional) Weights for each model in the ensemble. `Default: None` (equal weights)

audio_separator/separator/separator.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1088,7 +1088,7 @@ def _process_with_chunking(self, audio_file_path, custom_output_names=None):
10881088
self.model_instance.output_dir = temp_dir
10891089

10901090
try:
1091-
output_files = self._separate_file(chunk_path, custom_output_names)
1091+
output_files = self._separate_file(chunk_path)
10921092

10931093
# Dynamically group chunks by stem name
10941094
for stem_path in output_files:

tests/unit/test_separator_chunking.py

Lines changed: 38 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -318,7 +318,7 @@ def test_state_restoration_after_chunking(self, mock_chunker_class):
318318

319319
# Track state changes during _separate_file call
320320
state_during_processing = {}
321-
def track_state(chunk_path, custom_names):
321+
def track_state(chunk_path, custom_names=None):
322322
state_during_processing['chunk_duration'] = separator.chunk_duration
323323
state_during_processing['output_dir'] = separator.output_dir
324324
state_during_processing['model_output_dir'] = separator.model_instance.output_dir
@@ -500,8 +500,13 @@ def test_audio_chunker_initialization(self, mock_chunker_class):
500500
mock_chunker_class.assert_called_once_with(15.0, separator.logger)
501501

502502
@patch('audio_separator.separator.audio_chunking.AudioChunker')
503-
def test_custom_output_names_parameter(self, mock_chunker_class):
504-
"""Test that custom_output_names parameter is handled correctly."""
503+
def test_custom_output_names_applied_to_merged_output(self, mock_chunker_class):
504+
"""Test that custom_output_names are applied to final merged output, not per-chunk.
505+
506+
Regression test for issue #259: when custom_output_names were passed to
507+
_separate_file for each chunk, chunks would get the same custom name and
508+
overwrite each other, producing corrupted output.
509+
"""
505510
# Setup mock separator
506511
separator = Mock(spec=Separator)
507512
separator.logger = self.logger
@@ -511,18 +516,25 @@ def test_custom_output_names_parameter(self, mock_chunker_class):
511516
separator.model_instance = Mock()
512517
separator.model_instance.output_dir = self.temp_dir
513518

514-
# Mock chunker behavior
519+
# Mock chunker behavior - multiple chunks to reproduce the overwrite bug
515520
mock_chunker = Mock()
516521
mock_chunker_class.return_value = mock_chunker
517522
mock_chunker.split_audio.return_value = [
518523
os.path.join(self.temp_dir, "chunk_0000.wav"),
524+
os.path.join(self.temp_dir, "chunk_0001.wav"),
525+
os.path.join(self.temp_dir, "chunk_0002.wav"),
519526
]
520527

521-
separator._separate_file = Mock(return_value=[
522-
os.path.join(self.temp_dir, "chunk_0000_(Vocals).wav"),
528+
separator._separate_file = Mock(side_effect=[
529+
[os.path.join(self.temp_dir, "chunk_0000_(Vocals).wav"),
530+
os.path.join(self.temp_dir, "chunk_0000_(Instrumental).wav")],
531+
[os.path.join(self.temp_dir, "chunk_0001_(Vocals).wav"),
532+
os.path.join(self.temp_dir, "chunk_0001_(Instrumental).wav")],
533+
[os.path.join(self.temp_dir, "chunk_0002_(Vocals).wav"),
534+
os.path.join(self.temp_dir, "chunk_0002_(Instrumental).wav")],
523535
])
524536

525-
custom_names = {"Vocals": "my_custom_vocals"}
537+
custom_names = {"Vocals": "my_custom_vocals", "Instrumental": "my_custom_instrumental"}
526538

527539
# Import and call the actual method
528540
from audio_separator.separator.separator import Separator as RealSeparator
@@ -532,9 +544,25 @@ def test_custom_output_names_parameter(self, mock_chunker_class):
532544
custom_output_names=custom_names
533545
)
534546

535-
# Verify custom name was used in output
536-
assert len(result) == 1
537-
assert "my_custom_vocals" in os.path.basename(result[0])
547+
# Verify _separate_file was called WITHOUT custom_output_names for each chunk
548+
# This is the core of the fix: chunks must use default naming to avoid overwrites
549+
for call_args in separator._separate_file.call_args_list:
550+
args, kwargs = call_args
551+
assert len(args) == 1, f"_separate_file should be called with only chunk_path, got args: {args}"
552+
assert "custom_output_names" not in kwargs, \
553+
f"_separate_file should not receive custom_output_names, got kwargs: {kwargs}"
554+
555+
# Verify custom names were applied to the final merged output
556+
assert len(result) == 2
557+
output_basenames = [os.path.basename(path) for path in result]
558+
assert any("my_custom_instrumental" in name for name in output_basenames)
559+
assert any("my_custom_vocals" in name for name in output_basenames)
560+
561+
# Verify all 3 chunks were processed for each stem
562+
assert mock_chunker.merge_chunks.call_count == 2
563+
for merge_call in mock_chunker.merge_chunks.call_args_list:
564+
chunk_list = merge_call[0][0]
565+
assert len(chunk_list) == 3, f"Each stem should merge 3 chunks, got {len(chunk_list)}"
538566

539567

540568
class TestSeparatorChunkingEdgeCases:

0 commit comments

Comments
 (0)