Skip to content

Commit d78c851

Browse files
authored
Add WebM audio format support for reading and writing (#635)
Enable processing of .webm audio files/blobs (commonly produced by browser MediaRecorder APIs) by routing them through the existing ffmpeg decode/encode path. No new dependencies required.
1 parent e42e143 commit d78c851

2 files changed

Lines changed: 64 additions & 10 deletions

File tree

mlx_audio/audio_io.py

Lines changed: 20 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@
22
33
This module provides functions for reading and writing audio files.
44
- Reading: Uses miniaudio to support WAV, MP3, FLAC, and Vorbis formats.
5-
Uses ffmpeg for M4A/AAC format support.
6-
- Writing: Uses miniaudio for WAV and ffmpeg for MP3, FLAC, OGG, Opus, and Vorbis encoding.
5+
Uses ffmpeg for M4A/AAC, OGG, Opus, and WebM format support.
6+
- Writing: Uses miniaudio for WAV and ffmpeg for MP3, FLAC, OGG, Opus, Vorbis, and WebM encoding.
77
"""
88

99
import io
@@ -23,6 +23,7 @@
2323
"vorbis": "vorbis",
2424
"m4a": "m4a",
2525
"aac": "m4a",
26+
"webm": "webm",
2627
}
2728

2829
# Sample format mapping
@@ -46,6 +47,9 @@ def _detect_format_from_bytes(data: bytes) -> str:
4647
elif data[4:8] == b"ftyp":
4748
# M4A/MP4/AAC container format
4849
return "m4a"
50+
elif data[:4] == b"\x1a\x45\xdf\xa3":
51+
# WebM/Matroska container (EBML header)
52+
return "webm"
4953
else:
5054
raise ValueError("Unable to detect audio format from bytes")
5155

@@ -72,7 +76,7 @@ def _decode_ffmpeg(
7276
" ffmpeg not found!\n"
7377
"========================================\n"
7478
"\n"
75-
"ffmpeg is required for M4A/AAC audio decoding.\n"
79+
"ffmpeg is required for M4A/AAC/WebM audio decoding.\n"
7680
"\n"
7781
"Install ffmpeg:\n"
7882
" macOS: brew install ffmpeg\n"
@@ -182,7 +186,7 @@ def read(
182186
always_2d: bool = False,
183187
dtype: str = "float64",
184188
) -> Tuple[np.ndarray, int]:
185-
"""Read an audio file using miniaudio (or ffmpeg for M4A/AAC).
189+
"""Read an audio file using miniaudio (or ffmpeg for M4A/AAC/OGG/Opus/WebM).
186190
187191
Args:
188192
file: Path to the audio file or a BytesIO object.
@@ -197,13 +201,17 @@ def read(
197201
use_ffmpeg = False
198202
if isinstance(file, (str, Path)):
199203
ext = Path(file).suffix.lstrip(".").lower()
200-
if ext in ("m4a", "aac", "ogg", "opus"):
204+
if ext in ("m4a", "aac", "ogg", "opus", "webm"):
201205
use_ffmpeg = True
202206
elif isinstance(file, io.BytesIO):
203207
file.seek(0)
204208
header = file.read(12)
205209
file.seek(0)
206-
if header[4:8] == b"ftyp" or header[:4] == b"OggS":
210+
if (
211+
header[4:8] == b"ftyp"
212+
or header[:4] == b"OggS"
213+
or header[:4] == b"\x1a\x45\xdf\xa3"
214+
):
207215
use_ffmpeg = True
208216

209217
if use_ffmpeg:
@@ -297,7 +305,7 @@ def _get_ffmpeg_path() -> str:
297305
" ffmpeg not found!\n"
298306
"========================================\n"
299307
"\n"
300-
"ffmpeg is required for MP3/FLAC encoding and M4A/AAC decoding.\n"
308+
"ffmpeg is required for MP3/FLAC/WebM encoding and M4A/AAC/WebM decoding.\n"
301309
"\n"
302310
"Install ffmpeg:\n"
303311
" macOS: brew install ffmpeg\n"
@@ -353,6 +361,8 @@ def _encode_ffmpeg(
353361
cmd.extend(["-b:a", bitrate])
354362
elif format == "opus":
355363
cmd.extend(["-c:a", "libopus", "-b:a", bitrate])
364+
elif format == "webm":
365+
cmd.extend(["-c:a", "libopus", "-b:a", bitrate])
356366
elif format in ("ogg", "vorbis"):
357367
# Use FLAC codec in OGG container for maximum compatibility
358368
# Native vorbis encoder has limitations (experimental, stereo-only)
@@ -400,12 +410,12 @@ def write(
400410
data: Audio data as numpy array. Shape can be (samples,) for mono
401411
or (samples, channels) for multi-channel.
402412
samplerate: Sample rate in Hz.
403-
format: Output format. Supports 'wav', 'flac', 'mp3', 'ogg', 'opus', 'vorbis'.
413+
format: Output format. Supports 'wav', 'flac', 'mp3', 'ogg', 'opus', 'vorbis', 'webm'.
404414
If None, inferred from file extension.
405415
406416
Note:
407417
WAV uses miniaudio for encoding.
408-
MP3, FLAC, OGG, Opus, and Vorbis use ffmpeg (must be installed: brew install ffmpeg).
418+
MP3, FLAC, OGG, Opus, Vorbis, and WebM use ffmpeg (must be installed: brew install ffmpeg).
409419
"""
410420
import miniaudio
411421

@@ -488,7 +498,7 @@ def write(
488498
else:
489499
miniaudio.wav_write_file(str(file), sound)
490500

491-
elif format in ("flac", "mp3", "ogg", "opus", "vorbis"):
501+
elif format in ("flac", "mp3", "ogg", "opus", "vorbis", "webm"):
492502
# Check for ffmpeg early to provide a clear error message
493503
if not _check_ffmpeg_available():
494504
import warnings

mlx_audio/tests/test_audio_io.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,36 @@ def test_write_read_vorbis(self, sample_audio_mono, tmp_path):
141141
tolerance = max(data.shape[0] * 0.2, samplerate * 0.5)
142142
assert abs(read_data.shape[0] - data.shape[0]) < tolerance
143143

144+
@pytest.mark.skipif(not FFMPEG_AVAILABLE, reason="ffmpeg not installed")
145+
def test_write_read_webm(self, sample_audio_mono, tmp_path):
146+
"""Test writing and reading WebM file."""
147+
data, samplerate = sample_audio_mono
148+
output_file = tmp_path / "test.webm"
149+
150+
write(output_file, data, samplerate, format="webm")
151+
assert output_file.exists()
152+
assert output_file.stat().st_size > 0
153+
154+
# Verify we can read it back via ffmpeg
155+
# Note: WebM with Opus internally uses 48kHz, so reading may return different sample rate
156+
read_data, read_samplerate = read(output_file)
157+
assert read_data.shape[0] > 0 # Just verify we got data
158+
159+
@pytest.mark.skipif(not FFMPEG_AVAILABLE, reason="ffmpeg not installed")
160+
def test_write_read_webm_stereo(self, sample_audio_stereo, tmp_path):
161+
"""Test writing and reading stereo WebM file."""
162+
data, samplerate = sample_audio_stereo
163+
output_file = tmp_path / "test_stereo.webm"
164+
165+
write(output_file, data, samplerate, format="webm")
166+
assert output_file.exists()
167+
assert output_file.stat().st_size > 0
168+
169+
read_data, read_samplerate = read(output_file)
170+
assert read_data.shape[0] > 0
171+
# WebM/Opus may change channel count, just verify data is returned
172+
assert read_data.ndim >= 1
173+
144174
@pytest.mark.skipif(not FFMPEG_AVAILABLE, reason="ffmpeg not installed")
145175
def test_write_bytesio_ogg(self, sample_audio_mono):
146176
"""Test writing OGG to BytesIO."""
@@ -169,6 +199,20 @@ def test_write_bytesio_opus(self, sample_audio_stereo):
169199
read_data, read_samplerate = read(buffer)
170200
assert read_data.shape[0] > 0 # Just verify we got data
171201

202+
@pytest.mark.skipif(not FFMPEG_AVAILABLE, reason="ffmpeg not installed")
203+
def test_write_bytesio_webm(self, sample_audio_mono):
204+
"""Test writing WebM to BytesIO and reading it back (simulates browser blob)."""
205+
data, samplerate = sample_audio_mono
206+
buffer = io.BytesIO()
207+
208+
write(buffer, data, samplerate, format="webm")
209+
assert buffer.getvalue() # Should have content
210+
211+
# Verify we can read it back (this is the browser blob path)
212+
buffer.seek(0)
213+
read_data, read_samplerate = read(buffer)
214+
assert read_data.shape[0] > 0 # Just verify we got data
215+
172216
@pytest.mark.skipif(not FFMPEG_AVAILABLE, reason="ffmpeg not installed")
173217
def test_format_inference_from_extension(self, sample_audio_mono, tmp_path):
174218
"""Test format inference from file extension."""

0 commit comments

Comments
 (0)