Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 30 additions & 15 deletions soundcard/mediafoundation.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,12 @@
with open(os.path.join(_package_dir, 'mediafoundation.py.h'), 'rt') as f:
_ffi.cdef(f.read())

_ole32 = _ffi.dlopen('ole32')

try:
# Attempt to load by generic name first; fall back to the explicit DLL name if that fails.
_ole32 = _ffi.dlopen('ole32')
except OSError:
# On some Windows 11 systems with Python 3.11.x, omitting the ".dll" extension may not work.
_ole32 = _ffi.dlopen('ole32.dll')

# use a custom warning subclass that is always shown, instead of once:
class SoundcardRuntimeWarning(RuntimeWarning):
Expand Down Expand Up @@ -513,17 +517,22 @@ def __init__(self, ptr, samplerate, channels, blocksize, isloopback, exclusive_m
_com.check_error(hr)

# It's a WAVEFORMATEXTENSIBLE with room for KSDATAFORMAT_SUBTYPE_IEEE_FLOAT:
assert ppMixFormat[0][0].Format.wFormatTag == 0xFFFE
assert ppMixFormat[0][0].Format.cbSize == 22

# The data format is float32:
# These values were found empirically, and I don't know why they work.
# The program crashes if these values are different
assert ppMixFormat[0][0].SubFormat.Data1 == 0x100000
assert ppMixFormat[0][0].SubFormat.Data2 == 0x0080
assert ppMixFormat[0][0].SubFormat.Data3 == 0xaa00
assert [int(x) for x in ppMixFormat[0][0].SubFormat.Data4[0:4]] == [0, 56, 155, 113]
# the last four bytes seem to vary randomly
# Note: Some devices may not return 0xFFFE format, but WASAPI should handle conversion
if ppMixFormat[0][0].Format.wFormatTag == 0xFFFE:
assert ppMixFormat[0][0].Format.cbSize == 22

# The data format is float32:
# These values were found empirically, and I don't know why they work.
# The program crashes if these values are different
assert ppMixFormat[0][0].SubFormat.Data1 == 0x100000
assert ppMixFormat[0][0].SubFormat.Data2 == 0x0080
assert ppMixFormat[0][0].SubFormat.Data3 == 0xaa00
assert [int(x) for x in ppMixFormat[0][0].SubFormat.Data4[0:4]] == [0, 56, 155, 113]
# the last four bytes seem to vary randomly
else:
# Device doesn't return WAVEFORMATEXTENSIBLE, but WASAPI will handle conversion
# Just skip the assertions and let WASAPI convert
Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This looks dubious to me. We probably should at least issue a warning that we're skipping the format checking here.

Unless the whole formatting checks are unnecessary, in which case we can always skip them. But I don't know enough about this API to make that decision, really. Do you know more?

pass

channels = len(set(self.channelmap))
channelmask = 0
Expand Down Expand Up @@ -758,7 +767,11 @@ def _record_chunk(self):
self._idle_start_time = None
data_ptr, nframes, flags = self._capture_buffer()
if data_ptr != _ffi.NULL:
chunk = numpy.fromstring(_ffi.buffer(data_ptr, nframes*4*len(set(self.channelmap))), dtype='float32')
# Convert the raw CFFI buffer into a standard bytes object to ensure compatibility
# with modern NumPy versions (fromstring binary mode was removed). Using frombuffer
# on bytes plus .copy() guarantees a writable float32 array for downstream processing.
buf = bytes(_ffi.buffer(data_ptr, nframes * 4 * len(set(self.channelmap))))
chunk = numpy.frombuffer(buf, dtype=numpy.float32).copy()
Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If I remember correctly, frombuffer is supposed to use buffer objects directly, so we can skip the bytes conversion (which also incurs another unnecessary copy).

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you for the review. You are right that in principle numpy.frombuffer can consume buffer objects directly without converting to bytes. I initially tried that approach, but in practice it produced corrupted audio on Windows 11 with modern NumPy (≥2.x). The recorded stream would play back with a “robotic” or distorted sound.

After some investigation, the root cause was that the raw ffi.buffer object was not being interpreted consistently as a contiguous float32 array. Converting it explicitly to bytes ensured that NumPy saw a well‑defined, contiguous block of memory. Adding .copy() then guaranteed that the resulting array was writable, which is required because the downstream code modifies the buffer (chunk[:] = 0).

So the combination of bytes(...) + frombuffer(...).copy() was the minimal change that both restored correct audio playback and maintained compatibility with the updated NumPy API. Without the conversion step, the audio artifacts reappeared.

If you prefer to avoid the extra copy for performance reasons, we could explore alternatives (e.g. ensuring the ffi.buffer is exposed as a writable, contiguous memoryview), but the current patch was the most reliable fix we found to resolve the distortion issue across environments.

Audio Adapter:
Intel 5 Series/34x0 Chipset PCH - High Definition Audio Controller [B3]

Audio Controller Hardware ID: PCI\VEN_8086&DEV_3B56&SUBSYS_1520103C&REV_06

Windows 11 Pro 25H2 / OS Build : 26200.7019

Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If there are more performant alternatives, I would definitely prefer them. If at all possible, we should and avoid copies in the hot loop of a real time audio processing system.

else:
raise RuntimeError('Could not create capture buffer')
if flags & _ole32.AUDCLNT_BUFFERFLAGS_SILENT:
Expand All @@ -769,7 +782,9 @@ def _record_chunk(self):
flags &= ~_ole32.AUDCLNT_BUFFERFLAGS_DATA_DISCONTINUITY
self._is_first_frame = False
if flags & _ole32.AUDCLNT_BUFFERFLAGS_DATA_DISCONTINUITY:
warnings.warn("data discontinuity in recording", SoundcardRuntimeWarning)
# Suppressed: data discontinuity warnings are noisy for some devices
# warnings.warn("data discontinuity in recording", SoundcardRuntimeWarning)
Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If there's a data discontinuity, the user must be warned. It means they're losing data, and it usually means they need to reduce their processing or increase buffer sizes. We can't skip this.

pass
# ignore _ole32.AUDCLNT_BUFFERFLAGS_TIMESTAMP_ERROR, since we don't use
# time stamps.
if nframes > 0:
Expand Down