Skip to content

Commit 3bbf641

Browse files
Merge pull request #1373 from CapSoftware/instant-mono-fix
feat: Add channel downmixing for microphone audio frames
2 parents 14cb1b8 + 9fe8c3d commit 3bbf641

File tree

2 files changed

+194
-15
lines changed

2 files changed

+194
-15
lines changed

crates/recording/src/sources/audio_mixer.rs

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -114,10 +114,7 @@ impl AudioMixerBuilder {
114114
let mut amix = filter_graph.add(
115115
&ffmpeg::filter::find("amix").expect("Failed to find amix filter"),
116116
"amix",
117-
&format!(
118-
"inputs={}:duration=first:dropout_transition=0",
119-
abuffers.len()
120-
),
117+
&format!("inputs={}:duration=longest", abuffers.len()),
121118
)?;
122119

123120
let aformat_args = format!(
@@ -388,16 +385,21 @@ impl AudioMixer {
388385
let elapsed = Duration::from_secs_f64(self.samples_out as f64 / output_rate);
389386
let timestamp = start.instant() + start_timestamp.duration_since(start) + elapsed;
390387

391-
self.samples_out += filtered.samples();
388+
let frame_samples = filtered.samples();
389+
let mut frame = AudioFrame::new(filtered, Timestamp::Instant(timestamp));
392390

393-
if self
394-
.output
395-
.try_send(AudioFrame::new(filtered, Timestamp::Instant(timestamp)))
396-
.is_err()
397-
{
398-
return Err(());
391+
loop {
392+
match self.output.try_send(frame) {
393+
Ok(()) => break,
394+
Err(err) if err.is_full() => {
395+
frame = err.into_inner();
396+
std::thread::sleep(Duration::from_millis(1));
397+
}
398+
Err(_) => return Err(()),
399+
}
399400
}
400401

402+
self.samples_out += frame_samples;
401403
filtered = ffmpeg::frame::Audio::empty();
402404
}
403405

crates/recording/src/sources/microphone.rs

Lines changed: 181 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,11 @@ use crate::{
44
};
55
use anyhow::anyhow;
66
use cap_media_info::AudioInfo;
7+
use cpal::SampleFormat;
78
use futures::{SinkExt, channel::mpsc};
8-
use std::sync::Arc;
9+
use std::{borrow::Cow, sync::Arc};
10+
11+
const MICROPHONE_TARGET_CHANNELS: u16 = 1;
912

1013
pub struct Microphone {
1114
info: AudioInfo,
@@ -25,7 +28,10 @@ impl AudioSource for Microphone {
2528
Self: Sized,
2629
{
2730
async move {
28-
let audio_info = feed_lock.audio_info();
31+
let source_info = feed_lock.audio_info();
32+
let audio_info = source_info.with_max_channels(MICROPHONE_TARGET_CHANNELS);
33+
let source_channels = source_info.channels;
34+
let target_channels = audio_info.channels;
2935
let (tx, rx) = flume::bounded(8);
3036

3137
feed_lock
@@ -35,17 +41,24 @@ impl AudioSource for Microphone {
3541

3642
tokio::spawn(async move {
3743
while let Ok(frame) = rx.recv_async().await {
44+
let packed = maybe_downmix_channels(
45+
&frame.data,
46+
frame.format,
47+
source_channels,
48+
target_channels,
49+
);
50+
3851
let _ = audio_tx
3952
.send(AudioFrame::new(
40-
audio_info.wrap_frame_with_max_channels(&frame.data, 2),
53+
audio_info.wrap_frame(packed.as_ref()),
4154
frame.timestamp,
4255
))
4356
.await;
4457
}
4558
});
4659

4760
Ok(Self {
48-
info: audio_info.with_max_channels(2),
61+
info: audio_info,
4962
_lock: feed_lock,
5063
})
5164
}
@@ -55,3 +68,167 @@ impl AudioSource for Microphone {
5568
self.info
5669
}
5770
}
71+
72+
fn maybe_downmix_channels<'a>(
73+
data: &'a [u8],
74+
format: SampleFormat,
75+
source_channels: usize,
76+
target_channels: usize,
77+
) -> Cow<'a, [u8]> {
78+
if target_channels == 0 || source_channels == 0 || target_channels >= source_channels {
79+
return Cow::Borrowed(data);
80+
}
81+
82+
if target_channels == 1 {
83+
if let Some(samples) = downmix_to_mono(data, format, source_channels) {
84+
Cow::Owned(samples)
85+
} else {
86+
Cow::Borrowed(data)
87+
}
88+
} else {
89+
Cow::Borrowed(data)
90+
}
91+
}
92+
93+
fn downmix_to_mono(data: &[u8], format: SampleFormat, source_channels: usize) -> Option<Vec<u8>> {
94+
let sample_size = sample_format_size(format)?;
95+
96+
let frame_size = sample_size.checked_mul(source_channels)?;
97+
if frame_size == 0 || !data.len().is_multiple_of(frame_size) {
98+
return None;
99+
}
100+
101+
let frame_count = data.len() / frame_size;
102+
let mut out = vec![0u8; frame_count * sample_size];
103+
104+
for (frame_idx, frame) in data.chunks(frame_size).enumerate() {
105+
let mono = average_frame_sample(format, frame, sample_size, source_channels)?;
106+
let start = frame_idx * sample_size;
107+
write_sample_from_f64(format, mono, &mut out[start..start + sample_size]);
108+
}
109+
110+
Some(out)
111+
}
112+
113+
fn sample_format_size(format: SampleFormat) -> Option<usize> {
114+
Some(match format {
115+
SampleFormat::U8 => 1,
116+
SampleFormat::I16 => 2,
117+
SampleFormat::I32 => 4,
118+
SampleFormat::I64 => 8,
119+
SampleFormat::F32 => 4,
120+
SampleFormat::F64 => 8,
121+
_ => return None,
122+
})
123+
}
124+
125+
fn average_frame_sample(
126+
format: SampleFormat,
127+
frame: &[u8],
128+
sample_size: usize,
129+
channels: usize,
130+
) -> Option<f64> {
131+
let mut sum = 0.0;
132+
for ch in 0..channels {
133+
let start = ch * sample_size;
134+
let end = start + sample_size;
135+
sum += sample_to_f64(format, &frame[start..end])?;
136+
}
137+
138+
Some(sum / channels as f64)
139+
}
140+
141+
fn sample_to_f64(format: SampleFormat, bytes: &[u8]) -> Option<f64> {
142+
match format {
143+
SampleFormat::U8 => bytes.first().copied().map(|v| v as f64),
144+
SampleFormat::I16 => {
145+
let mut buf = [0u8; 2];
146+
buf.copy_from_slice(bytes);
147+
Some(i16::from_ne_bytes(buf) as f64)
148+
}
149+
SampleFormat::I32 => {
150+
let mut buf = [0u8; 4];
151+
buf.copy_from_slice(bytes);
152+
Some(i32::from_ne_bytes(buf) as f64)
153+
}
154+
SampleFormat::I64 => {
155+
let mut buf = [0u8; 8];
156+
buf.copy_from_slice(bytes);
157+
Some(i64::from_ne_bytes(buf) as f64)
158+
}
159+
SampleFormat::F32 => {
160+
let mut buf = [0u8; 4];
161+
buf.copy_from_slice(bytes);
162+
Some(f32::from_ne_bytes(buf) as f64)
163+
}
164+
SampleFormat::F64 => {
165+
let mut buf = [0u8; 8];
166+
buf.copy_from_slice(bytes);
167+
Some(f64::from_ne_bytes(buf))
168+
}
169+
_ => None,
170+
}
171+
}
172+
173+
fn write_sample_from_f64(format: SampleFormat, value: f64, out: &mut [u8]) {
174+
match format {
175+
SampleFormat::U8 => {
176+
let sample = value.round().clamp(u8::MIN as f64, u8::MAX as f64) as u8;
177+
out[0] = sample;
178+
}
179+
SampleFormat::I16 => {
180+
let sample = value.round().clamp(i16::MIN as f64, i16::MAX as f64) as i16;
181+
out.copy_from_slice(&sample.to_ne_bytes());
182+
}
183+
SampleFormat::I32 => {
184+
let sample = value.round().clamp(i32::MIN as f64, i32::MAX as f64) as i32;
185+
out.copy_from_slice(&sample.to_ne_bytes());
186+
}
187+
SampleFormat::I64 => {
188+
let sample = value.round().clamp(i64::MIN as f64, i64::MAX as f64) as i64;
189+
out.copy_from_slice(&sample.to_ne_bytes());
190+
}
191+
SampleFormat::F32 => {
192+
let sample = value as f32;
193+
out.copy_from_slice(&sample.to_ne_bytes());
194+
}
195+
SampleFormat::F64 => {
196+
out.copy_from_slice(&value.to_ne_bytes());
197+
}
198+
_ => {}
199+
}
200+
}
201+
202+
#[cfg(test)]
203+
mod tests {
204+
use super::*;
205+
206+
#[test]
207+
fn downmixes_stereo_f32_to_mono() {
208+
let frames = [(0.5f32, -0.25f32), (1.0f32, 1.0f32)];
209+
let mut data = Vec::new();
210+
211+
for (left, right) in frames {
212+
data.extend_from_slice(&left.to_ne_bytes());
213+
data.extend_from_slice(&right.to_ne_bytes());
214+
}
215+
216+
let downmixed = maybe_downmix_channels(&data, SampleFormat::F32, 2, 1);
217+
let owned = downmixed.into_owned();
218+
assert_eq!(owned.len(), frames.len() * std::mem::size_of::<f32>());
219+
220+
let first = f32::from_ne_bytes(owned[0..4].try_into().unwrap());
221+
let second = f32::from_ne_bytes(owned[4..8].try_into().unwrap());
222+
223+
assert!((first - 0.125).abs() < f32::EPSILON);
224+
assert!((second - 1.0).abs() < f32::EPSILON);
225+
}
226+
227+
#[test]
228+
fn leaves_mono_buffers_untouched() {
229+
let sample = 0.75f32;
230+
let data = sample.to_ne_bytes().to_vec();
231+
let result = maybe_downmix_channels(&data, SampleFormat::F32, 1, 1);
232+
assert!(matches!(result, Cow::Borrowed(_)));
233+
}
234+
}

0 commit comments

Comments
 (0)