Skip to content

Commit b72f410

Browse files
authored
Merge pull request #85 from pragmatrix/vl-noise-suppression
Interface Voice Live NoiseReductionType, so that everything is serialized in cam…
2 parents 97a3269 + d6bef63 commit b72f410

2 files changed

Lines changed: 34 additions & 3 deletions

File tree

services/microsoft-voice-live/src/client.rs

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ use context_switch_core::{
2020
Input, OutputPath, ThresholdLevel, audio,
2121
};
2222

23-
use crate::transcribe::{Params, ServiceOutputEvent};
23+
use crate::transcribe::{NoiseReduction, NoiseReductionType, Params, ServiceOutputEvent};
2424
use crate::transcription_state::TranscriptionState;
2525

2626
pub struct Client {
@@ -114,7 +114,7 @@ impl Client {
114114
async fn send_session_update(&mut self, params: &Params) -> Result<()> {
115115
let session = types::VoiceLiveSession {
116116
input_audio_sampling_rate: None,
117-
input_audio_noise_reduction: params.noise_reduction.clone(),
117+
input_audio_noise_reduction: params.noise_reduction.as_ref().map(noise_reduction),
118118
input_audio_echo_cancellation: None,
119119
input_audio_transcription: Some(types::TranscriptionConfig {
120120
language: params.language.clone(),
@@ -321,6 +321,18 @@ fn eou_threshold_level(level: ThresholdLevel) -> EndOfUtteranceThresholdLevel {
321321
}
322322
}
323323

324+
fn noise_reduction(configured: &NoiseReduction) -> types::NoiseReduction {
325+
let reduction_type = match configured.reduction_type {
326+
NoiseReductionType::NearField => types::NoiseReductionType::NearField,
327+
NoiseReductionType::FarField => types::NoiseReductionType::FarField,
328+
NoiseReductionType::AzureDeepNoiseSuppression => {
329+
types::NoiseReductionType::AzureDeepNoiseSuppression
330+
}
331+
};
332+
333+
types::NoiseReduction { reduction_type }
334+
}
335+
324336
enum FlowControl {
325337
Continue,
326338
End,

services/microsoft-voice-live/src/transcribe.rs

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
use anyhow::Result;
22
use async_trait::async_trait;
3-
use openai_api_rs::realtime::types::NoiseReduction;
43
use serde::{Deserialize, Serialize};
54

65
use context_switch_core::{Conversation, Service, TurnDetection};
@@ -36,6 +35,26 @@ pub struct Params {
3635
pub turn_detection: Option<TurnDetection>,
3736
}
3837

38+
/// Input-audio noise reduction. Mapped to the provider noise-reduction configuration before
39+
/// being sent in the session update.
40+
#[derive(Debug, Deserialize)]
41+
pub struct NoiseReduction {
42+
#[serde(rename = "type")]
43+
pub reduction_type: NoiseReductionType,
44+
}
45+
46+
/// Noise-reduction profile. Mirrors the provider's variants with a camelCase wire form.
47+
#[derive(Debug, Deserialize)]
48+
#[serde(rename_all = "camelCase")]
49+
pub enum NoiseReductionType {
50+
/// `nearField` is for close-talking microphones such as headphones.
51+
NearField,
52+
/// `farField` is for far-field microphones such as laptop or conference room microphones.
53+
FarField,
54+
/// Azure deep noise suppression, optimized for the speaker closest to the microphone.
55+
AzureDeepNoiseSuppression,
56+
}
57+
3958
#[derive(Debug)]
4059
pub struct MicrosoftVoiceLiveTranscribe;
4160

0 commit comments

Comments
 (0)