-
Notifications
You must be signed in to change notification settings - Fork 27
Expand file tree
/
Copy pathAudioProcessor.cs
More file actions
384 lines (325 loc) · 11.7 KB
/
AudioProcessor.cs
File metadata and controls
384 lines (325 loc) · 11.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
using System.Threading.Channels;
using NAudio.Wave;
namespace Azure.AI.VoiceLive.Samples;
/// <summary>
/// Handles real-time audio capture and playback for the voice assistant.
/// </summary>
/// <remarks>
/// Threading Architecture:
/// - Main thread: Event loop and UI
/// - Capture thread: NAudio input stream reading
/// - Send thread: Async audio data transmission to VoiceLive
/// - Playback thread: NAudio output stream writing
/// </remarks>
public class AudioProcessor : IDisposable
{
private readonly VoiceLiveSession _session;
private readonly ILogger<AudioProcessor> _logger;
// Audio configuration - PCM16, 24kHz, mono as specified
private const int SampleRate = 24000;
private const int Channels = 1;
private const int BitsPerSample = 16;
// NAudio components
private WaveInEvent? _waveIn;
private WaveOutEvent? _waveOut;
private BufferedWaveProvider? _playbackBuffer;
// Audio capture and playback state
private bool _isCapturing;
private bool _isPlaying;
// Audio streaming channels
private readonly Channel<byte[]> _audioSendChannel;
private readonly Channel<byte[]> _audioPlaybackChannel;
private readonly ChannelWriter<byte[]> _audioSendWriter;
private readonly ChannelReader<byte[]> _audioSendReader;
private readonly ChannelWriter<byte[]> _audioPlaybackWriter;
private readonly ChannelReader<byte[]> _audioPlaybackReader;
// Background tasks
private Task? _audioSendTask;
private Task? _audioPlaybackTask;
private readonly CancellationTokenSource _cancellationTokenSource;
private CancellationTokenSource _playbackCancellationTokenSource;
/// <summary>
/// Initializes a new instance of the AudioProcessor class.
/// </summary>
/// <param name="session">The VoiceLive session for audio communication.</param>
/// <param name="logger">Logger for diagnostic information.</param>
public AudioProcessor(VoiceLiveSession session, ILogger<AudioProcessor> logger)
{
_session = session ?? throw new ArgumentNullException(nameof(session));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
// Create unbounded channels for audio data
_audioSendChannel = Channel.CreateUnbounded<byte[]>();
_audioSendWriter = _audioSendChannel.Writer;
_audioSendReader = _audioSendChannel.Reader;
_audioPlaybackChannel = Channel.CreateUnbounded<byte[]>();
_audioPlaybackWriter = _audioPlaybackChannel.Writer;
_audioPlaybackReader = _audioPlaybackChannel.Reader;
_cancellationTokenSource = new CancellationTokenSource();
_playbackCancellationTokenSource = new CancellationTokenSource();
_logger.LogInformation("AudioProcessor initialized with {SampleRate}Hz PCM16 mono audio", SampleRate);
}
/// <summary>
/// Start capturing audio from microphone.
/// </summary>
public Task StartCaptureAsync()
{
if (_isCapturing)
return Task.CompletedTask;
_isCapturing = true;
try
{
_waveIn = new WaveInEvent
{
WaveFormat = new WaveFormat(SampleRate, BitsPerSample, Channels),
BufferMilliseconds = 50 // 50ms buffer for low latency
};
_waveIn.DataAvailable += OnAudioDataAvailable;
_waveIn.RecordingStopped += OnRecordingStopped;
/*
_logger.LogInformation($"There are {WaveIn.DeviceCount} devices available.");
for (int i = 0; i < WaveIn.DeviceCount; i++)
{
var deviceInfo = WaveIn.GetCapabilities(i);
_logger.LogInformation($"{i}: {deviceInfo.ProductName}");
}
*/
_waveIn.DeviceNumber = 0; // Default to first device
_waveIn.StartRecording();
// Start audio send task
_audioSendTask = ProcessAudioSendAsync(_cancellationTokenSource.Token);
_logger.LogInformation("Started audio capture");
return Task.CompletedTask;
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to start audio capture");
_isCapturing = false;
throw;
}
}
/// <summary>
/// Stop capturing audio.
/// </summary>
public async Task StopCaptureAsync()
{
if (!_isCapturing)
return;
_isCapturing = false;
if (_waveIn != null)
{
_waveIn.StopRecording();
_waveIn.DataAvailable -= OnAudioDataAvailable;
_waveIn.RecordingStopped -= OnRecordingStopped;
_waveIn.Dispose();
_waveIn = null;
}
// Complete the send channel and wait for the send task
_audioSendWriter.TryComplete();
if (_audioSendTask != null)
{
await _audioSendTask.ConfigureAwait(false);
_audioSendTask = null;
}
_logger.LogInformation("Stopped audio capture");
}
/// <summary>
/// Initialize audio playback system.
/// </summary>
public Task StartPlaybackAsync()
{
if (_isPlaying)
return Task.CompletedTask;
_isPlaying = true;
try
{
_waveOut = new WaveOutEvent
{
DesiredLatency = 100 // 100ms latency
};
_playbackBuffer = new BufferedWaveProvider(new WaveFormat(SampleRate, BitsPerSample, Channels))
{
BufferDuration = TimeSpan.FromMinutes(5), // 5 second buffer
DiscardOnBufferOverflow = true
};
_waveOut.Init(_playbackBuffer);
_waveOut.Play();
_playbackCancellationTokenSource = new CancellationTokenSource();
// Start audio playback task
_audioPlaybackTask = ProcessAudioPlaybackAsync();
_logger.LogInformation("Audio playback system ready");
return Task.CompletedTask;
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to initialize audio playback");
_isPlaying = false;
throw;
}
}
/// <summary>
/// Stop audio playback and clear buffer.
/// </summary>
public async Task StopPlaybackAsync()
{
if (!_isPlaying)
return;
_isPlaying = false;
// Clear the playback channel
while (_audioPlaybackReader.TryRead(out _))
{ }
if (_playbackBuffer != null)
{
_playbackBuffer.ClearBuffer();
}
if (_waveOut != null)
{
_waveOut.Stop();
_waveOut.Dispose();
_waveOut = null;
}
_playbackBuffer = null;
// Complete the playback channel and wait for the playback task
_playbackCancellationTokenSource.Cancel();
if (_audioPlaybackTask != null)
{
await _audioPlaybackTask.ConfigureAwait(false);
_audioPlaybackTask = null;
}
_logger.LogInformation("Stopped audio playback");
}
/// <summary>
/// Queue audio data for playback.
/// </summary>
/// <param name="audioData">The audio data to queue.</param>
public async Task QueueAudioAsync(byte[] audioData)
{
if (_isPlaying && audioData.Length > 0)
{
await _audioPlaybackWriter.WriteAsync(audioData).ConfigureAwait(false);
}
}
/// <summary>
/// Event handler for audio data available from microphone.
/// </summary>
private void OnAudioDataAvailable(object? sender, WaveInEventArgs e)
{
if (_isCapturing && e.BytesRecorded > 0)
{
byte[] audioData = new byte[e.BytesRecorded];
Array.Copy(e.Buffer, 0, audioData, 0, e.BytesRecorded);
// Queue audio data for sending (non-blocking)
if (!_audioSendWriter.TryWrite(audioData))
{
_logger.LogWarning("Failed to queue audio data for sending - channel may be full");
}
}
}
/// <summary>
/// Event handler for recording stopped.
/// </summary>
private void OnRecordingStopped(object? sender, StoppedEventArgs e)
{
if (e.Exception != null)
{
_logger.LogError(e.Exception, "Audio recording stopped due to error");
}
}
/// <summary>
/// Background task to process audio data and send to VoiceLive service.
/// </summary>
private async Task ProcessAudioSendAsync(CancellationToken cancellationToken)
{
try
{
await foreach (byte[] audioData in _audioSendReader.ReadAllAsync(cancellationToken).ConfigureAwait(false))
{
if (cancellationToken.IsCancellationRequested)
break;
try
{
// Send audio data directly to the session
await _session.SendInputAudioAsync(audioData, cancellationToken).ConfigureAwait(false);
}
catch (Exception ex)
{
_logger.LogError(ex, "Error sending audio data to VoiceLive");
// Continue processing other audio data
}
}
}
catch (OperationCanceledException)
{
// Expected when cancellation is requested
}
catch (Exception ex)
{
_logger.LogError(ex, "Error in audio send processing");
}
}
/// <summary>
/// Background task to process audio playback.
/// </summary>
private async Task ProcessAudioPlaybackAsync()
{
try
{
CancellationTokenSource combinedTokenSource = CancellationTokenSource.CreateLinkedTokenSource(_playbackCancellationTokenSource.Token, _cancellationTokenSource.Token);
var cancellationToken = combinedTokenSource.Token;
await foreach (byte[] audioData in _audioPlaybackReader.ReadAllAsync(cancellationToken).ConfigureAwait(false))
{
if (cancellationToken.IsCancellationRequested)
break;
try
{
if (_playbackBuffer != null && _isPlaying)
{
_playbackBuffer.AddSamples(audioData, 0, audioData.Length);
}
}
catch (Exception ex)
{
_logger.LogError(ex, "Error in audio playback");
// Continue processing other audio data
}
}
}
catch (OperationCanceledException)
{
// Expected when cancellation is requested
}
catch (Exception ex)
{
_logger.LogError(ex, "Error in audio playback processing");
}
}
/// <summary>
/// Clean up audio resources.
/// </summary>
public async Task CleanupAsync()
{
await StopCaptureAsync().ConfigureAwait(false);
await StopPlaybackAsync().ConfigureAwait(false);
_cancellationTokenSource.Cancel();
// Wait for background tasks to complete
var tasks = new List<Task>();
if (_audioSendTask != null)
tasks.Add(_audioSendTask);
if (_audioPlaybackTask != null)
tasks.Add(_audioPlaybackTask);
if (tasks.Count > 0)
{
await Task.WhenAll(tasks).ConfigureAwait(false);
}
_logger.LogInformation("Audio processor cleaned up");
}
/// <summary>
/// Dispose of resources.
/// </summary>
public void Dispose()
{
CleanupAsync().Wait();
_cancellationTokenSource.Dispose();
}
}