-
Notifications
You must be signed in to change notification settings - Fork 5
Expand file tree
/
Copy pathaudio.py
More file actions
262 lines (219 loc) · 8.84 KB
/
audio.py
File metadata and controls
262 lines (219 loc) · 8.84 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
import asyncio
from fractions import Fraction
import sounddevice as sd
import numpy as np
from aiortc import RTCPeerConnection, RTCIceServer, RTCConfiguration, RTCSessionDescription, MediaStreamTrack, \
MediaStreamError
from aiortc.contrib.media import MediaRelay, MediaPlayer
from av.audio import AudioFrame
from find_device import get_output_device_by_name, get_input_device_by_name
class AudioPlaybackTrack:
def __init__(self, track, device):
self.track = track
self.queue = asyncio.Queue(maxsize=20)
self.device = device
self.buffer = np.empty((0, 2), dtype=np.int16)
print(f"[AudioPlaybackTrack] Starting output stream on device {self.device}")
try:
self.stream = sd.OutputStream(
samplerate=48000,
channels=2,
dtype='int16',
blocksize=256,
device=self.device,
callback=self._callback,
)
self.stream.start()
except sd.PortAudioError as e:
pass
print(f"PortAudio error opening output stream: {e}")
self._task_receive = asyncio.create_task(self._receive_audio())
async def _receive_audio(self):
print("[AudioPlaybackTrack] Audio receive task started")
while True:
try:
frame = await self.track.recv()
# print("[AudioPlaybackTrack] Received audio frame")
except MediaStreamError:
# print("[AudioPlaybackTrack] Track ended or closed")
break
try:
self.queue.put_nowait(frame)
# print(f"[AudioPlaybackTrack] Frame put in queue (size={self.queue.qsize()})")
except asyncio.QueueFull:
pass
# print("[AudioPlaybackTrack] Queue full, dropping frame")
def _callback(self, outdata, frames, time, status):
if status:
pass
# print(f"[AudioPlaybackTrack] Output stream status: {status}")
try:
while self.buffer.shape[0] < frames:
frame = self.queue.get_nowait()
raw = frame.to_ndarray()
data = raw.reshape(-1, 2) # reshape to (samples, 2 channels)
if data.dtype != np.int16:
data = (data * 32767).astype(np.int16)
self.buffer = np.vstack([self.buffer, data])
# print(f"[AudioPlaybackTrack] Added {data.shape[0]} samples to buffer (buffer size={self.buffer.shape[0]})")
except asyncio.QueueEmpty:
pass
# print("[AudioPlaybackTrack] Queue empty, no new data to add")
if self.buffer.shape[0] >= frames:
outdata[:] = self.buffer[:frames]
self.buffer = self.buffer[frames:]
# print(f"[AudioPlaybackTrack] Outputting {frames} frames, buffer left {self.buffer.shape[0]}")
else:
outdata.fill(0) # not enough data, output silence
self.buffer = np.empty((0, 2), dtype=np.int16)
# print(f"[AudioPlaybackTrack] Not enough data, outputting silence")
async def close(self):
if self.stream:
print("[AudioPlaybackTrack] Stopping output stream")
self.stream.stop()
self.stream.close()
self.stream = None
if self._task_receive:
self._task_receive.cancel()
try:
await self._task_receive
except asyncio.CancelledError:
pass
self._task_receive = None
class AudioRecordTrack(MediaStreamTrack):
kind = "audio"
def __init__(self, mic_url, samplerate=48000, channels=1, blocksize=256):
super().__init__()
self.samplerate = samplerate
self.channels = channels
self.blocksize = blocksize
self.queue = asyncio.Queue(maxsize=10)
self.frame_count = 0
self.stream = sd.InputStream(
samplerate=self.samplerate,
channels=self.channels,
blocksize=self.blocksize,
dtype='int16',
device=mic_url,
)
self.stream.start()
self._produce_task = asyncio.create_task(self._produce_audio())
async def _produce_audio(self):
try:
while True:
if self.stream is None:
print("AudioRecordTrack stream is None, stopping _produce_audio task")
break
data, overflow = await asyncio.to_thread(self.stream.read, self.blocksize)
if overflow:
continue
try:
await self.queue.put(data.copy())
except asyncio.QueueFull:
try:
_ = self.queue.get_nowait()
await self.queue.put(data.copy())
except asyncio.QueueEmpty:
pass
except asyncio.CancelledError:
print("AudioRecordTrack _produce_audio task cancelled")
# Cleanup if needed
pass
async def recv(self):
data = await self.queue.get()
frame = AudioFrame(format="s16", layout="mono", samples=data.shape[0])
frame.planes[0].update(data.tobytes())
frame.sample_rate = self.samplerate
frame.time_base = Fraction(1, self.samplerate)
frame.pts = self.frame_count
self.frame_count += frame.samples
return frame
async def close(self):
# Cancel the _produce_audio task and wait for it to finish
if self._produce_task:
self._produce_task.cancel()
try:
await self._produce_task
except asyncio.CancelledError:
pass
self._produce_task = None
if self.stream:
self.stream.stop()
self.stream.close()
self.stream = None
class MediaCapture:
def __init__(self, cam_url, mic_name, playback_name):
self.cam_url = cam_url
self.fps = 30
self.width = 1280
self.height = 720
self.mic_index = get_input_device_by_name(mic_name)
self.playback_index = get_output_device_by_name(playback_name)
self.pcs = set()
self.playback_track = None
self.cam_relay = None
self.cam = None
self.mic_relay = None
self.mic = None
self.is_running = False
def _create_tracks(self):
if self.is_running:
return self.cam_relay.subscribe(self.cam.video), self.mic_relay.subscribe(self.mic)
if self.cam_relay is None:
self.cam = MediaPlayer(
self.cam_url,
format="v4l2",
options={"framerate": str(self.fps), "video_size": f"{self.width}x{self.height}"}
)
self.cam_relay = MediaRelay()
self.is_running = True
if self.mic_relay is None:
self.mic = AudioRecordTrack(self.mic_index)
self.mic_relay = MediaRelay()
audio_track = self.mic_relay.subscribe(self.mic)
video_track = self.cam_relay.subscribe(self.cam.video)
return video_track, audio_track
async def handle_offer(self, params):
offer = RTCSessionDescription(sdp=params["sdp"], type=params["type"])
config = RTCConfiguration(iceServers=[
RTCIceServer(urls=["turn:10.21.40.25:3478"], username="turnuser", credential="turnpassword")
])
pc = RTCPeerConnection(configuration=config)
self.pcs.add(pc)
@pc.on("connectionstatechange")
async def on_connectionstatechange():
print("Connection state is", pc.connectionState)
if pc.connectionState == "failed":
await pc.close()
self.pcs.discard(pc)
@pc.on("track")
def on_track(track):
if track.kind == "audio":
print("Client audio track received")
if not self.playback_track:
self.playback_track = AudioPlaybackTrack(track, self.playback_index)
video, audio = self._create_tracks()
if video:
pc.addTrack(video)
if audio:
pc.addTrack(audio)
await pc.setRemoteDescription(offer)
answer = await pc.createAnswer()
await pc.setLocalDescription(answer)
return pc.localDescription.sdp, pc.localDescription.type
async def shutdown(self):
coros = [pc.close() for pc in self.pcs]
await asyncio.gather(*coros)
self.pcs.clear()
if self.playback_track:
await self.playback_track.close()
self.playback_track = None
if self.cam:
self.cam.video.stop()
self.cam = None
self.cam_relay = None
if self.mic:
await self.mic.close()
self.mic = None
self.mic_relay = None
self.is_running = False