Skip to content

Commit 6813877

Browse files
authored
Merge pull request #64 from ghost-in-moss/feat/audio-rate-transform
feat: update speaker and listener with rate conversion
2 parents 09808d2 + 98e4a5d commit 6813877

File tree

10 files changed

+86
-26
lines changed

10 files changed

+86
-26
lines changed

RELEASES.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,10 @@
44

55
first release version.
66

7+
## v0.1.8
8+
9+
* add speaker and listener with audio rate conversion
10+
711
## v0.1.7
812

913
* update speaker and listener with pyaudio device_index argument

ghostos/framework/audio/pyaudio_io/__init__.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
def get_pyaudio_pcm16_listener(
66
rate: int = 24000,
7+
output_rate: int = 24000,
78
interval: float = 0.5,
89
channels: int = 1,
910
chunk_size: int = 1024,
@@ -15,7 +16,8 @@ def get_pyaudio_pcm16_listener(
1516
raise ImportError(f"pyaudio package is required. run `pip install ghostos[audio]`")
1617
from ghostos.framework.audio.pyaudio_io.listener import PyAudioPCM16Listener
1718
return PyAudioPCM16Listener(
18-
rate=rate,
19+
sample_rate=rate,
20+
output_rate=output_rate,
1921
interval=interval,
2022
channels=channels,
2123
chunk_size=chunk_size,
@@ -24,7 +26,8 @@ def get_pyaudio_pcm16_listener(
2426

2527

2628
def get_pyaudio_pcm16_speaker(
27-
rate: int = 24000,
29+
input_rate: int = 24000,
30+
output_rate: int = 24000,
2831
buffer_size: int = 1024 * 5,
2932
channels: int = 1,
3033
output_device_index: Union[int, None] = None,
@@ -35,7 +38,8 @@ def get_pyaudio_pcm16_speaker(
3538
raise ImportError(f"pyaudio package is required. run `pip install ghostos[audio]`")
3639
from ghostos.framework.audio.pyaudio_io.speaker import PyAudioPCM16Speaker
3740
return PyAudioPCM16Speaker(
38-
rate=rate,
41+
input_rate=input_rate,
42+
output_rate=output_rate,
3943
buffer_size=buffer_size,
4044
channels=channels,
4145
output_device_index=output_device_index,

ghostos/framework/audio/pyaudio_io/example.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,10 @@
99

1010
if __name__ == '__main__':
1111

12-
listener = PyAudioPCM16Listener()
12+
listener = PyAudioPCM16Listener(
13+
sample_rate=44100,
14+
output_rate=24000,
15+
)
1316
ticker = Timeleft(0)
1417

1518
heard = BytesIO()
@@ -46,7 +49,7 @@ def read() -> Union[bytes, None]:
4649
return heard.read(1024)
4750

4851

49-
speaker = PyAudioPCM16Speaker()
52+
speaker = PyAudioPCM16Speaker(input_rate=24000, output_rate=44100)
5053
print("start speaking, %f" % ticker.passed())
5154
with speaker.speak(read) as speaking:
5255
speaking.wait()

ghostos/framework/audio/pyaudio_io/listener.py

Lines changed: 28 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
try:
22
from pyaudio import PyAudio, paInt16
3+
from scipy.signal import resample
34
except ImportError:
45
raise ImportError(f"Pyaudio is required, please install pyaudio or ghostos[audio] first")
56

7+
import numpy as np
68
from typing import Callable, Optional
79
from ghostos.abcd.realtime import Listener, Listening
810
from threading import Thread, Event
@@ -18,25 +20,28 @@ class PyAudioPCM16Listener(Listener):
1820

1921
def __init__(
2022
self,
21-
rate: int = 24000,
23+
sample_rate: int = 24000,
24+
output_rate: int = 24000,
2225
chunk_size: int = CHUNK,
2326
interval: float = 0.5,
2427
channels: int = CHANNELS,
2528
input_device_index: Optional[int] = None,
2629
):
27-
self.rate = rate
30+
self.sample_rate = sample_rate
31+
self.output_rate = output_rate
2832
self.chunk_size = chunk_size
2933
self.stream = PyAudio().open(
3034
format=paInt16,
3135
channels=channels,
32-
rate=self.rate,
36+
rate=self.sample_rate,
3337
input=True,
3438
input_device_index=input_device_index,
3539
)
3640
self.interval = interval
3741

3842
def listen(self, sender: Callable[[bytes], None]) -> Listening:
39-
return PyAudioPCM16Listening(self.stream, sender, self.rate, self.chunk_size, self.interval)
43+
return PyAudioPCM16Listening(self.stream, sender, self.sample_rate, self.output_rate, self.chunk_size,
44+
self.interval)
4045

4146
def __del__(self):
4247
self.stream.close()
@@ -48,14 +53,16 @@ def __init__(
4853
self,
4954
stream,
5055
sender: Callable[[bytes], None],
51-
rate: int = 24000,
56+
sample_rate: int = 24000,
57+
output_rate: int = 24000,
5258
chunk: int = CHUNK,
5359
interval: float = 0.5,
5460
):
5561
self.sender = sender
5662
self.stream = stream
5763
self.interval = interval
58-
self.rate = rate
64+
self.sample_rate = sample_rate
65+
self.output_rate = output_rate
5966
self.chunk = chunk
6067
self.stopped = Event()
6168
self.thread = Thread(target=self._listening)
@@ -64,12 +71,25 @@ def _listening(self):
6471
self.stream.start_stream()
6572
while not self.stopped.is_set():
6673
buffer = BytesIO()
67-
for i in range(int((self.rate / self.chunk) * self.interval)):
74+
for i in range(int((self.sample_rate / self.chunk) * self.interval)):
6875
data = self.stream.read(self.chunk, exception_on_overflow=False)
6976
buffer.write(data)
70-
self.sender(buffer.getvalue())
77+
parsed = self._parse_output_data(buffer.getvalue())
78+
self.sender(parsed)
7179
self.stream.stop_stream()
7280

81+
def _parse_output_data(self, data: bytes) -> bytes:
82+
if self.sample_rate == self.output_rate:
83+
return data
84+
audio_data = np.frombuffer(data, dtype=np.int16)
85+
num_samples = int(len(audio_data) * self.output_rate / self.sample_rate)
86+
87+
# 使用 resample 进行重新采样
88+
resampled_audio = resample(audio_data, num_samples)
89+
90+
# 导出为二进制数据
91+
return resampled_audio.astype(np.int16)
92+
7393
def __enter__(self):
7494
self.thread.start()
7595

ghostos/framework/audio/pyaudio_io/speaker.py

Lines changed: 32 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
try:
22
from pyaudio import PyAudio, paInt16
3+
from scipy.signal import resample
34
except ImportError:
45
raise ImportError(f"Pyaudio is required, please install pyaudio or ghostos[audio] first")
56

7+
import numpy as np
68
from typing import Callable, Union
79
from ghostos.abcd.realtime import Speaker, Speaking
810
from threading import Thread, Event
@@ -12,33 +14,43 @@ class PyAudioPCM16Speaker(Speaker):
1214

1315
def __init__(
1416
self,
15-
rate: int = 24000,
17+
input_rate: int = 24000,
18+
output_rate: int = 24000,
1619
buffer_size: int = 4096,
1720
channels: int = 1,
1821
output_device_index: Union[int, None] = None,
1922
):
20-
self.rate = rate
23+
self.input_rate = input_rate
24+
self.output_rate = output_rate
2125
self.buffer_size = buffer_size
2226
self.stream = PyAudio().open(
2327
format=paInt16,
2428
channels=channels,
25-
rate=self.rate,
29+
rate=self.output_rate,
2630
output=True,
2731
output_device_index=output_device_index,
2832
)
2933

3034
def speak(self, queue: Callable[[], Union[bytes, None]]) -> Speaking:
31-
return PyAudioPCM16Speaking(self.stream, queue, self.rate, self.buffer_size)
35+
return PyAudioPCM16Speaking(self.stream, queue, self.input_rate, self.output_rate, self.buffer_size)
3236

3337
def __del__(self):
3438
self.stream.close()
3539

3640

3741
class PyAudioPCM16Speaking(Speaking):
3842

39-
def __init__(self, stream, queue: Callable[[], Union[bytes, None]], rate: int = 24000, buffer_size: int = 0):
43+
def __init__(
44+
self,
45+
stream,
46+
queue: Callable[[], Union[bytes, None]],
47+
input_rate: int = 24000,
48+
output_rate: int = 24000,
49+
buffer_size: int = 0,
50+
):
4051
self.stream = stream
41-
self.rate = rate
52+
self.input_rate = input_rate
53+
self.output_rate = output_rate
4254
self.buffer_size = buffer_size
4355
self.queue = queue
4456
self.stop = Event()
@@ -52,9 +64,22 @@ def _speaking(self):
5264
data = self.queue()
5365
if not data:
5466
break
55-
self.stream.write(data)
67+
parsed = self._parse_output_data(data)
68+
self.stream.write(parsed)
5669
self._done = True
5770

71+
def _parse_output_data(self, data: bytes) -> bytes:
72+
if self.input_rate == self.output_rate:
73+
return data
74+
audio_data = np.frombuffer(data, dtype=np.int16)
75+
num_samples = int(len(audio_data) * self.output_rate / self.input_rate)
76+
77+
# 使用 resample 进行重新采样
78+
resampled_audio = resample(audio_data, num_samples)
79+
80+
# 导出为二进制数据
81+
return resampled_audio.astype(np.int16)
82+
5883
def __enter__(self):
5984
self.thread.start()
6085
return self

ghostos/framework/openai_realtime/client.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,7 @@ def save_audio_data(self, item_id: str, audio_data: bytes) -> None:
161161
with wave.open(buffer, 'wb') as f:
162162
f.setnchannels(1)
163163
f.setsampwidth(2)
164+
# todo: save rate by configs
164165
f.setframerate(24000)
165166
f.writeframes(audio_data)
166167

ghostos/prototypes/streamlitapp/pages/chat_with_ghost.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -259,13 +259,15 @@ def get_realtime_app(conversation: Conversation) -> Optional[RealtimeApp]:
259259
audio_input = app_conf.audio_input
260260
audio_output = app_conf.audio_output
261261
speaker = get_pyaudio_pcm16_speaker(
262-
rate=audio_output.sample_rate,
262+
input_rate=audio_output.input_rate,
263+
output_rate=audio_output.output_rate,
263264
buffer_size=audio_output.buffer_size,
264265
channels=audio_output.channels,
265266
output_device_index=audio_output.output_device_index,
266267
)
267268
listener = get_pyaudio_pcm16_listener(
268269
rate=audio_input.sample_rate,
270+
output_rate=audio_input.output_rate,
269271
interval=audio_input.interval,
270272
channels=audio_input.channels,
271273
chunk_size=audio_input.chunk_size,

ghostos/prototypes/streamlitapp/resources.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,14 +20,16 @@ def get_container() -> Container:
2020

2121
class AudioInputConf(BaseModel):
2222
sample_rate: int = Field(24000)
23+
output_rate: int = Field(24000)
2324
interval: float = Field(0.5)
2425
channels: int = Field(1)
2526
chunk_size: int = Field(1024)
2627
input_device_index: Union[int, None] = Field(None)
2728

2829

2930
class AudioOutputConf(BaseModel):
30-
sample_rate: int = Field(24000)
31+
input_rate: int = Field(24000)
32+
output_rate: int = Field(24000)
3133
channels: int = Field(1)
3234
buffer_size: int = Field(1024 * 5)
3335
output_device_index: Union[int, None] = Field(None)

ghostos/prototypes/streamlitapp/widgets/messages.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -188,8 +188,6 @@ def render_message_item(msg: Message, debug: bool):
188188
render_user_message(msg, debug)
189189
elif msg.role == Role.SYSTEM.value:
190190
render_sys_message(msg, debug)
191-
elif msg.role == Role.FUNCTION.value:
192-
render_func_message(msg, debug)
193191
else:
194192
render_other_message(msg, debug)
195193

pyproject.toml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,13 +44,14 @@ spherov2 = { version = "^0.12.1", optional = true }
4444
bleak = [
4545
{ version = "^0.22.3", python = ">=3.10,<3.14", optional = true }
4646
]
47+
scipy = { version = "^1.15.1", optional = true }
4748

4849
[tool.poetry.scripts]
4950
ghostos = "ghostos.scripts.cli:main"
5051

5152
[tool.poetry.extras]
52-
realtime = ['pyaudio']
53-
sphero = ["spherov2", "bleak", "pyaudio"]
53+
realtime = ['pyaudio', "scipy"]
54+
sphero = ["spherov2", "bleak"]
5455

5556

5657
[tool.poetry.group.dev.dependencies]

0 commit comments

Comments
 (0)