Skip to content

Commit 5e54235

Browse files
committed
feat: update pyaudio listener and speaker with device_index argument
1 parent 4272139 commit 5e54235

File tree

8 files changed

+93
-16
lines changed

8 files changed

+93
-16
lines changed

RELEASES.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,11 @@
44

55
first release version.
66

7+
## v0.1.7
8+
9+
* update speaker and listener with pyaudio device_index argument
10+
* streamlit_app.yml add options about audio_input and audio_output
11+
712
## v0.1.6
813

914
* upgrade openai package to 1.59, support develop message.
Lines changed: 27 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,42 @@
1+
from typing import Union
12
from ghostos.abcd.realtime import Speaker, Listener
23

34

4-
def get_pyaudio_pcm16_listener(rate: int = 24000, interval: float = 0.5) -> Listener:
5+
def get_pyaudio_pcm16_listener(
6+
rate: int = 24000,
7+
interval: float = 0.5,
8+
channels: int = 1,
9+
chunk_size: int = 1024,
10+
input_device_index: Union[int, None] = None,
11+
) -> Listener:
512
try:
613
import pyaudio
714
except ImportError:
815
raise ImportError(f"pyaudio package is required. run `pip install ghostos[audio]`")
916
from ghostos.framework.audio.pyaudio_io.listener import PyAudioPCM16Listener
10-
return PyAudioPCM16Listener(rate, interval=interval)
17+
return PyAudioPCM16Listener(
18+
rate=rate,
19+
interval=interval,
20+
channels=channels,
21+
chunk_size=chunk_size,
22+
input_device_index=input_device_index,
23+
)
1124

1225

13-
def get_pyaudio_pcm16_speaker(rate: int = 24000, buffer_size: int = 1024 * 5) -> Speaker:
26+
def get_pyaudio_pcm16_speaker(
27+
rate: int = 24000,
28+
buffer_size: int = 1024 * 5,
29+
channels: int = 1,
30+
output_device_index: Union[int, None] = None,
31+
) -> Speaker:
1432
try:
1533
import pyaudio
1634
except ImportError:
1735
raise ImportError(f"pyaudio package is required. run `pip install ghostos[audio]`")
1836
from ghostos.framework.audio.pyaudio_io.speaker import PyAudioPCM16Speaker
19-
return PyAudioPCM16Speaker(rate, buffer_size)
37+
return PyAudioPCM16Speaker(
38+
rate=rate,
39+
buffer_size=buffer_size,
40+
channels=channels,
41+
output_device_index=output_device_index,
42+
)

ghostos/framework/audio/pyaudio_io/listener.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
except ImportError:
44
raise ImportError(f"Pyaudio is required, please install pyaudio or ghostos[audio] first")
55

6-
from typing import Callable, Union
6+
from typing import Callable, Optional
77
from ghostos.abcd.realtime import Listener, Listening
88
from threading import Thread, Event
99
from io import BytesIO
@@ -16,14 +16,22 @@
1616

1717
class PyAudioPCM16Listener(Listener):
1818

19-
def __init__(self, rate: int = 24000, chunk_size: int = CHUNK, interval: float = 0.5):
19+
def __init__(
20+
self,
21+
rate: int = 24000,
22+
chunk_size: int = CHUNK,
23+
interval: float = 0.5,
24+
channels: int = CHANNELS,
25+
input_device_index: Optional[int] = None,
26+
):
2027
self.rate = rate
2128
self.chunk_size = chunk_size
2229
self.stream = PyAudio().open(
2330
format=paInt16,
24-
channels=1,
31+
channels=channels,
2532
rate=self.rate,
2633
input=True,
34+
input_device_index=input_device_index,
2735
)
2836
self.interval = interval
2937

ghostos/framework/audio/pyaudio_io/speaker.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,14 +10,21 @@
1010

1111
class PyAudioPCM16Speaker(Speaker):
1212

13-
def __init__(self, rate: int = 24000, buffer_size: int = 4096):
13+
def __init__(
14+
self,
15+
rate: int = 24000,
16+
buffer_size: int = 4096,
17+
channels: int = 1,
18+
output_device_index: Union[int, None] = None,
19+
):
1420
self.rate = rate
1521
self.buffer_size = buffer_size
1622
self.stream = PyAudio().open(
1723
format=paInt16,
18-
channels=1,
24+
channels=channels,
1925
rate=self.rate,
2026
output=True,
27+
output_device_index=output_device_index,
2128
)
2229

2330
def speak(self, queue: Callable[[], Union[bytes, None]]) -> Speaking:

ghostos/ghosts/moss_agent/agent.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -207,9 +207,12 @@ def _get_instruction_prompter(self, session: Session, runtime: MossRuntime) -> P
207207
title="Meta Instruction",
208208
content=AGENT_META_INTRODUCTION,
209209
).with_children(
210+
# ghostos meta instruction.
210211
TextPrmt(title="GhostOS", content=GHOSTOS_INTRODUCTION),
212+
# the information about moss
211213
TextPrmt(title="MOSS", content=MOSS_INTRODUCTION),
212-
# code context
214+
215+
# the moss providing context prompter.
213216
get_moss_context_prompter("Code Context", runtime),
214217
),
215218
# agent prompt

ghostos/prototypes/streamlitapp/pages/chat_with_ghost.py

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -255,8 +255,22 @@ def get_realtime_app(conversation: Conversation) -> Optional[RealtimeApp]:
255255

256256
from ghostos.framework.audio import get_pyaudio_pcm16_speaker, get_pyaudio_pcm16_listener
257257
from ghostos.framework.openai_realtime import get_openai_realtime_app
258-
speaker = get_pyaudio_pcm16_speaker()
259-
listener = get_pyaudio_pcm16_listener()
258+
app_conf = get_app_conf()
259+
audio_input = app_conf.audio_input
260+
audio_output = app_conf.audio_output
261+
speaker = get_pyaudio_pcm16_speaker(
262+
rate=audio_output.sample_rate,
263+
buffer_size=audio_output.buffer_size,
264+
channels=audio_output.channels,
265+
output_device_index=audio_output.output_device_index,
266+
)
267+
listener = get_pyaudio_pcm16_listener(
268+
rate=audio_input.sample_rate,
269+
interval=audio_input.interval,
270+
channels=audio_input.channels,
271+
chunk_size=audio_input.chunk_size,
272+
input_device_index=audio_input.input_device_index,
273+
)
260274
vad_mode = True
261275
return get_openai_realtime_app(conversation, vad_mode=vad_mode, listener=listener, speaker=speaker)
262276

ghostos/prototypes/streamlitapp/resources.py

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,14 @@
1-
from typing import Optional, Dict, Tuple, List
1+
from typing import Optional, Dict, Tuple, List, Union
22

33
from enum import Enum
4-
from pydantic import Field
4+
from pydantic import BaseModel, Field
55
import streamlit as st
66
from ghostos.container import Container
77
from ghostos.prototypes.streamlitapp.utils.session import Singleton
88
from ghostos.contracts.configs import YamlConfig, Configs
99
from ghostos.contracts.assets import ImageAssets, FileInfo, AudioAssets
1010
from ghostos.contracts.documents import DocumentRegistry, Documents
1111
from PIL.Image import Image as ImageType
12-
from ghostos.core.messages.message_classes import ImageAssetMessage
1312
from ghostos.helpers import GHOSTOS_DOMAIN, uuid
1413
from streamlit.runtime.uploaded_file_manager import DeletedFile, UploadedFile
1514

@@ -19,6 +18,21 @@ def get_container() -> Container:
1918
return Singleton.get(Container, st.session_state)
2019

2120

21+
class AudioInputConf(BaseModel):
22+
sample_rate: int = Field(24000)
23+
interval: float = Field(0.5)
24+
channels: int = Field(1)
25+
chunk_size: int = Field(1024)
26+
input_device_index: Union[int, None] = Field(None)
27+
28+
29+
class AudioOutputConf(BaseModel):
30+
sample_rate: int = Field(24000)
31+
channels: int = Field(1)
32+
buffer_size: int = Field(1024 * 5)
33+
output_device_index: Union[int, None] = Field(None)
34+
35+
2236
class AppConf(YamlConfig):
2337
relative_path = "streamlit_app.yml"
2438

@@ -29,6 +43,9 @@ class AppConf(YamlConfig):
2943
default_factory=dict,
3044
)
3145

46+
audio_input: AudioInputConf = Field(default_factory=AudioInputConf)
47+
audio_output: AudioOutputConf = Field(default_factory=AudioOutputConf)
48+
3249
class BoolOpts(str, Enum):
3350
HELP_MODE = "ghostos.streamlit.app.help_mode"
3451
"""global help mode"""

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "ghostos"
3-
version = "0.1.6"
3+
version = "0.1.7"
44
description = "A framework offers an operating system simulator with a Python Code Interface for AI Agents"
55
authors = ["zhuming <thirdgerb@gmail.com>", "Nile Zhou <nilezhou123@gmail.com>"]
66
license = "MIT"

0 commit comments

Comments
 (0)