Skip to content

Commit 68dc828

Browse files
committed
add speech-to-speech plugin
1 parent 34ccf52 commit 68dc828

12 files changed

Lines changed: 2548 additions & 2027 deletions

File tree

examples/openai_realtime_speech_to_speech/llm_audio_conversation/README.md renamed to examples/openai_realtime_speech_to_speech/README.md

File renamed without changes.

examples/openai_realtime_speech_to_speech/llm_audio_conversation/__init__.py renamed to examples/openai_realtime_speech_to_speech/__init__.py

File renamed without changes.

examples/openai_realtime_speech_to_speech/llm_audio_conversation/main.py

Lines changed: 0 additions & 110 deletions
This file was deleted.
Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
import asyncio
2+
import logging
3+
import os
4+
from uuid import uuid4
5+
from dotenv import load_dotenv
6+
from examples.utils import create_user, open_browser
7+
from getstream import Stream
8+
from getstream.plugins.sts.openai_realtime import OpenAIRealtime
9+
10+
11+
logging.basicConfig(
12+
level=logging.INFO,
13+
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
14+
force=True, # Override any previous basicConfig calls
15+
)
16+
17+
# Enable verbose logging for the OpenAI Realtime plugin
18+
logging.getLogger("getstream.plugins.sts.openai_realtime.sts").setLevel(logging.INFO)
19+
20+
21+
async def main():
22+
"""Run a demo call with an OpenAI Speech-to-Speech agent attached."""
23+
24+
load_dotenv(os.path.join(os.path.dirname(__file__), "..", ".env"))
25+
26+
# Initialize Stream client from env vars (STREAM_API_KEY / SECRET / BASE_URL)
27+
client = Stream.from_env()
28+
29+
user_id = f"user-{uuid4()}"
30+
create_user(client, user_id, "My User")
31+
logging.info("👤 Created user: %s", user_id)
32+
33+
user_token = client.create_token(user_id, expiration=3600)
34+
logging.info("🔑 Created token for user: %s", user_id)
35+
36+
bot_user_id = f"openai-realtime-speech-to-speech-bot-{uuid4()}"
37+
create_user(client, bot_user_id, "OpenAI Realtime Speech to Speech Bot")
38+
logging.info("🤖 Created bot user: %s", bot_user_id)
39+
40+
call_id = str(uuid4())
41+
logging.info("📞 Call ID: %s", call_id)
42+
43+
call = client.video.call("default", call_id)
44+
call.get_or_create(data={"created_by_id": bot_user_id})
45+
logging.info("📞 Call created: %s", call_id)
46+
47+
# Open demo browser so you can join from the UI
48+
open_browser(client.api_key, user_token, call_id)
49+
50+
sts_bot = OpenAIRealtime(
51+
api_key=os.getenv("OPENAI_API_KEY"),
52+
model="gpt-4o-realtime-preview",
53+
instructions="You are a friendly assistant; reply verbally in a short sentence.",
54+
voice="alloy",
55+
)
56+
57+
@sts_bot.on("connected")
58+
async def _on_connected():
59+
print("✅ CONNECTED EVENT RECEIVED")
60+
logging.info("✅ Bot connected successfully")
61+
62+
@sts_bot.on("disconnected")
63+
async def _on_disconnected():
64+
print("❌ DISCONNECTED EVENT RECEIVED")
65+
logging.info("❌ Bot disconnected")
66+
67+
@sts_bot.on("error")
68+
async def _on_error(error):
69+
print(f"💥 ERROR EVENT RECEIVED: {error}")
70+
logging.error("💥 Bot error: %s", error)
71+
72+
@sts_bot.on("session.created")
73+
@sts_bot.on("session.updated")
74+
@sts_bot.on("conversation.item.created")
75+
@sts_bot.on("response.created")
76+
@sts_bot.on("response.done")
77+
@sts_bot.on("call.session_participant_joined")
78+
@sts_bot.on("call.session_participant_left")
79+
async def _on_openai_event(event):
80+
print(f"🔔 Event received: {event.type}")
81+
print(f" Event data: {event}")
82+
logging.info("🔔 Event: %s", event.type)
83+
84+
try:
85+
logging.info("Connecting to OpenAI Realtime...")
86+
87+
# Check if API key is set
88+
if not os.getenv("OPENAI_API_KEY"):
89+
logging.error("❌ OPENAI_API_KEY not found in environment")
90+
return
91+
92+
await sts_bot.connect(call, agent_user_id=bot_user_id)
93+
logging.info("🎧 Listening for responses... (Press Ctrl+C to stop)")
94+
logging.info("💡 Try speaking in the browser to generate audio events!")
95+
96+
while sts_bot.is_connected:
97+
await asyncio.sleep(1)
98+
99+
except KeyboardInterrupt: # noqa: WPS420
100+
logging.info("\n⏹️ Stopping OpenAI Realtime Speech to Speech bot…")
101+
except Exception as e: # noqa: BLE001
102+
logging.exception("❌ Error: %s", e)
103+
finally:
104+
logging.info("Cleaning up...")
105+
await sts_bot.close()
106+
client.delete_users([user_id, bot_user_id])
107+
logging.info("Cleanup complete")
108+
109+
110+
if __name__ == "__main__":
111+
asyncio.run(main())

examples/openai_realtime_speech_to_speech/llm_audio_conversation/pyproject.toml renamed to examples/openai_realtime_speech_to_speech/pyproject.toml

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,6 @@ requires-python = ">=3.9"
77
license = {text = "MIT"}
88

99
dependencies = [
10-
"getstream[webrtc]",
11-
"getstream-plugins-stt-deepgram",
12-
"getstream-plugins-tts-elevenlabs",
13-
"getstream-plugins-vad-silero",
1410
"python-dotenv>=1.0.0",
1511
"aiortc>=1.10.1",
1612
"numpy>=2.0.0",

getstream/plugins/sts/__init__.py

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
import abc
2+
import logging
3+
4+
from pyee.asyncio import AsyncIOEventEmitter
5+
6+
7+
logger = logging.getLogger(__name__)
8+
9+
10+
class STS(AsyncIOEventEmitter, abc.ABC):
11+
"""Speech-to-Speech (full duplex) base class.
12+
13+
Implementations are expected to:
14+
• establish an audio session (usually via Stream Video `Call.connect_openai`)
15+
• emit high-level events coming from the AI agent (for example
16+
``conversation.updated`` or ``error``)
17+
• optionally expose helper methods like ``update_session`` or
18+
``send_user_message``.
19+
20+
Events emitted by *all* STS implementations:
21+
- *connected*: fired once the underlying websocket is ready
22+
- *disconnected*: fired when the websocket is closed (graceful or error)
23+
- *error*: emitted for any exception that bubbles up
24+
- *<any other event type coming from the provider>*: forwarded verbatim
25+
"""
26+
27+
def __init__(self):
28+
super().__init__()
29+
self._is_connected = False
30+
31+
# ---------------------------------------------------------------------
32+
# Lifecycle helpers
33+
# ---------------------------------------------------------------------
34+
@abc.abstractmethod
35+
async def connect(self, *args, **kwargs): # pragma: no cover
36+
"""Establish the realtime connection (provider-specific)."""
37+
38+
@abc.abstractmethod
39+
async def close(self): # pragma: no cover
40+
"""Close the connection and release all resources."""
41+
42+
# Derived classes should set ``self._is_connected`` accordingly so that
43+
# embedders can introspect the state.
44+
# ---------------------------------------------------------------------
45+
46+
@property
47+
def is_connected(self) -> bool:
48+
"""Return True if the realtime session is currently active."""
49+
return self._is_connected
50+
51+
# Public re-export
52+
__all__ = ["STS"]
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
from .sts import OpenAIRealtime
2+
3+
__all__ = ["OpenAIRealtime"]
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
[build-system]
2+
requires = ["setuptools>=42", "wheel"]
3+
build-backend = "setuptools.build_meta"
4+
5+
[project]
6+
name = "getstream-plugins-sts-openai-realtime"
7+
version = "0.1.0"
8+
description = "OpenAI Realtime STS plugin for GetStream"
9+
readme = "README.md"
10+
requires-python = ">=3.9"
11+
license = {text = "MIT"}
12+
dependencies = [
13+
"getstream[webrtc]",
14+
15+
]
16+
17+
[project.optional-dependencies]
18+
test = [
19+
"pytest>=7.0.0",
20+
"pytest-asyncio>=0.18.0",
21+
]
22+
23+
[tool.uv.sources]
24+
getstream = { workspace = true }
25+
getstream-plugins-sts-openai-realtime = { workspace = true }

0 commit comments

Comments
 (0)