Skip to content

Commit 24d5b94

Browse files
committed
fix example with function calling and add tests
1 parent 21405b1 commit 24d5b94

12 files changed

Lines changed: 309 additions & 335 deletions

File tree

README.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,15 @@ call.get_or_create(
9595
)
9696
```
9797

98+
### Getting Response Data
99+
100+
Many calls return a `StreamResponse` object, with the specific dataclass for the method call nested inside. You can access this via:
101+
102+
```python
103+
response: StreamResponse[StartClosedCaptionsResponse] = call.start_closed_captions()
104+
response.data # Gives the StartClosedCaptionsResponse model
105+
```
106+
98107
### App configuration
99108

100109
```python

examples/env.example

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# Stream API credentials
22
STREAM_API_KEY=your_stream_api_key_here
33
STREAM_API_SECRET=your_stream_api_secret_here
4-
STREAM_BASE_URL=stream_base_url_here
4+
EXAMPLE_BASE_URL=example_base_url_here
55

66
# Deepgram API credentials
77
DEEPGRAM_API_KEY=your_deepgram_api_key_here

examples/llm_audio_conversation/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ This example demonstrates how to build a real-time AI conversation bot using Str
5353
```env
5454
STREAM_API_KEY=your_actual_stream_api_key
5555
STREAM_API_SECRET=your_actual_stream_api_secret
56-
STREAM_BASE_URL=https://chat.stream-io-api.com/
56+
STREAM_BASE_URL=the_stream_base_url
5757
DEEPGRAM_API_KEY=your_actual_deepgram_api_key
5858
ELEVENLABS_API_KEY=your_actual_elevenlabs_api_key
5959
OPENAI_API_KEY=your_actual_openai_api_key

examples/openai_realtime_speech_to_speech/main.py

Lines changed: 56 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,10 @@
55
from dotenv import load_dotenv
66
from examples.utils import create_user, open_browser
77
from getstream import Stream
8+
from getstream.models import StartClosedCaptionsResponse
89
from getstream.plugins.sts.openai_realtime import OpenAIRealtime
10+
from dataclasses import asdict
11+
import json
912

1013

1114
logging.basicConfig(
@@ -54,33 +57,6 @@ async def main():
5457
voice="alloy",
5558
)
5659

57-
@sts_bot.on("connected")
58-
async def _on_connected():
59-
print("✅ CONNECTED EVENT RECEIVED")
60-
logging.info("✅ Bot connected successfully")
61-
62-
@sts_bot.on("disconnected")
63-
async def _on_disconnected():
64-
print("❌ DISCONNECTED EVENT RECEIVED")
65-
logging.info("❌ Bot disconnected")
66-
67-
@sts_bot.on("error")
68-
async def _on_error(error):
69-
print(f"💥 ERROR EVENT RECEIVED: {error}")
70-
logging.error("💥 Bot error: %s", error)
71-
72-
@sts_bot.on("session.created")
73-
@sts_bot.on("session.updated")
74-
@sts_bot.on("conversation.item.created")
75-
@sts_bot.on("response.created")
76-
@sts_bot.on("response.done")
77-
@sts_bot.on("call.session_participant_joined")
78-
@sts_bot.on("call.session_participant_left")
79-
async def _on_openai_event(event):
80-
print(f"🔔 Event received: {event.type}")
81-
print(f" Event data: {event}")
82-
logging.info("🔔 Event: %s", event.type)
83-
8460
try:
8561
logging.info("Connecting to OpenAI Realtime...")
8662

@@ -89,20 +65,66 @@ async def _on_openai_event(event):
8965
logging.error("❌ OPENAI_API_KEY not found in environment")
9066
return
9167

92-
await sts_bot.connect(call, agent_user_id=bot_user_id)
93-
logging.info("🎧 Listening for responses... (Press Ctrl+C to stop)")
94-
logging.info("💡 Try speaking in the browser to generate audio events!")
95-
96-
while sts_bot.is_connected:
97-
await asyncio.sleep(1)
68+
async with await sts_bot.connect(call, agent_user_id=bot_user_id) as connection:
69+
tools = [
70+
{
71+
"type": "function",
72+
"name": "start_closed_captions",
73+
"description": "start closed captions for the call",
74+
"parameters": {
75+
"type": "object",
76+
"properties": {},
77+
"required": [],
78+
},
79+
}
80+
]
81+
82+
await sts_bot.update_session(
83+
turn_detection={
84+
"type": "semantic_vad",
85+
"eagerness": "low",
86+
"create_response": True,
87+
"interrupt_response": True,
88+
},
89+
tools=tools,
90+
)
91+
92+
logging.info("🎧 Listening for responses... (Press Ctrl+C to stop)")
93+
logging.info("💡 Try speaking in the browser – ask it something like 'start closed captions' to trigger the function call.")
94+
95+
async def start_closed_captions() -> StartClosedCaptionsResponse:
96+
"""Helper that starts closed captions for the call."""
97+
return call.start_closed_captions().data
98+
99+
async for event in connection:
100+
logging.info("🔔 Event received: %s", event.type)
101+
102+
if (
103+
event.type == "response.done"
104+
and event.response.output is not None
105+
and len(event.response.output) > 0
106+
and event.response.output[0].type == "function_call"
107+
):
108+
tool_call_id = event.response.output[0].call_id
109+
110+
if event.response.output[0].name == "start_closed_captions":
111+
logging.info("🛠 Assistant requested start_closed_captions()")
112+
113+
result = await start_closed_captions()
114+
115+
# Send the tool result back to the assistant
116+
await sts_bot.send_function_call_output(tool_call_id, result.to_json())
117+
118+
logging.info("🛠 Replied to tool call with result: %s", result)
119+
120+
98121

99122
except KeyboardInterrupt: # noqa: WPS420
100123
logging.info("\n⏹️ Stopping OpenAI Realtime Speech to Speech bot…")
101124
except Exception as e: # noqa: BLE001
102125
logging.exception("❌ Error: %s", e)
103126
finally:
104127
logging.info("Cleaning up...")
105-
await sts_bot.close()
106128
client.delete_users([user_id, bot_user_id])
107129
logging.info("Cleanup complete")
108130

examples/utils.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,14 @@
55
This module contains common functions that can be reused across different examples.
66
"""
77

8+
import os
89
import webbrowser
910
from urllib.parse import urlencode
1011
from getstream.models import UserRequest
1112
from getstream.stream import Stream
13+
from dotenv import load_dotenv
1214

15+
load_dotenv()
1316

1417
def create_user(client: Stream, id: str, name: str) -> None:
1518
"""
@@ -36,7 +39,7 @@ def open_browser(api_key: str, token: str, call_id: str) -> str:
3639
Returns:
3740
The URL that was opened
3841
"""
39-
base_url = "https://pronto.getstream.io/bare/join/"
42+
base_url = f"{os.getenv('EXAMPLE_BASE_URL')}/join/"
4043
params = {"api_key": api_key, "token": token, "skip_lobby": "true"}
4144

4245
url = f"{base_url}{call_id}?{urlencode(params)}"

getstream/plugins/sts/__init__.py

Lines changed: 16 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -8,41 +8,28 @@
88

99

1010
class STS(AsyncIOEventEmitter, abc.ABC):
11-
"""Speech-to-Speech (full duplex) base class.
12-
13-
Implementations are expected to:
14-
• establish an audio session (usually via Stream Video `Call.connect_openai`)
15-
• emit high-level events coming from the AI agent (for example
16-
``conversation.updated`` or ``error``)
17-
• optionally expose helper methods like ``update_session`` or
18-
``send_user_message``.
19-
20-
Events emitted by *all* STS implementations:
21-
- *connected*: fired once the underlying websocket is ready
22-
- *disconnected*: fired when the websocket is closed (graceful or error)
23-
- *error*: emitted for any exception that bubbles up
24-
- *<any other event type coming from the provider>*: forwarded verbatim
11+
"""Base class for Speech-to-Speech (STS) implementations.
12+
13+
This abstract base class provides the foundation for implementing real-time
14+
speech-to-speech communication with AI agents. It handles event emission
15+
and connection state management.
16+
17+
Key Features:
18+
- Event-driven architecture using AsyncIOEventEmitter
19+
- Connection state tracking
20+
- Standardized event interface
21+
22+
Implementations should:
23+
1. Establish and manage the audio session
24+
2. Handle provider-specific authentication and setup
25+
3. Emit appropriate events for state changes and interactions
26+
4. Implement any provider-specific helper methods
2527
"""
2628

2729
def __init__(self):
2830
super().__init__()
2931
self._is_connected = False
3032

31-
# ---------------------------------------------------------------------
32-
# Lifecycle helpers
33-
# ---------------------------------------------------------------------
34-
@abc.abstractmethod
35-
async def connect(self, *args, **kwargs): # pragma: no cover
36-
"""Establish the realtime connection (provider-specific)."""
37-
38-
@abc.abstractmethod
39-
async def close(self): # pragma: no cover
40-
"""Close the connection and release all resources."""
41-
42-
# Derived classes should set ``self._is_connected`` accordingly so that
43-
# embedders can introspect the state.
44-
# ---------------------------------------------------------------------
45-
4633
@property
4734
def is_connected(self) -> bool:
4835
"""Return True if the realtime session is currently active."""

0 commit comments

Comments
 (0)