Checks
Strands Version
1.20.0
Python Version
3.12
Operating System
macOS
Installation Method
pip
Steps to Reproduce
import asyncio
import logging
import os
from strands.experimental.bidi import BidiAgent
from strands.experimental.bidi.io import BidiAudioIO, BidiTextIO
from strands.experimental.bidi.models.openai_realtime import BidiOpenAIRealtimeModel
from strands.experimental.bidi.tools import stop_conversation
from strands_tools import calculator
# Load environment variables from .env file
from dotenv import load_dotenv
load_dotenv()
# Configure logging
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
handlers=[logging.StreamHandler()]
)
logger = logging.getLogger(__name__)
# Set strands to WARNING to reduce noise
logging.getLogger("strands").setLevel(logging.INFO)
async def main() -> None:
logger.info("🎤 Starting OpenAI Voice Agent...")
# Get OpenAI API key
api_key = os.environ.get('OPENAI_API_KEY')
if not api_key:
logger.error("OPENAI_API_KEY environment variable not set")
raise ValueError("OPENAI_API_KEY environment variable is required")
logger.info("✅ API key loaded from environment")
logger.info("🔧 Creating BidiOpenAIRealtimeModel...")
model = BidiOpenAIRealtimeModel(
model_id="gpt-realtime",
provider_config={
"audio": {
"input_rate": 24000,
"output_rate": 24000,
"channels": 1,
"format": "pcm",
"voice": "coral",
},
},
client_config={
"api_key": api_key,
},
)
logger.info("✅ Model created successfully")
# stop_conversation tool allows user to verbally stop agent execution.
logger.info("🤖 Creating BidiAgent with tools: calculator, stop_conversation")
agent = BidiAgent(model=model, tools=[calculator, stop_conversation])
logger.info("✅ Agent created successfully")
logger.info("🎧 Setting up audio I/O...")
audio_io = BidiAudioIO()
text_io = BidiTextIO()
logger.info("✅ Audio I/O ready")
logger.info("🚀 Starting voice conversation...")
logger.info("💡 Say 'stop conversation' to end the session")
logger.info("⌨️ Press Ctrl+C for emergency stop")
try:
await agent.run(inputs=[audio_io.input()], outputs=[audio_io.output(), text_io.output()])
except asyncio.CancelledError:
logger.info("🛑 Session cancelled by user")
except Exception as e:
logger.error(f"❌ Error during conversation: {e}", exc_info=True)
raise
finally:
logger.info("👋 Voice session ended")
if __name__ == "__main__":
try:
asyncio.run(main())
except KeyboardInterrupt:
logger.info("🛑 Interrupted by user (Ctrl+C)")
except Exception as e:
logger.error(f"❌ Fatal error: {e}", exc_info=True)
Expected Behavior
Audio and transcript works
Actual Behavior
interrupted
2025-12-20 13:28:39,725 - strands.experimental.bidi.models.openai_realtime - WARNING - error=<Input transcription failed for item 'item_CoqMYmtK90wuTOQVid1yI'.> | openai transcription failed
interrupted
2025-12-20 13:28:44,726 - strands.experimental.bidi.models.openai_realtime - WARNING - error=<Input transcription failed for item 'item_CoqMcUAxpSAmrVn4KALN6'.> | openai transcription failed
Additional Context
API KEY is correct
Possible Solution
No response
Related Issues
No response
Checks
Strands Version
1.20.0
Python Version
3.12
Operating System
macOS
Installation Method
pip
Steps to Reproduce
Expected Behavior
Audio and transcript works
Actual Behavior
interrupted
2025-12-20 13:28:39,725 - strands.experimental.bidi.models.openai_realtime - WARNING - error=<Input transcription failed for item 'item_CoqMYmtK90wuTOQVid1yI'.> | openai transcription failed
interrupted
2025-12-20 13:28:44,726 - strands.experimental.bidi.models.openai_realtime - WARNING - error=<Input transcription failed for item 'item_CoqMcUAxpSAmrVn4KALN6'.> | openai transcription failed
Additional Context
API KEY is correct
Possible Solution
No response
Related Issues
No response