Skip to content

Commit f8abbaa

Browse files
authored
Merge pull request #14 from mkmeral/bidi-gemini
feat(gemini): Add bidirectional gemini model
2 parents ce97c1d + 4648327 commit f8abbaa

7 files changed

Lines changed: 933 additions & 19 deletions

File tree

src/strands/experimental/bidirectional_streaming/agent/agent.py

Lines changed: 16 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
from ....types.traces import AttributeValue
3232
from ..event_loop.bidirectional_event_loop import start_bidirectional_connection, stop_bidirectional_connection
3333
from ..models.bidirectional_model import BidirectionalModel
34-
from ..types.bidirectional_streaming import AudioInputEvent, BidirectionalStreamEvent
34+
from ..types.bidirectional_streaming import AudioInputEvent, BidirectionalStreamEvent, ImageInputEvent
3535

3636
logger = logging.getLogger(__name__)
3737

@@ -359,18 +359,16 @@ async def start(self) -> None:
359359

360360
logger.debug("Conversation start - initializing session")
361361
self._session = await start_bidirectional_connection(self)
362-
logger.debug("Conversation ready")
363-
364-
async def send(self, input_data: str | AudioInputEvent) -> None:
365-
"""Send input to the model (text or audio).
366-
367-
Unified method for sending both text and audio input to the model during
368-
an active conversation session. User input is automatically added to
369-
conversation history for complete message tracking.
370-
362+
363+
async def send(self, input_data: str | AudioInputEvent | ImageInputEvent) -> None:
364+
"""Send input to the model (text, audio, or image).
365+
366+
Unified method for sending text, audio, and image input to the model during
367+
an active conversation session.
368+
371369
Args:
372-
input_data: Either a string for text input or AudioInputEvent for audio input.
373-
370+
input_data: String for text, AudioInputEvent for audio, or ImageInputEvent for images.
371+
374372
Raises:
375373
ValueError: If no active session or invalid input type.
376374
"""
@@ -385,10 +383,14 @@ async def send(self, input_data: str | AudioInputEvent) -> None:
385383
elif isinstance(input_data, dict) and "audioData" in input_data:
386384
# Handle audio input
387385
await self._session.model_session.send_audio_content(input_data)
386+
elif isinstance(input_data, dict) and "imageData" in input_data:
387+
# Handle image input (ImageInputEvent)
388+
await self._session.model_session.send_image_content(input_data)
388389
else:
389390
raise ValueError(
390-
"Input must be either a string (text) or AudioInputEvent "
391-
"(dict with audioData, format, sampleRate, channels)"
391+
"Input must be either a string (text), AudioInputEvent "
392+
"(dict with audioData, format, sampleRate, channels), or ImageInputEvent "
393+
"(dict with imageData, mimeType, encoding)"
392394
)
393395

394396
async def receive(self) -> AsyncIterable[BidirectionalStreamEvent]:
Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,17 @@
11
"""Bidirectional model interfaces and implementations."""
22

33
from .bidirectional_model import BidirectionalModel, BidirectionalModelSession
4+
from .gemini_live import GeminiLiveBidirectionalModel, GeminiLiveSession
45
from .novasonic import NovaSonicBidirectionalModel, NovaSonicSession
56
from .openai import OpenAIRealtimeBidirectionalModel, OpenAIRealtimeSession
67

78
__all__ = [
8-
"BidirectionalModel",
9-
"BidirectionalModelSession",
10-
"NovaSonicBidirectionalModel",
9+
"BidirectionalModel",
10+
"BidirectionalModelSession",
11+
"GeminiLiveBidirectionalModel",
12+
"GeminiLiveSession",
13+
"NovaSonicBidirectionalModel",
1114
"NovaSonicSession",
1215
"OpenAIRealtimeBidirectionalModel",
13-
"OpenAIRealtimeSession"
16+
"OpenAIRealtimeSession",
1417
]

src/strands/experimental/bidirectional_streaming/models/bidirectional_model.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
from ....types.content import Messages
1919
from ....types.tools import ToolSpec
20-
from ..types.bidirectional_streaming import AudioInputEvent, BidirectionalStreamEvent
20+
from ..types.bidirectional_streaming import AudioInputEvent, BidirectionalStreamEvent, ImageInputEvent
2121

2222
logger = logging.getLogger(__name__)
2323

@@ -48,6 +48,15 @@ async def send_audio_content(self, audio_input: AudioInputEvent) -> None:
4848
"""
4949
raise NotImplementedError
5050

51+
# TODO: remove with interface unification
52+
async def send_image_content(self, image_input: ImageInputEvent) -> None:
53+
"""Send image content to the model during an active connection.
54+
55+
Handles image encoding and provider-specific formatting while presenting
56+
a simple ImageInputEvent interface.
57+
"""
58+
raise NotImplementedError
59+
5160
@abc.abstractmethod
5261
async def send_text_content(self, text: str, **kwargs) -> None:
5362
"""Send text content to the model during ongoing generation.

0 commit comments

Comments
 (0)