Skip to content

Commit b7b1c2e

Browse files
yinghsienwucopybara-github
authored andcommitted
feat: voice activity support
PiperOrigin-RevId: 856356548
1 parent 142276e commit b7b1c2e

2 files changed

Lines changed: 124 additions & 2 deletions

File tree

google/genai/_live_converters.py

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1151,6 +1151,59 @@ def _LiveSendRealtimeInputParameters_to_vertex(
11511151
return to_object
11521152

11531153

1154+
def _LiveServerMessage_from_mldev(
1155+
from_object: Union[dict[str, Any], object],
1156+
parent_object: Optional[dict[str, Any]] = None,
1157+
) -> dict[str, Any]:
1158+
to_object: dict[str, Any] = {}
1159+
if getv(from_object, ['setupComplete']) is not None:
1160+
setv(to_object, ['setup_complete'], getv(from_object, ['setupComplete']))
1161+
1162+
if getv(from_object, ['serverContent']) is not None:
1163+
setv(to_object, ['server_content'], getv(from_object, ['serverContent']))
1164+
1165+
if getv(from_object, ['toolCall']) is not None:
1166+
setv(to_object, ['tool_call'], getv(from_object, ['toolCall']))
1167+
1168+
if getv(from_object, ['toolCallCancellation']) is not None:
1169+
setv(
1170+
to_object,
1171+
['tool_call_cancellation'],
1172+
getv(from_object, ['toolCallCancellation']),
1173+
)
1174+
1175+
if getv(from_object, ['usageMetadata']) is not None:
1176+
setv(to_object, ['usage_metadata'], getv(from_object, ['usageMetadata']))
1177+
1178+
if getv(from_object, ['goAway']) is not None:
1179+
setv(to_object, ['go_away'], getv(from_object, ['goAway']))
1180+
1181+
if getv(from_object, ['sessionResumptionUpdate']) is not None:
1182+
setv(
1183+
to_object,
1184+
['session_resumption_update'],
1185+
getv(from_object, ['sessionResumptionUpdate']),
1186+
)
1187+
1188+
if getv(from_object, ['voiceActivityDetectionSignal']) is not None:
1189+
setv(
1190+
to_object,
1191+
['voice_activity_detection_signal'],
1192+
getv(from_object, ['voiceActivityDetectionSignal']),
1193+
)
1194+
1195+
if getv(from_object, ['voiceActivity']) is not None:
1196+
setv(
1197+
to_object,
1198+
['voice_activity'],
1199+
_VoiceActivity_from_mldev(
1200+
getv(from_object, ['voiceActivity']), to_object
1201+
),
1202+
)
1203+
1204+
return to_object
1205+
1206+
11541207
def _LiveServerMessage_from_vertex(
11551208
from_object: Union[dict[str, Any], object],
11561209
parent_object: Optional[dict[str, Any]] = None,
@@ -1198,6 +1251,15 @@ def _LiveServerMessage_from_vertex(
11981251
getv(from_object, ['voiceActivityDetectionSignal']),
11991252
)
12001253

1254+
if getv(from_object, ['voiceActivity']) is not None:
1255+
setv(
1256+
to_object,
1257+
['voice_activity'],
1258+
_VoiceActivity_from_vertex(
1259+
getv(from_object, ['voiceActivity']), to_object
1260+
),
1261+
)
1262+
12011263
return to_object
12021264

12031265

@@ -1468,3 +1530,25 @@ def _UsageMetadata_from_vertex(
14681530
setv(to_object, ['traffic_type'], getv(from_object, ['trafficType']))
14691531

14701532
return to_object
1533+
1534+
1535+
def _VoiceActivity_from_mldev(
1536+
from_object: Union[dict[str, Any], object],
1537+
parent_object: Optional[dict[str, Any]] = None,
1538+
) -> dict[str, Any]:
1539+
to_object: dict[str, Any] = {}
1540+
if getv(from_object, ['type']) is not None:
1541+
setv(to_object, ['voice_activity_type'], getv(from_object, ['type']))
1542+
1543+
return to_object
1544+
1545+
1546+
def _VoiceActivity_from_vertex(
1547+
from_object: Union[dict[str, Any], object],
1548+
parent_object: Optional[dict[str, Any]] = None,
1549+
) -> dict[str, Any]:
1550+
to_object: dict[str, Any] = {}
1551+
if getv(from_object, ['type']) is not None:
1552+
setv(to_object, ['voice_activity_type'], getv(from_object, ['type']))
1553+
1554+
return to_object

google/genai/types.py

Lines changed: 40 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -869,6 +869,17 @@ class VadSignalType(_common.CaseInSensitiveEnum):
869869
"""End of sentence signal."""
870870

871871

872+
class VoiceActivityType(_common.CaseInSensitiveEnum):
873+
"""The type of the voice activity signal."""
874+
875+
TYPE_UNSPECIFIED = 'TYPE_UNSPECIFIED'
876+
"""The default is VOICE_ACTIVITY_TYPE_UNSPECIFIED."""
877+
ACTIVITY_START = 'ACTIVITY_START'
878+
"""Start of sentence signal."""
879+
ACTIVITY_END = 'ACTIVITY_END'
880+
"""End of sentence signal."""
881+
882+
872883
class StartSensitivity(_common.CaseInSensitiveEnum):
873884
"""Start of speech sensitivity."""
874885

@@ -16347,6 +16358,24 @@ class VoiceActivityDetectionSignalDict(TypedDict, total=False):
1634716358
]
1634816359

1634916360

16361+
class VoiceActivity(_common.BaseModel):
16362+
"""Voice activity signal."""
16363+
16364+
voice_activity_type: Optional[VoiceActivityType] = Field(
16365+
default=None, description="""The type of the voice activity signal."""
16366+
)
16367+
16368+
16369+
class VoiceActivityDict(TypedDict, total=False):
16370+
"""Voice activity signal."""
16371+
16372+
voice_activity_type: Optional[VoiceActivityType]
16373+
"""The type of the voice activity signal."""
16374+
16375+
16376+
VoiceActivityOrDict = Union[VoiceActivity, VoiceActivityDict]
16377+
16378+
1635016379
class LiveServerMessage(_common.BaseModel):
1635116380
"""Response message for API call."""
1635216381

@@ -16379,7 +16408,13 @@ class LiveServerMessage(_common.BaseModel):
1637916408
)
1638016409
)
1638116410
voice_activity_detection_signal: Optional[VoiceActivityDetectionSignal] = (
16382-
Field(default=None, description="""Voice activity detection signal.""")
16411+
Field(
16412+
default=None,
16413+
description="""Voice activity detection signal. Allowlisted only.""",
16414+
)
16415+
)
16416+
voice_activity: Optional[VoiceActivity] = Field(
16417+
default=None, description="""Voice activity signal."""
1638316418
)
1638416419

1638516420
@property
@@ -16478,7 +16513,10 @@ class LiveServerMessageDict(TypedDict, total=False):
1647816513
"""Update of the session resumption state."""
1647916514

1648016515
voice_activity_detection_signal: Optional[VoiceActivityDetectionSignalDict]
16481-
"""Voice activity detection signal."""
16516+
"""Voice activity detection signal. Allowlisted only."""
16517+
16518+
voice_activity: Optional[VoiceActivityDict]
16519+
"""Voice activity signal."""
1648216520

1648316521

1648416522
LiveServerMessageOrDict = Union[LiveServerMessage, LiveServerMessageDict]

0 commit comments

Comments
 (0)