Skip to content

Commit e99a745

Browse files
authored
Add include_timestamps and quoa_exceeded (#682)
1 parent 15d236d commit e99a745

2 files changed

Lines changed: 15 additions & 2 deletions

File tree

src/elevenlabs/realtime/connection.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@ class RealtimeEvents(str, Enum):
1414
COMMITTED_TRANSCRIPT = "committed_transcript"
1515
COMMITTED_TRANSCRIPT_WITH_TIMESTAMPS = "committed_transcript_with_timestamps"
1616
ERROR = "error"
17+
AUTH_ERROR = "auth_error"
18+
QUOTA_EXCEEDED = "quota_exceeded"
1719

1820

1921
class RealtimeConnection:

src/elevenlabs/realtime/scribe.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ class RealtimeAudioOptions(typing.TypedDict, total=False):
4848
min_speech_duration_ms: Minimum speech duration in milliseconds (must be between 50 and 2000)
4949
min_silence_duration_ms: Minimum silence duration in milliseconds (must be between 50 and 2000)
5050
language_code: An ISO-639-1 or ISO-639-3 language_code corresponding to the language of the audio file. Can sometimes improve transcription performance if known beforehand.
51+
include_timestamps: Whether to receive the committed_transcript_with_timestamps event after committing the segment (optional, defaults to False)
5152
"""
5253
model_id: str
5354
audio_format: AudioFormat
@@ -58,6 +59,7 @@ class RealtimeAudioOptions(typing.TypedDict, total=False):
5859
min_speech_duration_ms: int
5960
min_silence_duration_ms: int
6061
language_code: str
62+
include_timestamps: bool
6163

6264

6365
class RealtimeUrlOptions(typing.TypedDict, total=False):
@@ -72,7 +74,8 @@ class RealtimeUrlOptions(typing.TypedDict, total=False):
7274
vad_threshold: Threshold for voice activity detection (must be between 0.1 and 0.9)
7375
min_speech_duration_ms: Minimum speech duration in milliseconds (must be between 50 and 2000)
7476
min_silence_duration_ms: Minimum silence duration in milliseconds (must be between 50 and 2000)
75-
An ISO-639-1 or ISO-639-3 language_code corresponding to the language of the audio file. Can sometimes improve transcription performance if known beforehand.
77+
language_code: An ISO-639-1 or ISO-639-3 language_code corresponding to the language of the audio file. Can sometimes improve transcription performance if known beforehand.
78+
include_timestamps: Whether to receive the committed_transcript_with_timestamps event after committing the segment (optional, defaults to False)
7679
"""
7780
model_id: str
7881
url: str
@@ -82,6 +85,7 @@ class RealtimeUrlOptions(typing.TypedDict, total=False):
8285
min_speech_duration_ms: int
8386
min_silence_duration_ms: int
8487
language_code: str
88+
include_timestamps: bool
8589

8690

8791
class ScribeRealtime:
@@ -173,6 +177,7 @@ async def _connect_audio(self, options: RealtimeAudioOptions) -> RealtimeConnect
173177
min_speech_duration_ms = options.get("min_speech_duration_ms")
174178
min_silence_duration_ms = options.get("min_silence_duration_ms")
175179
language_code = options.get("language_code")
180+
include_timestamps = options.get("include_timestamps", False)
176181

177182
if not audio_format or not sample_rate:
178183
raise ValueError("audio_format and sample_rate are required for manual audio mode")
@@ -188,6 +193,7 @@ async def _connect_audio(self, options: RealtimeAudioOptions) -> RealtimeConnect
188193
min_speech_duration_ms=min_speech_duration_ms,
189194
min_silence_duration_ms=min_silence_duration_ms,
190195
language_code=language_code,
196+
include_timestamps=include_timestamps,
191197
)
192198

193199
# Connect to WebSocket
@@ -219,6 +225,7 @@ async def _connect_url(self, options: RealtimeUrlOptions) -> RealtimeConnection:
219225
min_speech_duration_ms = options.get("min_speech_duration_ms")
220226
min_silence_duration_ms = options.get("min_silence_duration_ms")
221227
language_code = options.get("language_code")
228+
include_timestamps = options.get("include_timestamps", False)
222229

223230
if not url:
224231
raise ValueError("url is required for URL mode")
@@ -238,6 +245,7 @@ async def _connect_url(self, options: RealtimeUrlOptions) -> RealtimeConnection:
238245
min_speech_duration_ms=min_speech_duration_ms,
239246
min_silence_duration_ms=min_silence_duration_ms,
240247
language_code=language_code,
248+
include_timestamps=include_timestamps,
241249
)
242250

243251
# Connect to WebSocket
@@ -340,7 +348,8 @@ def _build_websocket_url(
340348
vad_threshold: typing.Optional[float] = None,
341349
min_speech_duration_ms: typing.Optional[int] = None,
342350
min_silence_duration_ms: typing.Optional[int] = None,
343-
language_code: typing.Optional[str] = None
351+
language_code: typing.Optional[str] = None,
352+
include_timestamps: typing.Optional[bool] = None
344353
) -> str:
345354
"""Build the WebSocket URL with query parameters"""
346355
# Extract base domain
@@ -365,6 +374,8 @@ def _build_websocket_url(
365374
params.append(f"min_silence_duration_ms={min_silence_duration_ms}")
366375
if language_code is not None:
367376
params.append(f"language_code={language_code}")
377+
if include_timestamps is not None:
378+
params.append(f"include_timestamps={include_timestamps}")
368379

369380
query_string = "&".join(params)
370381
return f"{base}/v1/speech-to-text/realtime?{query_string}"

0 commit comments

Comments
 (0)