Skip to content

Commit 1a10515

Browse files
authored
[Scribe] Add audio format correctly and better typing (#705)
* Add audio format correctly and better typing * Fix build errors * Use enum
1 parent cb8f9d3 commit 1a10515

2 files changed

Lines changed: 35 additions & 18 deletions

File tree

src/elevenlabs/realtime/connection.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,9 @@
44
import typing
55
from enum import Enum
66

7+
if typing.TYPE_CHECKING:
8+
from websockets.asyncio.client import ClientConnection
9+
710

811
class RealtimeEvents(str, Enum):
912
"""Events emitted by the RealtimeConnection"""
@@ -55,7 +58,7 @@ class RealtimeConnection:
5558
```
5659
"""
5760

58-
def __init__(self, websocket, current_sample_rate: int, ffmpeg_process: typing.Optional[subprocess.Popen] = None):
61+
def __init__(self, websocket: "ClientConnection", current_sample_rate: int, ffmpeg_process: typing.Optional[subprocess.Popen] = None):
5962
self.websocket = websocket
6063
self.current_sample_rate = current_sample_rate
6164
self.ffmpeg_process = ffmpeg_process

src/elevenlabs/realtime/scribe.py

Lines changed: 31 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,12 @@
33
import subprocess
44
import typing
55
from enum import Enum
6+
from typing import overload
7+
8+
from typing_extensions import Required
69

710
try:
8-
import websockets
11+
from websockets.asyncio.client import connect as websocket_connect
912
except ImportError:
1013
raise ImportError(
1114
"The websockets package is required for realtime speech-to-text. "
@@ -17,10 +20,13 @@
1720

1821
class AudioFormat(str, Enum):
1922
"""Audio format options for realtime transcription"""
23+
PCM_8000 = "pcm_8000"
2024
PCM_16000 = "pcm_16000"
2125
PCM_22050 = "pcm_22050"
2226
PCM_24000 = "pcm_24000"
2327
PCM_44100 = "pcm_44100"
28+
PCM_48000 = "pcm_48000"
29+
ULAW_8000 = "ulaw_8000"
2430

2531

2632
class CommitStrategy(str, Enum):
@@ -50,9 +56,9 @@ class RealtimeAudioOptions(typing.TypedDict, total=False):
5056
language_code: An ISO-639-1 or ISO-639-3 language_code corresponding to the language of the audio file. Can sometimes improve transcription performance if known beforehand.
5157
include_timestamps: Whether to receive the committed_transcript_with_timestamps event after committing the segment (optional, defaults to False)
5258
"""
53-
model_id: str
54-
audio_format: AudioFormat
55-
sample_rate: int
59+
model_id: Required[str]
60+
audio_format: Required[AudioFormat]
61+
sample_rate: Required[int]
5662
commit_strategy: CommitStrategy
5763
vad_silence_threshold_secs: float
5864
vad_threshold: float
@@ -77,8 +83,8 @@ class RealtimeUrlOptions(typing.TypedDict, total=False):
7783
language_code: An ISO-639-1 or ISO-639-3 language_code corresponding to the language of the audio file. Can sometimes improve transcription performance if known beforehand.
7884
include_timestamps: Whether to receive the committed_transcript_with_timestamps event after committing the segment (optional, defaults to False)
7985
"""
80-
model_id: str
81-
url: str
86+
model_id: Required[str]
87+
url: Required[str]
8288
commit_strategy: CommitStrategy
8389
vad_silence_threshold_secs: float
8490
vad_threshold: float
@@ -121,6 +127,18 @@ def __init__(self, api_key: str, base_url: str = "wss://api.elevenlabs.io"):
121127
self.api_key = api_key
122128
self.base_url = base_url
123129

130+
@overload
131+
async def connect(
132+
self,
133+
options: RealtimeAudioOptions
134+
) -> RealtimeConnection: ...
135+
136+
@overload
137+
async def connect(
138+
self,
139+
options: RealtimeUrlOptions
140+
) -> RealtimeConnection: ...
141+
124142
async def connect(
125143
self,
126144
options: typing.Union[RealtimeAudioOptions, RealtimeUrlOptions]
@@ -185,8 +203,7 @@ async def _connect_audio(self, options: RealtimeAudioOptions) -> RealtimeConnect
185203
# Build WebSocket URL with query parameters
186204
ws_url = self._build_websocket_url(
187205
model_id=model_id,
188-
encoding=audio_format.value,
189-
sample_rate=sample_rate,
206+
audio_format=audio_format.value,
190207
commit_strategy=commit_strategy.value,
191208
vad_silence_threshold_secs=vad_silence_threshold_secs,
192209
vad_threshold=vad_threshold,
@@ -197,7 +214,7 @@ async def _connect_audio(self, options: RealtimeAudioOptions) -> RealtimeConnect
197214
)
198215

199216
# Connect to WebSocket
200-
websocket = await websockets.connect(
217+
websocket = await websocket_connect(
201218
ws_url,
202219
additional_headers={"xi-api-key": self.api_key}
203220
)
@@ -232,13 +249,12 @@ async def _connect_url(self, options: RealtimeUrlOptions) -> RealtimeConnection:
232249

233250
# Default to 16kHz for URL streaming
234251
sample_rate = 16000
235-
encoding = "pcm_16000"
252+
audio_format = AudioFormat.PCM_16000
236253

237254
# Build WebSocket URL
238255
ws_url = self._build_websocket_url(
239256
model_id=model_id,
240-
encoding=encoding,
241-
sample_rate=sample_rate,
257+
audio_format=audio_format,
242258
commit_strategy=commit_strategy.value,
243259
vad_silence_threshold_secs=vad_silence_threshold_secs,
244260
vad_threshold=vad_threshold,
@@ -249,7 +265,7 @@ async def _connect_url(self, options: RealtimeUrlOptions) -> RealtimeConnection:
249265
)
250266

251267
# Connect to WebSocket
252-
websocket = await websockets.connect(
268+
websocket = await websocket_connect(
253269
ws_url,
254270
additional_headers={"xi-api-key": self.api_key}
255271
)
@@ -341,8 +357,7 @@ async def _stream_ffmpeg_to_websocket(self, connection: RealtimeConnection) -> N
341357
def _build_websocket_url(
342358
self,
343359
model_id: str,
344-
encoding: str,
345-
sample_rate: int,
360+
audio_format: str,
346361
commit_strategy: str,
347362
vad_silence_threshold_secs: typing.Optional[float] = None,
348363
vad_threshold: typing.Optional[float] = None,
@@ -358,8 +373,7 @@ def _build_websocket_url(
358373
# Build query parameters
359374
params = [
360375
f"model_id={model_id}",
361-
f"encoding={encoding}",
362-
f"sample_rate={sample_rate}",
376+
f"audio_format={audio_format}",
363377
f"commit_strategy={commit_strategy}"
364378
]
365379

0 commit comments

Comments
 (0)