From f30e5387056f7399b4a4c294afdbfc5d0e4f12e4 Mon Sep 17 00:00:00 2001
From: Paul Asjes <paul.asjes@elevenlabs.io>
Date: Wed, 14 May 2025 17:41:25 +0200
Subject: [PATCH 1/2] Remove files from the ignore list

---
 .fernignore | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/.fernignore b/.fernignore
index 424df5de..5c36ac9f 100644
--- a/.fernignore
+++ b/.fernignore
@@ -5,9 +5,6 @@ src/elevenlabs/client.py
 src/elevenlabs/conversational_ai/conversation.py
 src/elevenlabs/conversational_ai/default_audio_interface.py
 src/elevenlabs/play.py
-src/elevenlabs/realtime_tts.py
-src/elevenlabs/types/get_agent_response_model.py
-src/elevenlabs/types/prompt_agent.py
 
 # Ignore CI files
 .github/

From bcb24fb691f17cf762082536b5dfad5e4c830e12 Mon Sep 17 00:00:00 2001
From: Paul Asjes <paul.asjes@elevenlabs.io>
Date: Mon, 19 May 2025 14:46:18 +0200
Subject: [PATCH 2/2] Remove generate and clone methods

---
 src/elevenlabs/client.py | 371 +--------------------------------------
 tests/test_tts.py        |  41 -----
 2 files changed, 2 insertions(+), 410 deletions(-)

diff --git a/src/elevenlabs/client.py b/src/elevenlabs/client.py
index d204c52a..15616204 100644
--- a/src/elevenlabs/client.py
+++ b/src/elevenlabs/client.py
@@ -1,82 +1,21 @@
 import typing
-import json
-import re
 import os
 import httpx
-import warnings
 from functools import wraps
 
-from typing import Iterator, Optional, Union, \
-  Optional, AsyncIterator
-
 from .base_client import \
   BaseElevenLabs, AsyncBaseElevenLabs
-from .core import RequestOptions, ApiError
-from .types import Voice, VoiceSettings, \
-  PronunciationDictionaryVersionLocator, Model
 from .environment import ElevenLabsEnvironment
 from .realtime_tts import RealtimeTextToSpeechClient
-from .types import OutputFormat
 from .webhooks import WebhooksClient
 
 
-DEFAULT_VOICE = Voice(
-    voice_id="EXAVITQu4vr4xnSDxMaL",
-    name="Sarah",
-    settings=VoiceSettings(
-        stability=0.71, similarity_boost=0.5, style=0.0, use_speaker_boost=True
-    ),
-)
-
-VoiceId = str
-
-VoiceName = str
-
-ModelId = str
-
-
-def is_voice_id(val: str) -> bool:
-    return bool(re.match(r"^[a-zA-Z0-9]{20}$", val))
-
-
-def get_base_url_host(base_url: str) -> str:
-    return httpx.URL(base_url).host
-
-
 # this is used as the default value for optional parameters
 OMIT = typing.cast(typing.Any, ...)
 
 
-def deprecated(func):
-    """
-    This is a decorator which can be used to mark functions as deprecated.
-    It will result in a warning being emitted when the function is used.
-    """
-    @wraps(func)
-    def wrapper(*args, **kwargs):
-        warnings.warn(
-            f"The method {func.__name__} is deprecated and will be removed in a future version.",
-            category=DeprecationWarning,
-            stacklevel=2
-        )
-        return func(*args, **kwargs)
-    return wrapper
-
-
-def deprecated_async(func):
-    """
-    This is a decorator which can be used to mark async functions as deprecated.
-    It will result in a warning being emitted when the function is used.
-    """
-    @wraps(func)
-    async def wrapper(*args, **kwargs):
-        warnings.warn(
-            f"The method {func.__name__} is deprecated and will be removed in a future version.",
-            category=DeprecationWarning,
-            stacklevel=2
-        )
-        return await func(*args, **kwargs)
-    return wrapper
+def get_base_url_host(base_url: str) -> str:
+    return httpx.URL(base_url).host
 
 
 class ElevenLabs(BaseElevenLabs):
@@ -123,161 +62,6 @@ def __init__(
         self.text_to_speech = RealtimeTextToSpeechClient(client_wrapper=self._client_wrapper)
         self.webhooks = WebhooksClient()
 
-    @deprecated
-    def clone(
-      self,
-      name: str,
-      files: typing.List[str],
-      description: str,
-      labels: typing.Optional[str] = None,
-      request_options: typing.Optional[RequestOptions] = None
-    ) -> Voice:
-        """
-          This is a manually maintained helper function that clones a voice from a set of audio files.
-          **NOTE**: This function is a helper function and is simply making
-          calls to the `add` and `get` functions of the `voices` endpoint.
-
-          Parameters:
-              - name: str. The name that identifies this voice. This will be displayed in the dropdown of the website.
-
-              - files: typing.List[str]. The filepaths of the audio files to be used to create the voice.
-
-              - description: str. How would you describe the voice?
-
-              - labels: str. Serialized labels dictionary for the voice.
-
-              - request_options: typing.Optional[RequestOptions]. Request-specific configuration.
-        """
-        add_voice_response = self.voices.add(
-          name=name,
-          description=description,
-          files=[open(file, 'rb') for file in files],
-          labels=str(json.dumps(labels or {}))
-        )
-        return self.voices.get(
-          add_voice_response.voice_id,
-          request_options=request_options
-        )
-
-
-    @deprecated
-    def generate(
-      self,
-      *,
-      text: Union[str, Iterator[str]],
-      voice: Union[VoiceId, VoiceName, Voice] = DEFAULT_VOICE,
-      voice_settings: typing.Optional[VoiceSettings] = DEFAULT_VOICE.settings,
-      model: Union[ModelId, Model] = "eleven_multilingual_v2",
-      optimize_streaming_latency: typing.Optional[int] = 0,
-      stream: bool = False,
-      output_format: Optional[OutputFormat] = "mp3_44100_128",
-      pronunciation_dictionary_locators: typing.Optional[
-            typing.Sequence[PronunciationDictionaryVersionLocator]
-        ] = OMIT,
-      request_options: typing.Optional[RequestOptions] = None
-    ) -> Iterator[bytes]:
-        """
-            - text: Union[str, Iterator[str]]. The string or stream of strings that will get converted into speech.
-
-            - voice: str. A voice id, name, or voice response. Defaults to the Sarah voice.
-
-            - model: typing.Optional[str]. Identifier of the model that will be used, you can query them using GET /v1/models.
-                                           The model needs to have support for text to speech, you can check this using the
-                                           can_do_text_to_speech property.
-
-            - optimize_streaming_latency: typing.Optional[int]. You can turn on latency optimizations at some cost of quality. The best possible final latency varies by model. Possible values:
-                                                                0 - default mode (no latency optimizations)
-                                                                1 - normal latency optimizations (about 50% of possible latency improvement of option 3)
-                                                                2 - strong latency optimizations (about 75% of possible latency improvement of option 3)
-                                                                3 - max latency optimizations
-                                                                4 - max latency optimizations, but also with text normalizer turned off for even more latency savings (best latency, but can mispronounce eg numbers and dates).
-
-                                                                Defaults to 0.
-
-            - stream: bool. If true, the function will return a generator that will yield the audio in chunks.
-
-                            Defaults to False.
-
-            - output_format: typing.Optional[OutputFormat]. Output format of the generated audio. Must be one of:
-                                                   mp3_22050_32 - output format, mp3 with 22.05kHz sample rate at 32kbps.
-                                                   mp3_44100_32 - output format, mp3 with 44.1kHz sample rate at 32kbps.
-                                                   mp3_44100_64 - output format, mp3 with 44.1kHz sample rate at 64kbps.
-                                                   mp3_44100_96 - output format, mp3 with 44.1kHz sample rate at 96kbps.
-                                                   mp3_44100_128 - default output format, mp3 with 44.1kHz sample rate at 128kbps.
-                                                   mp3_44100_192 - output format, mp3 with 44.1kHz sample rate at 192kbps. Requires you to be subscribed to Creator tier or above.
-                                                   pcm_16000 - PCM format (S16LE) with 16kHz sample rate.
-                                                   pcm_22050 - PCM format (S16LE) with 22.05kHz sample rate.
-                                                   pcm_24000 - PCM format (S16LE) with 24kHz sample rate.
-                                                   pcm_44100 - PCM format (S16LE) with 44.1kHz sample rate. Requires you to be subscribed to Independent Publisher tier or above.
-                                                   ulaw_8000 - μ-law format (sometimes written mu-law, often approximated as u-law) with 8kHz sample rate. Note that this format is commonly used for Twilio audio inputs.
-
-                                                    Defaults to mp3_44100_128.
-
-            - voice_settings: typing.Optional[VoiceSettings]. Voice settings overriding stored setttings for the given voice. They are applied only on the given request.
-
-            - pronunciation_dictionary_locators: typing.Optional[typing.Sequence[PronunciationDictionaryVersionLocator]]. A list of pronunciation dictionary locators (id, version_id) to be applied to the text. They will be applied in order. You may have up to 3 locators per request
-
-            - request_options: typing.Optional[RequestOptions]. Request-specific configuration.
-        """
-        if isinstance(voice, str) and is_voice_id(voice):
-            voice_id = voice
-        elif isinstance(voice, str):
-            voices_response = self.voices.get_all(request_options=request_options, show_legacy=True)
-            maybe_voice_id = next((v.voice_id for v in voices_response.voices if v.name == voice), None)
-            if maybe_voice_id is None:
-                raise ApiError(body=f"Voice {voice} not found.")
-            voice_id = maybe_voice_id
-        elif isinstance(voice, Voice):
-            voice_id = voice.voice_id
-            if voice_settings == DEFAULT_VOICE.settings \
-                    and voice.settings is not None:
-                voice_settings = voice.settings
-        else:
-            voice_id = DEFAULT_VOICE.voice_id
-
-        if isinstance(model, str):
-            model_id = model
-        elif isinstance(model, Model):
-            model_id = model.model_id
-
-
-        if stream:
-            if isinstance(text, str):
-                return self.text_to_speech.convert_as_stream(
-                    voice_id=voice_id,
-                    voice_settings=voice_settings,
-                    optimize_streaming_latency=optimize_streaming_latency,
-                    output_format=output_format,
-                    text=text,
-                    request_options=request_options,
-                    pronunciation_dictionary_locators=pronunciation_dictionary_locators,
-                    model_id=model_id
-                )
-            elif isinstance(text, Iterator):
-                return self.text_to_speech.convert_realtime(  # type: ignore
-                    voice_id=voice_id,
-                    voice_settings=voice_settings,
-                    output_format=output_format,
-                    text=text,
-                    request_options=request_options,
-                    model_id=model_id
-                )
-            else:
-                raise ApiError(body="Text is neither a string nor an iterator.")
-        else:
-            if not isinstance(text, str):
-                raise ApiError(body="Text must be a string when stream is False.")
-            return self.text_to_speech.convert(
-                voice_id=voice_id,
-                model_id=model_id,
-                voice_settings=voice_settings,
-                optimize_streaming_latency=optimize_streaming_latency,
-                output_format=output_format,
-                text=text,
-                request_options=request_options,
-                pronunciation_dictionary_locators=pronunciation_dictionary_locators
-            )
-
 
 class AsyncElevenLabs(AsyncBaseElevenLabs):
     """
@@ -324,154 +108,3 @@ def __init__(
             httpx_client=httpx_client
         )
         self.webhooks = WebhooksClient()
-
-    @deprecated_async
-    async def clone(
-      self,
-      name: str,
-      files: typing.List[str],
-      description: str,
-      labels: str,
-      request_options: typing.Optional[RequestOptions] = None
-    ) -> Voice:
-        """
-          This is a manually mnaintained helper function that generates a
-          voice from provided text.
-
-          **NOTE**: This function is a helper function and is simply making
-          calls to the `text_to_speech.convert` and`text_to_speech.convert_as_stream`
-          functions.
-
-          Parameters:
-              - name: str. The name that identifies this voice. This will be displayed in the dropdown of the website.
-
-              - files: typing.List[str]. The filepaths of the audio files to be used to create the voice.
-
-              - description: str. How would you describe the voice?
-
-              - labels: str. Serialized labels dictionary for the voice.
-
-              - request_options: typing.Optional[RequestOptions]. Request-specific configuration.
-        """
-        add_voice_response = await self.voices.add(
-          name=name,
-          description=description,
-          files=[open(file, 'rb') for file in files],
-          labels=str(json.dumps(labels or {}))
-        )
-        return await self.voices.get(
-          add_voice_response.voice_id,
-          request_options=request_options
-        )
-
-    @deprecated_async
-    async def generate(
-      self,
-      *,
-      text: str,
-      voice: Union[VoiceId, VoiceName, Voice] = DEFAULT_VOICE,
-      voice_settings: typing.Optional[VoiceSettings] = DEFAULT_VOICE.settings,
-      model: Union[ModelId, Model] = "eleven_multilingual_v2",
-      optimize_streaming_latency: typing.Optional[int] = 0,
-      stream: bool = False,
-      output_format: Optional[OutputFormat] = "mp3_44100_128",
-      pronunciation_dictionary_locators: typing.Optional[
-            typing.Sequence[PronunciationDictionaryVersionLocator]
-        ] = OMIT,
-      request_options: typing.Optional[RequestOptions] = None
-    ) -> AsyncIterator[bytes]:
-        """
-          This is a manually mnaintained helper function that generates a
-          voice from provided text.
-
-          **NOTE**: This function is a helper function and is simply making
-          calls to the `text_to_speech.convert` and`text_to_speech.convert_as_stream`
-          functions.
-
-            - text: str. The string that will get converted into speech. The Async client does not support streaming.
-
-            - voice: str. A voice id, name, or voice response. Defaults to the Rachel voice.
-
-            - model: typing.Optional[str]. Identifier of the model that will be used, you can query them using GET /v1/models.
-                                           The model needs to have support for text to speech, you can check this using the
-                                           can_do_text_to_speech property.
-
-            - optimize_streaming_latency: typing.Optional[int]. You can turn on latency optimizations at some cost of quality. The best possible final latency varies by model. Possible values:
-                                                                0 - default mode (no latency optimizations)
-                                                                1 - normal latency optimizations (about 50% of possible latency improvement of option 3)
-                                                                2 - strong latency optimizations (about 75% of possible latency improvement of option 3)
-                                                                3 - max latency optimizations
-                                                                4 - max latency optimizations, but also with text normalizer turned off for even more latency savings (best latency, but can mispronounce eg numbers and dates).
-
-                                                                Defaults to 0.
-
-            - stream: bool. If true, the function will return a generator that will yield the audio in chunks.
-
-                            Defaults to False.
-
-            - output_format: typing.Optional[OutputFormat]. Output format of the generated audio. Must be one of:
-                                                   mp3_22050_32 - output format, mp3 with 22.05kHz sample rate at 32kbps.
-                                                   mp3_44100_32 - output format, mp3 with 44.1kHz sample rate at 32kbps.
-                                                   mp3_44100_64 - output format, mp3 with 44.1kHz sample rate at 64kbps.
-                                                   mp3_44100_96 - output format, mp3 with 44.1kHz sample rate at 96kbps.
-                                                   mp3_44100_128 - default output format, mp3 with 44.1kHz sample rate at 128kbps.
-                                                   mp3_44100_192 - output format, mp3 with 44.1kHz sample rate at 192kbps. Requires you to be subscribed to Creator tier or above.
-                                                   pcm_16000 - PCM format (S16LE) with 16kHz sample rate.
-                                                   pcm_22050 - PCM format (S16LE) with 22.05kHz sample rate.
-                                                   pcm_24000 - PCM format (S16LE) with 24kHz sample rate.
-                                                   pcm_44100 - PCM format (S16LE) with 44.1kHz sample rate. Requires you to be subscribed to Independent Publisher tier or above.
-                                                   ulaw_8000 - μ-law format (sometimes written mu-law, often approximated as u-law) with 8kHz sample rate. Note that this format is commonly used for Twilio audio inputs.
-
-                                                    Defaults to mp3_44100_128.
-
-            - voice_settings: typing.Optional[VoiceSettings]. Voice settings overriding stored setttings for the given voice. They are applied only on the given request.
-
-            - pronunciation_dictionary_locators: typing.Optional[typing.Sequence[PronunciationDictionaryVersionLocator]]. A list of pronunciation dictionary locators (id, version_id) to be applied to the text. They will be applied in order. You may have up to 3 locators per request
-
-            - request_options: typing.Optional[RequestOptions]. Request-specific configuration.
-        """
-        if isinstance(voice, str) and is_voice_id(voice):
-            voice_id = voice
-        elif isinstance(voice, str):
-            voices_response = await self.voices.get_all(request_options=request_options, show_legacy=True)
-            maybe_voice_id = next((v.voice_id for v in voices_response.voices if v.name == voice), None)
-            if not maybe_voice_id:
-                raise ApiError(body=f"Voice {voice} not found.")
-            voice_id = maybe_voice_id
-        elif isinstance(voice, Voice):
-            voice_id = voice.voice_id
-            if voice_settings == DEFAULT_VOICE.settings \
-                    and voice.settings is not None:
-                voice_settings = voice.settings
-        else:
-            voice_id = DEFAULT_VOICE.voice_id
-
-        if isinstance(model, str):
-            model_id = model
-        elif isinstance(model, Model):
-            model_id = model.model_id
-
-        if stream:
-            return self.text_to_speech.convert_as_stream(
-                voice_id=voice_id,
-                model_id=model_id,
-                voice_settings=voice_settings,
-                optimize_streaming_latency=optimize_streaming_latency,
-                output_format=output_format,
-                text=text,
-                request_options=request_options,
-                pronunciation_dictionary_locators=pronunciation_dictionary_locators
-            )
-        else:
-            if not isinstance(text, str):
-                raise ApiError(body="Text must be a string when stream is False.")
-            return self.text_to_speech.convert(
-                voice_id=voice_id,
-                model_id=model_id,
-                voice_settings=voice_settings,
-                optimize_streaming_latency=optimize_streaming_latency,
-                output_format=output_format,
-                text=text,
-                request_options=request_options,
-                pronunciation_dictionary_locators=pronunciation_dictionary_locators
-            )
diff --git a/tests/test_tts.py b/tests/test_tts.py
index 42828d90..1603f728 100644
--- a/tests/test_tts.py
+++ b/tests/test_tts.py
@@ -7,47 +7,6 @@
 import base64
 
 
-def test_tts_generate() -> None:
-    """Test basic text-to-speech generation w/ custom generate."""
-    client = ElevenLabs()
-    audio_generator = client.generate(text=DEFAULT_TEXT, voice="Brian", model=DEFAULT_MODEL)
-    audio = b"".join(audio_generator)
-    assert isinstance(audio, bytes), "TTS should return bytes"
-    if not IN_GITHUB:
-        play(audio)
-
-
-def test_tts_generate_with_voice_settings() -> None:
-    """Test basic text-to-speech generation."""
-    client = ElevenLabs()
-    audio_generator = client.generate(
-        text=DEFAULT_TEXT,
-        model=DEFAULT_MODEL,
-        voice=Voice(
-            voice_id="nPczCjzI2devNBz1zQrb",
-            settings=VoiceSettings(stability=0.71, similarity_boost=0.5, style=0.0, use_speaker_boost=True),
-        ),
-    )
-    audio = b"".join(audio_generator)
-    assert isinstance(audio, bytes), "TTS should return bytes"
-    if not IN_GITHUB:
-        play(audio)
-
-
-def test_tts_generate_stream() -> None:
-    """Test streaming text-to-speech generation."""
-    client = ElevenLabs()
-    audio_generator = client.generate(
-        stream=True,
-        text=DEFAULT_TEXT,
-        model=DEFAULT_MODEL,
-    )
-    audio = b"".join(audio_generator)
-    assert isinstance(audio, bytes), "TTS should return bytes"
-    if not IN_GITHUB:
-        play(audio)
-
-
 def test_tts_convert() -> None:
     """Test basic text-to-speech generation."""
     client = ElevenLabs()