Skip to content

Commit 21375cc

Browse files
committed
compose_detailed parsing
1 parent 42a49ad commit 21375cc

2 files changed

Lines changed: 289 additions & 0 deletions

File tree

src/elevenlabs/client.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from .environment import ElevenLabsEnvironment
99
from .realtime_tts import RealtimeTextToSpeechClient
1010
from .webhooks_custom import WebhooksClient, AsyncWebhooksClient
11+
from .music_custom import MusicClient, AsyncMusicClient
1112

1213

1314
# this is used as the default value for optional parameters
@@ -59,6 +60,7 @@ def __init__(
5960
)
6061
self.text_to_speech = RealtimeTextToSpeechClient(client_wrapper=self._client_wrapper)
6162
self.webhooks = WebhooksClient(client_wrapper=self._client_wrapper)
63+
self.music = MusicClient(client_wrapper=self._client_wrapper)
6264

6365

6466
class AsyncElevenLabs(AsyncBaseElevenLabs):
@@ -102,3 +104,4 @@ def __init__(
102104
httpx_client=httpx_client
103105
)
104106
self.webhooks = AsyncWebhooksClient(client_wrapper=self._client_wrapper)
107+
self.music = AsyncMusicClient(client_wrapper=self._client_wrapper)

src/elevenlabs/music_custom.py

Lines changed: 286 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,286 @@
1+
import typing
2+
import json
3+
import re
4+
from dataclasses import dataclass
5+
6+
from elevenlabs.music.client import MusicClient as AutogeneratedMusicClient, AsyncMusicClient as AutogeneratedAsyncMusicClient
7+
from elevenlabs.types.music_prompt import MusicPrompt
8+
from elevenlabs.music.types.music_compose_detailed_request_output_format import MusicComposeDetailedRequestOutputFormat
9+
from elevenlabs.core.request_options import RequestOptions
10+
11+
# this is used as the default value for optional parameters
12+
OMIT = typing.cast(typing.Any, ...)
13+
14+
15+
@dataclass
16+
class SongMetadata:
17+
title: str
18+
description: str
19+
genres: typing.List[str]
20+
languages: typing.List[str]
21+
is_explicit: bool
22+
23+
24+
@dataclass
25+
class MultipartResponse:
26+
json: typing.Dict[str, typing.Any] # Contains compositionPlan and songMetadata
27+
audio: bytes
28+
filename: str
29+
30+
31+
class MusicClient(AutogeneratedMusicClient):
32+
"""
33+
A client to handle ElevenLabs music-related functionality
34+
Extends the autogenerated client to include custom music methods
35+
"""
36+
37+
def compose_detailed(
38+
self,
39+
*,
40+
output_format: typing.Optional[MusicComposeDetailedRequestOutputFormat] = None,
41+
prompt: typing.Optional[str] = OMIT,
42+
music_prompt: typing.Optional[MusicPrompt] = OMIT,
43+
composition_plan: typing.Optional[MusicPrompt] = OMIT,
44+
music_length_ms: typing.Optional[int] = OMIT,
45+
model_id: typing.Optional[typing.Literal["music_v1"]] = OMIT,
46+
request_options: typing.Optional[RequestOptions] = None,
47+
) -> MultipartResponse:
48+
"""
49+
Compose a song from a prompt or a composition plan with detailed response parsing.
50+
This method calls the original compose_detailed and then parses the stream response.
51+
52+
Returns a MultipartResponse containing parsed JSON metadata, audio bytes, and filename.
53+
"""
54+
# Call the parent method to get the stream
55+
stream = super().compose_detailed(
56+
output_format=output_format,
57+
prompt=prompt,
58+
music_prompt=music_prompt,
59+
composition_plan=composition_plan,
60+
music_length_ms=music_length_ms,
61+
model_id=model_id,
62+
request_options=request_options,
63+
)
64+
65+
# Parse the stream using the parsing method
66+
return self._parse_multipart(stream)
67+
68+
def _parse_multipart(self, stream: typing.Iterator[bytes]) -> MultipartResponse:
69+
"""
70+
Reads a byte stream containing multipart data and parses it into JSON and audio parts.
71+
72+
Args:
73+
stream: Iterator of bytes from ElevenLabs music API response
74+
75+
Returns:
76+
MultipartResponse containing parsed JSON metadata, audio bytes, and filename
77+
"""
78+
# Collect all chunks into a single bytes object
79+
chunks = []
80+
for chunk in stream:
81+
chunks.append(chunk)
82+
83+
# Combine all chunks into a single buffer
84+
response_bytes = b''.join(chunks)
85+
86+
# Parse the multipart content
87+
response_text = response_bytes.decode('utf-8', errors='ignore')
88+
lines = response_text.split('\n')
89+
90+
if not lines:
91+
raise ValueError("Empty response from music API")
92+
93+
boundary = lines[0].strip()
94+
95+
# Find the JSON part (should be early in the response)
96+
json_data = None
97+
filename = 'generated_music.mp3'
98+
99+
# Parse JSON from the text representation
100+
for i in range(min(10, len(lines))):
101+
if 'Content-Type: application/json' in lines[i] and i + 2 < len(lines):
102+
json_line = lines[i + 2]
103+
if json_line.strip() and json_line.startswith('{'):
104+
try:
105+
json_data = json.loads(json_line)
106+
print('✓ Successfully parsed JSON metadata')
107+
except json.JSONDecodeError as e:
108+
print(f'Failed to parse JSON: {e}')
109+
break
110+
111+
# Extract filename from headers
112+
for i in range(min(20, len(lines))):
113+
if 'filename=' in lines[i]:
114+
match = re.search(r'filename="([^"]+)"', lines[i])
115+
if match:
116+
filename = match.group(1)
117+
break
118+
119+
# Find where the audio data starts (after the second boundary and headers)
120+
boundary_bytes = boundary.encode('utf-8')
121+
first_boundary = -1
122+
second_boundary = -1
123+
124+
for i in range(len(response_bytes) - len(boundary_bytes) + 1):
125+
if response_bytes[i:i + len(boundary_bytes)] == boundary_bytes:
126+
if first_boundary == -1:
127+
first_boundary = i
128+
elif second_boundary == -1:
129+
second_boundary = i
130+
break
131+
132+
if second_boundary == -1:
133+
raise ValueError('Could not find audio part boundary')
134+
135+
# Find the start of audio data (after headers and empty line)
136+
audio_start = second_boundary + len(boundary_bytes)
137+
138+
# Skip past the headers to find the empty line (\n\n)
139+
while audio_start < len(response_bytes) - 1:
140+
if (response_bytes[audio_start] == 0x0A and
141+
response_bytes[audio_start + 1] == 0x0A):
142+
# Found \n\n - audio starts after this
143+
audio_start += 2
144+
break
145+
audio_start += 1
146+
147+
# Audio goes until the end (or until we find another boundary)
148+
audio_buffer = response_bytes[audio_start:]
149+
150+
if not json_data:
151+
raise ValueError('Could not parse JSON data')
152+
153+
return MultipartResponse(
154+
json=json_data,
155+
audio=audio_buffer,
156+
filename=filename
157+
)
158+
159+
160+
class AsyncMusicClient(AutogeneratedAsyncMusicClient):
161+
"""
162+
An async client to handle ElevenLabs music-related functionality
163+
Extends the autogenerated async client to include custom music methods
164+
"""
165+
166+
async def compose_detailed(
167+
self,
168+
*,
169+
output_format: typing.Optional[MusicComposeDetailedRequestOutputFormat] = None,
170+
prompt: typing.Optional[str] = OMIT,
171+
music_prompt: typing.Optional[MusicPrompt] = OMIT,
172+
composition_plan: typing.Optional[MusicPrompt] = OMIT,
173+
music_length_ms: typing.Optional[int] = OMIT,
174+
model_id: typing.Optional[typing.Literal["music_v1"]] = OMIT,
175+
request_options: typing.Optional[RequestOptions] = None,
176+
) -> MultipartResponse:
177+
"""
178+
Compose a song from a prompt or a composition plan with detailed response parsing.
179+
This method calls the original compose_detailed and then parses the stream response.
180+
181+
Returns a MultipartResponse containing parsed JSON metadata, audio bytes, and filename.
182+
"""
183+
# Call the parent method to get the stream
184+
stream = super().compose_detailed(
185+
output_format=output_format,
186+
prompt=prompt,
187+
music_prompt=music_prompt,
188+
composition_plan=composition_plan,
189+
music_length_ms=music_length_ms,
190+
model_id=model_id,
191+
request_options=request_options,
192+
)
193+
194+
# Parse the stream using the parsing method
195+
return await self._parse_multipart_async(stream)
196+
197+
async def _parse_multipart_async(self, stream: typing.AsyncIterator[bytes]) -> MultipartResponse:
198+
"""
199+
Reads an async byte stream containing multipart data and parses it into JSON and audio parts.
200+
201+
Args:
202+
stream: AsyncIterator of bytes from ElevenLabs music API response
203+
204+
Returns:
205+
MultipartResponse containing parsed JSON metadata, audio bytes, and filename
206+
"""
207+
# Collect all chunks into a single bytes object
208+
chunks = []
209+
async for chunk in stream:
210+
chunks.append(chunk)
211+
212+
# Combine all chunks into a single buffer
213+
response_bytes = b''.join(chunks)
214+
215+
# Parse the multipart content
216+
response_text = response_bytes.decode('utf-8', errors='ignore')
217+
lines = response_text.split('\n')
218+
219+
if not lines:
220+
raise ValueError("Empty response from music API")
221+
222+
boundary = lines[0].strip()
223+
224+
# Find the JSON part (should be early in the response)
225+
json_data = None
226+
filename = 'generated_music.mp3'
227+
228+
# Parse JSON from the text representation
229+
for i in range(min(10, len(lines))):
230+
if 'Content-Type: application/json' in lines[i] and i + 2 < len(lines):
231+
json_line = lines[i + 2]
232+
if json_line.strip() and json_line.startswith('{'):
233+
try:
234+
json_data = json.loads(json_line)
235+
print('✓ Successfully parsed JSON metadata')
236+
except json.JSONDecodeError as e:
237+
print(f'Failed to parse JSON: {e}')
238+
break
239+
240+
# Extract filename from headers
241+
for i in range(min(20, len(lines))):
242+
if 'filename=' in lines[i]:
243+
match = re.search(r'filename="([^"]+)"', lines[i])
244+
if match:
245+
filename = match.group(1)
246+
break
247+
248+
# Find where the audio data starts (after the second boundary and headers)
249+
boundary_bytes = boundary.encode('utf-8')
250+
first_boundary = -1
251+
second_boundary = -1
252+
253+
for i in range(len(response_bytes) - len(boundary_bytes) + 1):
254+
if response_bytes[i:i + len(boundary_bytes)] == boundary_bytes:
255+
if first_boundary == -1:
256+
first_boundary = i
257+
elif second_boundary == -1:
258+
second_boundary = i
259+
break
260+
261+
if second_boundary == -1:
262+
raise ValueError('Could not find audio part boundary')
263+
264+
# Find the start of audio data (after headers and empty line)
265+
audio_start = second_boundary + len(boundary_bytes)
266+
267+
# Skip past the headers to find the empty line (\n\n)
268+
while audio_start < len(response_bytes) - 1:
269+
if (response_bytes[audio_start] == 0x0A and
270+
response_bytes[audio_start + 1] == 0x0A):
271+
# Found \n\n - audio starts after this
272+
audio_start += 2
273+
break
274+
audio_start += 1
275+
276+
# Audio goes until the end (or until we find another boundary)
277+
audio_buffer = response_bytes[audio_start:]
278+
279+
if not json_data:
280+
raise ValueError('Could not parse JSON data')
281+
282+
return MultipartResponse(
283+
json=json_data,
284+
audio=audio_buffer,
285+
filename=filename
286+
)

0 commit comments

Comments
 (0)