Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 10 additions & 3 deletions mlx_audio/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
File,
Form,
HTTPException,
Request,
Response,
UploadFile,
WebSocket,
Expand Down Expand Up @@ -260,7 +261,7 @@ async def remove_model(model_name: str):
raise HTTPException(status_code=404, detail=f"Model '{model_name}' not found")


async def generate_audio(model, payload: SpeechRequest):
async def generate_audio(model, payload: SpeechRequest, request: Request):
# Load reference audio if provided
ref_audio = payload.ref_audio
audio_chunks = []
Expand Down Expand Up @@ -300,6 +301,10 @@ async def generate_audio(model, payload: SpeechRequest):
verbose=payload.verbose,
):

if await request.is_disconnected():
mx.clear_cache()
return

if payload.stream:
buffer = io.BytesIO()
audio_write(
Expand All @@ -311,6 +316,8 @@ async def generate_audio(model, payload: SpeechRequest):
if sample_rate is None:
sample_rate = result.sample_rate

await asyncio.sleep(0) # register any disconnects
Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This can cause the server performance to drop significantly.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@Blaizzy I don't know how else to allow Python to temporarily pass control back to fastapi to register client disconnects. If I remove that line, this PR will do nothing.

For what it's worth, I streamed generated audio for over an hour with a stream interval of 0.5 on a MacBook Air. No hiccups at all. If the server performance had dropped significantly, then normal performance might be overkill.

Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, let's merge for now and revisit it later.

Could you open a github issue with describing the issue and preferably a video so I we can reproduce and fix it later without using asyncio.sleep(0)?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.


if payload.stream:
return

Expand All @@ -324,11 +331,11 @@ async def generate_audio(model, payload: SpeechRequest):


@app.post("/v1/audio/speech")
async def tts_speech(payload: SpeechRequest):
async def tts_speech(payload: SpeechRequest, request: Request):
"""Generate speech audio following the OpenAI text-to-speech API."""
model = model_provider.load_model(payload.model)
return StreamingResponse(
generate_audio(model, payload),
generate_audio(model, payload, request),
media_type=f"audio/{payload.response_format}",
headers={
"Content-Disposition": f"attachment; filename=speech.{payload.response_format}"
Expand Down
Loading