diff --git a/fastchat/serve/base_model_worker.py b/fastchat/serve/base_model_worker.py index 2fe322990..a83da3d44 100644 --- a/fastchat/serve/base_model_worker.py +++ b/fastchat/serve/base_model_worker.py @@ -215,7 +215,7 @@ async def api_generate(request: Request): async def api_get_embeddings(request: Request): params = await request.json() await acquire_worker_semaphore() - embedding = worker.get_embeddings(params) + embedding = await asyncio.to_thread(worker.get_embeddings, params) release_worker_semaphore() return JSONResponse(content=embedding) diff --git a/fastchat/serve/huggingface_api_worker.py b/fastchat/serve/huggingface_api_worker.py index 6ed8e6c8c..e7f19b05b 100644 --- a/fastchat/serve/huggingface_api_worker.py +++ b/fastchat/serve/huggingface_api_worker.py @@ -233,7 +233,7 @@ async def api_generate(request: Request): params = await request.json() worker = worker_map[params["model"]] await acquire_worker_semaphore(worker) - output = worker.generate_gate(params) + output = await asyncio.to_thread(worker.generate_gate, params) release_worker_semaphore(worker) return JSONResponse(output) diff --git a/fastchat/serve/multi_model_worker.py b/fastchat/serve/multi_model_worker.py index 5e6266fe0..362caea44 100644 --- a/fastchat/serve/multi_model_worker.py +++ b/fastchat/serve/multi_model_worker.py @@ -109,7 +109,7 @@ async def api_generate(request: Request): params = await request.json() await acquire_worker_semaphore() worker = worker_map[params["model"]] - output = worker.generate_gate(params) + output = await asyncio.to_thread(worker.generate_gate, params) release_worker_semaphore() return JSONResponse(output)