|
31 | 31 | ToolChoice, |
32 | 32 | ToolDefinition, |
33 | 33 | ) |
| 34 | +from .llm_throttle import get_llm_semaphore |
34 | 35 |
|
35 | 36 | # Common constants |
36 | 37 | API_VERSION = "2024-10-21" # Standard API version for OpenAI-compatible endpoints |
@@ -189,13 +190,14 @@ async def embeddings( |
189 | 190 | ) |
190 | 191 | endpoint = Endpoint("/" + endpoint) |
191 | 192 |
|
192 | | - response = await self.request_async( |
193 | | - "POST", |
194 | | - endpoint, |
195 | | - json={"input": input}, |
196 | | - params={"api-version": API_VERSION}, |
197 | | - headers=DEFAULT_LLM_HEADERS, |
198 | | - ) |
| 193 | + async with get_llm_semaphore(): |
| 194 | + response = await self.request_async( |
| 195 | + "POST", |
| 196 | + endpoint, |
| 197 | + json={"input": input}, |
| 198 | + params={"api-version": API_VERSION}, |
| 199 | + headers=DEFAULT_LLM_HEADERS, |
| 200 | + ) |
199 | 201 |
|
200 | 202 | return TextEmbedding.model_validate(response.json()) |
201 | 203 |
|
@@ -315,13 +317,14 @@ class Country(BaseModel): |
315 | 317 | # Use provided dictionary format directly |
316 | 318 | request_body["response_format"] = response_format |
317 | 319 |
|
318 | | - response = await self.request_async( |
319 | | - "POST", |
320 | | - endpoint, |
321 | | - json=request_body, |
322 | | - params={"api-version": API_VERSION}, |
323 | | - headers=DEFAULT_LLM_HEADERS, |
324 | | - ) |
| 320 | + async with get_llm_semaphore(): |
| 321 | + response = await self.request_async( |
| 322 | + "POST", |
| 323 | + endpoint, |
| 324 | + json=request_body, |
| 325 | + params={"api-version": API_VERSION}, |
| 326 | + headers=DEFAULT_LLM_HEADERS, |
| 327 | + ) |
325 | 328 |
|
326 | 329 | return ChatCompletion.model_validate(response.json()) |
327 | 330 |
|
@@ -546,13 +549,14 @@ class Country(BaseModel): |
546 | 549 | "X-UiPath-LlmGateway-NormalizedApi-ModelName": model, |
547 | 550 | } |
548 | 551 |
|
549 | | - response = await self.request_async( |
550 | | - "POST", |
551 | | - endpoint, |
552 | | - json=request_body, |
553 | | - params={"api-version": NORMALIZED_API_VERSION}, |
554 | | - headers=headers, |
555 | | - ) |
| 552 | + async with get_llm_semaphore(): |
| 553 | + response = await self.request_async( |
| 554 | + "POST", |
| 555 | + endpoint, |
| 556 | + json=request_body, |
| 557 | + params={"api-version": NORMALIZED_API_VERSION}, |
| 558 | + headers=headers, |
| 559 | + ) |
556 | 560 |
|
557 | 561 | return ChatCompletion.model_validate(response.json()) |
558 | 562 |
|
|
0 commit comments