|
16 | 16 | build_reasoning_param, |
17 | 17 | extract_reasoning_from_model_name, |
18 | 18 | ) |
| 19 | +from .service_tier import resolve_service_tier, service_tier_error_message |
19 | 20 | from .upstream import normalize_model_name, start_upstream_request |
20 | 21 | from .utils import ( |
21 | 22 | convert_chat_messages_to_responses_input, |
@@ -93,6 +94,16 @@ def chat_completions() -> Response: |
93 | 94 | _log_json("OUT POST /v1/chat/completions", err) |
94 | 95 | return jsonify(err), 400 |
95 | 96 |
|
| 97 | + service_tier, invalid_service_tier = resolve_service_tier( |
| 98 | + payload.get("service_tier"), |
| 99 | + current_app.config.get("SERVICE_TIER"), |
| 100 | + ) |
| 101 | + if invalid_service_tier: |
| 102 | + err = {"error": {"message": service_tier_error_message()}} |
| 103 | + if verbose: |
| 104 | + _log_json("OUT POST /v1/chat/completions", err) |
| 105 | + return jsonify(err), 400 |
| 106 | + |
96 | 107 | requested_model = payload.get("model") |
97 | 108 | model = normalize_model_name(requested_model, debug_model) |
98 | 109 | messages = payload.get("messages") |
@@ -187,6 +198,7 @@ def chat_completions() -> Response: |
187 | 198 | tool_choice=tool_choice, |
188 | 199 | parallel_tool_calls=parallel_tool_calls, |
189 | 200 | reasoning_param=reasoning_param, |
| 201 | + service_tier=service_tier, |
190 | 202 | ) |
191 | 203 | if error_resp is not None: |
192 | 204 | if verbose: |
@@ -224,6 +236,7 @@ def chat_completions() -> Response: |
224 | 236 | tool_choice=safe_choice, |
225 | 237 | parallel_tool_calls=parallel_tool_calls, |
226 | 238 | reasoning_param=reasoning_param, |
| 239 | + service_tier=service_tier, |
227 | 240 | ) |
228 | 241 | record_rate_limits_from_response(upstream2) |
229 | 242 | if err2 is None and upstream2 is not None and upstream2.status_code < 400: |
@@ -391,6 +404,16 @@ def completions() -> Response: |
391 | 404 | _log_json("OUT POST /v1/completions", err) |
392 | 405 | return jsonify(err), 400 |
393 | 406 |
|
| 407 | + service_tier, invalid_service_tier = resolve_service_tier( |
| 408 | + payload.get("service_tier"), |
| 409 | + current_app.config.get("SERVICE_TIER"), |
| 410 | + ) |
| 411 | + if invalid_service_tier: |
| 412 | + err = {"error": {"message": service_tier_error_message()}} |
| 413 | + if verbose: |
| 414 | + _log_json("OUT POST /v1/completions", err) |
| 415 | + return jsonify(err), 400 |
| 416 | + |
394 | 417 | requested_model = payload.get("model") |
395 | 418 | model = normalize_model_name(requested_model, debug_model) |
396 | 419 | prompt = payload.get("prompt") |
@@ -418,6 +441,7 @@ def completions() -> Response: |
418 | 441 | input_items, |
419 | 442 | instructions=_instructions_for_model(model), |
420 | 443 | reasoning_param=reasoning_param, |
| 444 | + service_tier=service_tier, |
421 | 445 | ) |
422 | 446 | if error_resp is not None: |
423 | 447 | if verbose: |
|
0 commit comments