|
| 1 | +"""LiteLLM DeepSeek 兜底请求清洗回调。""" |
| 2 | + |
| 3 | +from typing import Any |
| 4 | + |
| 5 | +from litellm.integrations.custom_logger import CustomLogger |
| 6 | + |
| 7 | +DEEPSEEK_MODEL_PREFIXES = ("deepseek-v4-pro", "deepseek-v4-flash") |
| 8 | +THINKING_BLOCK_TYPES = {"thinking", "redacted_thinking"} |
| 9 | + |
| 10 | + |
| 11 | +def _is_deepseek_anthropic_request(kwargs: dict[str, Any]) -> bool: |
| 12 | + """判断当前请求是否即将发往 DeepSeek Anthropic 兼容端点。 |
| 13 | +
|
| 14 | + Args: |
| 15 | + kwargs: LiteLLM 传入的模型调用上下文。 |
| 16 | +
|
| 17 | + Returns: |
| 18 | + 命中 DeepSeek Anthropic 请求时返回 True,否则返回 False。 |
| 19 | + """ |
| 20 | + additional_args = kwargs.get("additional_args") or {} |
| 21 | + request_body = additional_args.get("complete_input_dict") |
| 22 | + request_model = "" |
| 23 | + if isinstance(request_body, dict): |
| 24 | + request_model = str(request_body.get("model") or "") |
| 25 | + |
| 26 | + api_base = str(additional_args.get("api_base") or "") |
| 27 | + return request_model.startswith(DEEPSEEK_MODEL_PREFIXES) or ("deepseek" in api_base and "/anthropic/" in api_base) |
| 28 | + |
| 29 | + |
| 30 | +def _without_thinking_blocks(content: Any) -> Any: |
| 31 | + """移除 Anthropic content 列表里的 thinking 内容块。 |
| 32 | +
|
| 33 | + Args: |
| 34 | + content: 单条消息的 content 字段。 |
| 35 | +
|
| 36 | + Returns: |
| 37 | + 清理后的 content;非列表内容会原样返回。 |
| 38 | + """ |
| 39 | + if not isinstance(content, list): |
| 40 | + return content |
| 41 | + |
| 42 | + return [block for block in content if not (isinstance(block, dict) and block.get("type") in THINKING_BLOCK_TYPES)] |
| 43 | + |
| 44 | + |
| 45 | +def _sanitize_messages(messages: Any) -> Any: |
| 46 | + """清理历史消息中 DeepSeek 兜底无法校验的 thinking 块。 |
| 47 | +
|
| 48 | + Args: |
| 49 | + messages: Anthropic messages 请求体中的 messages 字段。 |
| 50 | +
|
| 51 | + Returns: |
| 52 | + 清理后的 messages;非列表内容会原样返回。 |
| 53 | + """ |
| 54 | + if not isinstance(messages, list): |
| 55 | + return messages |
| 56 | + |
| 57 | + sanitized_messages: list[Any] = [] |
| 58 | + for message in messages: |
| 59 | + if not isinstance(message, dict): |
| 60 | + sanitized_messages.append(message) |
| 61 | + continue |
| 62 | + |
| 63 | + sanitized_message = dict(message) |
| 64 | + sanitized_message.pop("thinking_blocks", None) |
| 65 | + sanitized_content = _without_thinking_blocks(sanitized_message.get("content")) |
| 66 | + if isinstance(sanitized_content, list) and len(sanitized_content) == 0: |
| 67 | + # thinking-only 历史消息对 DeepSeek 兜底没有可见上下文价值, |
| 68 | + # 保留空内容反而可能触发上游校验失败。 |
| 69 | + if sanitized_message.get("role") == "assistant": |
| 70 | + continue |
| 71 | + sanitized_message["content"] = "" |
| 72 | + else: |
| 73 | + sanitized_message["content"] = sanitized_content |
| 74 | + |
| 75 | + sanitized_messages.append(sanitized_message) |
| 76 | + |
| 77 | + return sanitized_messages |
| 78 | + |
| 79 | + |
| 80 | +class DeepSeekThinkingSanitizer(CustomLogger): |
| 81 | + """在 DeepSeek Anthropic 兜底请求发出前移除不兼容的 thinking 历史。""" |
| 82 | + |
| 83 | + def log_pre_api_call(self, model: str, messages: list, kwargs: dict) -> None: |
| 84 | + """在 LiteLLM 即将发送 HTTP 请求前清理 DeepSeek fallback 请求体。 |
| 85 | +
|
| 86 | + Args: |
| 87 | + model: LiteLLM 记录的当前模型名。 |
| 88 | + messages: LiteLLM 记录的原始消息列表。 |
| 89 | + kwargs: LiteLLM 模型调用上下文,包含即将序列化的完整请求体引用。 |
| 90 | +
|
| 91 | + Returns: |
| 92 | + 无返回值;函数会原地修改 DeepSeek Anthropic 请求体。 |
| 93 | + """ |
| 94 | + if not _is_deepseek_anthropic_request(kwargs): |
| 95 | + return |
| 96 | + |
| 97 | + additional_args = kwargs.get("additional_args") or {} |
| 98 | + request_body = additional_args.get("complete_input_dict") |
| 99 | + if not isinstance(request_body, dict): |
| 100 | + return |
| 101 | + |
| 102 | + request_body.pop("thinking", None) |
| 103 | + request_body.pop("reasoning_effort", None) |
| 104 | + request_body["messages"] = _sanitize_messages(request_body.get("messages")) |
| 105 | + |
| 106 | + # 同步 LiteLLM 日志上下文,避免清理后的请求体和日志里的顶层字段不一致。 |
| 107 | + kwargs.pop("thinking", None) |
| 108 | + kwargs.pop("reasoning_effort", None) |
| 109 | + kwargs["messages"] = request_body.get("messages") |
| 110 | + |
| 111 | + |
| 112 | +proxy_handler_instance = DeepSeekThinkingSanitizer() |
0 commit comments