@@ -82,6 +82,28 @@ def get_customizable_model_schema(
8282 en_US = credentials ["display_name" ], zh_Hans = credentials ["display_name" ]
8383 )
8484
85+ entity .parameter_rules .append (
86+ ParameterRule (
87+ name = "strict_compatibility" ,
88+ label = I18nObject (en_US = "Strict compatibility mode" , zh_Hans = "严格兼容模式" ),
89+ help = I18nObject (
90+ en_US = (
91+ "Whether to prioritize strict OpenAI compatibility. "
92+ "When True, OpenAI compatibility is prioritized and extended parameters "
93+ "(e.g., thinking, chat_template_kwargs) are not added. "
94+ "Set to False to enable these extensions."
95+ ),
96+ zh_Hans = (
97+ "是否优先严格的 OpenAI 兼容性。"
98+ "为 True 时,将优先 OpenAI 兼容性,并且不会添加扩展参数(例如 thinking、chat_template_kwargs)。"
99+ "设为 False 以启用这些扩展。"
100+ )
101+ ),
102+ type = ParameterType .BOOLEAN ,
103+ required = False ,
104+ )
105+ )
106+
85107 # Configure thinking mode parameter based on model support
86108 agent_thought_support = credentials .get ("agent_thought_support" , "not_supported" )
87109
@@ -200,9 +222,18 @@ def _invoke(
200222 user_enable_thinking = model_parameters .pop ("enable_thinking" , None )
201223 if user_enable_thinking is not None :
202224 enable_thinking_value = bool (user_enable_thinking )
203-
204- chat_template_kwargs = model_parameters .setdefault ("chat_template_kwargs" , {})
205- if enable_thinking_value is not None :
225+
226+ user_strict_compatibility = model_parameters .pop ("strict_compatibility" , None )
227+ # Default `strict_compatibility_value` is False.
228+ strict_compatibility_value = False
229+ if user_strict_compatibility is not None :
230+ strict_compatibility_value = bool (user_strict_compatibility )
231+
232+ if enable_thinking_value is not None and strict_compatibility_value is False :
233+ # Only apply when `strict_compatibility_value` is False since
234+ # `chat_template_kwargs` and `thinking` are non-standard parameters.
235+
236+ chat_template_kwargs = model_parameters .setdefault ("chat_template_kwargs" , {})
206237 # Support vLLM/SGLang format (chat_template_kwargs)
207238 chat_template_kwargs ["enable_thinking" ] = enable_thinking_value
208239 chat_template_kwargs ["thinking" ] = enable_thinking_value
@@ -220,7 +251,11 @@ def _invoke(
220251 # - chat_template_kwargs for runtimes that read template kwargs (e.g., llama.cpp).
221252 # Only apply when thinking mode is explicitly enabled.
222253 model_parameters ["reasoning_effort" ] = reasoning_effort_value
223- chat_template_kwargs ["reasoning_effort" ] = reasoning_effort_value
254+ if strict_compatibility_value is False :
255+ # Only apply when `strict_compatibility_value` is False since
256+ # `chat_template_kwargs` is a non-standard parameter.
257+ chat_template_kwargs = model_parameters .setdefault ("chat_template_kwargs" , {})
258+ chat_template_kwargs ["reasoning_effort" ] = reasoning_effort_value
224259
225260 # Remove thinking content from assistant messages for better performance.
226261 with suppress (Exception ):
0 commit comments