From b5dd077673d92bfef82577f954f78cee4691e836 Mon Sep 17 00:00:00 2001 From: henry-fung Date: Sat, 4 Apr 2026 01:36:02 +0800 Subject: [PATCH 1/8] feat(openai/azure): enable temperature when reasoning_effort=none for gpt-5.1/5.2/5.4 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Starting from GPT-5.2 (and confirmed also for GPT-5.1), when reasoning_effort is set to "none", temperature and top_p become valid parameters. With any other reasoning effort value, passing these parameters causes a 400 API error. Changes: - OpenAI provider: add temperature parameter to gpt-5.1/5.2/5.4 YAML definitions - OpenAI llm.py (_build_responses_api_params): strip temperature/top_p/logprobs when reasoning_effort is active (non-"none") - OpenAI llm.py (_chat_generate): same stripping for standard Chat Completions path - Azure OpenAI constants.py: add temperature ParameterRule to gpt-5.1/5.2/5.4 entries - Azure OpenAI llm.py (_chat_generate_with_responses): restructure reasoning vs temperature/top_p as mutually exclusive — reasoning active → set reasoning param only; reasoning=none or absent → pass temperature/top_p, skip reasoning param Tested against Azure OpenAI (dscgpt.openai.azure.com) with all three deployments: - reasoning=none + temperature=0.7 → success - reasoning=low + temperature=0.7 → expected 400 error Co-Authored-By: Claude Sonnet 4.6 --- models/azure_openai/manifest.yaml | 2 +- models/azure_openai/models/constants.py | 12 ++++++++++++ models/azure_openai/models/llm/llm.py | 18 +++++++++++------- models/openai/manifest.yaml | 2 +- models/openai/models/llm/gpt-5.1.yaml | 2 ++ models/openai/models/llm/gpt-5.2.yaml | 2 ++ models/openai/models/llm/gpt-5.4.yaml | 2 ++ models/openai/models/llm/llm.py | 13 ++++++++++++- 8 files changed, 43 insertions(+), 10 deletions(-) diff --git a/models/azure_openai/manifest.yaml b/models/azure_openai/manifest.yaml index 91c69184f..febf8e600 100644 --- a/models/azure_openai/manifest.yaml +++ b/models/azure_openai/manifest.yaml @@ -24,4 +24,4 @@ resource: model: enabled: false type: plugin -version: 0.0.49 +version: 0.0.50 diff --git a/models/azure_openai/models/constants.py b/models/azure_openai/models/constants.py index e8001d911..0017d26e0 100644 --- a/models/azure_openai/models/constants.py +++ b/models/azure_openai/models/constants.py @@ -2586,6 +2586,10 @@ class AzureBaseModel(BaseModel): ModelPropertyKey.CONTEXT_SIZE: 272000, }, parameter_rules=[ + ParameterRule( + name="temperature", + **PARAMETER_RULE_TEMPLATE[DefaultParameterName.TEMPERATURE], + ), ParameterRule( name="response_format", label=I18nObject(zh_Hans="回复格式", en_US="response_format"), @@ -3050,6 +3054,10 @@ class AzureBaseModel(BaseModel): ModelPropertyKey.CONTEXT_SIZE: 400000, }, parameter_rules=[ + ParameterRule( + name="temperature", + **PARAMETER_RULE_TEMPLATE[DefaultParameterName.TEMPERATURE], + ), ParameterRule( name="top_p", **PARAMETER_RULE_TEMPLATE[DefaultParameterName.TOP_P], @@ -3500,6 +3508,10 @@ class AzureBaseModel(BaseModel): ModelPropertyKey.CONTEXT_SIZE: 400000, }, parameter_rules=[ + ParameterRule( + name="temperature", + **PARAMETER_RULE_TEMPLATE[DefaultParameterName.TEMPERATURE], + ), ParameterRule( name="top_p", **PARAMETER_RULE_TEMPLATE[DefaultParameterName.TOP_P], diff --git a/models/azure_openai/models/llm/llm.py b/models/azure_openai/models/llm/llm.py index d69f53046..08b9e4762 100644 --- a/models/azure_openai/models/llm/llm.py +++ b/models/azure_openai/models/llm/llm.py @@ -429,10 +429,17 @@ def _chat_generate_with_responses( } # Map model parameters to the Responses API - if "temperature" in model_parameters: - responses_params["temperature"] = model_parameters["temperature"] - if "top_p" in model_parameters: - responses_params["top_p"] = model_parameters["top_p"] + # reasoning_effort controls whether temperature/top_p are supported: + # - "none" (or not set): reasoning is disabled, temperature/top_p are valid + # - any other value: reasoning is active, temperature/top_p are NOT supported + reasoning_effort = model_parameters.get("reasoning_effort") + if reasoning_effort and reasoning_effort != "none": + responses_params["reasoning"] = {"effort": reasoning_effort} + else: + if "temperature" in model_parameters: + responses_params["temperature"] = model_parameters["temperature"] + if "top_p" in model_parameters: + responses_params["top_p"] = model_parameters["top_p"] if "max_tokens" in model_parameters: responses_params["max_output_tokens"] = model_parameters["max_tokens"] elif "max_completion_tokens" in model_parameters: @@ -495,9 +502,6 @@ def _chat_generate_with_responses( "format": {"type": response_format} } - if "reasoning_effort" in model_parameters: - responses_params["reasoning"] = {"effort": model_parameters["reasoning_effort"]} - logger.info( f"llm request with responses api: model={model}, stream={stream}, " f"parameters={responses_params}" diff --git a/models/openai/manifest.yaml b/models/openai/manifest.yaml index a6b81306f..388b1cdf3 100644 --- a/models/openai/manifest.yaml +++ b/models/openai/manifest.yaml @@ -1,4 +1,4 @@ -version: 0.3.4 +version: 0.3.5 type: plugin author: "langgenius" name: "openai" diff --git a/models/openai/models/llm/gpt-5.1.yaml b/models/openai/models/llm/gpt-5.1.yaml index 054ed6f58..4bb225d55 100644 --- a/models/openai/models/llm/gpt-5.1.yaml +++ b/models/openai/models/llm/gpt-5.1.yaml @@ -17,6 +17,8 @@ parameter_rules: default: 8192 min: 1 max: 128000 + - name: temperature + use_template: temperature - name: response_format label: zh_Hans: 回复格式 diff --git a/models/openai/models/llm/gpt-5.2.yaml b/models/openai/models/llm/gpt-5.2.yaml index eddb20b1c..7b5a13895 100644 --- a/models/openai/models/llm/gpt-5.2.yaml +++ b/models/openai/models/llm/gpt-5.2.yaml @@ -18,6 +18,8 @@ parameter_rules: default: 8192 min: 1 max: 128000 + - name: temperature + use_template: temperature - name: response_format label: zh_Hans: 回复格式 diff --git a/models/openai/models/llm/gpt-5.4.yaml b/models/openai/models/llm/gpt-5.4.yaml index 77ac38123..25ea02fa4 100644 --- a/models/openai/models/llm/gpt-5.4.yaml +++ b/models/openai/models/llm/gpt-5.4.yaml @@ -18,6 +18,8 @@ parameter_rules: default: 8192 min: 1 max: 128000 + - name: temperature + use_template: temperature - name: response_format label: zh_Hans: 回复格式 diff --git a/models/openai/models/llm/llm.py b/models/openai/models/llm/llm.py index 13a1616bb..75cc497d3 100644 --- a/models/openai/models/llm/llm.py +++ b/models/openai/models/llm/llm.py @@ -776,7 +776,14 @@ def _chat_generate( else: # chat model messages: Any = [self._convert_prompt_message_to_dict(m) for m in prompt_messages] - + + # For models where temperature is only valid when reasoning_effort="none" + # (e.g. gpt-5.1/5.2/5.4): strip temperature/top_p when reasoning is active + _re = model_parameters.get("reasoning_effort") + if _re and _re != "none": + model_parameters.pop("temperature", None) + model_parameters.pop("top_p", None) + try: response = client.chat.completions.create( messages=messages, @@ -821,6 +828,10 @@ def _build_responses_api_params( reasoning_effort = params.pop("reasoning_effort", None) if reasoning_effort and reasoning_effort != "none": params["reasoning"] = {"effort": reasoning_effort} + # temperature/top_p/logprobs not supported when reasoning is active + params.pop("temperature", None) + params.pop("top_p", None) + params.pop("logprobs", None) # response_format -> text.format (Responses API uses different format) # response_format is incompatible with Responses API, convert to text.format From dce04f663603d2e7beadbebb7befd48580f97e94 Mon Sep 17 00:00:00 2001 From: henry-fung Date: Sun, 10 May 2026 13:31:53 +0800 Subject: [PATCH 2/8] feat(embedding): add dimensions parameter for OpenAI and Azure OpenAI text-embedding-3-* models Co-Authored-By: Claude Sonnet 4.6 --- .../models/text_embedding/text_embedding.py | 21 +++++++++++++++++++ .../azure_openai/provider/azure_openai.yaml | 15 +++++++++++++ .../models/text_embedding/text_embedding.py | 19 +++++++++++++++++ models/openai/provider/openai.yaml | 15 +++++++++++++ 4 files changed, 70 insertions(+) diff --git a/models/azure_openai/models/text_embedding/text_embedding.py b/models/azure_openai/models/text_embedding/text_embedding.py index 6813ccc4b..b31f00629 100644 --- a/models/azure_openai/models/text_embedding/text_embedding.py +++ b/models/azure_openai/models/text_embedding/text_embedding.py @@ -41,6 +41,11 @@ def _invoke( if user: extra_model_kwargs["user"] = user extra_model_kwargs["encoding_format"] = "base64" + + dimensions = credentials.get("embedding_dimensions") + if dimensions and base_model_name != "text-embedding-ada-002": + extra_model_kwargs["dimensions"] = int(dimensions) + context_size = self._get_context_size(model, credentials) max_chunks = self._get_max_chunks(model, credentials) embeddings: list[list[float]] = [[] for _ in range(len(texts))] @@ -130,6 +135,22 @@ def validate_credentials(self, model: str, credentials: dict) -> None: raise CredentialsValidateFailedError( f"Base Model Name {credentials['base_model_name']} is invalid" ) + + _MAX_DIMS = {"text-embedding-3-large": 3072, "text-embedding-3-small": 1536} + base_model = credentials.get("base_model_name", "") + dimensions = credentials.get("embedding_dimensions") + if dimensions: + try: + dim_int = int(dimensions) + except (ValueError, TypeError): + raise CredentialsValidateFailedError("embedding_dimensions must be an integer") + if base_model == "text-embedding-ada-002": + raise CredentialsValidateFailedError("text-embedding-ada-002 does not support dimensions") + if base_model in _MAX_DIMS and not (1 <= dim_int <= _MAX_DIMS[base_model]): + raise CredentialsValidateFailedError( + f"embedding_dimensions for {base_model} must be between 1 and {_MAX_DIMS[base_model]}" + ) + try: client = self._create_client(credentials) self._embedding_invoke( diff --git a/models/azure_openai/provider/azure_openai.yaml b/models/azure_openai/provider/azure_openai.yaml index 29589f737..5e3a93491 100644 --- a/models/azure_openai/provider/azure_openai.yaml +++ b/models/azure_openai/provider/azure_openai.yaml @@ -521,6 +521,21 @@ model_credential_schema: required: true type: select variable: base_model_name + - variable: embedding_dimensions + label: + en_US: Embedding Dimensions + zh_Hans: 向量维度 + type: text-input + required: false + placeholder: + en_US: "e.g. 1536 (optional, text-embedding-3-* only)" + zh_Hans: "如 1536(可选,仅 text-embedding-3-* 支持)" + help: + en_US: "Reduce output dimensions via MRL. text-embedding-3-small: 1-1536, text-embedding-3-large: 1-3072. Leave empty for model default." + zh_Hans: "通过 MRL 缩减输出维度。text-embedding-3-small: 1-1536,text-embedding-3-large: 1-3072。留空使用模型默认值。" + show_on: + - variable: __model_type + value: text-embedding model: label: en_US: Deployment Name diff --git a/models/openai/models/text_embedding/text_embedding.py b/models/openai/models/text_embedding/text_embedding.py index 0b91c55e3..c7166212d 100644 --- a/models/openai/models/text_embedding/text_embedding.py +++ b/models/openai/models/text_embedding/text_embedding.py @@ -49,6 +49,10 @@ def _invoke( extra_model_kwargs["encoding_format"] = "base64" + dimensions = credentials.get("embedding_dimensions") + if dimensions and model != "text-embedding-ada-002": + extra_model_kwargs["dimensions"] = int(dimensions) + # get model properties context_size = self._get_context_size(model, credentials) max_chunks = self._get_max_chunks(model, credentials) @@ -156,6 +160,21 @@ def validate_credentials(self, model: str, credentials: dict) -> None: credentials_kwargs = self._to_credential_kwargs(credentials) client = OpenAI(**credentials_kwargs) + # validate dimensions if provided + _MAX_DIMS = {"text-embedding-3-large": 3072, "text-embedding-3-small": 1536} + dimensions = credentials.get("embedding_dimensions") + if dimensions: + try: + dim_int = int(dimensions) + except (ValueError, TypeError): + raise CredentialsValidateFailedError("embedding_dimensions must be an integer") + if model == "text-embedding-ada-002": + raise CredentialsValidateFailedError("text-embedding-ada-002 does not support dimensions") + if model in _MAX_DIMS and not (1 <= dim_int <= _MAX_DIMS[model]): + raise CredentialsValidateFailedError( + f"embedding_dimensions for {model} must be between 1 and {_MAX_DIMS[model]}" + ) + # call embedding model self._embedding_invoke( model=model, client=client, texts=["ping"], extra_model_kwargs={} diff --git a/models/openai/provider/openai.yaml b/models/openai/provider/openai.yaml index c95afc99c..e0ee17141 100644 --- a/models/openai/provider/openai.yaml +++ b/models/openai/provider/openai.yaml @@ -78,6 +78,21 @@ model_credential_schema: help: zh_Hans: 选择该模型使用的 API 协议。Chat Completions 适用于大多数模型,Responses API 适用于 o3-pro、gpt-5.4 等模型。 en_US: Select the API protocol for this model. Use Chat Completions for most models, Responses API for models like o3-pro and gpt-5.4. + - variable: embedding_dimensions + label: + en_US: Embedding Dimensions + zh_Hans: 向量维度 + type: text-input + required: false + placeholder: + en_US: "e.g. 1536 (optional, text-embedding-3-* only)" + zh_Hans: "如 1536(可选,仅 text-embedding-3-* 支持)" + help: + en_US: "Reduce output dimensions via MRL. text-embedding-3-small: 1-1536, text-embedding-3-large: 1-3072. Leave empty for model default." + zh_Hans: "通过 MRL 缩减输出维度。text-embedding-3-small: 1-1536,text-embedding-3-large: 1-3072。留空使用模型默认值。" + show_on: + - variable: __model_type + value: text-embedding provider_credential_schema: credential_form_schemas: - variable: openai_api_key From 80cc23b6b3539254b5db53abee6a4b22b4fae7ab Mon Sep 17 00:00:00 2001 From: henry-fung Date: Sun, 10 May 2026 13:35:34 +0800 Subject: [PATCH 3/8] fix: apply review suggestions for gpt-5.1 top_p and logprobs stripping - Add top_p to gpt-5.1 parameter rules (openai yaml and azure constants) - Strip logprobs when reasoning_effort is active in _chat_generate path Co-Authored-By: Claude Sonnet 4.6 --- models/azure_openai/models/constants.py | 22 +++------------------- models/openai/models/llm/gpt-5.1.yaml | 2 ++ models/openai/models/llm/llm.py | 1 + 3 files changed, 6 insertions(+), 19 deletions(-) diff --git a/models/azure_openai/models/constants.py b/models/azure_openai/models/constants.py index 6f74e80e1..213cc03c3 100644 --- a/models/azure_openai/models/constants.py +++ b/models/azure_openai/models/constants.py @@ -2591,27 +2591,11 @@ class AzureBaseModel(BaseModel): **PARAMETER_RULE_TEMPLATE[DefaultParameterName.TEMPERATURE], ), ParameterRule( - name="response_format", - label=I18nObject(zh_Hans="回复格式", en_US="response_format"), - type="string", - help=I18nObject( - zh_Hans="指定模型必须输出的格式", - en_US="specifying the format that the model must output", - ), - required=False, - options=["text", "json_object", "json_schema"], - ), - ParameterRule( - name="json_schema", - label=I18nObject(en_US="JSON Schema"), - type="text", - help=I18nObject( - zh_Hans="设置返回的json schema,llm将按照它返回", - en_US="Set a response json schema will ensure LLM to adhere it.", - ), - required=False, + name="top_p", + **PARAMETER_RULE_TEMPLATE[DefaultParameterName.TOP_P], ), ParameterRule( + name="response_format", name="reasoning_effort", label=I18nObject(zh_Hans="推理工作", en_US="reasoning_effort"), type="string", diff --git a/models/openai/models/llm/gpt-5.1.yaml b/models/openai/models/llm/gpt-5.1.yaml index 4bb225d55..61cdcb71f 100644 --- a/models/openai/models/llm/gpt-5.1.yaml +++ b/models/openai/models/llm/gpt-5.1.yaml @@ -19,6 +19,8 @@ parameter_rules: max: 128000 - name: temperature use_template: temperature + - name: top_p + use_template: top_p - name: response_format label: zh_Hans: 回复格式 diff --git a/models/openai/models/llm/llm.py b/models/openai/models/llm/llm.py index 2e2edbe3a..e79c1f339 100644 --- a/models/openai/models/llm/llm.py +++ b/models/openai/models/llm/llm.py @@ -792,6 +792,7 @@ def _chat_generate( if _re and _re != "none": model_parameters.pop("temperature", None) model_parameters.pop("top_p", None) + model_parameters.pop("logprobs", None) # thinking models require max_completion_tokens instead of max_tokens chat_params = model_parameters.copy() From 6ec75e197157e4cb18a88a8cf83d4a58d89fbabd Mon Sep 17 00:00:00 2001 From: henry-fung Date: Sun, 10 May 2026 13:38:13 +0800 Subject: [PATCH 4/8] chore(azure_openai): bump version to 0.0.56 Co-Authored-By: Claude Sonnet 4.6 --- models/azure_openai/manifest.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/models/azure_openai/manifest.yaml b/models/azure_openai/manifest.yaml index 8a3773380..7f811e067 100644 --- a/models/azure_openai/manifest.yaml +++ b/models/azure_openai/manifest.yaml @@ -24,4 +24,4 @@ resource: model: enabled: false type: plugin -version: 0.0.55 +version: 0.0.56 From 6acdfa98d803388c26670b5441254ce42174c607 Mon Sep 17 00:00:00 2001 From: henry-fung Date: Sun, 10 May 2026 13:38:51 +0800 Subject: [PATCH 5/8] chore(openai): bump version to 0.3.9 Co-Authored-By: Claude Sonnet 4.6 --- models/openai/manifest.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/models/openai/manifest.yaml b/models/openai/manifest.yaml index d705e6bf7..c148ee253 100644 --- a/models/openai/manifest.yaml +++ b/models/openai/manifest.yaml @@ -1,4 +1,4 @@ -version: 0.3.8 +version: 0.3.9 type: plugin author: "langgenius" name: "openai" From cca228e38ef3e01ed8a9668f2fc808568b81040a Mon Sep 17 00:00:00 2001 From: henry-fung Date: Sun, 10 May 2026 13:41:42 +0800 Subject: [PATCH 6/8] fix(azure_openai): fix duplicate name keyword in gpt-5.1 ParameterRule Co-Authored-By: Claude Sonnet 4.6 --- models/azure_openai/models/constants.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/models/azure_openai/models/constants.py b/models/azure_openai/models/constants.py index 213cc03c3..d373061e2 100644 --- a/models/azure_openai/models/constants.py +++ b/models/azure_openai/models/constants.py @@ -2596,6 +2596,26 @@ class AzureBaseModel(BaseModel): ), ParameterRule( name="response_format", + label=I18nObject(zh_Hans="回复格式", en_US="response_format"), + type="string", + help=I18nObject( + zh_Hans="指定模型必须输出的格式", + en_US="specifying the format that the model must output", + ), + required=False, + options=["text", "json_object", "json_schema"], + ), + ParameterRule( + name="json_schema", + label=I18nObject(en_US="JSON Schema"), + type="text", + help=I18nObject( + zh_Hans="设置返回的json schema,llm将按照它返回", + en_US="Set a response json schema will ensure LLM to adhere it.", + ), + required=False, + ), + ParameterRule( name="reasoning_effort", label=I18nObject(zh_Hans="推理工作", en_US="reasoning_effort"), type="string", From 2d543ad7515be72b35d4bce85f6ee551e4cb6c71 Mon Sep 17 00:00:00 2001 From: henry-fung Date: Sun, 10 May 2026 13:49:46 +0800 Subject: [PATCH 7/8] fix(azure_openai): fix duplicate name keyword in gpt-5.4 ParameterRule Co-Authored-By: Claude Sonnet 4.6 --- models/azure_openai/models/constants.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/models/azure_openai/models/constants.py b/models/azure_openai/models/constants.py index d373061e2..c93613d0e 100644 --- a/models/azure_openai/models/constants.py +++ b/models/azure_openai/models/constants.py @@ -3601,6 +3601,8 @@ class AzureBaseModel(BaseModel): ParameterRule( name="top_p", **PARAMETER_RULE_TEMPLATE[DefaultParameterName.TOP_P], + ), + ParameterRule( name="response_format", label=I18nObject(zh_Hans="回复格式", en_US="response_format"), type="string", From 58ec6dd7f44dcc568ee0c44cc8a820c54ad01564 Mon Sep 17 00:00:00 2001 From: henry-fung Date: Sat, 16 May 2026 17:41:31 +0800 Subject: [PATCH 8/8] chore(openai): bump version to 0.4.1 --- models/openai/manifest.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/models/openai/manifest.yaml b/models/openai/manifest.yaml index a1edf348b..6ebbd38c8 100644 --- a/models/openai/manifest.yaml +++ b/models/openai/manifest.yaml @@ -1,4 +1,4 @@ -version: 0.4.0 +version: 0.4.1 type: plugin author: "langgenius" name: "openai"