langgenius · henry-fung · Apr 3, 2026 · May 10, 2026 · May 10, 2026 · May 10, 2026
diff --git a/models/azure_openai/manifest.yaml b/models/azure_openai/manifest.yaml
@@ -24,4 +24,4 @@ resource:
     model:
       enabled: false
 type: plugin
-version: 0.0.55
+version: 0.0.56
diff --git a/models/azure_openai/models/constants.py b/models/azure_openai/models/constants.py
@@ -2586,6 +2586,14 @@ class AzureBaseModel(BaseModel):
                 ModelPropertyKey.CONTEXT_SIZE: 272000,
             },
             parameter_rules=[
+                ParameterRule(
+                    name="temperature",
+                    **PARAMETER_RULE_TEMPLATE[DefaultParameterName.TEMPERATURE],
+                ),
+                ParameterRule(
+                    name="top_p",
+                    **PARAMETER_RULE_TEMPLATE[DefaultParameterName.TOP_P],
+                ),
                 ParameterRule(
                     name="response_format",
                     label=I18nObject(zh_Hans="回复格式", en_US="response_format"),
@@ -3033,6 +3041,33 @@ class AzureBaseModel(BaseModel):
                 ModelPropertyKey.CONTEXT_SIZE: 400000,
             },
             parameter_rules=[
+                ParameterRule(
+                    name="temperature",
+                    **PARAMETER_RULE_TEMPLATE[DefaultParameterName.TEMPERATURE],
+                ),
+                ParameterRule(
+                    name="top_p",
+                    **PARAMETER_RULE_TEMPLATE[DefaultParameterName.TOP_P],
+                ),
+                ParameterRule(
+                    name="presence_penalty",
+                    **PARAMETER_RULE_TEMPLATE[DefaultParameterName.PRESENCE_PENALTY],
+                ),
+                ParameterRule(
+                    name="frequency_penalty",
+                    **PARAMETER_RULE_TEMPLATE[DefaultParameterName.FREQUENCY_PENALTY],
+                ),
+                _get_o1_max_tokens(default=4096, min_val=1, max_val=128000),
+                ParameterRule(
+                    name="seed",
+                    label=I18nObject(zh_Hans="种子", en_US="Seed"),
+                    type="int",
+                    help=AZURE_DEFAULT_PARAM_SEED_HELP,
+                    required=False,
+                    precision=0,
+                    min=0,
+                    max=2147483647,
+                ),
                 ParameterRule(
                     name="response_format",
                     label=I18nObject(zh_Hans="回复格式", en_US="response_format"),
@@ -3559,6 +3594,14 @@ class AzureBaseModel(BaseModel):
                 ModelPropertyKey.CONTEXT_SIZE: 1050000,
             },
             parameter_rules=[
+                ParameterRule(
+                    name="temperature",
+                    **PARAMETER_RULE_TEMPLATE[DefaultParameterName.TEMPERATURE],
+                ),
+                ParameterRule(
+                    name="top_p",
+                    **PARAMETER_RULE_TEMPLATE[DefaultParameterName.TOP_P],
+                ),
                 ParameterRule(
                     name="response_format",
                     label=I18nObject(zh_Hans="回复格式", en_US="response_format"),

diff --git a/models/azure_openai/models/llm/llm.py b/models/azure_openai/models/llm/llm.py
@@ -440,9 +440,10 @@ def _chat_generate_with_responses(
             "input": input_messages,
         }
 
-        # Map model parameters to the Responses API.
-        # temperature and top_p are not supported by gpt-5 reasoning models.
-        if not is_reasoning_model:
+        # temperature/top_p: not supported when reasoning is active
+        reasoning_effort = model_parameters.get("reasoning_effort")
+        reasoning_active = is_reasoning_model and reasoning_effort != "none"
+        if not reasoning_active:
             if "temperature" in model_parameters:
                 responses_params["temperature"] = model_parameters["temperature"]
             if "top_p" in model_parameters:
@@ -523,7 +524,6 @@ def _chat_generate_with_responses(
             reasoning["summary"] = model_parameters["reasoning_summary"]
         if reasoning:
             responses_params["reasoning"] = reasoning
-
         logger.info(
             f"llm request with responses api: model={model}, stream={stream}, "
             f"parameters={responses_params}"

diff --git a/models/azure_openai/models/text_embedding/text_embedding.py b/models/azure_openai/models/text_embedding/text_embedding.py
@@ -41,6 +41,11 @@ def _invoke(
         if user:
             extra_model_kwargs["user"] = user
         extra_model_kwargs["encoding_format"] = "base64"
+
+        dimensions = credentials.get("embedding_dimensions")
+        if dimensions and base_model_name != "text-embedding-ada-002":
+            extra_model_kwargs["dimensions"] = int(dimensions)
+
         context_size = self._get_context_size(model, credentials)
         max_chunks = self._get_max_chunks(model, credentials)
         embeddings: list[list[float]] = [[] for _ in range(len(texts))]
@@ -130,6 +135,22 @@ def validate_credentials(self, model: str, credentials: dict) -> None:
             raise CredentialsValidateFailedError(
                 f"Base Model Name {credentials['base_model_name']} is invalid"
             )
+
+        _MAX_DIMS = {"text-embedding-3-large": 3072, "text-embedding-3-small": 1536}
+        base_model = credentials.get("base_model_name", "")
+        dimensions = credentials.get("embedding_dimensions")
+        if dimensions:
+            try:
+                dim_int = int(dimensions)
+            except (ValueError, TypeError):
+                raise CredentialsValidateFailedError("embedding_dimensions must be an integer")
+            if base_model == "text-embedding-ada-002":
+                raise CredentialsValidateFailedError("text-embedding-ada-002 does not support dimensions")
+            if base_model in _MAX_DIMS and not (1 <= dim_int <= _MAX_DIMS[base_model]):
+                raise CredentialsValidateFailedError(
+                    f"embedding_dimensions for {base_model} must be between 1 and {_MAX_DIMS[base_model]}"
+                )
+
         try:
             client = self._create_client(credentials)
             self._embedding_invoke(

diff --git a/models/azure_openai/provider/azure_openai.yaml b/models/azure_openai/provider/azure_openai.yaml
@@ -521,6 +521,21 @@ model_credential_schema:
       required: true
       type: select
       variable: base_model_name
+    - variable: embedding_dimensions
+      label:
+        en_US: Embedding Dimensions
+        zh_Hans: 向量维度
+      type: text-input
+      required: false
+      placeholder:
+        en_US: "e.g. 1536 (optional, text-embedding-3-* only)"
+        zh_Hans: "如 1536（可选，仅 text-embedding-3-* 支持）"
+      help:
+        en_US: "Reduce output dimensions via MRL. text-embedding-3-small: 1-1536, text-embedding-3-large: 1-3072. Leave empty for model default."
+        zh_Hans: "通过 MRL 缩减输出维度。text-embedding-3-small: 1-1536，text-embedding-3-large: 1-3072。留空使用模型默认值。"
+      show_on:
+        - variable: __model_type
+          value: text-embedding
   model:
     label:
       en_US: Deployment Name

diff --git a/models/openai/manifest.yaml b/models/openai/manifest.yaml
@@ -1,4 +1,4 @@
-version: 0.4.0
+version: 0.4.1
 type: plugin
 author: "langgenius"
 name: "openai"

diff --git a/models/openai/models/llm/gpt-5.1.yaml b/models/openai/models/llm/gpt-5.1.yaml
@@ -17,6 +17,10 @@ parameter_rules:
     default: 8192
     min: 1
     max: 128000
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
   - name: response_format
     label:
       zh_Hans: 回复格式

diff --git a/models/openai/models/llm/gpt-5.2.yaml b/models/openai/models/llm/gpt-5.2.yaml
@@ -18,6 +18,8 @@ parameter_rules:
     default: 8192
     min: 1
     max: 128000
+  - name: temperature
+    use_template: temperature
   - name: response_format
     label:
       zh_Hans: 回复格式

diff --git a/models/openai/models/llm/gpt-5.4.yaml b/models/openai/models/llm/gpt-5.4.yaml
@@ -18,6 +18,8 @@ parameter_rules:
     default: 8192
     min: 1
     max: 128000
+  - name: temperature
+    use_template: temperature
   - name: response_format
     label:
       zh_Hans: 回复格式

diff --git a/models/openai/models/llm/llm.py b/models/openai/models/llm/llm.py
@@ -800,6 +800,15 @@ def _chat_generate(
         else:
             # chat model
             messages: Any = [self._convert_prompt_message_to_dict(m) for m in prompt_messages]
+
+            # For models where temperature is only valid when reasoning_effort="none"
+            # (e.g. gpt-5.1/5.2/5.4): strip temperature/top_p when reasoning is active
+            _re = model_parameters.get("reasoning_effort")
+            if _re and _re != "none":
+                model_parameters.pop("temperature", None)
+                model_parameters.pop("top_p", None)
+                model_parameters.pop("logprobs", None)
+
             # thinking models require max_completion_tokens instead of max_tokens
             chat_params = model_parameters.copy()
             if any(model.startswith(prefix) for prefix in THINKING_SERIES_PREFIXES):
@@ -849,6 +858,10 @@ def _build_responses_api_params(
         reasoning_effort = params.pop("reasoning_effort", None)
         if reasoning_effort and reasoning_effort != "none":
             params["reasoning"] = {"effort": reasoning_effort}
+            # temperature/top_p/logprobs not supported when reasoning is active
+            params.pop("temperature", None)
+            params.pop("top_p", None)
+            params.pop("logprobs", None)
 
         # response_format -> text.format (Responses API uses different format)
         # response_format is incompatible with Responses API, convert to text.format

diff --git a/models/openai/models/text_embedding/text_embedding.py b/models/openai/models/text_embedding/text_embedding.py
@@ -49,6 +49,10 @@ def _invoke(
 
         extra_model_kwargs["encoding_format"] = "base64"
 
+        dimensions = credentials.get("embedding_dimensions")
+        if dimensions and model != "text-embedding-ada-002":
+            extra_model_kwargs["dimensions"] = int(dimensions)
+
         # get model properties
         context_size = self._get_context_size(model, credentials)
         max_chunks = self._get_max_chunks(model, credentials)
@@ -156,6 +160,21 @@ def validate_credentials(self, model: str, credentials: dict) -> None:
             credentials_kwargs = self._to_credential_kwargs(credentials)
             client = OpenAI(**credentials_kwargs)
 
+            # validate dimensions if provided
+            _MAX_DIMS = {"text-embedding-3-large": 3072, "text-embedding-3-small": 1536}
+            dimensions = credentials.get("embedding_dimensions")
+            if dimensions:
+                try:
+                    dim_int = int(dimensions)
+                except (ValueError, TypeError):
+                    raise CredentialsValidateFailedError("embedding_dimensions must be an integer")
+                if model == "text-embedding-ada-002":
+                    raise CredentialsValidateFailedError("text-embedding-ada-002 does not support dimensions")
+                if model in _MAX_DIMS and not (1 <= dim_int <= _MAX_DIMS[model]):
+                    raise CredentialsValidateFailedError(
+                        f"embedding_dimensions for {model} must be between 1 and {_MAX_DIMS[model]}"
+                    )
+
             # call embedding model
             self._embedding_invoke(
                 model=model, client=client, texts=["ping"], extra_model_kwargs={}

diff --git a/models/openai/provider/openai.yaml b/models/openai/provider/openai.yaml
@@ -78,6 +78,21 @@ model_credential_schema:
       help:
         zh_Hans: 选择该模型使用的 API 协议。Chat Completions 适用于大多数模型，Responses API 适用于 o3-pro、gpt-5.4 等模型。
         en_US: Select the API protocol for this model. Use Chat Completions for most models, Responses API for models like o3-pro and gpt-5.4.
+    - variable: embedding_dimensions
+      label:
+        en_US: Embedding Dimensions
+        zh_Hans: 向量维度
+      type: text-input
+      required: false
+      placeholder:
+        en_US: "e.g. 1536 (optional, text-embedding-3-* only)"
+        zh_Hans: "如 1536（可选，仅 text-embedding-3-* 支持）"
+      help:
+        en_US: "Reduce output dimensions via MRL. text-embedding-3-small: 1-1536, text-embedding-3-large: 1-3072. Leave empty for model default."
+        zh_Hans: "通过 MRL 缩减输出维度。text-embedding-3-small: 1-1536，text-embedding-3-large: 1-3072。留空使用模型默认值。"
+      show_on:
+        - variable: __model_type
+          value: text-embedding
 provider_credential_schema:
   credential_form_schemas:
     - variable: openai_api_key