diff --git a/models/azure_openai/manifest.yaml b/models/azure_openai/manifest.yaml index 8a3773380..7f811e067 100644 --- a/models/azure_openai/manifest.yaml +++ b/models/azure_openai/manifest.yaml @@ -24,4 +24,4 @@ resource: model: enabled: false type: plugin -version: 0.0.55 +version: 0.0.56 diff --git a/models/azure_openai/models/constants.py b/models/azure_openai/models/constants.py index ecb12d020..c93613d0e 100644 --- a/models/azure_openai/models/constants.py +++ b/models/azure_openai/models/constants.py @@ -2586,6 +2586,14 @@ class AzureBaseModel(BaseModel): ModelPropertyKey.CONTEXT_SIZE: 272000, }, parameter_rules=[ + ParameterRule( + name="temperature", + **PARAMETER_RULE_TEMPLATE[DefaultParameterName.TEMPERATURE], + ), + ParameterRule( + name="top_p", + **PARAMETER_RULE_TEMPLATE[DefaultParameterName.TOP_P], + ), ParameterRule( name="response_format", label=I18nObject(zh_Hans="回复格式", en_US="response_format"), @@ -3033,6 +3041,33 @@ class AzureBaseModel(BaseModel): ModelPropertyKey.CONTEXT_SIZE: 400000, }, parameter_rules=[ + ParameterRule( + name="temperature", + **PARAMETER_RULE_TEMPLATE[DefaultParameterName.TEMPERATURE], + ), + ParameterRule( + name="top_p", + **PARAMETER_RULE_TEMPLATE[DefaultParameterName.TOP_P], + ), + ParameterRule( + name="presence_penalty", + **PARAMETER_RULE_TEMPLATE[DefaultParameterName.PRESENCE_PENALTY], + ), + ParameterRule( + name="frequency_penalty", + **PARAMETER_RULE_TEMPLATE[DefaultParameterName.FREQUENCY_PENALTY], + ), + _get_o1_max_tokens(default=4096, min_val=1, max_val=128000), + ParameterRule( + name="seed", + label=I18nObject(zh_Hans="种子", en_US="Seed"), + type="int", + help=AZURE_DEFAULT_PARAM_SEED_HELP, + required=False, + precision=0, + min=0, + max=2147483647, + ), ParameterRule( name="response_format", label=I18nObject(zh_Hans="回复格式", en_US="response_format"), @@ -3559,6 +3594,14 @@ class AzureBaseModel(BaseModel): ModelPropertyKey.CONTEXT_SIZE: 1050000, }, parameter_rules=[ + ParameterRule( + name="temperature", + **PARAMETER_RULE_TEMPLATE[DefaultParameterName.TEMPERATURE], + ), + ParameterRule( + name="top_p", + **PARAMETER_RULE_TEMPLATE[DefaultParameterName.TOP_P], + ), ParameterRule( name="response_format", label=I18nObject(zh_Hans="回复格式", en_US="response_format"), diff --git a/models/azure_openai/models/llm/llm.py b/models/azure_openai/models/llm/llm.py index ad5c0ab55..c2e42e89b 100644 --- a/models/azure_openai/models/llm/llm.py +++ b/models/azure_openai/models/llm/llm.py @@ -440,9 +440,10 @@ def _chat_generate_with_responses( "input": input_messages, } - # Map model parameters to the Responses API. - # temperature and top_p are not supported by gpt-5 reasoning models. - if not is_reasoning_model: + # temperature/top_p: not supported when reasoning is active + reasoning_effort = model_parameters.get("reasoning_effort") + reasoning_active = is_reasoning_model and reasoning_effort != "none" + if not reasoning_active: if "temperature" in model_parameters: responses_params["temperature"] = model_parameters["temperature"] if "top_p" in model_parameters: @@ -523,7 +524,6 @@ def _chat_generate_with_responses( reasoning["summary"] = model_parameters["reasoning_summary"] if reasoning: responses_params["reasoning"] = reasoning - logger.info( f"llm request with responses api: model={model}, stream={stream}, " f"parameters={responses_params}" diff --git a/models/azure_openai/models/text_embedding/text_embedding.py b/models/azure_openai/models/text_embedding/text_embedding.py index 6813ccc4b..b31f00629 100644 --- a/models/azure_openai/models/text_embedding/text_embedding.py +++ b/models/azure_openai/models/text_embedding/text_embedding.py @@ -41,6 +41,11 @@ def _invoke( if user: extra_model_kwargs["user"] = user extra_model_kwargs["encoding_format"] = "base64" + + dimensions = credentials.get("embedding_dimensions") + if dimensions and base_model_name != "text-embedding-ada-002": + extra_model_kwargs["dimensions"] = int(dimensions) + context_size = self._get_context_size(model, credentials) max_chunks = self._get_max_chunks(model, credentials) embeddings: list[list[float]] = [[] for _ in range(len(texts))] @@ -130,6 +135,22 @@ def validate_credentials(self, model: str, credentials: dict) -> None: raise CredentialsValidateFailedError( f"Base Model Name {credentials['base_model_name']} is invalid" ) + + _MAX_DIMS = {"text-embedding-3-large": 3072, "text-embedding-3-small": 1536} + base_model = credentials.get("base_model_name", "") + dimensions = credentials.get("embedding_dimensions") + if dimensions: + try: + dim_int = int(dimensions) + except (ValueError, TypeError): + raise CredentialsValidateFailedError("embedding_dimensions must be an integer") + if base_model == "text-embedding-ada-002": + raise CredentialsValidateFailedError("text-embedding-ada-002 does not support dimensions") + if base_model in _MAX_DIMS and not (1 <= dim_int <= _MAX_DIMS[base_model]): + raise CredentialsValidateFailedError( + f"embedding_dimensions for {base_model} must be between 1 and {_MAX_DIMS[base_model]}" + ) + try: client = self._create_client(credentials) self._embedding_invoke( diff --git a/models/azure_openai/provider/azure_openai.yaml b/models/azure_openai/provider/azure_openai.yaml index 29589f737..5e3a93491 100644 --- a/models/azure_openai/provider/azure_openai.yaml +++ b/models/azure_openai/provider/azure_openai.yaml @@ -521,6 +521,21 @@ model_credential_schema: required: true type: select variable: base_model_name + - variable: embedding_dimensions + label: + en_US: Embedding Dimensions + zh_Hans: 向量维度 + type: text-input + required: false + placeholder: + en_US: "e.g. 1536 (optional, text-embedding-3-* only)" + zh_Hans: "如 1536(可选,仅 text-embedding-3-* 支持)" + help: + en_US: "Reduce output dimensions via MRL. text-embedding-3-small: 1-1536, text-embedding-3-large: 1-3072. Leave empty for model default." + zh_Hans: "通过 MRL 缩减输出维度。text-embedding-3-small: 1-1536,text-embedding-3-large: 1-3072。留空使用模型默认值。" + show_on: + - variable: __model_type + value: text-embedding model: label: en_US: Deployment Name diff --git a/models/openai/manifest.yaml b/models/openai/manifest.yaml index a1edf348b..6ebbd38c8 100644 --- a/models/openai/manifest.yaml +++ b/models/openai/manifest.yaml @@ -1,4 +1,4 @@ -version: 0.4.0 +version: 0.4.1 type: plugin author: "langgenius" name: "openai" diff --git a/models/openai/models/llm/gpt-5.1.yaml b/models/openai/models/llm/gpt-5.1.yaml index 72f6edabc..13ea7bafe 100644 --- a/models/openai/models/llm/gpt-5.1.yaml +++ b/models/openai/models/llm/gpt-5.1.yaml @@ -17,6 +17,10 @@ parameter_rules: default: 8192 min: 1 max: 128000 + - name: temperature + use_template: temperature + - name: top_p + use_template: top_p - name: response_format label: zh_Hans: 回复格式 diff --git a/models/openai/models/llm/gpt-5.2.yaml b/models/openai/models/llm/gpt-5.2.yaml index f761e291d..59ab62ce6 100644 --- a/models/openai/models/llm/gpt-5.2.yaml +++ b/models/openai/models/llm/gpt-5.2.yaml @@ -18,6 +18,8 @@ parameter_rules: default: 8192 min: 1 max: 128000 + - name: temperature + use_template: temperature - name: response_format label: zh_Hans: 回复格式 diff --git a/models/openai/models/llm/gpt-5.4.yaml b/models/openai/models/llm/gpt-5.4.yaml index dcdb429a9..c36313047 100644 --- a/models/openai/models/llm/gpt-5.4.yaml +++ b/models/openai/models/llm/gpt-5.4.yaml @@ -18,6 +18,8 @@ parameter_rules: default: 8192 min: 1 max: 128000 + - name: temperature + use_template: temperature - name: response_format label: zh_Hans: 回复格式 diff --git a/models/openai/models/llm/llm.py b/models/openai/models/llm/llm.py index d07f370b7..8814ae89b 100644 --- a/models/openai/models/llm/llm.py +++ b/models/openai/models/llm/llm.py @@ -800,6 +800,15 @@ def _chat_generate( else: # chat model messages: Any = [self._convert_prompt_message_to_dict(m) for m in prompt_messages] + + # For models where temperature is only valid when reasoning_effort="none" + # (e.g. gpt-5.1/5.2/5.4): strip temperature/top_p when reasoning is active + _re = model_parameters.get("reasoning_effort") + if _re and _re != "none": + model_parameters.pop("temperature", None) + model_parameters.pop("top_p", None) + model_parameters.pop("logprobs", None) + # thinking models require max_completion_tokens instead of max_tokens chat_params = model_parameters.copy() if any(model.startswith(prefix) for prefix in THINKING_SERIES_PREFIXES): @@ -849,6 +858,10 @@ def _build_responses_api_params( reasoning_effort = params.pop("reasoning_effort", None) if reasoning_effort and reasoning_effort != "none": params["reasoning"] = {"effort": reasoning_effort} + # temperature/top_p/logprobs not supported when reasoning is active + params.pop("temperature", None) + params.pop("top_p", None) + params.pop("logprobs", None) # response_format -> text.format (Responses API uses different format) # response_format is incompatible with Responses API, convert to text.format diff --git a/models/openai/models/text_embedding/text_embedding.py b/models/openai/models/text_embedding/text_embedding.py index 0b91c55e3..c7166212d 100644 --- a/models/openai/models/text_embedding/text_embedding.py +++ b/models/openai/models/text_embedding/text_embedding.py @@ -49,6 +49,10 @@ def _invoke( extra_model_kwargs["encoding_format"] = "base64" + dimensions = credentials.get("embedding_dimensions") + if dimensions and model != "text-embedding-ada-002": + extra_model_kwargs["dimensions"] = int(dimensions) + # get model properties context_size = self._get_context_size(model, credentials) max_chunks = self._get_max_chunks(model, credentials) @@ -156,6 +160,21 @@ def validate_credentials(self, model: str, credentials: dict) -> None: credentials_kwargs = self._to_credential_kwargs(credentials) client = OpenAI(**credentials_kwargs) + # validate dimensions if provided + _MAX_DIMS = {"text-embedding-3-large": 3072, "text-embedding-3-small": 1536} + dimensions = credentials.get("embedding_dimensions") + if dimensions: + try: + dim_int = int(dimensions) + except (ValueError, TypeError): + raise CredentialsValidateFailedError("embedding_dimensions must be an integer") + if model == "text-embedding-ada-002": + raise CredentialsValidateFailedError("text-embedding-ada-002 does not support dimensions") + if model in _MAX_DIMS and not (1 <= dim_int <= _MAX_DIMS[model]): + raise CredentialsValidateFailedError( + f"embedding_dimensions for {model} must be between 1 and {_MAX_DIMS[model]}" + ) + # call embedding model self._embedding_invoke( model=model, client=client, texts=["ping"], extra_model_kwargs={} diff --git a/models/openai/provider/openai.yaml b/models/openai/provider/openai.yaml index c95afc99c..e0ee17141 100644 --- a/models/openai/provider/openai.yaml +++ b/models/openai/provider/openai.yaml @@ -78,6 +78,21 @@ model_credential_schema: help: zh_Hans: 选择该模型使用的 API 协议。Chat Completions 适用于大多数模型,Responses API 适用于 o3-pro、gpt-5.4 等模型。 en_US: Select the API protocol for this model. Use Chat Completions for most models, Responses API for models like o3-pro and gpt-5.4. + - variable: embedding_dimensions + label: + en_US: Embedding Dimensions + zh_Hans: 向量维度 + type: text-input + required: false + placeholder: + en_US: "e.g. 1536 (optional, text-embedding-3-* only)" + zh_Hans: "如 1536(可选,仅 text-embedding-3-* 支持)" + help: + en_US: "Reduce output dimensions via MRL. text-embedding-3-small: 1-1536, text-embedding-3-large: 1-3072. Leave empty for model default." + zh_Hans: "通过 MRL 缩减输出维度。text-embedding-3-small: 1-1536,text-embedding-3-large: 1-3072。留空使用模型默认值。" + show_on: + - variable: __model_type + value: text-embedding provider_credential_schema: credential_form_schemas: - variable: openai_api_key