Fixed an issue where AI Reports fail with OpenAI models that do not support the temperature parameter. #9719

dpage · claude · web-flow · commit 8f74b2e6d46a · 2026-03-16T12:13:04.000+05:30
Removed the temperature parameter from all LLM provider clients and
pipeline calls, allowing each model to use its default. This fixes
compatibility with GPT-5-mini/nano and future models that don't
support user-configurable temperature.

Co-authored-by: Claude Opus 4.6 &lt;noreply@anthropic.com&gt;
diff --git a/docs/en_US/release_notes_9_14.rst b/docs/en_US/release_notes_9_14.rst
@@ -32,5 +32,6 @@ Bug fixes
   | `Issue #9729 <https://github.com/pgadmin-org/pgadmin4/issues/9729>`_ -  Fixed an issue where some LLM models would not use database tools in the AI assistant, instead returning text descriptions of tool calls.
   | `Issue #9279 <https://github.com/pgadmin-org/pgadmin4/issues/9279>`_ -  Fixed an issue where OAuth2 authentication fails with 'object has no attribute' if OAUTH2_AUTO_CREATE_USER is False.
   | `Issue #9392 <https://github.com/pgadmin-org/pgadmin4/issues/9392>`_ -  Ensure that the Geometry Viewer refreshes when re-running queries or switching geometry columns, preventing stale data from being displayed.
+  | `Issue #9719 <https://github.com/pgadmin-org/pgadmin4/issues/9719>`_ -  Fixed an issue where AI Reports fail with OpenAI models that do not support the temperature parameter.
   | `Issue #9721 <https://github.com/pgadmin-org/pgadmin4/issues/9721>`_ -  Fixed an issue where permissions page is not completely accessible on full scroll.
   | `Issue #9740 <https://github.com/pgadmin-org/pgadmin4/issues/9740>`_ -  Fixed an issue where the AI Assistant input textbox sometimes swallows the first character of input.
diff --git a/web/pgadmin/llm/client.py b/web/pgadmin/llm/client.py
@@ -54,7 +54,6 @@ def chat(
         tools: Optional[list[Tool]] = None,
         system_prompt: Optional[str] = None,
         max_tokens: int = 4096,
-        temperature: float = 0.0,
         **kwargs
     ) -> LLMResponse:
         """
@@ -65,7 +64,6 @@ def chat(
             tools: Optional list of tools the LLM can use.
             system_prompt: Optional system prompt to set context.
             max_tokens: Maximum tokens in the response.
-            temperature: Sampling temperature (0.0 = deterministic).
             **kwargs: Additional provider-specific parameters.
 
         Returns:
diff --git a/web/pgadmin/llm/providers/anthropic.py b/web/pgadmin/llm/providers/anthropic.py
@@ -90,7 +90,6 @@ def chat(
         tools: Optional[list[Tool]] = None,
         system_prompt: Optional[str] = None,
         max_tokens: int = 4096,
-        temperature: float = 0.0,
         **kwargs
     ) -> LLMResponse:
         """
@@ -101,7 +100,6 @@ def chat(
             tools: Optional list of tools Claude can use.
             system_prompt: Optional system prompt.
             max_tokens: Maximum tokens in response.
-            temperature: Sampling temperature.
             **kwargs: Additional parameters.
 
         Returns:
@@ -120,9 +118,6 @@ def chat(
         if system_prompt:
             payload['system'] = system_prompt
 
-        if temperature > 0:
-            payload['temperature'] = temperature
-
         if tools:
             payload['tools'] = self._convert_tools(tools)
 
diff --git a/web/pgadmin/llm/providers/docker.py b/web/pgadmin/llm/providers/docker.py
@@ -83,7 +83,6 @@ def chat(
         tools: Optional[list[Tool]] = None,
         system_prompt: Optional[str] = None,
         max_tokens: int = 4096,
-        temperature: float = 0.0,
         **kwargs
     ) -> LLMResponse:
         """
@@ -94,7 +93,6 @@ def chat(
             tools: Optional list of tools the model can use.
             system_prompt: Optional system prompt.
             max_tokens: Maximum tokens in response.
-            temperature: Sampling temperature.
             **kwargs: Additional parameters.
 
         Returns:
@@ -117,7 +115,6 @@ def chat(
             'model': self._model,
             'messages': converted_messages,
             'max_completion_tokens': max_tokens,
-            'temperature': temperature
         }
 
         if tools:
diff --git a/web/pgadmin/llm/providers/ollama.py b/web/pgadmin/llm/providers/ollama.py
@@ -81,7 +81,6 @@ def chat(
         tools: Optional[list[Tool]] = None,
         system_prompt: Optional[str] = None,
         max_tokens: int = 4096,
-        temperature: float = 0.0,
         **kwargs
     ) -> LLMResponse:
         """
@@ -92,7 +91,6 @@ def chat(
             tools: Optional list of tools the model can use.
             system_prompt: Optional system prompt.
             max_tokens: Maximum tokens in response (num_predict in Ollama).
-            temperature: Sampling temperature.
             **kwargs: Additional parameters.
 
         Returns:
@@ -117,7 +115,6 @@ def chat(
             'stream': False,
             'options': {
                 'num_predict': max_tokens,
-                'temperature': temperature
             }
         }
 
diff --git a/web/pgadmin/llm/providers/openai.py b/web/pgadmin/llm/providers/openai.py
@@ -90,7 +90,6 @@ def chat(
         tools: Optional[list[Tool]] = None,
         system_prompt: Optional[str] = None,
         max_tokens: int = 4096,
-        temperature: float = 0.0,
         **kwargs
     ) -> LLMResponse:
         """
@@ -101,7 +100,6 @@ def chat(
             tools: Optional list of tools the model can use.
             system_prompt: Optional system prompt.
             max_tokens: Maximum tokens in response.
-            temperature: Sampling temperature.
             **kwargs: Additional parameters.
 
         Returns:
@@ -124,7 +122,6 @@ def chat(
             'model': self._model,
             'messages': converted_messages,
             'max_completion_tokens': max_tokens,
-            'temperature': temperature
         }
 
         if tools:
diff --git a/web/pgadmin/llm/reports/pipeline.py b/web/pgadmin/llm/reports/pipeline.py
@@ -218,8 +218,7 @@ def _planning_stage(self, context: dict) -> list[str]:
             response = self._call_llm_with_retry(
                 messages=[Message.user(user_prompt)],
                 system_prompt=PLANNING_SYSTEM_PROMPT,
-                max_tokens=500,
-                temperature=0.0
+                max_tokens=500
             )
 
             # Parse JSON response
@@ -292,8 +291,7 @@ def _analyze_section_with_retry(
                 response = self.client.chat(
                     messages=[Message.user(user_prompt)],
                     system_prompt=SECTION_ANALYSIS_SYSTEM_PROMPT,
-                    max_tokens=1500,
-                    temperature=0.3
+                    max_tokens=1500
                 )
 
                 # Determine severity from content
@@ -374,8 +372,7 @@ def _synthesize_with_retry(
                 response = self.client.chat(
                     messages=[Message.user(user_prompt)],
                     system_prompt=SYNTHESIS_SYSTEM_PROMPT,
-                    max_tokens=4096,
-                    temperature=0.3
+                    max_tokens=4096
                 )
 
                 yield {'type': 'result', 'result': response.content}
@@ -408,16 +405,14 @@ def _call_llm_with_retry(
         self,
         messages: list[Message],
         system_prompt: str,
-        max_tokens: int = 4096,
-        temperature: float = 0.3
+        max_tokens: int = 4096
     ):
         """Call LLM with exponential backoff retry.
 
         Args:
             messages: Messages to send.
             system_prompt: System prompt.
             max_tokens: Maximum response tokens.
-            temperature: Sampling temperature.
 
         Returns:
             LLMResponse from the client.
@@ -430,8 +425,7 @@ def _call_llm_with_retry(
                 return self.client.chat(
                     messages=messages,
                     system_prompt=system_prompt,
-                    max_tokens=max_tokens,
-                    temperature=temperature
+                    max_tokens=max_tokens
                 )
             except LLMClientError as e:
                 if e.error.retryable and attempt < self.max_retries - 1: