Skip to content

Commit 92a2599

Browse files
committed
Fixean issue where LLM responses are not streamed or rendered properly in the AI Assistant. Fixes #9734
1 parent 01c2d12 commit 92a2599

File tree

14 files changed

+1763
-177
lines changed

14 files changed

+1763
-177
lines changed

docs/en_US/release_notes_9_14.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,4 +39,5 @@ Bug fixes
3939
| `Issue #9721 <https://github.com/pgadmin-org/pgadmin4/issues/9721>`_ - Fixed an issue where permissions page is not completely accessible on full scroll.
4040
| `Issue #9729 <https://github.com/pgadmin-org/pgadmin4/issues/9729>`_ - Fixed an issue where some LLM models would not use database tools in the AI assistant, instead returning text descriptions of tool calls.
4141
| `Issue #9732 <https://github.com/pgadmin-org/pgadmin4/issues/9732>`_ - Improve the AI Assistant user prompt to be more descriptive of the actual functionality.
42+
| `Issue #9734 <https://github.com/pgadmin-org/pgadmin4/issues/9734>`_ - Fixed an issue where LLM responses are not streamed or rendered properly in the AI Assistant.
4243
| `Issue #9740 <https://github.com/pgadmin-org/pgadmin4/issues/9740>`_ - Fixed an issue where the AI Assistant input textbox sometimes swallows the first character of input.

web/pgadmin/llm/chat.py

Lines changed: 114 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,11 @@
1414
"""
1515

1616
import json
17-
from typing import Optional
17+
from collections.abc import Generator
18+
from typing import Optional, Union
1819

1920
from pgadmin.llm.client import get_llm_client, is_llm_available
20-
from pgadmin.llm.models import Message, StopReason
21+
from pgadmin.llm.models import Message, LLMResponse, StopReason
2122
from pgadmin.llm.tools import DATABASE_TOOLS, execute_tool, DatabaseToolError
2223
from pgadmin.llm.utils import get_max_tool_iterations
2324

@@ -153,6 +154,117 @@ def chat_with_database(
153154
)
154155

155156

157+
def chat_with_database_stream(
158+
user_message: str,
159+
sid: int,
160+
did: int,
161+
conversation_history: Optional[list[Message]] = None,
162+
system_prompt: Optional[str] = None,
163+
max_tool_iterations: Optional[int] = None,
164+
provider: Optional[str] = None,
165+
model: Optional[str] = None
166+
) -> Generator[Union[str, tuple[str, list[Message]]], None, None]:
167+
"""
168+
Stream an LLM chat conversation with database tool access.
169+
170+
Like chat_with_database, but yields text chunks as the final
171+
response streams in. During tool-use iterations, no text is
172+
yielded (tools are executed silently).
173+
174+
Yields:
175+
str: Text content chunks from the final LLM response.
176+
177+
The last item yielded is a tuple of
178+
(final_response_text, updated_conversation_history).
179+
180+
Raises:
181+
LLMClientError: If the LLM request fails.
182+
RuntimeError: If LLM is not available or max iterations exceeded.
183+
"""
184+
if not is_llm_available():
185+
raise RuntimeError("LLM is not configured. Please configure an LLM "
186+
"provider in Preferences > AI.")
187+
188+
client = get_llm_client(provider=provider, model=model)
189+
if not client:
190+
raise RuntimeError("Failed to create LLM client")
191+
192+
messages = list(conversation_history) if conversation_history else []
193+
messages.append(Message.user(user_message))
194+
195+
if system_prompt is None:
196+
system_prompt = DEFAULT_SYSTEM_PROMPT
197+
198+
if max_tool_iterations is None:
199+
max_tool_iterations = get_max_tool_iterations()
200+
201+
iteration = 0
202+
while iteration < max_tool_iterations:
203+
iteration += 1
204+
205+
# Stream the LLM response, yielding text chunks as they arrive
206+
response = None
207+
for item in client.chat_stream(
208+
messages=messages,
209+
tools=DATABASE_TOOLS,
210+
system_prompt=system_prompt
211+
):
212+
if isinstance(item, LLMResponse):
213+
response = item
214+
elif isinstance(item, str):
215+
yield item
216+
217+
if response is None:
218+
raise RuntimeError("No response received from LLM")
219+
220+
messages.append(response.to_message())
221+
222+
if response.stop_reason != StopReason.TOOL_USE:
223+
# Final response - yield the completion tuple
224+
yield (response.content, messages)
225+
return
226+
227+
# Signal that tools are being executed so the caller can
228+
# reset streaming state and show a thinking indicator
229+
yield ('tool_use', [tc.name for tc in response.tool_calls])
230+
231+
# Execute tool calls
232+
tool_results = []
233+
for tool_call in response.tool_calls:
234+
try:
235+
result = execute_tool(
236+
tool_name=tool_call.name,
237+
arguments=tool_call.arguments,
238+
sid=sid,
239+
did=did
240+
)
241+
tool_results.append(Message.tool_result(
242+
tool_call_id=tool_call.id,
243+
content=json.dumps(result, default=str),
244+
is_error=False
245+
))
246+
except (DatabaseToolError, ValueError) as e:
247+
tool_results.append(Message.tool_result(
248+
tool_call_id=tool_call.id,
249+
content=json.dumps({"error": str(e)}),
250+
is_error=True
251+
))
252+
except Exception as e:
253+
tool_results.append(Message.tool_result(
254+
tool_call_id=tool_call.id,
255+
content=json.dumps({
256+
"error": f"Unexpected error: {str(e)}"
257+
}),
258+
is_error=True
259+
))
260+
261+
messages.extend(tool_results)
262+
263+
raise RuntimeError(
264+
f"Exceeded maximum tool iterations ({max_tool_iterations})"
265+
)
266+
267+
156268
def single_query(
157269
question: str,
158270
sid: int,

web/pgadmin/llm/client.py

Lines changed: 44 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,8 @@
1010
"""Base LLM client interface and factory."""
1111

1212
from abc import ABC, abstractmethod
13-
from typing import Optional
13+
from collections.abc import Generator
14+
from typing import Optional, Union
1415

1516
from pgadmin.llm.models import (
1617
Message, Tool, LLMResponse, LLMError
@@ -74,6 +75,48 @@ def chat(
7475
"""
7576
pass
7677

78+
def chat_stream(
79+
self,
80+
messages: list[Message],
81+
tools: Optional[list[Tool]] = None,
82+
system_prompt: Optional[str] = None,
83+
max_tokens: int = 4096,
84+
temperature: float = 0.0,
85+
**kwargs
86+
) -> Generator[Union[str, LLMResponse], None, None]:
87+
"""
88+
Stream a chat response from the LLM.
89+
90+
Yields text chunks (str) as they arrive, then yields
91+
a final LLMResponse with the complete response metadata.
92+
93+
The default implementation falls back to non-streaming chat().
94+
95+
Args:
96+
messages: List of conversation messages.
97+
tools: Optional list of tools the LLM can use.
98+
system_prompt: Optional system prompt to set context.
99+
max_tokens: Maximum tokens in the response.
100+
temperature: Sampling temperature (0.0 = deterministic).
101+
**kwargs: Additional provider-specific parameters.
102+
103+
Yields:
104+
str: Text content chunks as they arrive.
105+
LLMResponse: Final response with complete metadata (last item).
106+
"""
107+
# Default: fall back to non-streaming
108+
response = self.chat(
109+
messages=messages,
110+
tools=tools,
111+
system_prompt=system_prompt,
112+
max_tokens=max_tokens,
113+
temperature=temperature,
114+
**kwargs
115+
)
116+
if response.content:
117+
yield response.content
118+
yield response
119+
77120
def validate_connection(self) -> tuple[bool, Optional[str]]:
78121
"""
79122
Validate the connection to the LLM provider.

web/pgadmin/llm/prompts/nlq.py

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -35,13 +35,10 @@
3535
- Use explicit column names instead of SELECT *
3636
- For UPDATE/DELETE, always include WHERE clauses
3737
38-
Once you have explored the database structure using the tools above, \
39-
provide your final answer as a JSON object in this exact format:
40-
{"sql": "YOUR SQL QUERY HERE", "explanation": "Brief explanation"}
41-
42-
Rules for the final response:
43-
- Return ONLY the JSON object, no other text
44-
- No markdown code blocks
45-
- If you need clarification, set "sql" to null and put \
46-
your question in "explanation"
38+
Response format:
39+
- Always put SQL in fenced code blocks with the sql language tag
40+
- You may include multiple SQL blocks if the request needs \
41+
multiple statements
42+
- Briefly explain what each query does
43+
- If you need clarification, just ask — no code blocks needed
4744
"""

0 commit comments

Comments
 (0)