Skip to content

Commit ba1bf03

Browse files
authored
fix: avoid gluing reasoning sentences across deltas (#704)
* fix: normalize reasoning delta spacing * fix: keep reasoning delta normalization minimal
1 parent e08cf1f commit ba1bf03

5 files changed

Lines changed: 127 additions & 16 deletions

File tree

src/fast_agent/llm/provider/openai/llm_openai.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@
5555
from fast_agent.mcp.helpers.content_helpers import get_text
5656
from fast_agent.mcp.mime_utils import guess_mime_type
5757
from fast_agent.types import LlmStopReason, PromptMessageExtended
58+
from fast_agent.utils.reasoning_chunk_join import normalize_reasoning_delta
5859

5960
_logger = get_logger(__name__)
6061

@@ -325,17 +326,22 @@ def _handle_reasoning_delta(
325326
if not reasoning_text:
326327
return reasoning_active
327328

329+
last_char = reasoning_segments[-1][-1] if reasoning_segments and reasoning_segments[-1] else None
330+
normalized_text = normalize_reasoning_delta(last_char, reasoning_text)
331+
if not normalized_text:
332+
return reasoning_active
333+
328334
if reasoning_mode == "tags":
329335
if not reasoning_active:
330336
reasoning_active = True
331-
self._notify_stream_listeners(StreamChunk(text=reasoning_text, is_reasoning=True))
332-
reasoning_segments.append(reasoning_text)
337+
self._notify_stream_listeners(StreamChunk(text=normalized_text, is_reasoning=True))
338+
reasoning_segments.append(normalized_text)
333339
return reasoning_active
334340

335341
if reasoning_mode in {"stream", "reasoning_content", "gpt_oss"}:
336342
# Emit reasoning as-is
337-
self._notify_stream_listeners(StreamChunk(text=reasoning_text, is_reasoning=True))
338-
reasoning_segments.append(reasoning_text)
343+
self._notify_stream_listeners(StreamChunk(text=normalized_text, is_reasoning=True))
344+
reasoning_segments.append(normalized_text)
339345
return reasoning_active
340346

341347
return reasoning_active

src/fast_agent/llm/provider/openai/openresponses_streaming.py

Lines changed: 34 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
OpenAIToolStreamState,
1616
)
1717
from fast_agent.llm.stream_types import StreamChunk
18+
from fast_agent.utils.reasoning_chunk_join import normalize_reasoning_delta
1819

1920
if TYPE_CHECKING:
2021
from openai.types.responses import (
@@ -200,11 +201,19 @@ def tool_use_id_for_event(*, event: Any, item: Any, index: int | None) -> str:
200201
part_type = getattr(part, "type", None)
201202
part_text = getattr(part, "text", None)
202203
if part_type in {"reasoning", "reasoning_text"} and part_text:
203-
reasoning_segments.append(part_text)
204+
last_char = (
205+
reasoning_segments[-1][-1]
206+
if reasoning_segments and reasoning_segments[-1]
207+
else None
208+
)
209+
normalized_delta = normalize_reasoning_delta(last_char, part_text)
210+
if not normalized_delta:
211+
continue
212+
reasoning_segments.append(normalized_delta)
204213
self._notify_stream_listeners(
205-
StreamChunk(text=part_text, is_reasoning=True)
214+
StreamChunk(text=normalized_delta, is_reasoning=True)
206215
)
207-
reasoning_chars += len(part_text)
216+
reasoning_chars += len(normalized_delta)
208217
await self._emit_streaming_progress(
209218
model=f"{model} (reasoning)",
210219
new_total=reasoning_chars,
@@ -217,11 +226,19 @@ def tool_use_id_for_event(*, event: Any, item: Any, index: int | None) -> str:
217226
"response.reasoning_summary.delta",
218227
}:
219228
if delta:
220-
reasoning_segments.append(delta)
229+
last_char = (
230+
reasoning_segments[-1][-1]
231+
if reasoning_segments and reasoning_segments[-1]
232+
else None
233+
)
234+
normalized_delta = normalize_reasoning_delta(last_char, delta)
235+
if not normalized_delta:
236+
continue
237+
reasoning_segments.append(normalized_delta)
221238
self._notify_stream_listeners(
222-
StreamChunk(text=delta, is_reasoning=True)
239+
StreamChunk(text=normalized_delta, is_reasoning=True)
223240
)
224-
reasoning_chars += len(delta)
241+
reasoning_chars += len(normalized_delta)
225242
await self._emit_streaming_progress(
226243
model=f"{model} (summary)",
227244
new_total=reasoning_chars,
@@ -234,11 +251,19 @@ def tool_use_id_for_event(*, event: Any, item: Any, index: int | None) -> str:
234251
"response.reasoning_text.delta",
235252
}:
236253
if delta:
237-
reasoning_segments.append(delta)
254+
last_char = (
255+
reasoning_segments[-1][-1]
256+
if reasoning_segments and reasoning_segments[-1]
257+
else None
258+
)
259+
normalized_delta = normalize_reasoning_delta(last_char, delta)
260+
if not normalized_delta:
261+
continue
262+
reasoning_segments.append(normalized_delta)
238263
self._notify_stream_listeners(
239-
StreamChunk(text=delta, is_reasoning=True)
264+
StreamChunk(text=normalized_delta, is_reasoning=True)
240265
)
241-
reasoning_chars += len(delta)
266+
reasoning_chars += len(normalized_delta)
242267
await self._emit_streaming_progress(
243268
model=f"{model} (reasoning)",
244269
new_total=reasoning_chars,

src/fast_agent/llm/provider/openai/responses_streaming.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
from fast_agent.llm.provider.openai.tool_notifications import OpenAIToolNotificationMixin
1717
from fast_agent.llm.provider.openai.tool_stream_state import OpenAIToolStreamState
1818
from fast_agent.llm.stream_types import StreamChunk
19+
from fast_agent.utils.reasoning_chunk_join import normalize_reasoning_delta
1920

2021
_logger = get_logger(__name__)
2122

@@ -163,11 +164,19 @@ async def _process_stream(
163164
}:
164165
delta = getattr(event, "delta", None)
165166
if delta:
166-
reasoning_segments.append(delta)
167+
last_char = (
168+
reasoning_segments[-1][-1]
169+
if reasoning_segments and reasoning_segments[-1]
170+
else None
171+
)
172+
normalized_delta = normalize_reasoning_delta(last_char, delta)
173+
if not normalized_delta:
174+
continue
175+
reasoning_segments.append(normalized_delta)
167176
self._notify_stream_listeners(
168-
StreamChunk(text=delta, is_reasoning=True)
177+
StreamChunk(text=normalized_delta, is_reasoning=True)
169178
)
170-
reasoning_chars += len(delta)
179+
reasoning_chars += len(normalized_delta)
171180
await self._emit_streaming_progress(
172181
model=f"{model} (summary)",
173182
new_total=reasoning_chars,
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
from __future__ import annotations
2+
3+
_SENTENCE_PUNCTUATION = ".!?;:"
4+
_MARKDOWN_PREFIXES = "\"`*["
5+
6+
7+
def _looks_like_sentence_chunk(incoming: str) -> bool:
8+
if not incoming:
9+
return False
10+
if " " not in incoming:
11+
return False
12+
first = incoming[0]
13+
return first.isupper() or first in _MARKDOWN_PREFIXES
14+
15+
16+
def normalize_reasoning_delta(last_char: str | None, incoming: str) -> str:
17+
"""Normalize one reasoning delta without rebuilding the full accumulated text.
18+
19+
Keep the Codex-style append-only flow, but patch the specific broken case where
20+
providers split natural-language reasoning into sentence chunks without a
21+
separating space, e.g. "approach." + "Specifying session retrieval format".
22+
"""
23+
if not incoming:
24+
return ""
25+
if not last_char or last_char.isspace() or incoming[0].isspace():
26+
return incoming
27+
if last_char in _SENTENCE_PUNCTUATION and _looks_like_sentence_chunk(incoming):
28+
return f" {incoming}"
29+
if last_char.islower() and _looks_like_sentence_chunk(incoming):
30+
return f" {incoming}"
31+
return incoming
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
from fast_agent.utils.reasoning_chunk_join import normalize_reasoning_delta
2+
3+
4+
def test_normalize_reasoning_delta_inserts_space_after_sentence_break() -> None:
5+
last_char = None
6+
emitted = ""
7+
parts = [
8+
"approach.",
9+
"Specifying session retrieval format",
10+
"Selecting session retrieval method",
11+
]
12+
13+
for part in parts:
14+
delta = normalize_reasoning_delta(last_char, part)
15+
emitted += delta
16+
last_char = emitted[-1] if emitted else None
17+
18+
assert emitted == "approach. Specifying session retrieval format Selecting session retrieval method"
19+
20+
21+
def test_normalize_reasoning_delta_preserves_contractions() -> None:
22+
last_char = None
23+
emitted = ""
24+
for part in ["don", "'t do that"]:
25+
delta = normalize_reasoning_delta(last_char, part)
26+
emitted += delta
27+
last_char = emitted[-1] if emitted else None
28+
29+
assert emitted == "don't do that"
30+
31+
32+
def test_normalize_reasoning_delta_preserves_identifier_fragments() -> None:
33+
last_char = None
34+
emitted = ""
35+
for part in ["session", "_id is required"]:
36+
delta = normalize_reasoning_delta(last_char, part)
37+
emitted += delta
38+
last_char = emitted[-1] if emitted else None
39+
40+
assert emitted == "session_id is required"

0 commit comments

Comments
 (0)