Skip to content

Commit d12fdf3

Browse files
authored
Merge branch 'protoLabsAI:main' into main
2 parents 94d2dcc + c5cf8fa commit d12fdf3

215 files changed

Lines changed: 13453 additions & 742 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.github/workflows/issue-gate.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -61,9 +61,9 @@ jobs:
6161
6262
const text = body.replace(/\s+/g, ' ').trim();
6363
const longEnough = text.length >= 80;
64-
const hasProblem = hasSection(/problem|what'?s? wrong|motivation|background|context|summary/i);
65-
const hasRepro = hasSection(/repro|reproduce|steps|evidence|expected|actual|observed/i);
66-
const hasProposal = hasSection(/propos|solution|approach|direction|fix|design/i);
64+
const hasProblem = hasSection(/problem|what'?s? wrong|motivation|background|context|summary|observed|symptom|idea|current behavior|\bwhat\b/i);
65+
const hasRepro = hasSection(/repro|reproduce|steps|evidence|expected|actual|observed|symptom|root cause/i);
66+
const hasProposal = hasSection(/propos|solution|approach|direction|fix|design|\bwork\b|plan/i);
6767
const hasAcceptance = hasSection(/acceptance|done when|success criteria|definition of done/i);
6868
6969
const missing = [];

CHANGELOG.md

Lines changed: 462 additions & 0 deletions
Large diffs are not rendered by default.

README.md

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -165,15 +165,16 @@ python -m server plugin uninstall your-plugin --purge # removes c
165165

166166
**Browse the directory → [agent.protolabs.studio/plugins](https://agent.protolabs.studio/plugins)**
167167

168-
First-party plugins ship in `plugins/``delegates` is a built-in, `notes`, `docs`, and
169-
`artifact` are on by default, and the rest are opt-in (enable via `plugins.enabled`):
168+
First-party plugins ship in `plugins/``delegates` is a built-in, `notes`, `docs`,
169+
`artifact`, and `craft` are on by default, and the rest are opt-in (enable via `plugins.enabled`):
170170

171171
| Plugin | Adds | What it does |
172172
| --- | --- | --- |
173173
| [`delegates`](./plugins/delegates/) | tool · settings | **Built-in**`delegate_to` over a2a / openai / acp, managed in Workspace ▸ Delegates |
174174
| [`notes`](./plugins/notes/) | tools · view | **On by default** — one shared markdown note the agent and operator both read/write |
175175
| [`docs`](./plugins/docs/) | tools · view · skill | **On by default** — offline search over protoAgent's own docs |
176176
| [`artifact`](./plugins/artifact/) | tools · view · skill | **On by default** — generative UI; `show_artifact` renders charts, diagrams, Mermaid, Markdown, or live React into a sandboxed panel ([ADR 0038](./docs/adr/0038-generative-ui-artifacts-two-mode.md)) |
177+
| [`craft`](./plugins/craft/) | skills · subagent | **On by default** — engineering rituals as user-only slash commands (`/grill`, `/standup`, `/code-review`, `/writing-skills`) + the `skill_writer` subagent; prompt-only |
177178
| [`plugin-devkit`](./plugins/plugin-devkit/) | tool · subagent · skill · workflow · view | The authoring kit + reference plugin — the agent can scaffold and build its own plugins |
178179
| [`workflows`](./plugins/workflows/) | tools | Declarative multi-step subagent workflows (DAG recipes) |
179180
| [`telegram`](./plugins/telegram/) | surface | Run the agent as a Telegram bot — the reference [communication plugin](./docs/guides/communication-plugins.md) |

a2a_impl/executor.py

Lines changed: 52 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -288,6 +288,25 @@ async def execute(self, context: RequestContext, event_queue: EventQueue) -> Non
288288
_text_buf = ""
289289
_answer_started = False # first chunk creates the artifact (append=False); rest append
290290
_FLUSH_CHARS = 24
291+
# Reasoning ("thinking") deltas arrive one token at a time. Batch them
292+
# like the answer text so the live thinking bubble still fills word by
293+
# word without a WORKING status frame per token — unbatched, one turn
294+
# produced ~700 single-word frames, and each one becomes a durable
295+
# history Message row downstream (#1710; the durable store additionally
296+
# coalesces whole contiguous reasoning runs — a2a_impl.stores).
297+
_reasoning_buf = ""
298+
299+
async def _flush_reasoning() -> None:
300+
nonlocal _reasoning_buf
301+
if not _reasoning_buf:
302+
return
303+
payload_text, _reasoning_buf = _reasoning_buf, ""
304+
await updater.update_status(
305+
TaskState.TASK_STATE_WORKING,
306+
message=updater.new_agent_message(
307+
[_data_part_proto({"text": payload_text}, REASONING_MIME)]
308+
),
309+
)
291310

292311
async def _flush_text() -> None:
293312
nonlocal _text_buf, _answer_started
@@ -303,20 +322,25 @@ async def _flush_text() -> None:
303322
_text_buf = ""
304323

305324
async def _finalize(final_text: str) -> None:
306-
"""Close the answer artifact + emit the cost/context DataParts. If
307-
the text was streamed (delta frames), append ONLY the meta parts so
308-
concat-based consumers don't double the answer; otherwise emit the full
309-
text once (the non-streaming path: workflow/subagent short-circuits)."""
310-
# If text streamed but the canonical final_text DIVERGES from what
311-
# streamed (a goal-outcome note appended, a kicker / multi-iteration retry,
312-
# or extract_output reshaping it), REPLACE the artifact (append=False) with
313-
# the full final_text so the durable task + any tasks/get re-fetch carry the
314-
# real answer, not the raw streamed deltas. When it matches (the common case)
315-
# append meta-only so concat-based consumers don't double the answer.
316-
diverged = _answer_started and (final_text or "").strip() != accumulated.strip()
317-
replace = (not _answer_started) or diverged
318-
# body="" yields a dataparts-only list (the text part is conditional).
319-
body = final_text if replace else ""
325+
"""Close the answer artifact + emit the cost/context DataParts.
326+
327+
AUTHORITATIVE (#1709): the terminal frame always carries the full
328+
canonical ``final_text`` with ``append=False`` — a REPLACE — so the
329+
durable task (and any tasks/get re-fetch) holds the answer exactly
330+
once even when mid-stream append frames were lost downstream of this
331+
process. The old heuristic (append meta-only when the streamed text
332+
matched ``final_text``) compared against the IN-PROCESS accumulation,
333+
so a downstream loss looked like "nothing diverged" and permanently
334+
sealed a truncated artifact into the store. Frame consumers must
335+
honor the A2A ``append`` flag — and beware its WIRE SHAPE: proto3
336+
gives ``append`` no presence, so ``append=False`` serializes as an
337+
ABSENT key. Absent/false ⇒ replace; only an explicit ``true`` is an
338+
append (the SDK task store, evals/client.py and the console's
339+
``artifactAppends`` all read it that way — the console's previous
340+
``append !== false`` mapping would have doubled every streamed
341+
answer). Naive concat consumers would otherwise see the answer
342+
twice."""
343+
body = final_text
320344
# Compaction context (#1372): the live prompt size + the configured trigger /
321345
# token threshold, merged into one context-v1 DataPart. Provider failures
322346
# degrade to "size only" — never break the turn's finalization.
@@ -343,7 +367,7 @@ async def _finalize(final_text: str) -> None:
343367
await updater.add_artifact(
344368
parts,
345369
artifact_id=answer_aid,
346-
append=not replace,
370+
append=False,
347371
last_chunk=True,
348372
)
349373

@@ -374,6 +398,12 @@ def _outcome(state: str, final_text: str) -> TurnOutcome:
374398
request_metadata=_md,
375399
images=images,
376400
):
401+
# A contiguous reasoning run ends at the first non-reasoning event:
402+
# flush the buffered tail first so frames reach the consumer in
403+
# stream order (thinking before the text/tool frame it preceded).
404+
if event_type != "reasoning":
405+
await _flush_reasoning()
406+
377407
if event_type == "text":
378408
accumulated += payload
379409
_text_buf += payload
@@ -411,13 +441,13 @@ def _outcome(state: str, final_text: str) -> TurnOutcome:
411441
elif event_type == "reasoning":
412442
# Live "thinking" — a reasoning DataPart on a WORKING frame,
413443
# separate from the answer artifact (plain consumers ignore it).
444+
# Batched to the same char threshold as the answer text so a
445+
# token-per-event reasoning stream doesn't become a frame per
446+
# word (#1710).
414447
if payload:
415-
await updater.update_status(
416-
TaskState.TASK_STATE_WORKING,
417-
message=updater.new_agent_message(
418-
[_data_part_proto({"text": str(payload)}, REASONING_MIME)]
419-
),
420-
)
448+
_reasoning_buf += str(payload)
449+
if len(_reasoning_buf) >= _FLUSH_CHARS:
450+
await _flush_reasoning()
421451

422452
elif event_type == "delta":
423453
if isinstance(payload, dict):
@@ -463,6 +493,7 @@ def _outcome(state: str, final_text: str) -> TurnOutcome:
463493

464494
# Stream ended without an explicit terminal event — treat the
465495
# accumulated text as the answer.
496+
await _flush_reasoning()
466497
await _flush_text()
467498
await _finalize(accumulated)
468499
await updater.complete()

a2a_impl/stores.py

Lines changed: 117 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@
4646
DatabaseTaskStore,
4747
)
4848
from a2a.server.tasks.push_notification_sender import PushNotificationEvent
49-
from a2a.types import TaskPushNotificationConfig
49+
from a2a.types import Task, TaskPushNotificationConfig
5050

5151
log = logging.getLogger(__name__)
5252

@@ -229,6 +229,119 @@ async def _dispatch_notification(
229229
return await super()._dispatch_notification(event, push_info, task_id)
230230

231231

232+
# ── Reasoning coalescing (#1710) ────────────────────────────────────────────────
233+
234+
# Duplicated from a2a_impl.executor (the executor imports graph.* at module
235+
# scope; keeping the constant local avoids pulling the whole agent brain into
236+
# the store's import chain). Locked together by test_a2a_stores.
237+
_REASONING_MIME = "application/vnd.protolabs.reasoning-v1+json"
238+
239+
240+
def coalesce_reasoning_history(task: Task) -> int:
241+
"""Collapse each contiguous run of reasoning-v1 Messages in ``task.history``
242+
into ONE Message, in place. Returns the number of messages removed.
243+
244+
Reasoning ("thinking") deltas are a STREAMING affordance: the executor emits
245+
them as reasoning-v1 DataParts on WORKING status frames so the console fills
246+
the live thinking bubble. But the a2a-sdk ``TaskManager`` moves every status
247+
frame's message into durable ``history`` — so a token-per-frame reasoning
248+
stream persisted one near-single-word Message per delta (~700 rows for one
249+
turn: #1710), bloating the store and making ``GetTask(historyLength=N)``
250+
return word fragments instead of conversation. The executor now batches the
251+
wire frames (~24-char granularity, keeping the live bubble); this collapses
252+
whatever reaches the durable store to one Message per contiguous reasoning
253+
block, mirroring how the answer text finalizes into a single canonical part.
254+
255+
Deliberately mutates the task in place: the SDK ``TaskManager`` re-saves its
256+
ONE in-memory ``Task`` on every event and copies it whole for every
257+
subscriber frame, so keeping history compact also removes the O(events ×
258+
history) serialization/copy pressure the flood created (the prime suspect
259+
for the mid-stream artifact frame loss in #1709). Only agent messages whose
260+
parts are ALL reasoning DataParts are touched; user messages, tool frames,
261+
and HITL prompts are never merged, and non-contiguous runs stay separate.
262+
"""
263+
from google.protobuf import json_format, struct_pb2
264+
265+
from a2a.types import Part, Role
266+
267+
def _reasoning_text(part) -> str | None:
268+
"""The part's reasoning text, or None when it isn't a reasoning DataPart."""
269+
if part.WhichOneof("content") != "data":
270+
return None
271+
mime = part.metadata.fields["mimeType"].string_value if "mimeType" in part.metadata.fields else ""
272+
if mime != _REASONING_MIME:
273+
return None
274+
fields = part.data.struct_value.fields
275+
return fields["text"].string_value if "text" in fields else ""
276+
277+
def _run_texts(msg) -> list[str] | None:
278+
"""All parts' reasoning texts when the whole message is reasoning, else None."""
279+
if msg.role != Role.ROLE_AGENT or not msg.parts:
280+
return None
281+
texts = [_reasoning_text(p) for p in msg.parts]
282+
return texts if all(t is not None for t in texts) else None # type: ignore[return-value]
283+
284+
merged: list = []
285+
run_head = None # first Message of the current contiguous reasoning run
286+
run_texts: list[str] = []
287+
288+
def _close_run() -> None:
289+
nonlocal run_head, run_texts
290+
if run_head is None:
291+
return
292+
# Rewrite the head's parts to a single data part with the run's full text
293+
# (same shape the executor emits, so parsers see one big reasoning part).
294+
del run_head.parts[:]
295+
part = Part()
296+
value = struct_pb2.Value()
297+
json_format.ParseDict({"text": "".join(run_texts)}, value.struct_value)
298+
part.data.CopyFrom(value)
299+
part.metadata.update({"mimeType": _REASONING_MIME})
300+
part.media_type = "application/json"
301+
run_head.parts.append(part)
302+
merged.append(run_head)
303+
run_head, run_texts = None, []
304+
305+
for msg in task.history:
306+
texts = _run_texts(msg)
307+
if texts is None:
308+
_close_run()
309+
merged.append(msg)
310+
continue
311+
if run_head is None:
312+
head = type(msg)()
313+
head.CopyFrom(msg)
314+
run_head = head
315+
run_texts.extend(texts)
316+
_close_run()
317+
318+
removed = len(task.history) - len(merged)
319+
if removed:
320+
# `merged` holds references into task.history — copy before clearing.
321+
kept = []
322+
for m in merged:
323+
c = type(m)()
324+
c.CopyFrom(m)
325+
kept.append(c)
326+
del task.history[:]
327+
task.history.extend(kept)
328+
return removed
329+
330+
331+
class ReasoningCoalescingTaskStore(DatabaseTaskStore):
332+
"""Durable task store that coalesces contiguous reasoning-v1 history runs
333+
into one Message per run on every save (#1710). Streaming frames are
334+
untouched — this is persistence-shape only, so the wire contract and the
335+
live thinking bubble are unchanged."""
336+
337+
async def save(self, task: Task, context: ServerCallContext) -> None:
338+
try:
339+
coalesce_reasoning_history(task)
340+
except Exception: # noqa: BLE001 — coalescing must never lose a save
341+
log.exception("[a2a] reasoning coalescing failed; saving uncoalesced")
342+
await super().save(task, context)
343+
344+
232345
# ── Durable store construction (paths match the bespoke stores) ─────────────────
233346

234347

@@ -263,10 +376,12 @@ def build_a2a_stores() -> tuple[
263376
Each store gets its own engine/file (same split the bespoke stores used:
264377
``a2a-tasks.db`` and ``a2a-push.db``). The SDK stores lazy-init their schema
265378
on first use; ``initialize_a2a_stores`` forces that + a TTL sweep at boot.
379+
The task store coalesces streamed reasoning runs into one durable history
380+
Message per run (#1710) — see ``ReasoningCoalescingTaskStore``.
266381
"""
267382
task_db = _resolve_db_path("a2a-tasks.db")
268383
push_db = _resolve_db_path("a2a-push.db")
269-
task_store = DatabaseTaskStore(make_sqlite_engine(task_db))
384+
task_store = ReasoningCoalescingTaskStore(make_sqlite_engine(task_db))
270385
push_store = ValidatingPushNotificationConfigStore(make_sqlite_engine(push_db))
271386
return task_store, push_store, task_db, push_db
272387

apps/desktop/sidecar/build_sidecar.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,10 @@
9494
"ddgs",
9595
"langfuse",
9696
"croniter",
97+
# stdlib zoneinfo's fallback IANA database — Windows has no system tz data,
98+
# and zoneinfo imports tzdata DYNAMICALLY, so the import-scan misses it;
99+
# without the collect every ZoneInfo(...) in the frozen sidecar raised (#1683).
100+
"tzdata",
97101
# A2A 1.0 (ADR 0014): the SDK + the protoLabs conventions layer (git-dep).
98102
# Both pull submodules/metadata that a bare import-scan misses — without a
99103
# full collect, the frozen `protolabs_a2a` is missing `build_agent_card`.

apps/desktop/src-tauri/Cargo.lock

Lines changed: 23 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

apps/desktop/src-tauri/Cargo.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,9 @@ tauri-plugin-dialog = "2"
2828
tauri-plugin-global-shortcut = "2.3.2"
2929
tauri-plugin-log = "2"
3030
tauri-plugin-notification = "2"
31+
# External links open in the system browser via the opener plugin; the shell
32+
# plugin stays for the sidecar (its `open` is deprecated in favor of opener).
33+
tauri-plugin-opener = "2"
3134
tauri-plugin-shell = "2"
3235
tauri-plugin-updater = "2"
3336
# Desktop chat streaming: WKWebView can't read a streaming fetch body, so the

0 commit comments

Comments
 (0)