protoLabsAI
diff --git a/‎.github/workflows/issue-gate.yml‎
Lines changed: 3 additions & 3 deletions b/‎.github/workflows/issue-gate.yml‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎CHANGELOG.md‎
Lines changed: 462 additions & 0 deletions b/‎CHANGELOG.md‎
Lines changed: 462 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 3 additions & 2 deletions b/‎README.md‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎a2a_impl/executor.py‎
Lines changed: 52 additions & 21 deletions b/‎a2a_impl/executor.py‎
Lines changed: 52 additions & 21 deletions
diff --git a/‎a2a_impl/stores.py‎
Lines changed: 117 additions & 2 deletions b/‎a2a_impl/stores.py‎
Lines changed: 117 additions & 2 deletions
diff --git a/‎apps/desktop/sidecar/build_sidecar.py‎
Lines changed: 4 additions & 0 deletions b/‎apps/desktop/sidecar/build_sidecar.py‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎apps/desktop/src-tauri/Cargo.lock‎
Lines changed: 23 additions & 0 deletions b/‎apps/desktop/src-tauri/Cargo.lock‎
Lines changed: 23 additions & 0 deletions
diff --git a/‎apps/desktop/src-tauri/Cargo.toml‎
Lines changed: 3 additions & 0 deletions b/‎apps/desktop/src-tauri/Cargo.toml‎
Lines changed: 3 additions & 0 deletions
@@ -61,9 +61,9 @@ jobs:
 
             const text = body.replace(/\s+/g, ' ').trim();
             const longEnough = text.length >= 80;
-            const hasProblem = hasSection(/problem|what'?s? wrong|motivation|background|context|summary/i);
-            const hasRepro = hasSection(/repro|reproduce|steps|evidence|expected|actual|observed/i);
-            const hasProposal = hasSection(/propos|solution|approach|direction|fix|design/i);
+            const hasProblem = hasSection(/problem|what'?s? wrong|motivation|background|context|summary|observed|symptom|idea|current behavior|\bwhat\b/i);
+            const hasRepro = hasSection(/repro|reproduce|steps|evidence|expected|actual|observed|symptom|root cause/i);
+            const hasProposal = hasSection(/propos|solution|approach|direction|fix|design|\bwork\b|plan/i);
             const hasAcceptance = hasSection(/acceptance|done when|success criteria|definition of done/i);
 
             const missing = [];
 
@@ -165,15 +165,16 @@ python -m server plugin uninstall your-plugin --purge                # removes c
 
 **Browse the directory → [agent.protolabs.studio/plugins](https://agent.protolabs.studio/plugins)**
 
-First-party plugins ship in `plugins/` — `delegates` is a built-in, `notes`, `docs`, and
-`artifact` are on by default, and the rest are opt-in (enable via `plugins.enabled`):
+First-party plugins ship in `plugins/` — `delegates` is a built-in, `notes`, `docs`,
+`artifact`, and `craft` are on by default, and the rest are opt-in (enable via `plugins.enabled`):
 
 | Plugin | Adds | What it does |
 | --- | --- | --- |
 | [`delegates`](./plugins/delegates/) | tool · settings | **Built-in** — `delegate_to` over a2a / openai / acp, managed in Workspace ▸ Delegates |
 | [`notes`](./plugins/notes/) | tools · view | **On by default** — one shared markdown note the agent and operator both read/write |
 | [`docs`](./plugins/docs/) | tools · view · skill | **On by default** — offline search over protoAgent's own docs |
 | [`artifact`](./plugins/artifact/) | tools · view · skill | **On by default** — generative UI; `show_artifact` renders charts, diagrams, Mermaid, Markdown, or live React into a sandboxed panel ([ADR 0038](./docs/adr/0038-generative-ui-artifacts-two-mode.md)) |
+| [`craft`](./plugins/craft/) | skills · subagent | **On by default** — engineering rituals as user-only slash commands (`/grill`, `/standup`, `/code-review`, `/writing-skills`) + the `skill_writer` subagent; prompt-only |
 | [`plugin-devkit`](./plugins/plugin-devkit/) | tool · subagent · skill · workflow · view | The authoring kit + reference plugin — the agent can scaffold and build its own plugins |
 | [`workflows`](./plugins/workflows/) | tools | Declarative multi-step subagent workflows (DAG recipes) |
 | [`telegram`](./plugins/telegram/) | surface | Run the agent as a Telegram bot — the reference [communication plugin](./docs/guides/communication-plugins.md) |
 
@@ -288,6 +288,25 @@ async def execute(self, context: RequestContext, event_queue: EventQueue) -> Non
         _text_buf = ""
         _answer_started = False  # first chunk creates the artifact (append=False); rest append
         _FLUSH_CHARS = 24
+        # Reasoning ("thinking") deltas arrive one token at a time. Batch them
+        # like the answer text so the live thinking bubble still fills word by
+        # word without a WORKING status frame per token — unbatched, one turn
+        # produced ~700 single-word frames, and each one becomes a durable
+        # history Message row downstream (#1710; the durable store additionally
+        # coalesces whole contiguous reasoning runs — a2a_impl.stores).
+        _reasoning_buf = ""
+
+        async def _flush_reasoning() -> None:
+            nonlocal _reasoning_buf
+            if not _reasoning_buf:
+                return
+            payload_text, _reasoning_buf = _reasoning_buf, ""
+            await updater.update_status(
+                TaskState.TASK_STATE_WORKING,
+                message=updater.new_agent_message(
+                    [_data_part_proto({"text": payload_text}, REASONING_MIME)]
+                ),
+            )
 
         async def _flush_text() -> None:
             nonlocal _text_buf, _answer_started
@@ -303,20 +322,25 @@ async def _flush_text() -> None:
             _text_buf = ""
 
         async def _finalize(final_text: str) -> None:
-            """Close the answer artifact + emit the cost/context DataParts. If
-            the text was streamed (delta frames), append ONLY the meta parts so
-            concat-based consumers don't double the answer; otherwise emit the full
-            text once (the non-streaming path: workflow/subagent short-circuits)."""
-            # If text streamed but the canonical final_text DIVERGES from what
-            # streamed (a goal-outcome note appended, a kicker / multi-iteration retry,
-            # or extract_output reshaping it), REPLACE the artifact (append=False) with
-            # the full final_text so the durable task + any tasks/get re-fetch carry the
-            # real answer, not the raw streamed deltas. When it matches (the common case)
-            # append meta-only so concat-based consumers don't double the answer.
-            diverged = _answer_started and (final_text or "").strip() != accumulated.strip()
-            replace = (not _answer_started) or diverged
-            # body="" yields a dataparts-only list (the text part is conditional).
-            body = final_text if replace else ""
+            """Close the answer artifact + emit the cost/context DataParts.
+
+            AUTHORITATIVE (#1709): the terminal frame always carries the full
+            canonical ``final_text`` with ``append=False`` — a REPLACE — so the
+            durable task (and any tasks/get re-fetch) holds the answer exactly
+            once even when mid-stream append frames were lost downstream of this
+            process. The old heuristic (append meta-only when the streamed text
+            matched ``final_text``) compared against the IN-PROCESS accumulation,
+            so a downstream loss looked like "nothing diverged" and permanently
+            sealed a truncated artifact into the store. Frame consumers must
+            honor the A2A ``append`` flag — and beware its WIRE SHAPE: proto3
+            gives ``append`` no presence, so ``append=False`` serializes as an
+            ABSENT key. Absent/false ⇒ replace; only an explicit ``true`` is an
+            append (the SDK task store, evals/client.py and the console's
+            ``artifactAppends`` all read it that way — the console's previous
+            ``append !== false`` mapping would have doubled every streamed
+            answer). Naive concat consumers would otherwise see the answer
+            twice."""
+            body = final_text
             # Compaction context (#1372): the live prompt size + the configured trigger /
             # token threshold, merged into one context-v1 DataPart. Provider failures
             # degrade to "size only" — never break the turn's finalization.
@@ -343,7 +367,7 @@ async def _finalize(final_text: str) -> None:
                 await updater.add_artifact(
                     parts,
                     artifact_id=answer_aid,
-                    append=not replace,
+                    append=False,
                     last_chunk=True,
                 )
 
@@ -374,6 +398,12 @@ def _outcome(state: str, final_text: str) -> TurnOutcome:
                 request_metadata=_md,
                 images=images,
             ):
+                # A contiguous reasoning run ends at the first non-reasoning event:
+                # flush the buffered tail first so frames reach the consumer in
+                # stream order (thinking before the text/tool frame it preceded).
+                if event_type != "reasoning":
+                    await _flush_reasoning()
+
                 if event_type == "text":
                     accumulated += payload
                     _text_buf += payload
@@ -411,13 +441,13 @@ def _outcome(state: str, final_text: str) -> TurnOutcome:
                 elif event_type == "reasoning":
                     # Live "thinking" — a reasoning DataPart on a WORKING frame,
                     # separate from the answer artifact (plain consumers ignore it).
+                    # Batched to the same char threshold as the answer text so a
+                    # token-per-event reasoning stream doesn't become a frame per
+                    # word (#1710).
                     if payload:
-                        await updater.update_status(
-                            TaskState.TASK_STATE_WORKING,
-                            message=updater.new_agent_message(
-                                [_data_part_proto({"text": str(payload)}, REASONING_MIME)]
-                            ),
-                        )
+                        _reasoning_buf += str(payload)
+                        if len(_reasoning_buf) >= _FLUSH_CHARS:
+                            await _flush_reasoning()
 
                 elif event_type == "delta":
                     if isinstance(payload, dict):
@@ -463,6 +493,7 @@ def _outcome(state: str, final_text: str) -> TurnOutcome:
 
             # Stream ended without an explicit terminal event — treat the
             # accumulated text as the answer.
+            await _flush_reasoning()
             await _flush_text()
             await _finalize(accumulated)
             await updater.complete()
 
@@ -46,7 +46,7 @@
     DatabaseTaskStore,
 )
 from a2a.server.tasks.push_notification_sender import PushNotificationEvent
-from a2a.types import TaskPushNotificationConfig
+from a2a.types import Task, TaskPushNotificationConfig
 
 log = logging.getLogger(__name__)
 
@@ -229,6 +229,119 @@ async def _dispatch_notification(
         return await super()._dispatch_notification(event, push_info, task_id)
 
 
+# ── Reasoning coalescing (#1710) ────────────────────────────────────────────────
+
+# Duplicated from a2a_impl.executor (the executor imports graph.* at module
+# scope; keeping the constant local avoids pulling the whole agent brain into
+# the store's import chain). Locked together by test_a2a_stores.
+_REASONING_MIME = "application/vnd.protolabs.reasoning-v1+json"
+
+
+def coalesce_reasoning_history(task: Task) -> int:
+    """Collapse each contiguous run of reasoning-v1 Messages in ``task.history``
+    into ONE Message, in place. Returns the number of messages removed.
+
+    Reasoning ("thinking") deltas are a STREAMING affordance: the executor emits
+    them as reasoning-v1 DataParts on WORKING status frames so the console fills
+    the live thinking bubble. But the a2a-sdk ``TaskManager`` moves every status
+    frame's message into durable ``history`` — so a token-per-frame reasoning
+    stream persisted one near-single-word Message per delta (~700 rows for one
+    turn: #1710), bloating the store and making ``GetTask(historyLength=N)``
+    return word fragments instead of conversation. The executor now batches the
+    wire frames (~24-char granularity, keeping the live bubble); this collapses
+    whatever reaches the durable store to one Message per contiguous reasoning
+    block, mirroring how the answer text finalizes into a single canonical part.
+
+    Deliberately mutates the task in place: the SDK ``TaskManager`` re-saves its
+    ONE in-memory ``Task`` on every event and copies it whole for every
+    subscriber frame, so keeping history compact also removes the O(events ×
+    history) serialization/copy pressure the flood created (the prime suspect
+    for the mid-stream artifact frame loss in #1709). Only agent messages whose
+    parts are ALL reasoning DataParts are touched; user messages, tool frames,
+    and HITL prompts are never merged, and non-contiguous runs stay separate.
+    """
+    from google.protobuf import json_format, struct_pb2
+
+    from a2a.types import Part, Role
+
+    def _reasoning_text(part) -> str | None:
+        """The part's reasoning text, or None when it isn't a reasoning DataPart."""
+        if part.WhichOneof("content") != "data":
+            return None
+        mime = part.metadata.fields["mimeType"].string_value if "mimeType" in part.metadata.fields else ""
+        if mime != _REASONING_MIME:
+            return None
+        fields = part.data.struct_value.fields
+        return fields["text"].string_value if "text" in fields else ""
+
+    def _run_texts(msg) -> list[str] | None:
+        """All parts' reasoning texts when the whole message is reasoning, else None."""
+        if msg.role != Role.ROLE_AGENT or not msg.parts:
+            return None
+        texts = [_reasoning_text(p) for p in msg.parts]
+        return texts if all(t is not None for t in texts) else None  # type: ignore[return-value]
+
+    merged: list = []
+    run_head = None  # first Message of the current contiguous reasoning run
+    run_texts: list[str] = []
+
+    def _close_run() -> None:
+        nonlocal run_head, run_texts
+        if run_head is None:
+            return
+        # Rewrite the head's parts to a single data part with the run's full text
+        # (same shape the executor emits, so parsers see one big reasoning part).
+        del run_head.parts[:]
+        part = Part()
+        value = struct_pb2.Value()
+        json_format.ParseDict({"text": "".join(run_texts)}, value.struct_value)
+        part.data.CopyFrom(value)
+        part.metadata.update({"mimeType": _REASONING_MIME})
+        part.media_type = "application/json"
+        run_head.parts.append(part)
+        merged.append(run_head)
+        run_head, run_texts = None, []
+
+    for msg in task.history:
+        texts = _run_texts(msg)
+        if texts is None:
+            _close_run()
+            merged.append(msg)
+            continue
+        if run_head is None:
+            head = type(msg)()
+            head.CopyFrom(msg)
+            run_head = head
+        run_texts.extend(texts)
+    _close_run()
+
+    removed = len(task.history) - len(merged)
+    if removed:
+        # `merged` holds references into task.history — copy before clearing.
+        kept = []
+        for m in merged:
+            c = type(m)()
+            c.CopyFrom(m)
+            kept.append(c)
+        del task.history[:]
+        task.history.extend(kept)
+    return removed
+
+
+class ReasoningCoalescingTaskStore(DatabaseTaskStore):
+    """Durable task store that coalesces contiguous reasoning-v1 history runs
+    into one Message per run on every save (#1710). Streaming frames are
+    untouched — this is persistence-shape only, so the wire contract and the
+    live thinking bubble are unchanged."""
+
+    async def save(self, task: Task, context: ServerCallContext) -> None:
+        try:
+            coalesce_reasoning_history(task)
+        except Exception:  # noqa: BLE001 — coalescing must never lose a save
+            log.exception("[a2a] reasoning coalescing failed; saving uncoalesced")
+        await super().save(task, context)
+
+
 # ── Durable store construction (paths match the bespoke stores) ─────────────────
 
 
@@ -263,10 +376,12 @@ def build_a2a_stores() -> tuple[
     Each store gets its own engine/file (same split the bespoke stores used:
     ``a2a-tasks.db`` and ``a2a-push.db``). The SDK stores lazy-init their schema
     on first use; ``initialize_a2a_stores`` forces that + a TTL sweep at boot.
+    The task store coalesces streamed reasoning runs into one durable history
+    Message per run (#1710) — see ``ReasoningCoalescingTaskStore``.
     """
     task_db = _resolve_db_path("a2a-tasks.db")
     push_db = _resolve_db_path("a2a-push.db")
-    task_store = DatabaseTaskStore(make_sqlite_engine(task_db))
+    task_store = ReasoningCoalescingTaskStore(make_sqlite_engine(task_db))
     push_store = ValidatingPushNotificationConfigStore(make_sqlite_engine(push_db))
     return task_store, push_store, task_db, push_db
 
 
@@ -94,6 +94,10 @@
     "ddgs",
     "langfuse",
     "croniter",
+    # stdlib zoneinfo's fallback IANA database — Windows has no system tz data,
+    # and zoneinfo imports tzdata DYNAMICALLY, so the import-scan misses it;
+    # without the collect every ZoneInfo(...) in the frozen sidecar raised (#1683).
+    "tzdata",
     # A2A 1.0 (ADR 0014): the SDK + the protoLabs conventions layer (git-dep).
     # Both pull submodules/metadata that a bare import-scan misses — without a
     # full collect, the frozen `protolabs_a2a` is missing `build_agent_card`.
 
@@ -28,6 +28,9 @@ tauri-plugin-dialog = "2"
 tauri-plugin-global-shortcut = "2.3.2"
 tauri-plugin-log = "2"
 tauri-plugin-notification = "2"
+# External links open in the system browser via the opener plugin; the shell
+# plugin stays for the sidecar (its `open` is deprecated in favor of opener).
+tauri-plugin-opener = "2"
 tauri-plugin-shell = "2"
 tauri-plugin-updater = "2"
 # Desktop chat streaming: WKWebView can't read a streaming fetch body, so the