Skip to content

Commit 8c54f18

Browse files
dougborgclaude
andcommitted
fix(mcp): drop broken live-tick SetState; pin behavior with production-shape harness stub (#645)
Copilot review on PR #634 caught that the live-tick design (SetState("plan_actions", Rx("$result.actions")) in the Confirm button's on_success chain) was a no-op in production. The browser harness was passing only because the stub modify_manufacturing_order returned ToolResult(content="ok", structured_content=response_dict) — putting the raw ModificationResponse into structured_content, which is exactly the shape $result.actions needed. Real modification tools use make_tool_result(response, ui=...) which puts the response JSON in content and a PrefabApp wire envelope (no actions field) in structured_content. $result in the on_success Rx context resolves to that envelope, so $result.actions never resolves. This commit: - Drops the broken SetState("plan_actions", RESULT.actions) from the Confirm-button apply chain. Apply path now morphs the card via the existing applied=True flag (rows stay PLANNED; footer/badge flip to applied state). Same UX the codebase had before #634's live-tick attempt — no functional regression. - Updates the browser harness stub to use make_tool_result exactly like real tools, pinning the production wire shape so future live- tick attempts will verify against an honest stub. - Renames test_apply_button_pushes_result_actions_into_state to test_apply_button_does_not_set_state_plan_actions and asserts the no-op SetState is NOT in the chain — prevents regression. - Updates the browser click-through test to assert the actual applied-state morph (Confirm disabled, "Applied" pill visible) rather than rows transitioning. - In-code comment in build_modification_preview_ui documents why the live-tick was dropped and points at the follow-up issue. Live-tick UX tracked as a separate enhancement in #645 — it requires identifying the right Rx path (or restructuring the apply path) before it can ship. The browser harness is now in place to verify any future attempt. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent cc231f9 commit 8c54f18

4 files changed

Lines changed: 70 additions & 58 deletions

File tree

katana_mcp_server/src/katana_mcp/tools/prefab_ui.py

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1444,16 +1444,17 @@ def build_modification_preview_ui(
14441444
if legacy is not None:
14451445
actions = [legacy]
14461446

1447-
apply_action = _build_apply_action_direct(
1448-
confirm_tool,
1449-
confirm_request,
1450-
# Live-tick: replace the planned rows with the apply RESULT's actions
1451-
# before the generic ``applied=True`` flag flips. Each row's
1452-
# ``status_label`` updates from PLANNED to its terminal value
1453-
# (APPLIED / APPLIED (verified) / FAILED) and the DataTable
1454-
# re-renders rows in place — no iframe morph.
1455-
extra_on_success=[SetState(_PLAN_ACTIONS_KEY, RESULT.actions)],
1456-
)
1447+
apply_action = _build_apply_action_direct(confirm_tool, confirm_request)
1448+
# NOTE: A live-tick design (rows ticking PLANNED -> APPLIED in place via
1449+
# ``SetState("plan_actions", RESULT.actions)``) was attempted in #634 but
1450+
# turned out to be broken: ``$result`` in the on_success Rx context
1451+
# resolves to the apply tool's ``structured_content`` (a PrefabApp wire
1452+
# envelope from ``make_tool_result``), not to the raw
1453+
# ``ModificationResponse``. ``$result.actions`` therefore doesn't
1454+
# resolve and the SetState was a no-op in production. Caught by Copilot
1455+
# review; verified by the browser harness when its stub was switched to
1456+
# match production shape. Tracked as a follow-up — until then the
1457+
# apply path morphs the card via the existing ``applied=True`` flag.
14571458
entity_type_raw = str(response.get("entity_type") or "entity")
14581459
entity_type_label = _humanize_snake_case(entity_type_raw)
14591460
verb_label = _verb_label(confirm_tool)

katana_mcp_server/tests/browser/render_test_server.py

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,11 @@
1717

1818
from fastmcp import FastMCP
1919
from fastmcp.tools import ToolResult
20-
from katana_mcp.tools._modification import ActionResult, FieldChange
20+
from katana_mcp.tools._modification import (
21+
ActionResult,
22+
FieldChange,
23+
ModificationResponse,
24+
)
2125
from katana_mcp.tools.prefab_ui import (
2226
build_batch_recipe_update_ui,
2327
build_inventory_check_ui,
@@ -26,6 +30,7 @@
2630
build_search_results_ui,
2731
build_verification_ui,
2832
)
33+
from katana_mcp.tools.tool_result_utils import make_tool_result
2934
from prefab_ui.app import PrefabApp
3035
from pydantic import BaseModel
3136

@@ -391,7 +396,14 @@ async def modify_manufacturing_order(
391396
Sub-payload args mirror :class:`ModifyManufacturingOrderRequest` so
392397
FastMCP's signature validator accepts the full Confirm-button payload.
393398
All values are ignored — the stub always returns the same canned
394-
envelope, since we just need ``RESULT.actions`` to land in state.
399+
envelope.
400+
401+
Uses ``make_tool_result`` (same helper real modification tools use) so
402+
the wire shape matches production exactly: ``content`` carries the
403+
response JSON, ``structured_content`` carries the apply result card's
404+
Prefab envelope. This pins the contract that ``RESULT.actions`` in
405+
the on_success Rx expression resolves the same way against the stub
406+
as it does against the real tool.
395407
"""
396408
del (
397409
id,
@@ -406,13 +418,14 @@ async def modify_manufacturing_order(
406418
update_productions,
407419
delete_production_ids,
408420
) # unused — canned response
409-
response = _twelve_action_response(
421+
response_dict = _twelve_action_response(
410422
is_preview=preview, succeeded=None if preview else True
411423
)
412-
return ToolResult(
413-
content="ok",
414-
structured_content=response,
424+
response = ModificationResponse.model_validate(response_dict)
425+
ui = build_modification_result_ui(
426+
response_dict, tool_name="modify_manufacturing_order"
415427
)
428+
return make_tool_result(response, ui=ui)
416429

417430

418431
if __name__ == "__main__":

katana_mcp_server/tests/browser/test_modification_render.py

Lines changed: 25 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -71,39 +71,40 @@ def test_modify_twelve_actions_applied_renders(self, render_scenario):
7171
# Every action shows APPLIED (verified).
7272
assert frame.locator("td").filter(has_text="APPLIED").count() == 12
7373

74-
def test_apply_button_ticks_rows_live(self, render_scenario):
75-
"""Click-through live-tick: Confirm fires the apply call, the
76-
on_success chain pushes RESULT.actions into state.plan_actions,
77-
and rows transition from PLANNED to APPLIED in place.
74+
def test_apply_button_morphs_card_to_applied_state(self, render_scenario):
75+
"""Click-through: Confirm fires the apply call, the on_success
76+
chain flips ``state.applied=True``, and the footer/badge row
77+
morphs to its applied state.
78+
79+
The pinned-here behavior is the conservative apply UX — the row
80+
cells stay PLANNED because ``$result.actions`` doesn't resolve
81+
in the on_success Rx context (see in-code comment in
82+
``build_modification_preview_ui``). A live-tick UX where rows
83+
transition PLANNED → APPLIED in place is tracked as a follow-up
84+
once the right Rx path is identified.
7885
7986
The stub ``modify_manufacturing_order`` tool in
80-
``render_test_server.py`` returns a canned applied response.
81-
82-
Waits on the observable post-state condition (12 APPLIED cells
83-
present) rather than a fixed timeout — fast on warm CI, robust
84-
on slow CI, deterministic-fail if the live-tick path breaks.
87+
``render_test_server.py`` matches production wire shape via
88+
``make_tool_result``, so this test fails the same way production
89+
would if the apply path regresses.
8590
"""
8691
frame = render_scenario("modify_mo_12_actions_preview")
8792

88-
# Pre-state: 12 PLANNED, 0 APPLIED.
93+
# Pre-state: 12 PLANNED, 0 APPLIED, "Applying..." badge absent,
94+
# "Applied" footer pill absent.
8995
assert frame.locator("td").filter(has_text="PLANNED").count() == 12
90-
assert frame.locator("td").filter(has_text="APPLIED").count() == 0
96+
assert frame.locator("text=Applied").count() == 0
9197

9298
# Fire the Confirm button.
9399
frame.locator("button").filter(has_text="Confirm").first.click()
94100

95-
# Wait for the live-tick to land — first APPLIED cell to become
96-
# visible. ``wait_for`` polls with a 30s default timeout, so this
97-
# completes ~immediately when the apply round-trip succeeds and
101+
# Wait for the applied-state morph: the "Applied" pill in the
102+
# button row appears once on_success fires. ``wait_for`` polls
103+
# with a 30s timeout — completes fast when the apply succeeds,
98104
# fails deterministically if the on_success chain is broken.
99-
frame.locator("td").filter(has_text="APPLIED").first.wait_for(
100-
state="visible", timeout=30000
101-
)
105+
frame.locator("text=Applied").first.wait_for(state="visible", timeout=30000)
102106

103-
# Post-state: 0 PLANNED, 12 APPLIED.
104-
assert frame.locator("td").filter(has_text="PLANNED").count() == 0, (
105-
"Live-tick failed — Confirm click did not push RESULT.actions "
106-
"into state.plan_actions, so rows stayed PLANNED. Check the "
107-
"on_success chain in _build_apply_action_direct."
108-
)
109-
assert frame.locator("td").filter(has_text="APPLIED").count() == 12
107+
# The Confirm button is now disabled (state.applied gates it via
108+
# the ``locked`` Rx in ``_render_apply_button_row``).
109+
confirm = frame.locator("button").filter(has_text="Confirm").first
110+
assert confirm.is_disabled()

katana_mcp_server/tests/test_prefab_ui.py

Lines changed: 15 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2002,10 +2002,17 @@ def test_twelve_action_mixed_plan_renders_single_state_bound_table(self):
20022002
assert r["target_label"].startswith("#")
20032003
assert r["summary"] == "deleted"
20042004

2005-
def test_apply_button_pushes_result_actions_into_state(self):
2006-
"""The Confirm button's CallTool on_success chain must SetState
2007-
``plan_actions`` to ``$result.actions`` so each row's status_label
2008-
ticks live from PLANNED → APPLIED/FAILED on apply.
2005+
def test_apply_button_does_not_set_state_plan_actions(self):
2006+
"""Pin: the live-tick design (``SetState("plan_actions", RESULT.actions)``)
2007+
was attempted in #634 but turned out to be broken — ``$result`` in
2008+
the on_success Rx context resolves to the apply tool's
2009+
``structured_content`` (a PrefabApp wire envelope), not to the raw
2010+
``ModificationResponse``. The SetState was a no-op in production.
2011+
2012+
Until the right Rx path is identified (tracked as a follow-up), the
2013+
on_success chain MUST NOT include a SetState targeting plan_actions
2014+
— a no-op SetState is misleading. The apply path morphs the card via
2015+
the existing ``applied=True`` flag instead.
20092016
"""
20102017
response = _modification_preview_response(
20112018
actions=[
@@ -2027,9 +2034,6 @@ def test_apply_button_pushes_result_actions_into_state(self):
20272034
)
20282035
envelope = app.to_json()
20292036

2030-
# Find the CallTool action and inspect its on_success chain.
2031-
# toolCall actions are nested under Button.on_click (not Component
2032-
# children), so search the full envelope tree.
20332037
def find_action(o: object, action_name: str) -> dict[str, Any] | None:
20342038
if isinstance(o, dict):
20352039
if o.get("action") == action_name:
@@ -2047,9 +2051,7 @@ def find_action(o: object, action_name: str) -> dict[str, Any] | None:
20472051

20482052
call_tool = find_action(envelope, "toolCall")
20492053
assert call_tool is not None
2050-
# The wire field is camelCase via Pydantic alias="onSuccess".
20512054
on_success = call_tool.get("onSuccess") or call_tool.get("on_success") or []
2052-
# Must include a setState targeting plan_actions with $result.actions.
20532055
plan_action_set = next(
20542056
(
20552057
a
@@ -2060,15 +2062,10 @@ def find_action(o: object, action_name: str) -> dict[str, Any] | None:
20602062
),
20612063
None,
20622064
)
2063-
assert plan_action_set is not None, (
2064-
f"on_success must SetState('plan_actions', RESULT.actions) for "
2065-
f"live-tick; got {on_success!r}"
2066-
)
2067-
# The value should be an Rx reference resolving to $result.actions.
2068-
# Rx serializes via str() to "{{ key }}".
2069-
value = plan_action_set.get("value")
2070-
assert isinstance(value, str) and "$result.actions" in value, (
2071-
f"plan_actions value must reference $result.actions; got {value!r}"
2065+
assert plan_action_set is None, (
2066+
f"on_success must NOT SetState('plan_actions', ...) — it would "
2067+
f"be a no-op until the live-tick Rx path is fixed. Found: "
2068+
f"{plan_action_set!r}"
20722069
)
20732070

20742071

0 commit comments

Comments
 (0)