Skip to content

Commit a572513

Browse files
author
Kun
committed
Add explicit plan artifacts
1 parent f849f7d commit a572513

14 files changed

Lines changed: 487 additions & 7 deletions

File tree

.trellis/spec/backend/task-workflow-contracts.md

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,18 @@ def update_task(
3535
def is_task_ready(store: TaskStore, record: TaskRecord) -> bool: ...
3636
def validate_task_graph(store: TaskStore) -> None: ...
3737
def task_graph_needs_verification(store: TaskStore) -> bool: ...
38+
39+
def create_plan(
40+
store: TaskStore,
41+
*,
42+
title: str,
43+
content: str,
44+
verification: str,
45+
task_ids: list[str] | None = None,
46+
metadata: dict[str, str] | None = None,
47+
) -> PlanArtifact: ...
48+
49+
def get_plan(store: TaskStore, plan_id: str) -> PlanArtifact: ...
3850
```
3951

4052
### 3. Contracts
@@ -53,6 +65,11 @@ def task_graph_needs_verification(store: TaskStore) -> bool: ...
5365
- `task_list` must expose ready state in rendered JSON metadata as `"ready": "true"` or `"false"`.
5466
- Completing a 3+ non-cancelled task graph without a verification task must expose a `verification_nudge` in the returned `task_update` JSON metadata.
5567
- Verification nudge is output metadata only; it must not mutate the stored task record.
68+
- `PlanArtifact` is the durable plan boundary for implementation workflow.
69+
- `PlanArtifact.verification` is required and must be non-empty.
70+
- `PlanArtifact.task_ids` must reference existing durable tasks.
71+
- Plan artifacts use a separate store namespace from task records.
72+
- `plan_save` and `plan_get` are main-surface tools, but they do not enter TodoWrite state.
5673

5774
### 4. Validation & Error Matrix
5875

@@ -67,6 +84,9 @@ def task_graph_needs_verification(store: TaskStore) -> bool: ...
6784
| 3 completed non-verification tasks | `task_graph_needs_verification(...) is True` |
6885
| graph includes verification task | `task_graph_needs_verification(...) is False` |
6986
| `task_update` closes 3rd non-verification task | output metadata includes `verification_nudge=true` |
87+
| save plan with missing verification | Pydantic validation error |
88+
| save plan with unknown task id | `ValueError("Unknown task dependencies...")` |
89+
| get missing plan | `KeyError("Unknown plan...")` |
7090

7191
### 5. Good / Base / Bad Cases
7292

@@ -100,6 +120,24 @@ update_task(store, task_id=task.id, depends_on=[task.id])
100120

101121
Expected: reject self-dependency.
102122

123+
#### Plan Artifact
124+
125+
```python
126+
task = create_task(store, title="Implement feature")
127+
plan = create_plan(
128+
store,
129+
title="Feature plan",
130+
content="Use the existing task store and tests.",
131+
verification="Run pytest tests/test_tasks.py",
132+
task_ids=[task.id],
133+
)
134+
```
135+
136+
Expected:
137+
- plan has stable id
138+
- verification criteria are non-empty
139+
- referenced task IDs exist
140+
103141
### 6. Tests Required
104142

105143
- `tests/test_tasks.py::test_task_store_transitions_dependencies_and_ready_rule`
@@ -108,6 +146,8 @@ Expected: reject self-dependency.
108146
- `tests/test_tasks.py::test_task_graph_needs_verification_after_closing_three_tasks`
109147
- `tests/test_tasks.py::test_task_graph_with_verification_task_does_not_need_nudge`
110148
- `tests/test_tasks.py::test_task_update_tool_marks_verification_nudge_in_output_metadata`
149+
- `tests/test_tasks.py::test_plan_artifact_roundtrip_requires_verification_and_known_tasks`
150+
- `tests/test_tasks.py::test_plan_tools_save_and_get_artifacts`
111151
- `tests/test_tool_system_registry.py::test_main_projection_preserves_current_product_tool_surface`
112152

113153
### 7. Wrong vs Correct

.trellis/tasks/04-14-redefine-coding-deepgent-final-goal/task.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,8 @@
5050
"04-14-stage-16b-latest-valid-compact-view-selection",
5151
"04-14-stage-16c-virtual-pruning-view-metadata",
5252
"04-14-stage-17a-task-graph-readiness-and-transition-invariants",
53-
"04-14-stage-17b-plan-verify-workflow-boundary"
53+
"04-14-stage-17b-plan-verify-workflow-boundary",
54+
"04-15-stage-17c-explicit-plan-artifact-boundary"
5455
],
5556
"parent": null,
5657
"relatedFiles": [],
Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
# Stage 17C: Explicit Plan Artifact Boundary
2+
3+
## Goal
4+
5+
Add a durable explicit plan artifact boundary that can serve as stable input for later verification workflows, without adding plan-mode UI, coordinator runtime, mailbox, or multi-agent communication.
6+
7+
## Upgraded Function
8+
9+
The workflow system is upgraded from task completion nudges to a store-backed implementation plan artifact.
10+
11+
## Expected Benefit
12+
13+
* Recoverability: plans can be saved and retrieved outside chat history.
14+
* Testability: verification criteria become required structured data.
15+
* Maintainability: future verifier subagents can consume a stable artifact instead of parsing arbitrary prose.
16+
17+
## Out of Scope
18+
19+
* EnterPlanMode / ExitPlanMode tools
20+
* approval UI
21+
* coordinator runtime
22+
* mailbox / SendMessage
23+
* verifier subagent execution
24+
25+
## Requirements
26+
27+
* Add `PlanArtifact`.
28+
* Add `plan_save` and `plan_get`.
29+
* Require non-empty verification criteria.
30+
* Validate referenced `task_ids` exist.
31+
* Store plans in a namespace separate from tasks.
32+
* Register plan tools in the main tool surface and capability registry.
33+
34+
## Acceptance Criteria
35+
36+
* [ ] Plan artifacts roundtrip through store.
37+
* [ ] Plan artifacts reject missing verification criteria.
38+
* [ ] Plan artifacts reject unknown task IDs.
39+
* [ ] `plan_save` / `plan_get` are exposed as main tools.
40+
* [ ] Existing task tools still pass.
41+
* [ ] Focused tests, full tests, ruff, and mypy pass.
42+
43+
## cc-haha Alignment
44+
45+
### Expected effect
46+
47+
Aligning this behavior should improve workflow discipline, testability, and future verifier readiness.
48+
49+
The local runtime effect is: implementation plans become explicit artifacts with verification criteria, matching cc-haha's plan-file / ExitPlanMode principle without copying its UI or approval runtime.
50+
51+
### Source-backed alignment matrix
52+
53+
| Area | cc-haha source behavior | Expected local effect | Local target | Status | Decision |
54+
|---|---|---|---|---|---|
55+
| Plan file | `plans.ts`, plan-mode attachments, and `ExitPlanModeV2Tool` use a persisted plan file as workflow artifact | local workflow has a stable plan artifact | `PlanArtifact` | partial | Implement store-backed artifact now |
56+
| Verification criteria | plan instructions require a verification section | plan artifact must define how to verify | required `verification` field | align | Implement now |
57+
| Approval UI | ExitPlanMode asks/coordinates approval | user approval flow | none | defer | Out of scope |
58+
59+
## LangChain Architecture
60+
61+
Use:
62+
63+
* strict Pydantic schemas
64+
* LangGraph store namespace
65+
* normal LangChain tools
66+
67+
Avoid:
68+
69+
* prompt-only plan parsing
70+
* UI approval
71+
* coordinator/mailbox runtime
72+
73+
## Checkpoint: Stage 17C
74+
75+
State:
76+
- checkpoint
77+
78+
Verdict:
79+
- APPROVE
80+
81+
Implemented:
82+
- Added `PlanArtifact`, `PlanSaveInput`, and `PlanGetInput`.
83+
- Added plan store helpers:
84+
- `PLAN_ROOT_NAMESPACE`
85+
- `plan_namespace()`
86+
- `create_plan()`
87+
- `get_plan()`
88+
- Added model-visible tools:
89+
- `plan_save`
90+
- `plan_get`
91+
- Registered plan tools in `ToolSystemContainer`.
92+
- Added plan capabilities to `tool_system.capabilities`.
93+
- Updated task workflow executable spec.
94+
95+
Verification:
96+
- `pytest -q tests/test_tasks.py tests/test_tool_system_registry.py tests/test_tool_system_middleware.py tests/test_app.py tests/test_subagents.py`
97+
- `pytest -q`
98+
- `ruff check src/coding_deepgent/tasks/schemas.py src/coding_deepgent/tasks/store.py src/coding_deepgent/tasks/tools.py src/coding_deepgent/tasks/__init__.py src/coding_deepgent/containers/tool_system.py src/coding_deepgent/tool_system/capabilities.py tests/test_tasks.py tests/test_tool_system_registry.py tests/test_tool_system_middleware.py tests/test_app.py`
99+
- `mypy src/coding_deepgent/tasks/schemas.py src/coding_deepgent/tasks/store.py src/coding_deepgent/tasks/tools.py src/coding_deepgent/tasks/__init__.py src/coding_deepgent/containers/tool_system.py src/coding_deepgent/tool_system/capabilities.py`
100+
101+
cc-haha alignment:
102+
- Source files inspected:
103+
- `/root/claude-code-haha/src/utils/plans.ts`
104+
- `/root/claude-code-haha/src/tools/ExitPlanModeTool/ExitPlanModeV2Tool.ts`
105+
- `/root/claude-code-haha/src/utils/attachments.ts`
106+
- `/root/claude-code-haha/src/utils/messages.ts`
107+
- Aligned:
108+
- plan artifact is now explicit and requires verification.
109+
- Deferred:
110+
- plan-mode UI
111+
- approval flow
112+
- coordinator/mailbox runtime
113+
114+
LangChain architecture:
115+
- Primitive used:
116+
- LangChain tools + Pydantic schemas
117+
- LangGraph store
118+
- Why no heavier abstraction:
119+
- 17C only establishes the artifact boundary; runtime approval and verifier execution are separate stages.
120+
121+
Boundary findings:
122+
- New issue handled:
123+
- storing plans under the task namespace caused `list_tasks()` to read plan artifacts as tasks because LangGraph store search is prefix-like. Plan artifacts now use a separate `coding_deepgent_plans` root namespace.
124+
- Residual risk:
125+
- plan artifacts are saved/retrieved but not yet consumed by verifier execution.
126+
127+
Decision:
128+
- continue
129+
130+
Reason:
131+
- Tests, ruff, and mypy passed.
132+
- Scope stayed non-UI and LangChain-native.
133+
- No coordinator, mailbox, or multi-agent communication was introduced.
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
{
2+
"id": "stage-17c-explicit-plan-artifact-boundary",
3+
"name": "stage-17c-explicit-plan-artifact-boundary",
4+
"title": "Stage 17C: Explicit Plan Artifact Boundary",
5+
"description": "",
6+
"status": "planning",
7+
"dev_type": null,
8+
"scope": null,
9+
"priority": "P2",
10+
"creator": "kun",
11+
"assignee": "kun",
12+
"createdAt": "2026-04-15",
13+
"completedAt": null,
14+
"branch": null,
15+
"base_branch": "codex/stage-12-14-context-compact-foundation",
16+
"worktree_path": null,
17+
"current_phase": 0,
18+
"next_action": [
19+
{
20+
"phase": 1,
21+
"action": "implement"
22+
},
23+
{
24+
"phase": 2,
25+
"action": "check"
26+
},
27+
{
28+
"phase": 3,
29+
"action": "finish"
30+
},
31+
{
32+
"phase": 4,
33+
"action": "create-pr"
34+
}
35+
],
36+
"commit": null,
37+
"pr_url": null,
38+
"subtasks": [],
39+
"children": [],
40+
"parent": "04-14-redefine-coding-deepgent-final-goal",
41+
"relatedFiles": [],
42+
"notes": "",
43+
"meta": {}
44+
}

coding-deepgent/src/coding_deepgent/containers/tool_system.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,14 @@
1111
from coding_deepgent.permissions.rules import PermissionRuleSpec, expand_rule_specs
1212
from coding_deepgent.skills import load_skill
1313
from coding_deepgent.subagents import run_subagent
14-
from coding_deepgent.tasks import task_create, task_get, task_list, task_update
14+
from coding_deepgent.tasks import (
15+
plan_get,
16+
plan_save,
17+
task_create,
18+
task_get,
19+
task_list,
20+
task_update,
21+
)
1522
from coding_deepgent.tool_system import (
1623
ToolCapability,
1724
ToolGuardMiddleware,
@@ -54,7 +61,9 @@ class ToolSystemContainer(containers.DeclarativeContainer):
5461
memory_tools: Any = providers.Dependency(default=providers.Object([save_memory]))
5562
skill_tools: Any = providers.Dependency(default=providers.Object([load_skill]))
5663
task_tools: Any = providers.Dependency(
57-
default=providers.Object([task_create, task_get, task_list, task_update])
64+
default=providers.Object(
65+
[task_create, task_get, task_list, task_update, plan_save, plan_get]
66+
)
5867
)
5968
subagent_tools: Any = providers.Dependency(default=providers.Object([run_subagent]))
6069
extension_capabilities: Any = providers.Dependency(default=providers.Object([]))

coding-deepgent/src/coding_deepgent/tasks/__init__.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
11
from .schemas import (
2+
PlanArtifact,
3+
PlanGetInput,
4+
PlanSaveInput,
25
TaskCreateInput,
36
TaskGetInput,
47
TaskListInput,
@@ -8,29 +11,42 @@
811
)
912
from .store import (
1013
TASK_ROOT_NAMESPACE,
14+
PLAN_ROOT_NAMESPACE,
15+
create_plan,
1116
create_task,
17+
get_plan,
1218
get_task,
1319
is_task_ready,
1420
list_tasks,
21+
plan_namespace,
1522
task_namespace,
1623
task_graph_needs_verification,
1724
update_task,
1825
validate_task_graph,
1926
)
20-
from .tools import task_create, task_get, task_list, task_update
27+
from .tools import plan_get, plan_save, task_create, task_get, task_list, task_update
2128

2229
__all__ = [
30+
"PlanArtifact",
31+
"PlanGetInput",
32+
"PlanSaveInput",
33+
"PLAN_ROOT_NAMESPACE",
2334
"TASK_ROOT_NAMESPACE",
2435
"TaskCreateInput",
2536
"TaskGetInput",
2637
"TaskListInput",
2738
"TaskRecord",
2839
"TaskStatus",
2940
"TaskUpdateInput",
41+
"create_plan",
3042
"create_task",
43+
"get_plan",
3144
"get_task",
3245
"is_task_ready",
3346
"list_tasks",
47+
"plan_get",
48+
"plan_namespace",
49+
"plan_save",
3450
"task_create",
3551
"task_get",
3652
"task_list",

coding-deepgent/src/coding_deepgent/tasks/schemas.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,3 +85,37 @@ def _has_update(self) -> "TaskUpdateInput":
8585
):
8686
raise ValueError("at least one update field is required")
8787
return self
88+
89+
90+
class PlanArtifact(BaseModel):
91+
model_config = ConfigDict(extra="forbid")
92+
93+
id: str
94+
title: str = Field(..., min_length=1)
95+
content: str = Field(..., min_length=1)
96+
verification: str = Field(..., min_length=1)
97+
task_ids: list[str] = Field(default_factory=list)
98+
metadata: dict[str, str] = Field(default_factory=dict)
99+
100+
@field_validator("id", "title", "content", "verification", mode="before")
101+
@classmethod
102+
def _strip_plan_text(cls, value: str) -> str:
103+
return str(value).strip()
104+
105+
106+
class PlanSaveInput(BaseModel):
107+
model_config = ConfigDict(extra="forbid", arbitrary_types_allowed=True)
108+
109+
title: str = Field(..., min_length=1)
110+
content: str = Field(..., min_length=1)
111+
verification: str = Field(..., min_length=1)
112+
task_ids: list[str] = Field(default_factory=list)
113+
metadata: dict[str, str] = Field(default_factory=dict)
114+
runtime: ToolRuntime
115+
116+
117+
class PlanGetInput(BaseModel):
118+
model_config = ConfigDict(extra="forbid", arbitrary_types_allowed=True)
119+
120+
plan_id: str = Field(..., min_length=1)
121+
runtime: ToolRuntime

0 commit comments

Comments
 (0)