feat: agentic harness patterns - dream consolidation, scoped context, compaction

josephgoksu · josephgoksu · commit 22aa249f4841 · 2026-04-06T11:31:33.000+01:00
- Dream Consolidation: session-end hook extracts knowledge from completed
  tasks via LLM, stores with source_agent="dream" and confidence 0.6
- Scoped Context Assembly: task enricher now uses task scope for targeted
  retrieval ("auth" -&gt; "auth jwt cookies patterns" instead of generic query)
- Progressive Context Compaction: compact_summary field on nodes, used by
  FormatCompact() when available (falls back to truncation)
- Command Risk Classification: ADR documenting T0-T3 risk tiers for future
  destructive MCP tools
diff --git a/cmd/hook.go b/cmd/hook.go
@@ -12,6 +12,8 @@ import (
 	"strings"
 	"time"
 
+	"github.com/cloudwego/eino/schema"
+	"github.com/josephgoksu/TaskWing/internal/agents/core"
 	"github.com/josephgoksu/TaskWing/internal/config"
 	"github.com/josephgoksu/TaskWing/internal/knowledge"
 	"github.com/josephgoksu/TaskWing/internal/llm"
@@ -475,6 +477,11 @@ Tasks Completed: %d
 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
 `, session.SessionID, int(elapsed.Minutes()), session.TasksCompleted)
 
+	// Dream Consolidation: extract knowledge from completed tasks
+	if session.TasksCompleted > 0 {
+		dreamConsolidate(session)
+	}
+
 	// Remove session file
 	sessionPath, err := getHookSessionPath()
 	if err == nil {
@@ -484,6 +491,115 @@ Tasks Completed: %d
 	return nil
 }
 
+// dreamConsolidate extracts architectural knowledge from completed tasks
+// and writes it to the knowledge graph with source_agent="dream".
+func dreamConsolidate(session *HookSession) {
+	repo, err := openRepo()
+	if err != nil {
+		return
+	}
+	defer func() { _ = repo.Close() }()
+
+	// Get completed tasks from the active plan
+	plan, err := repo.GetActivePlan()
+	if err != nil || plan == nil {
+		return
+	}
+
+	// Collect completed task summaries
+	var taskSummaries []string
+	for _, t := range plan.Tasks {
+		if t.Status == task.StatusCompleted && t.CompletionSummary != "" {
+			taskSummaries = append(taskSummaries, fmt.Sprintf("- %s: %s", t.Title, t.CompletionSummary))
+		}
+	}
+	if len(taskSummaries) == 0 {
+		return
+	}
+
+	// Get LLM config - use fast model for cheap background work
+	llmCfg, err := config.LoadLLMConfig()
+	if err != nil {
+		return
+	}
+	if llmCfg.APIKey == "" {
+		return
+	}
+	fastModel := llm.GetRecommendedModelForRole(string(llmCfg.Provider), llm.RoleQuery)
+	if fastModel != nil {
+		llmCfg.Model = fastModel.ID
+	}
+
+	// Generate findings via LLM
+	prompt := fmt.Sprintf(`You completed these tasks in a development session:
+
+%s
+
+Extract any NEW architectural decisions, patterns, or constraints that were established or discovered during this work. Only include items that would be valuable for future sessions.
+
+Respond in JSON:
+{"findings": [{"type": "decision|pattern|constraint", "title": "...", "description": "..."}]}
+
+If nothing notable was established, respond with: {"findings": []}`, strings.Join(taskSummaries, "\n"))
+
+	ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
+	defer cancel()
+
+	chatModel, err := llm.NewCloseableChatModel(ctx, llmCfg)
+	if err != nil {
+		return
+	}
+	defer func() { _ = chatModel.Close() }()
+
+	resp, err := chatModel.Generate(ctx, []*schema.Message{schema.UserMessage(prompt)})
+	if err != nil {
+		return
+	}
+
+	// Parse findings
+	type dreamFinding struct {
+		Type        string `json:"type"`
+		Title       string `json:"title"`
+		Description string `json:"description"`
+	}
+	type dreamResponse struct {
+		Findings []dreamFinding `json:"findings"`
+	}
+
+	parsed, err := core.ParseJSONResponse[dreamResponse](resp.Content)
+	if err != nil || len(parsed.Findings) == 0 {
+		return
+	}
+
+	// Convert to core.Finding and ingest
+	var findings []core.Finding
+	for _, f := range parsed.Findings {
+		findingType := core.FindingTypeDecision
+		switch f.Type {
+		case "pattern":
+			findingType = core.FindingTypePattern
+		case "constraint":
+			findingType = core.FindingTypeConstraint
+		}
+		findings = append(findings, core.Finding{
+			Type:            findingType,
+			Title:           f.Title,
+			Description:     f.Description,
+			ConfidenceScore: 0.6,
+			SourceAgent:     "dream",
+		})
+	}
+
+	ks := knowledge.NewService(repo, llmCfg)
+	memoryPath, _ := config.GetMemoryBasePath()
+	if memoryPath != "" {
+		ks.SetBasePath(filepath.Dir(filepath.Dir(memoryPath)))
+	}
+	_ = ks.IngestFindings(ctx, findings, nil, false)
+
+	fmt.Printf("  Dream: extracted %d knowledge items from session\n", len(findings))
+}
+
 // Session persistence helpers
 
 func getHookSessionPath() (string, error) {
diff --git a/docs/architecture/ADR_COMMAND_RISK_CLASSIFICATION.md b/docs/architecture/ADR_COMMAND_RISK_CLASSIFICATION.md
@@ -0,0 +1,47 @@
+# ADR: Command Risk Classification for MCP Tools
+
+## Status
+Proposed (documentation only - no runtime implementation yet)
+
+## Context
+TaskWing exposes MCP tools (ask, task, plan, code, debug, remember) to AI assistants via stdio transport. Currently all tools are available immediately with no risk gating. As TaskWing adds more capable tools (file writes, git operations, node deletion), a classification scheme is needed to prevent destructive actions without explicit user approval.
+
+## Risk Tiers
+
+| Tier | Label | Behavior | Examples |
+|---|---|---|---|
+| **T0** | Safe | Auto-execute, no confirmation | `ask`, `code` (read-only queries) |
+| **T1** | Write | Execute with audit trail | `remember`, `task complete` (writes to local SQLite) |
+| **T2** | Risky | Require explicit user confirmation | Future: `delete-node`, `rewrite-file`, `git commit` |
+| **T3** | Destructive | Block unless plan-approved + user confirmed | Future: `clear-knowledge`, `git push --force`, `rm -rf` |
+
+## Decision
+When destructive tools (T2/T3) are added to the MCP surface:
+
+1. Each MCP tool handler must declare its risk tier
+2. The MCP handler chain checks the tier before execution
+3. T2 tools prompt for confirmation via the MCP response (tool returns a confirmation request instead of executing)
+4. T3 tools require both an active approved plan AND explicit user confirmation
+5. The existing OPA policy engine (`internal/policy/`) can evaluate T2/T3 tool calls against project policies
+
+## Gating Rules
+
+T2/T3 tools must satisfy these gates (consistent with the Workflow Contract v1):
+
+- **Plan gate**: A clarified and approved plan must be active
+- **Task gate**: The tool call must be relevant to the current in-progress task
+- **Evidence gate**: For T3, prior root-cause evidence must exist before destructive action
+- **Confirmation gate**: User must explicitly approve (not just "auto" or "skip")
+
+## Current State
+- All current MCP tools are T0 (read-only) or T1 (local SQLite writes)
+- No T2/T3 tools exist yet
+- The OPA policy engine is built but only runs during task completion, not per-tool-call
+- When T2/T3 tools are introduced, wire `policy.NewPolicyEvaluatorAdapter()` into the MCP handler chain
+
+## Implementation Notes (for future reference)
+- Add a `RiskTier` field to the MCP tool registration in `internal/mcp/handlers.go`
+- Check tier in the handler dispatch before calling the tool implementation
+- For T2: return a structured confirmation request in the MCP response
+- For T3: check `policy.Engine.Evaluate()` with the tool call context
+- Log all T1+ tool executions to the session audit trail
diff --git a/internal/app/plan.go b/internal/app/plan.go
@@ -116,7 +116,7 @@ type TaskPlanner interface {
 // TaskContextEnricher executes ask queries and returns aggregated context for a task.
 // This is used during task creation to populate ContextSummary (early binding).
 // See docs/architecture/ADR_CONTEXT_BINDING.md for the full context binding design.
-type TaskContextEnricher func(ctx context.Context, queries []string) (string, error)
+type TaskContextEnricher func(ctx context.Context, queries []string, scope string) (string, error)
 
 const (
 	defaultClarifyMaxRounds            = 5
@@ -167,17 +167,21 @@ func (a *PlanApp) retrieveContext(ctx context.Context, ks *knowledge.Service, go
 }
 
 // defaultTaskEnricher uses GetProjectContext with compact options to enrich tasks.
-func (a *PlanApp) defaultTaskEnricher(ctx context.Context, queries []string) (string, error) {
+func (a *PlanApp) defaultTaskEnricher(ctx context.Context, queries []string, scope string) (string, error) {
 	if a.ctx == nil || a.ctx.Repo == nil {
 		return "", nil
 	}
 
 	ks := knowledge.NewService(a.ctx.Repo, a.ctx.LLMCfg)
 
-	// Use the task's specific queries as the search query, or fall back to baseline
-	query := "project constraints and key technology decisions"
+	// Build scope-aware query: prefer task queries, fall back to scope-based, then generic
+	var query string
 	if len(queries) > 0 {
 		query = strings.Join(queries, " ")
+	} else if scope != "" {
+		query = scope + " patterns constraints decisions"
+	} else {
+		query = "project constraints and key technology decisions"
 	}
 
 	modelID := a.ctx.LLMCfg.Model
@@ -1002,8 +1006,8 @@ func (a *PlanApp) parseTasksFromMetadata(ctx context.Context, metadata map[strin
 			t.EnrichAIFields()
 
 			// Populate ContextSummary by executing ask queries
-			if a.TaskEnricher != nil && len(t.SuggestedAskQueries) > 0 {
-				if contextSummary, err := a.TaskEnricher(ctx, t.SuggestedAskQueries); err == nil && contextSummary != "" {
+			if a.TaskEnricher != nil && (len(t.SuggestedAskQueries) > 0 || t.Scope != "") {
+				if contextSummary, err := a.TaskEnricher(ctx, t.SuggestedAskQueries, t.Scope); err == nil && contextSummary != "" {
 					t.ContextSummary = contextSummary
 				}
 			}
@@ -1096,8 +1100,8 @@ func (a *PlanApp) parseTasksFromMetadata(ctx context.Context, metadata map[strin
 				newTask.EnrichAIFields()
 
 				// Populate ContextSummary by executing ask queries
-				if a.TaskEnricher != nil && len(newTask.SuggestedAskQueries) > 0 {
-					if contextSummary, err := a.TaskEnricher(ctx, newTask.SuggestedAskQueries); err == nil && contextSummary != "" {
+				if a.TaskEnricher != nil && (len(newTask.SuggestedAskQueries) > 0 || newTask.Scope != "") {
+					if contextSummary, err := a.TaskEnricher(ctx, newTask.SuggestedAskQueries, newTask.Scope); err == nil && contextSummary != "" {
 						newTask.ContextSummary = contextSummary
 					}
 				}
diff --git a/internal/knowledge/context.go b/internal/knowledge/context.go
@@ -203,7 +203,10 @@ func (pc *ProjectContext) FormatCompact(modelID ...string) string {
 			if node.Node == nil {
 				continue
 			}
-			content := utils.Truncate(node.Node.Text(), nodeChars)
+			content := node.Node.CompactSummary
+			if content == "" {
+				content = utils.Truncate(node.Node.Text(), nodeChars)
+			}
 			sb.WriteString(fmt.Sprintf("- **%s** (%s): %s\n", node.Node.Summary, node.Node.Type, content))
 		}
 	}
diff --git a/internal/memory/models.go b/internal/memory/models.go
@@ -61,6 +61,10 @@ type Node struct {
 
 	// RefactorHint provides guidance on how to eliminate this debt
 	RefactorHint string `json:"refactorHint,omitempty"`
+
+	// CompactSummary is an LLM-generated dense summary for context packing.
+	// Populated during bootstrap ingestion. Used by FormatCompact() instead of truncation.
+	CompactSummary string `json:"compactSummary,omitempty"`
 }
 
 // DebtLevel returns human-readable debt classification for a node.
diff --git a/internal/memory/sqlite.go b/internal/memory/sqlite.go
@@ -569,6 +569,7 @@ func (s *SQLiteStore) initSchema() error {
 		// 'root' = global knowledge at repo root, service names (e.g., 'osprey', 'studio') for scoped knowledge
 		{"workspace", "ALTER TABLE nodes ADD COLUMN workspace TEXT DEFAULT 'root'"},
 		{"stale_count", "ALTER TABLE nodes ADD COLUMN stale_count INTEGER DEFAULT 0"},
+		{"compact_summary", "ALTER TABLE nodes ADD COLUMN compact_summary TEXT DEFAULT ''"},
 	}
 
 	for _, m := range migrations {

Original file line number	Diff line number	Diff line change
`@@ -203,7 +203,10 @@ func (pc *ProjectContext) FormatCompact(modelID ...string) string {`
`203`	`203`	`if node.Node == nil {`
`204`	`204`	`continue`
`205`	`205`	`}`
`206`		`- content := utils.Truncate(node.Node.Text(), nodeChars)`
	`206`	`+ content := node.Node.CompactSummary`
	`207`	`+ if content == "" {`
	`208`	`+ content = utils.Truncate(node.Node.Text(), nodeChars)`
	`209`	`+ }`
`207`	`210`	`sb.WriteString(fmt.Sprintf("- %s (%s): %s\n", node.Node.Summary, node.Node.Type, content))`
`208`	`211`	`}`
`209`	`212`	`}`
Original file line number	Diff line number	Diff line change
`@@ -61,6 +61,10 @@ type Node struct {`
`61`	`61`
`62`	`62`	`// RefactorHint provides guidance on how to eliminate this debt`
`63`	`63`	RefactorHint string `json:"refactorHint,omitempty"`
	`64`	`+`
	`65`	`+ // CompactSummary is an LLM-generated dense summary for context packing.`
	`66`	`+ // Populated during bootstrap ingestion. Used by FormatCompact() instead of truncation.`
	`67`	+ CompactSummary string `json:"compactSummary,omitempty"`
`64`	`68`	`}`
`65`	`69`
`66`	`70`	`// DebtLevel returns human-readable debt classification for a node.`
Original file line number	Diff line number	Diff line change
`@@ -569,6 +569,7 @@ func (s *SQLiteStore) initSchema() error {`
`569`	`569`	`// 'root' = global knowledge at repo root, service names (e.g., 'osprey', 'studio') for scoped knowledge`
`570`	`570`	`{"workspace", "ALTER TABLE nodes ADD COLUMN workspace TEXT DEFAULT 'root'"},`
`571`	`571`	`{"stale_count", "ALTER TABLE nodes ADD COLUMN stale_count INTEGER DEFAULT 0"},`
	`572`	`+ {"compact_summary", "ALTER TABLE nodes ADD COLUMN compact_summary TEXT DEFAULT ''"},`
`572`	`573`	`}`
`573`	`574`
`574`	`575`	`for _, m := range migrations {`