Add Context Governor for pre-turn compaction and tool spill

Patel230 · Patel230 · commit 5dc77077f772 · 2026-06-03T10:57:17.000+05:30
Wire AutoCompactor on each turn using auto_compact_threshold_pct (default 85%),
always resolve context window (catalog or 128k fallback), and spill large tool
output to .hawk/scratch/. /compact runs the full strategy chain; footer shows
a real window size when the catalog omits context.
diff --git a/cmd/chat_commands_session.go b/cmd/chat_commands_session.go
@@ -1,6 +1,7 @@
 package cmd
 
 import (
+	"context"
 	"fmt"
 	"math/rand"
 	"os"
@@ -65,9 +66,14 @@ func (m *chatModel) handleSessionCommand(cmd string, parts []string, text string
 
 	case "/compact":
 		before := m.session.MessageCount()
-		m.session.SmartCompact()
+		strat, tokBefore, tokAfter, err := m.session.CompactConversation(context.Background())
 		after := m.session.MessageCount()
-		m.messages = append(m.messages, displayMsg{role: "system", content: fmt.Sprintf("Compacted: %d → %d messages (LLM summary)", before, after)})
+		msg := fmt.Sprintf("Compacted (%s): %d → %d messages, ~%dk → ~%dk tokens", strat, before, after, tokBefore/1000, tokAfter/1000)
+		if err != nil {
+			msg = fmt.Sprintf("Compacted with fallback: %d → %d messages", before, after)
+		}
+		m.messages = append(m.messages, displayMsg{role: "system", content: msg})
+		m.invalidateConnStatus()
 		return m, nil
 
 	case "/history":
diff --git a/cmd/chat_status.go b/cmd/chat_status.go
@@ -124,8 +124,15 @@ func (m chatModel) connectionStatusParts() (gateway, model, contextLabel string)
 	}
 
 	model, contextLabel = modelStatusMeta(gw, modelID)
-	if contextLabel == "" || contextLabel == "—" {
-		contextLabel = "0k"
+	if contextLabel == "" || contextLabel == "—" || contextLabel == "0k" {
+		if m.session != nil {
+			if w := m.session.ContextWindowSize(); w > 0 {
+				contextLabel = formatModelTableContext(w)
+			}
+		}
+		if contextLabel == "" || contextLabel == "—" {
+			contextLabel = formatModelTableContext(engine.DefaultContextWindow)
+		}
 	}
 	return gateway, model, contextLabel
 }
diff --git a/cmd/options.go b/cmd/options.go
@@ -285,6 +285,11 @@ func configureSession(sess *engine.Session, settings hawkconfig.Settings, maxTur
 	// Adaptive prompt: learn user preferences from corrections
 	sess.AdaptivePrompt = engine.NewAdaptivePrompt()
 
+	if pct := settings.AutoCompactThresholdPct; pct > 0 {
+		sess.AutoCompactThresholdPct = pct
+	}
+	sess.EnsureAutoCompactor()
+
 	return nil
 }
 
diff --git a/internal/engine/compact.go b/internal/engine/compact.go
@@ -22,8 +22,9 @@ func (s *Session) ShouldAutoCompact() bool {
 	for _, msg := range s.messages {
 		totalTokens += tok.EstimateTokens(msg.Content)
 	}
-	// Compact if approaching 80% of typical context window (128K tokens)
-	return totalTokens > 100000
+	window := s.ContextWindowSize()
+	threshold := window * s.compactThresholdPct() / 100
+	return totalTokens > threshold
 }
 
 // AutoCompactIfNeeded runs compaction when the conversation exceeds the threshold.
diff --git a/internal/engine/compact_auto.go b/internal/engine/compact_auto.go
@@ -25,6 +25,17 @@ func NewAutoCompactor(config CompactConfig) *AutoCompactor {
 	}
 }
 
+// Configure updates compaction settings and rebuilds the strategy registry.
+func (ac *AutoCompactor) Configure(config CompactConfig) {
+	if ac == nil {
+		return
+	}
+	ac.mu.Lock()
+	defer ac.mu.Unlock()
+	ac.config = config
+	ac.registry = NewStrategyRegistry(config)
+}
+
 // GetAutoCompactThreshold returns the token count at which auto-compaction triggers.
 func (ac *AutoCompactor) GetAutoCompactThreshold() int {
 	return ac.config.ContextWindowSize - ac.config.AutoCompactBuffer - ac.config.MaxOutputTokens
diff --git a/internal/engine/context_governor.go b/internal/engine/context_governor.go
@@ -0,0 +1,141 @@
+package engine
+
+import (
+	"context"
+	"fmt"
+
+	"github.com/GrayCodeAI/hawk/internal/engine/ctxmgr"
+	modelPkg "github.com/GrayCodeAI/hawk/internal/provider/routing"
+)
+
+const (
+	// DefaultContextWindow is used when the model catalog has no context size.
+	DefaultContextWindow = 128_000
+	// DefaultAutoCompactThresholdPct matches Grok CLI default (85% of window).
+	DefaultAutoCompactThresholdPct = 85
+)
+
+// ResolveModelContextWindow returns the effective context window for a model.
+func ResolveModelContextWindow(model string, override int) int {
+	if override > 0 {
+		return override
+	}
+	if info, ok := modelPkg.Find(model); ok && info.ContextSize > 0 {
+		return info.ContextSize
+	}
+	return DefaultContextWindow
+}
+
+// ContextWindowSize returns this session's context window (catalog or default).
+func (s *Session) ContextWindowSize() int {
+	if s == nil {
+		return DefaultContextWindow
+	}
+	return ResolveModelContextWindow(s.model, s.ContextWindowCached)
+}
+
+// EnsureAutoCompactor initializes the compaction orchestrator from session settings.
+func (s *Session) EnsureAutoCompactor() {
+	if s == nil {
+		return
+	}
+	if s.AutoCompactor != nil {
+		s.AutoCompactor.Configure(s.compactConfig())
+		return
+	}
+	s.AutoCompactor = NewAutoCompactor(s.compactConfig())
+}
+
+func (s *Session) compactThresholdPct() int {
+	pct := s.AutoCompactThresholdPct
+	if pct <= 0 {
+		pct = DefaultAutoCompactThresholdPct
+	}
+	if pct < 50 {
+		pct = 50
+	}
+	if pct > 95 {
+		pct = 95
+	}
+	return pct
+}
+
+func (s *Session) compactConfig() CompactConfig {
+	window := s.ContextWindowSize()
+	pct := s.compactThresholdPct()
+	target := window * pct / 100
+	cfg := DefaultCompactConfig()
+	cfg.AutoEnabled = true
+	cfg.ContextWindowSize = window
+	cfg.MaxOutputTokens = 0
+	cfg.AutoCompactBuffer = window - target
+	if cfg.AutoCompactBuffer < 0 {
+		cfg.AutoCompactBuffer = 0
+	}
+	return cfg
+}
+
+// refreshContextWindowCache updates cached window from the catalog when the model changes.
+func (s *Session) refreshContextWindowCache() {
+	if s == nil {
+		return
+	}
+	s.ContextWindowCached = 0
+	if info, ok := modelPkg.Find(s.model); ok && info.ContextSize > 0 {
+		s.ContextWindowCached = info.ContextSize
+	}
+	s.EnsureAutoCompactor()
+}
+
+// ManageContextBeforeTurn collapses noise, then compacts via the strategy registry when needed.
+// Returns the compaction strategy name (if any) and whether messages were reduced.
+func (s *Session) ManageContextBeforeTurn(ctx context.Context) (strategy string, compacted bool) {
+	if s == nil {
+		return "", false
+	}
+	s.messages = ctxmgr.CollapseRepeatedMessages(s.messages)
+
+	s.EnsureAutoCompactor()
+	if strat, ok := s.AutoCompactor.AutoCompactIfNeeded(ctx, s); ok {
+		return strat, true
+	}
+
+	if len(s.messages) > maxContextMessages {
+		s.smartCompact()
+		return "smart_message_cap", true
+	}
+
+	convTokens := EstimateTokens(s.messages)
+	window := s.ContextWindowSize()
+	budget := ctxmgr.NewContextBudget(window)
+	if budget.ShouldCompact(convTokens) {
+		s.smartCompact()
+		return "smart_budget", true
+	}
+
+	return "", false
+}
+
+// CompactConversation runs compaction immediately (for /compact). Uses the full strategy chain.
+func (s *Session) CompactConversation(ctx context.Context) (strategy string, tokensBefore, tokensAfter int, err error) {
+	if s == nil {
+		return "", 0, 0, fmt.Errorf("no session")
+	}
+	s.messages = ctxmgr.CollapseRepeatedMessages(s.messages)
+	s.EnsureAutoCompactor()
+	tokensBefore = EstimateTokens(s.messages)
+	strategy, err = s.AutoCompactor.RunCompaction(ctx, s)
+	if err != nil {
+		s.smartCompact()
+		strategy = "smart_fallback"
+	}
+	tokensAfter = EstimateTokens(s.messages)
+	return strategy, tokensBefore, tokensAfter, nil
+}
+
+// ShouldCompactByBudget reports whether conversation tokens exceed the configured % of window.
+func (s *Session) ShouldCompactByBudget() bool {
+	window := s.ContextWindowSize()
+	conv := EstimateTokens(s.messages)
+	return conv >= window*s.compactThresholdPct()/100
+}
diff --git a/internal/engine/context_governor_test.go b/internal/engine/context_governor_test.go
@@ -0,0 +1,60 @@
+package engine
+
+import (
+	"context"
+	"strings"
+	"testing"
+
+	"github.com/GrayCodeAI/hawk/internal/types"
+)
+
+func TestResolveModelContextWindow_Fallback(t *testing.T) {
+	if got := ResolveModelContextWindow("unknown-model-xyz", 0); got != DefaultContextWindow {
+		t.Fatalf("expected default %d, got %d", DefaultContextWindow, got)
+	}
+	if got := ResolveModelContextWindow("any", 64_000); got != 64_000 {
+		t.Fatalf("expected override 64000, got %d", got)
+	}
+}
+
+func TestSession_compactConfig_ThresholdPct(t *testing.T) {
+	s := NewSession("", "test-model", "sys", nil)
+	s.AutoCompactThresholdPct = 85
+	s.ContextWindowCached = 100_000
+	cfg := s.compactConfig()
+	want := 100_000 - 85_000
+	got := cfg.ContextWindowSize - cfg.AutoCompactBuffer - cfg.MaxOutputTokens
+	if got != want {
+		t.Fatalf("threshold tokens = %d, want %d", got, want)
+	}
+}
+
+func TestMaybeSpillToolOutput_SmallUnchanged(t *testing.T) {
+	in := "hello"
+	if got := maybeSpillToolOutput(in, "Read", "id1"); got != in {
+		t.Fatalf("expected unchanged small output, got %q", got)
+	}
+}
+
+func TestMaybeSpillToolOutput_LargeSpills(t *testing.T) {
+	in := strings.Repeat("x", toolOutputSpillMinChars+100)
+	got := maybeSpillToolOutput(in, "Bash", "call-1")
+	if !strings.Contains(got, ".hawk/scratch/") {
+		t.Fatalf("expected spill path in output, got %q", got[:200])
+	}
+	if strings.Contains(got, strings.Repeat("x", toolOutputSpillMinChars)) {
+		t.Fatal("expected full payload not inlined after spill")
+	}
+}
+
+func TestManageContextBeforeTurn_CollapseOnly(t *testing.T) {
+	s := NewSession("", "test-model", "sys", nil)
+	s.messages = []types.EyrieMessage{
+		{Role: "user", ToolResults: []types.ToolResult{{Content: "err", IsError: true}}},
+		{Role: "user", ToolResults: []types.ToolResult{{Content: "err", IsError: true}}},
+	}
+	_, compacted := s.ManageContextBeforeTurn(context.Background())
+	if compacted {
+		t.Fatal("expected no compaction for tiny history")
+	}
+}
diff --git a/internal/engine/session.go b/internal/engine/session.go
@@ -78,6 +78,8 @@ type Session struct {
 
 	PinnedMessages          int  // messages to protect from compaction (from /pin)
 	AutoCompactThresholdPct int  // token % to trigger auto-compact (default 85)
+	ContextWindowCached     int  // catalog context window; 0 → governor default
+	AutoCompactor           *AutoCompactor
 	Verbose                 bool // show tool calls, timing, token counts in output
 
 	// Cost optimization
@@ -154,6 +156,8 @@ func NewSessionWithClient(chat ChatClient, provider, model, systemPrompt string,
 	}
 	s.Cost.Model = model
 	s.Router = modelPkg.NewRouter(modelPkg.StrategyBalanced)
+	s.AutoCompactThresholdPct = DefaultAutoCompactThresholdPct
+	s.refreshContextWindowCache()
 
 	// Initialize agents accumulator for .hawk/agents.md
 	cwd, _ := os.Getwd()
@@ -200,6 +204,7 @@ func (s *Session) SetModel(model string) {
 	s.model = strings.TrimSpace(model)
 	s.Cost.Model = s.model
 	s.syncCascadeDefaultModel()
+	s.refreshContextWindowCache()
 }
 
 // syncCascadeDefaultModel keeps the cascade router aligned after /config model picks.
diff --git a/internal/engine/stream.go b/internal/engine/stream.go
@@ -10,12 +10,10 @@ import (
 	"github.com/GrayCodeAI/hawk/internal/types"
 
 	"github.com/GrayCodeAI/hawk/internal/engine/branching"
-	"github.com/GrayCodeAI/hawk/internal/engine/ctxmgr"
 	"github.com/GrayCodeAI/hawk/internal/engine/lifecycle"
 	"github.com/GrayCodeAI/hawk/internal/hooks"
 	analytics "github.com/GrayCodeAI/hawk/internal/observability"
 	"github.com/GrayCodeAI/hawk/internal/observability/oteltrace"
-	modelPkg "github.com/GrayCodeAI/hawk/internal/provider/routing"
 	"github.com/GrayCodeAI/hawk/internal/resilience/retry"
 )
 
@@ -171,21 +169,12 @@ func (s *Session) agentLoop(ctx context.Context, ch chan<- StreamEvent) {
 		if s.Beliefs != nil && s.Beliefs.Size() > 0 {
 			s.Beliefs.Prune(turnCount)
 		}
-		// Auto-compact if conversation is too long (message count)
-		if len(s.messages) > maxContextMessages {
-			s.messages = ctxmgr.CollapseRepeatedMessages(s.messages)
-			if len(s.messages) > maxContextMessages {
-				s.smartCompact()
-			}
-		}
-
-		// Auto-compact if token usage exceeds context budget allocation
-		convTokens := EstimateTokens(s.messages)
-		if info, ok := modelPkg.Find(s.model); ok && info.ContextSize > 0 {
-			budget := ctxmgr.NewContextBudget(info.ContextSize)
-			if budget.ShouldCompact(convTokens) {
-				s.smartCompact()
-			}
+		// Context governor: collapse → micro/smart/truncate (settings threshold %).
+		if strat, didCompact := s.ManageContextBeforeTurn(ctx); didCompact {
+			s.log.Info("context compacted", map[string]interface{}{
+				"strategy": strat,
+				"messages": len(s.messages),
+			})
 		}
 
 		// Integration pipeline: pre-query (intent, tools, budget, injection scan, cache)
@@ -234,10 +223,7 @@ func (s *Session) agentLoop(ctx context.Context, ch chan<- StreamEvent) {
 
 		// Dynamic max_tokens based on task type and recent tool patterns
 		taskType := classifyPromptForBudget(s.messages)
-		contextSize := 200000
-		if info, ok := modelPkg.Find(s.model); ok && info.ContextSize > 0 {
-			contextSize = info.ContextSize
-		}
+		contextSize := s.ContextWindowSize()
 		maxTok := DynamicMaxTokens(s.messages, contextSize, taskType)
 
 		// Model cascade: select optimal model for this request
diff --git a/internal/engine/stream_tool_exec.go b/internal/engine/stream_tool_exec.go
@@ -15,7 +15,6 @@ import (
 	hooks "github.com/GrayCodeAI/hawk/internal/hooks"
 	"github.com/GrayCodeAI/hawk/internal/observability/oteltrace"
 	"github.com/GrayCodeAI/hawk/internal/prompts"
-	modelPkg "github.com/GrayCodeAI/hawk/internal/provider/routing"
 )
 
 // toolExecResult holds the output of a single tool execution.
@@ -326,8 +325,8 @@ func (s *Session) executeSingleTool(ctx context.Context, tc types.ToolCall, ch c
 	}
 
 	maxChars := 50000
-	if info, ok := modelPkg.Find(s.model); ok && info.ContextSize > 0 {
-		dynamic := info.ContextSize * 20 / 100 * 4
+	if window := s.ContextWindowSize(); window > 0 {
+		dynamic := window * 20 / 100 * 4
 		if dynamic < 5000 {
 			dynamic = 5000
 		}
@@ -345,6 +344,7 @@ func (s *Session) executeSingleTool(ctx context.Context, tc types.ToolCall, ch c
 	if len(output) > maxChars {
 		output = output[:maxChars] + "\n... (truncated)"
 	}
+	output = maybeSpillToolOutput(output, canonical, tc.ID)
 
 	if s.Pipeline != nil {
 		var execErr error
diff --git a/internal/engine/tool_output_spill.go b/internal/engine/tool_output_spill.go

Original file line number	Diff line number	Diff line change
`@@ -124,8 +124,15 @@ func (m chatModel) connectionStatusParts() (gateway, model, contextLabel string)`
`124`	`124`	`}`
`125`	`125`
`126`	`126`	`model, contextLabel = modelStatusMeta(gw, modelID)`
`127`		`- if contextLabel == "" \|\| contextLabel == "—" {`
`128`		`- contextLabel = "0k"`
	`127`	`+ if contextLabel == "" \|\| contextLabel == "—" \|\| contextLabel == "0k" {`
	`128`	`+ if m.session != nil {`
	`129`	`+ if w := m.session.ContextWindowSize(); w > 0 {`
	`130`	`+ contextLabel = formatModelTableContext(w)`
	`131`	`+ }`
	`132`	`+ }`
	`133`	`+ if contextLabel == "" \|\| contextLabel == "—" {`
	`134`	`+ contextLabel = formatModelTableContext(engine.DefaultContextWindow)`
	`135`	`+ }`
`129`	`136`	`}`
`130`	`137`	`return gateway, model, contextLabel`
`131`	`138`	`}`
Original file line number	Diff line number	Diff line change
`@@ -22,8 +22,9 @@ func (s *Session) ShouldAutoCompact() bool {`
`22`	`22`	`for _, msg := range s.messages {`
`23`	`23`	`totalTokens += tok.EstimateTokens(msg.Content)`
`24`	`24`	`}`
`25`		`- // Compact if approaching 80% of typical context window (128K tokens)`
`26`		`- return totalTokens > 100000`
	`25`	`+ window := s.ContextWindowSize()`
	`26`	`+ threshold := window * s.compactThresholdPct() / 100`
	`27`	`+ return totalTokens > threshold`
`27`	`28`	`}`
`28`	`29`
`29`	`30`	`// AutoCompactIfNeeded runs compaction when the conversation exceeds the threshold.`
Original file line number	Diff line number	Diff line change
`@@ -15,7 +15,6 @@ import (`
`15`	`15`	`hooks "github.com/GrayCodeAI/hawk/internal/hooks"`
`16`	`16`	`"github.com/GrayCodeAI/hawk/internal/observability/oteltrace"`
`17`	`17`	`"github.com/GrayCodeAI/hawk/internal/prompts"`
`18`		`- modelPkg "github.com/GrayCodeAI/hawk/internal/provider/routing"`
`19`	`18`	`)`
`20`	`19`
`21`	`20`	`// toolExecResult holds the output of a single tool execution.`
`@@ -326,8 +325,8 @@ func (s *Session) executeSingleTool(ctx context.Context, tc types.ToolCall, ch c`
`326`	`325`	`}`
`327`	`326`
`328`	`327`	`maxChars := 50000`
`329`		`- if info, ok := modelPkg.Find(s.model); ok && info.ContextSize > 0 {`
`330`		`- dynamic := info.ContextSize * 20 / 100 * 4`
	`328`	`+ if window := s.ContextWindowSize(); window > 0 {`
	`329`	`+ dynamic := window * 20 / 100 * 4`
`331`	`330`	`if dynamic < 5000 {`
`332`	`331`	`dynamic = 5000`
`333`	`332`	`}`
`@@ -345,6 +344,7 @@ func (s *Session) executeSingleTool(ctx context.Context, tc types.ToolCall, ch c`
`345`	`344`	`if len(output) > maxChars {`
`346`	`345`	`output = output[:maxChars] + "\n... (truncated)"`
`347`	`346`	`}`
	`347`	`+ output = maybeSpillToolOutput(output, canonical, tc.ID)`
`348`	`348`
`349`	`349`	`if s.Pipeline != nil {`
`350`	`350`	`var execErr error`