Skip to content

Commit 5dc7707

Browse files
committed
Add Context Governor for pre-turn compaction and tool spill
Wire AutoCompactor on each turn using auto_compact_threshold_pct (default 85%), always resolve context window (catalog or 128k fallback), and spill large tool output to .hawk/scratch/. /compact runs the full strategy chain; footer shows a real window size when the catalog omits context.
1 parent 102b225 commit 5dc7707

11 files changed

Lines changed: 317 additions & 30 deletions

cmd/chat_commands_session.go

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package cmd
22

33
import (
4+
"context"
45
"fmt"
56
"math/rand"
67
"os"
@@ -65,9 +66,14 @@ func (m *chatModel) handleSessionCommand(cmd string, parts []string, text string
6566

6667
case "/compact":
6768
before := m.session.MessageCount()
68-
m.session.SmartCompact()
69+
strat, tokBefore, tokAfter, err := m.session.CompactConversation(context.Background())
6970
after := m.session.MessageCount()
70-
m.messages = append(m.messages, displayMsg{role: "system", content: fmt.Sprintf("Compacted: %d → %d messages (LLM summary)", before, after)})
71+
msg := fmt.Sprintf("Compacted (%s): %d → %d messages, ~%dk → ~%dk tokens", strat, before, after, tokBefore/1000, tokAfter/1000)
72+
if err != nil {
73+
msg = fmt.Sprintf("Compacted with fallback: %d → %d messages", before, after)
74+
}
75+
m.messages = append(m.messages, displayMsg{role: "system", content: msg})
76+
m.invalidateConnStatus()
7177
return m, nil
7278

7379
case "/history":

cmd/chat_status.go

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -124,8 +124,15 @@ func (m chatModel) connectionStatusParts() (gateway, model, contextLabel string)
124124
}
125125

126126
model, contextLabel = modelStatusMeta(gw, modelID)
127-
if contextLabel == "" || contextLabel == "—" {
128-
contextLabel = "0k"
127+
if contextLabel == "" || contextLabel == "—" || contextLabel == "0k" {
128+
if m.session != nil {
129+
if w := m.session.ContextWindowSize(); w > 0 {
130+
contextLabel = formatModelTableContext(w)
131+
}
132+
}
133+
if contextLabel == "" || contextLabel == "—" {
134+
contextLabel = formatModelTableContext(engine.DefaultContextWindow)
135+
}
129136
}
130137
return gateway, model, contextLabel
131138
}

cmd/options.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -285,6 +285,11 @@ func configureSession(sess *engine.Session, settings hawkconfig.Settings, maxTur
285285
// Adaptive prompt: learn user preferences from corrections
286286
sess.AdaptivePrompt = engine.NewAdaptivePrompt()
287287

288+
if pct := settings.AutoCompactThresholdPct; pct > 0 {
289+
sess.AutoCompactThresholdPct = pct
290+
}
291+
sess.EnsureAutoCompactor()
292+
288293
return nil
289294
}
290295

internal/engine/compact.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,9 @@ func (s *Session) ShouldAutoCompact() bool {
2222
for _, msg := range s.messages {
2323
totalTokens += tok.EstimateTokens(msg.Content)
2424
}
25-
// Compact if approaching 80% of typical context window (128K tokens)
26-
return totalTokens > 100000
25+
window := s.ContextWindowSize()
26+
threshold := window * s.compactThresholdPct() / 100
27+
return totalTokens > threshold
2728
}
2829

2930
// AutoCompactIfNeeded runs compaction when the conversation exceeds the threshold.

internal/engine/compact_auto.go

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,17 @@ func NewAutoCompactor(config CompactConfig) *AutoCompactor {
2525
}
2626
}
2727

28+
// Configure updates compaction settings and rebuilds the strategy registry.
29+
func (ac *AutoCompactor) Configure(config CompactConfig) {
30+
if ac == nil {
31+
return
32+
}
33+
ac.mu.Lock()
34+
defer ac.mu.Unlock()
35+
ac.config = config
36+
ac.registry = NewStrategyRegistry(config)
37+
}
38+
2839
// GetAutoCompactThreshold returns the token count at which auto-compaction triggers.
2940
func (ac *AutoCompactor) GetAutoCompactThreshold() int {
3041
return ac.config.ContextWindowSize - ac.config.AutoCompactBuffer - ac.config.MaxOutputTokens
Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
package engine
2+
3+
import (
4+
"context"
5+
"fmt"
6+
7+
"github.com/GrayCodeAI/hawk/internal/engine/ctxmgr"
8+
modelPkg "github.com/GrayCodeAI/hawk/internal/provider/routing"
9+
)
10+
11+
const (
12+
// DefaultContextWindow is used when the model catalog has no context size.
13+
DefaultContextWindow = 128_000
14+
// DefaultAutoCompactThresholdPct matches Grok CLI default (85% of window).
15+
DefaultAutoCompactThresholdPct = 85
16+
)
17+
18+
// ResolveModelContextWindow returns the effective context window for a model.
19+
func ResolveModelContextWindow(model string, override int) int {
20+
if override > 0 {
21+
return override
22+
}
23+
if info, ok := modelPkg.Find(model); ok && info.ContextSize > 0 {
24+
return info.ContextSize
25+
}
26+
return DefaultContextWindow
27+
}
28+
29+
// ContextWindowSize returns this session's context window (catalog or default).
30+
func (s *Session) ContextWindowSize() int {
31+
if s == nil {
32+
return DefaultContextWindow
33+
}
34+
return ResolveModelContextWindow(s.model, s.ContextWindowCached)
35+
}
36+
37+
// EnsureAutoCompactor initializes the compaction orchestrator from session settings.
38+
func (s *Session) EnsureAutoCompactor() {
39+
if s == nil {
40+
return
41+
}
42+
if s.AutoCompactor != nil {
43+
s.AutoCompactor.Configure(s.compactConfig())
44+
return
45+
}
46+
s.AutoCompactor = NewAutoCompactor(s.compactConfig())
47+
}
48+
49+
func (s *Session) compactThresholdPct() int {
50+
pct := s.AutoCompactThresholdPct
51+
if pct <= 0 {
52+
pct = DefaultAutoCompactThresholdPct
53+
}
54+
if pct < 50 {
55+
pct = 50
56+
}
57+
if pct > 95 {
58+
pct = 95
59+
}
60+
return pct
61+
}
62+
63+
func (s *Session) compactConfig() CompactConfig {
64+
window := s.ContextWindowSize()
65+
pct := s.compactThresholdPct()
66+
target := window * pct / 100
67+
cfg := DefaultCompactConfig()
68+
cfg.AutoEnabled = true
69+
cfg.ContextWindowSize = window
70+
cfg.MaxOutputTokens = 0
71+
cfg.AutoCompactBuffer = window - target
72+
if cfg.AutoCompactBuffer < 0 {
73+
cfg.AutoCompactBuffer = 0
74+
}
75+
return cfg
76+
}
77+
78+
// refreshContextWindowCache updates cached window from the catalog when the model changes.
79+
func (s *Session) refreshContextWindowCache() {
80+
if s == nil {
81+
return
82+
}
83+
s.ContextWindowCached = 0
84+
if info, ok := modelPkg.Find(s.model); ok && info.ContextSize > 0 {
85+
s.ContextWindowCached = info.ContextSize
86+
}
87+
s.EnsureAutoCompactor()
88+
}
89+
90+
// ManageContextBeforeTurn collapses noise, then compacts via the strategy registry when needed.
91+
// Returns the compaction strategy name (if any) and whether messages were reduced.
92+
func (s *Session) ManageContextBeforeTurn(ctx context.Context) (strategy string, compacted bool) {
93+
if s == nil {
94+
return "", false
95+
}
96+
s.messages = ctxmgr.CollapseRepeatedMessages(s.messages)
97+
98+
s.EnsureAutoCompactor()
99+
if strat, ok := s.AutoCompactor.AutoCompactIfNeeded(ctx, s); ok {
100+
return strat, true
101+
}
102+
103+
if len(s.messages) > maxContextMessages {
104+
s.smartCompact()
105+
return "smart_message_cap", true
106+
}
107+
108+
convTokens := EstimateTokens(s.messages)
109+
window := s.ContextWindowSize()
110+
budget := ctxmgr.NewContextBudget(window)
111+
if budget.ShouldCompact(convTokens) {
112+
s.smartCompact()
113+
return "smart_budget", true
114+
}
115+
116+
return "", false
117+
}
118+
119+
// CompactConversation runs compaction immediately (for /compact). Uses the full strategy chain.
120+
func (s *Session) CompactConversation(ctx context.Context) (strategy string, tokensBefore, tokensAfter int, err error) {
121+
if s == nil {
122+
return "", 0, 0, fmt.Errorf("no session")
123+
}
124+
s.messages = ctxmgr.CollapseRepeatedMessages(s.messages)
125+
s.EnsureAutoCompactor()
126+
tokensBefore = EstimateTokens(s.messages)
127+
strategy, err = s.AutoCompactor.RunCompaction(ctx, s)
128+
if err != nil {
129+
s.smartCompact()
130+
strategy = "smart_fallback"
131+
}
132+
tokensAfter = EstimateTokens(s.messages)
133+
return strategy, tokensBefore, tokensAfter, nil
134+
}
135+
136+
// ShouldCompactByBudget reports whether conversation tokens exceed the configured % of window.
137+
func (s *Session) ShouldCompactByBudget() bool {
138+
window := s.ContextWindowSize()
139+
conv := EstimateTokens(s.messages)
140+
return conv >= window*s.compactThresholdPct()/100
141+
}
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
package engine
2+
3+
import (
4+
"context"
5+
"strings"
6+
"testing"
7+
8+
"github.com/GrayCodeAI/hawk/internal/types"
9+
)
10+
11+
func TestResolveModelContextWindow_Fallback(t *testing.T) {
12+
if got := ResolveModelContextWindow("unknown-model-xyz", 0); got != DefaultContextWindow {
13+
t.Fatalf("expected default %d, got %d", DefaultContextWindow, got)
14+
}
15+
if got := ResolveModelContextWindow("any", 64_000); got != 64_000 {
16+
t.Fatalf("expected override 64000, got %d", got)
17+
}
18+
}
19+
20+
func TestSession_compactConfig_ThresholdPct(t *testing.T) {
21+
s := NewSession("", "test-model", "sys", nil)
22+
s.AutoCompactThresholdPct = 85
23+
s.ContextWindowCached = 100_000
24+
cfg := s.compactConfig()
25+
want := 100_000 - 85_000
26+
got := cfg.ContextWindowSize - cfg.AutoCompactBuffer - cfg.MaxOutputTokens
27+
if got != want {
28+
t.Fatalf("threshold tokens = %d, want %d", got, want)
29+
}
30+
}
31+
32+
func TestMaybeSpillToolOutput_SmallUnchanged(t *testing.T) {
33+
in := "hello"
34+
if got := maybeSpillToolOutput(in, "Read", "id1"); got != in {
35+
t.Fatalf("expected unchanged small output, got %q", got)
36+
}
37+
}
38+
39+
func TestMaybeSpillToolOutput_LargeSpills(t *testing.T) {
40+
in := strings.Repeat("x", toolOutputSpillMinChars+100)
41+
got := maybeSpillToolOutput(in, "Bash", "call-1")
42+
if !strings.Contains(got, ".hawk/scratch/") {
43+
t.Fatalf("expected spill path in output, got %q", got[:200])
44+
}
45+
if strings.Contains(got, strings.Repeat("x", toolOutputSpillMinChars)) {
46+
t.Fatal("expected full payload not inlined after spill")
47+
}
48+
}
49+
50+
func TestManageContextBeforeTurn_CollapseOnly(t *testing.T) {
51+
s := NewSession("", "test-model", "sys", nil)
52+
s.messages = []types.EyrieMessage{
53+
{Role: "user", ToolResults: []types.ToolResult{{Content: "err", IsError: true}}},
54+
{Role: "user", ToolResults: []types.ToolResult{{Content: "err", IsError: true}}},
55+
}
56+
_, compacted := s.ManageContextBeforeTurn(context.Background())
57+
if compacted {
58+
t.Fatal("expected no compaction for tiny history")
59+
}
60+
}

internal/engine/session.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,8 @@ type Session struct {
7878

7979
PinnedMessages int // messages to protect from compaction (from /pin)
8080
AutoCompactThresholdPct int // token % to trigger auto-compact (default 85)
81+
ContextWindowCached int // catalog context window; 0 → governor default
82+
AutoCompactor *AutoCompactor
8183
Verbose bool // show tool calls, timing, token counts in output
8284

8385
// Cost optimization
@@ -154,6 +156,8 @@ func NewSessionWithClient(chat ChatClient, provider, model, systemPrompt string,
154156
}
155157
s.Cost.Model = model
156158
s.Router = modelPkg.NewRouter(modelPkg.StrategyBalanced)
159+
s.AutoCompactThresholdPct = DefaultAutoCompactThresholdPct
160+
s.refreshContextWindowCache()
157161

158162
// Initialize agents accumulator for .hawk/agents.md
159163
cwd, _ := os.Getwd()
@@ -200,6 +204,7 @@ func (s *Session) SetModel(model string) {
200204
s.model = strings.TrimSpace(model)
201205
s.Cost.Model = s.model
202206
s.syncCascadeDefaultModel()
207+
s.refreshContextWindowCache()
203208
}
204209

205210
// syncCascadeDefaultModel keeps the cascade router aligned after /config model picks.

internal/engine/stream.go

Lines changed: 7 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,10 @@ import (
1010
"github.com/GrayCodeAI/hawk/internal/types"
1111

1212
"github.com/GrayCodeAI/hawk/internal/engine/branching"
13-
"github.com/GrayCodeAI/hawk/internal/engine/ctxmgr"
1413
"github.com/GrayCodeAI/hawk/internal/engine/lifecycle"
1514
"github.com/GrayCodeAI/hawk/internal/hooks"
1615
analytics "github.com/GrayCodeAI/hawk/internal/observability"
1716
"github.com/GrayCodeAI/hawk/internal/observability/oteltrace"
18-
modelPkg "github.com/GrayCodeAI/hawk/internal/provider/routing"
1917
"github.com/GrayCodeAI/hawk/internal/resilience/retry"
2018
)
2119

@@ -171,21 +169,12 @@ func (s *Session) agentLoop(ctx context.Context, ch chan<- StreamEvent) {
171169
if s.Beliefs != nil && s.Beliefs.Size() > 0 {
172170
s.Beliefs.Prune(turnCount)
173171
}
174-
// Auto-compact if conversation is too long (message count)
175-
if len(s.messages) > maxContextMessages {
176-
s.messages = ctxmgr.CollapseRepeatedMessages(s.messages)
177-
if len(s.messages) > maxContextMessages {
178-
s.smartCompact()
179-
}
180-
}
181-
182-
// Auto-compact if token usage exceeds context budget allocation
183-
convTokens := EstimateTokens(s.messages)
184-
if info, ok := modelPkg.Find(s.model); ok && info.ContextSize > 0 {
185-
budget := ctxmgr.NewContextBudget(info.ContextSize)
186-
if budget.ShouldCompact(convTokens) {
187-
s.smartCompact()
188-
}
172+
// Context governor: collapse → micro/smart/truncate (settings threshold %).
173+
if strat, didCompact := s.ManageContextBeforeTurn(ctx); didCompact {
174+
s.log.Info("context compacted", map[string]interface{}{
175+
"strategy": strat,
176+
"messages": len(s.messages),
177+
})
189178
}
190179

191180
// Integration pipeline: pre-query (intent, tools, budget, injection scan, cache)
@@ -234,10 +223,7 @@ func (s *Session) agentLoop(ctx context.Context, ch chan<- StreamEvent) {
234223

235224
// Dynamic max_tokens based on task type and recent tool patterns
236225
taskType := classifyPromptForBudget(s.messages)
237-
contextSize := 200000
238-
if info, ok := modelPkg.Find(s.model); ok && info.ContextSize > 0 {
239-
contextSize = info.ContextSize
240-
}
226+
contextSize := s.ContextWindowSize()
241227
maxTok := DynamicMaxTokens(s.messages, contextSize, taskType)
242228

243229
// Model cascade: select optimal model for this request

internal/engine/stream_tool_exec.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@ import (
1515
hooks "github.com/GrayCodeAI/hawk/internal/hooks"
1616
"github.com/GrayCodeAI/hawk/internal/observability/oteltrace"
1717
"github.com/GrayCodeAI/hawk/internal/prompts"
18-
modelPkg "github.com/GrayCodeAI/hawk/internal/provider/routing"
1918
)
2019

2120
// toolExecResult holds the output of a single tool execution.
@@ -326,8 +325,8 @@ func (s *Session) executeSingleTool(ctx context.Context, tc types.ToolCall, ch c
326325
}
327326

328327
maxChars := 50000
329-
if info, ok := modelPkg.Find(s.model); ok && info.ContextSize > 0 {
330-
dynamic := info.ContextSize * 20 / 100 * 4
328+
if window := s.ContextWindowSize(); window > 0 {
329+
dynamic := window * 20 / 100 * 4
331330
if dynamic < 5000 {
332331
dynamic = 5000
333332
}
@@ -345,6 +344,7 @@ func (s *Session) executeSingleTool(ctx context.Context, tc types.ToolCall, ch c
345344
if len(output) > maxChars {
346345
output = output[:maxChars] + "\n... (truncated)"
347346
}
347+
output = maybeSpillToolOutput(output, canonical, tc.ID)
348348

349349
if s.Pipeline != nil {
350350
var execErr error

0 commit comments

Comments
 (0)