Skip to content

Commit cac004d

Browse files
authored
refactor: make retry fallbacks and sqlite pooling configurable (#46)
* refactor: make codeagent fallbacks explicit Remove baked-in fallback model IDs from retry defaults and cover the new option-based fallback and strategy configuration with focused tests. * refactor: add configurable sqlite connection pooling Keep serialized access as the default while allowing callers to opt into a larger pool, and lock that behavior down with targeted storage tests. * docs: remove stale cmd/eyrie entry from architecture tree Keep the repository layout example aligned with the current top-level structure after the CLI path was removed from the tree.
1 parent 3ff69b5 commit cac004d

9 files changed

Lines changed: 179 additions & 16 deletions

File tree

README.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -231,7 +231,6 @@ config.SaveProviderConfig(cfg, "") // save changes
231231

232232
```
233233
eyrie/
234-
├── cmd/eyrie/ # CLI binary
235234
├── client/ # Provider client & streaming interface
236235
├── config/ # Provider configuration & routing
237236
│ └── credential/ # Credential file management

codeagent/retry.go

Lines changed: 40 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -45,15 +45,43 @@ type RetryRecord struct {
4545
}
4646

4747
// NewCodeAgentRetry creates a retry system with code-agent-specific strategies.
48-
func NewCodeAgentRetry() *CodeAgentRetry {
48+
// Default strategies do not pin fallback model IDs; configure fallbacks
49+
// explicitly via WithFallback (or override a whole strategy via WithStrategy)
50+
// so the catalog remains the single source of truth for model names.
51+
func NewCodeAgentRetry(opts ...Option) *CodeAgentRetry {
4952
cr := &CodeAgentRetry{
5053
strategies: make(map[string]*RetryStrategy),
5154
history: make([]RetryRecord, 0, 1000),
5255
}
5356
cr.registerDefaults()
57+
for _, opt := range opts {
58+
opt(cr)
59+
}
5460
return cr
5561
}
5662

63+
// Option configures a CodeAgentRetry.
64+
type Option func(*CodeAgentRetry)
65+
66+
// WithFallback configures a fallback model+provider for a given error type
67+
// (e.g. "context_length", "budget_exceeded"). Model and provider names should
68+
// be resolved from the catalog by the caller — no defaults are baked in.
69+
func WithFallback(errorType, model, provider string) Option {
70+
return func(cr *CodeAgentRetry) {
71+
if s, ok := cr.strategies[errorType]; ok {
72+
s.FallbackModel = model
73+
s.FallbackProvider = provider
74+
}
75+
}
76+
}
77+
78+
// WithStrategy overrides the retry strategy for a given error type.
79+
func WithStrategy(errorType string, strategy RetryStrategy) Option {
80+
return func(cr *CodeAgentRetry) {
81+
cr.strategies[errorType] = &strategy
82+
}
83+
}
84+
5785
// registerDefaults sets up default retry strategies for common code agent failures.
5886
func (cr *CodeAgentRetry) registerDefaults() {
5987
// Rate limiting - wait and retry
@@ -65,13 +93,13 @@ func (cr *CodeAgentRetry) registerDefaults() {
6593
Backoff: 2.0,
6694
}
6795

68-
// Context length exceeded - switch to model with larger context
96+
// Context length exceeded - switch to model with larger context.
97+
// Fallback model is intentionally unset; configure it via WithFallback
98+
// using a catalog-resolved model name so it never drifts from the catalog.
6999
cr.strategies["context_length"] = &RetryStrategy{
70-
Name: "Context Length",
71-
MaxRetries: 2,
72-
BaseDelay: 1 * time.Second,
73-
FallbackModel: "claude-3-5-sonnet", // larger context
74-
FallbackProvider: "anthropic",
100+
Name: "Context Length",
101+
MaxRetries: 2,
102+
BaseDelay: 1 * time.Second,
75103
}
76104

77105
// Tool execution failure - retry with different approach
@@ -82,12 +110,12 @@ func (cr *CodeAgentRetry) registerDefaults() {
82110
Backoff: 1.5,
83111
}
84112

85-
// Token budget exceeded - switch to cheaper model
113+
// Token budget exceeded - switch to cheaper model.
114+
// Fallback model is intentionally unset; configure it via WithFallback
115+
// using a catalog-resolved model name so it never drifts from the catalog.
86116
cr.strategies["budget_exceeded"] = &RetryStrategy{
87-
Name: "Budget Exceeded",
88-
MaxRetries: 1,
89-
FallbackModel: "gpt-4o-mini", // cheaper
90-
FallbackProvider: "openai",
117+
Name: "Budget Exceeded",
118+
MaxRetries: 1,
91119
}
92120

93121
// Server error - retry with backoff

codeagent/retry_test.go

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
package codeagent
2+
3+
import (
4+
"context"
5+
"errors"
6+
"testing"
7+
"time"
8+
)
9+
10+
func TestNewCodeAgentRetryDefaultsLeaveFallbacksUnset(t *testing.T) {
11+
cr := NewCodeAgentRetry()
12+
13+
for _, errorType := range []string{"context_length", "budget_exceeded"} {
14+
strategy, ok := cr.strategies[errorType]
15+
if !ok {
16+
t.Fatalf("missing default strategy for %q", errorType)
17+
}
18+
if strategy.FallbackModel != "" || strategy.FallbackProvider != "" {
19+
t.Fatalf("%s fallback should be unset by default, got model=%q provider=%q", errorType, strategy.FallbackModel, strategy.FallbackProvider)
20+
}
21+
}
22+
}
23+
24+
func TestWithFallbackConfiguresFallbackDecision(t *testing.T) {
25+
cr := NewCodeAgentRetry(
26+
WithFallback("context_length", "claude-sonnet", "anthropic"),
27+
)
28+
ctx := context.Background()
29+
err := errors.New("context length exceeded")
30+
31+
first := cr.DecideRetry(ctx, err, "openai", "gpt-4o")
32+
if first == nil || !first.ShouldRetry || first.FallbackModel != "" {
33+
t.Fatalf("first retry = %+v, want normal retry without fallback", first)
34+
}
35+
36+
second := cr.DecideRetry(ctx, err, "openai", "gpt-4o")
37+
if second == nil || !second.ShouldRetry || second.FallbackModel != "" {
38+
t.Fatalf("second retry = %+v, want normal retry without fallback", second)
39+
}
40+
41+
third := cr.DecideRetry(ctx, err, "openai", "gpt-4o")
42+
if third == nil {
43+
t.Fatal("third retry decision is nil")
44+
}
45+
if !third.ShouldRetry {
46+
t.Fatalf("third retry should switch to fallback, got %+v", third)
47+
}
48+
if third.FallbackModel != "claude-sonnet" || third.FallbackProvider != "anthropic" {
49+
t.Fatalf("third retry fallback = %q/%q, want claude-sonnet/anthropic", third.FallbackModel, third.FallbackProvider)
50+
}
51+
}
52+
53+
func TestWithStrategyOverridesDefaultStrategy(t *testing.T) {
54+
override := RetryStrategy{
55+
Name: "Custom Timeout",
56+
MaxRetries: 1,
57+
BaseDelay: 25 * time.Millisecond,
58+
MaxDelay: 25 * time.Millisecond,
59+
Backoff: 1,
60+
}
61+
cr := NewCodeAgentRetry(WithStrategy("timeout", override))
62+
63+
got, ok := cr.strategies["timeout"]
64+
if !ok {
65+
t.Fatal("timeout strategy missing after override")
66+
}
67+
if got.Name != override.Name || got.MaxRetries != override.MaxRetries || got.BaseDelay != override.BaseDelay || got.MaxDelay != override.MaxDelay || got.Backoff != override.Backoff {
68+
t.Fatalf("timeout strategy = %+v, want %+v", *got, override)
69+
}
70+
}

storage/dag.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,8 @@ func NewDAG(dbPath string, sessionID string) (*DAG, error) {
3636
return &DAG{store: store, sessionID: sessionID}, nil
3737
}
3838

39-
// NewDAGFromStore creates a DAG using an existing store.
39+
// NewDAGFromStore creates a DAG using an existing store, which is useful in
40+
// tests and callers that manage store lifetime outside the DAG wrapper.
4041
func NewDAGFromStore(store Store, sessionID string) *DAG {
4142
return &DAG{store: store, sessionID: sessionID}
4243
}

storage/sqlite.go

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,40 @@ type SQLiteStore struct {
1515
db *sql.DB
1616
}
1717

18-
func Open(path string) (*SQLiteStore, error) {
18+
// Option configures a SQLiteStore at Open time.
19+
type Option func(*openConfig)
20+
21+
type openConfig struct {
22+
maxOpenConns int
23+
}
24+
25+
// WithMaxOpenConns overrides the database connection pool size. The default
26+
// (1) serializes all access, which is safe for single-agent use; raise it for
27+
// concurrent (e.g. HTTP server) workloads. SQLite still serializes writes via
28+
// the busy_timeout pragma, so concurrent access remains bounded by WAL readers
29+
// plus a single writer.
30+
func WithMaxOpenConns(n int) Option {
31+
return func(c *openConfig) {
32+
if n > 0 {
33+
c.maxOpenConns = n
34+
}
35+
}
36+
}
37+
38+
// Open opens the SQLite conversation DAG store at path. The store is opened in
39+
// WAL mode with foreign keys enabled. Without options the connection pool is
40+
// limited to a single connection (serialized access); pass WithMaxOpenConns to
41+
// raise the pool size for concurrent workloads.
42+
func Open(path string, opts ...Option) (*SQLiteStore, error) {
43+
cfg := openConfig{maxOpenConns: 1}
44+
for _, opt := range opts {
45+
opt(&cfg)
46+
}
1947
db, err := sql.Open("sqlite", path+"?_pragma=journal_mode(wal)&_pragma=busy_timeout(5000)&_pragma=foreign_keys(on)")
2048
if err != nil {
2149
return nil, fmt.Errorf("storage: open %s: %w", path, err)
2250
}
23-
db.SetMaxOpenConns(1)
51+
db.SetMaxOpenConns(cfg.maxOpenConns)
2452
s := &SQLiteStore{db: db}
2553
if err := s.migrate(); err != nil {
2654
_ = db.Close()

storage/sqlite_test.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@ import (
77
"testing"
88
)
99

10+
// testStore opens an isolated on-disk SQLite store for tests that need the
11+
// real schema and pragmas rather than an in-memory stub.
1012
func testStore(t *testing.T) *SQLiteStore {
1113
t.Helper()
1214
path := filepath.Join(t.TempDir(), "test.db")

storage/store.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@ package storage
22

33
import "context"
44

5+
// Store captures the persistence operations used by the DAG wrapper and API
6+
// layers so tests can swap implementations without changing call sites.
57
type Store interface {
68
CreateNode(ctx context.Context, node *Node) error
79
GetNode(ctx context.Context, id string) (*Node, error)

storage/store_test.go

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,36 @@ func TestOpenReusesExisting(t *testing.T) {
7373
}
7474
}
7575

76+
func TestOpenWithMaxOpenConns(t *testing.T) {
77+
dir := t.TempDir()
78+
path := filepath.Join(dir, "pool.db")
79+
s, err := Open(path, WithMaxOpenConns(4))
80+
if err != nil {
81+
t.Fatal(err)
82+
}
83+
defer s.Close()
84+
85+
stats := s.db.Stats()
86+
if stats.MaxOpenConnections != 4 {
87+
t.Fatalf("MaxOpenConnections = %d, want 4", stats.MaxOpenConnections)
88+
}
89+
}
90+
91+
func TestOpenWithInvalidMaxOpenConnsKeepsDefault(t *testing.T) {
92+
dir := t.TempDir()
93+
path := filepath.Join(dir, "default-pool.db")
94+
s, err := Open(path, WithMaxOpenConns(0))
95+
if err != nil {
96+
t.Fatal(err)
97+
}
98+
defer s.Close()
99+
100+
stats := s.db.Stats()
101+
if stats.MaxOpenConnections != 1 {
102+
t.Fatalf("MaxOpenConnections = %d, want default 1", stats.MaxOpenConnections)
103+
}
104+
}
105+
76106
func TestClose(t *testing.T) {
77107
s := testStore(t)
78108
if err := s.Close(); err != nil {

storage/types.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import (
55
"time"
66
)
77

8+
// NodeType identifies the role of a persisted conversation node.
89
type NodeType string
910

1011
const (
@@ -15,6 +16,7 @@ const (
1516
NodeTypeToolResult NodeType = "tool_result"
1617
)
1718

19+
// Node is a single persisted entry in the conversation DAG.
1820
type Node struct {
1921
ID string `json:"id"`
2022
ParentID string `json:"parent_id,omitempty"`
@@ -39,6 +41,7 @@ type Node struct {
3941
Metadata json.RawMessage `json:"metadata,omitempty"`
4042
}
4143

44+
// Alias maps a stable name to a node ID.
4245
type Alias struct {
4346
Alias string `json:"alias"`
4447
NodeID string `json:"node_id"`

0 commit comments

Comments
 (0)