Skip to content

Commit 3ff69b5

Browse files
authored
fix: security and correctness hardening across credentials, API, storage, and tooling (#45)
* fix: harden security and correctness across credentials, API, and storage Critical: - audit.go: tighten JSONLFileSink file perms 0o644 -> 0o600 (metadata) - api/server.go: store and call bgCancel in Shutdown (context leak); add validateAuthConfig loopback auth gate on ListenAndServe - guardrails.go: add AddRuleSafe/NewGuardrailsSafe (error-returning variants) for untrusted rule sources; fix ApplyRedactions to use FindAllStringIndex positions so the correct match instance is redacted instead of the first occurrence via strings.Index - cache/backend.go: bound RESP bulk-string allocation at 64 MB to prevent memory-exhaustion DoS from a malicious Redis - budgets.go: chmod budget DB to 0o600 (stores plaintext API keys) High: - conversation/engine.go: defer-close the final StreamResult on all exit paths; add nil-provider guard in streamAndSave - client/stream.go: sort OpenAI tool calls by Index before emitting (was non-deterministic map iteration); nil args -> {_raw} fallback - credentials/keyring_platform.go: only map not-found errors to ErrNotFound; real backend errors now pass through so LookupSecret logs them at Warn instead of silently classifying as 'no secret' - openai_proxy.go: surface streaming errors as SSE data event before [DONE]; raise body limit to 10 MiB for multi-turn conversations Medium: - provider_registry.go: bound resolveEnvSecret with 10s timeout ctx - retry.go: time.After -> time.NewTimer + Stop (avoid timer leak) - sqlite.go: escape backslash in GetNodeByPrefix LIKE query - config/provider_env.go: ValidateBaseURL uses url.Parse instead of os.Stat; config dir perms 0o755 -> 0o700 * fix: eliminate timer leaks in ratelimit, add security headers to API server - ratelimit.go: replace time.After with time.NewTimer+Stop in both token-bucket wait paths (minInterval enforcement and token-deficit wait) to prevent runtime timer leaks on ctx cancellation - adaptive_ratelimit.go: same fix in RPM and TPM throttle paths - internal/api/server.go: wrap handler chain with securityHeaders middleware (X-Content-Type-Options, X-Frame-Options, Cache-Control) matching the hawk daemon's security posture * fix: eliminate timer leak in router retry backoff - router/retry.go: replace afterFunc = time.After with newTimer = time.NewTimer to avoid leaking the timer in the runtime when ctx is cancelled before the delay elapses - router/router.go: update call site to use newTimer + timer.Stop() * refactor: centralize HTTP utilities, bound ctx in keyring lookups - internal/httputil: new package with canonical ConstantTimeEqual, DecodeJSONBody, WriteJSON, SecurityHeaders, IsLoopbackHost, ValidateAuthConfig, ExtractBearerToken — eliminates duplication across internal/api/server.go (previously had local copies) - internal/api/server.go: delegate to httputil; remove duplicated constantTimeEqual, decodeJSONBody, writeJSON, securityHeaders, validateAuthConfig, isLoopbackHost - internal/api/auth_test.go: update to use httputil.ConstantTimeEqual - config/runtime.go: bound envValue keyring lookup with 5s timeout (was unbounded context.Background()) - runtime/runtime.go: bound CredentialTargets keyring probes with 5s timeout each (was unbounded context.Background()) * fix: address review findings in security hardening PR - budgets.go: chmod WAL/SHM sidecar files to 0o600 in addition to the main DB file (WAL holds uncheckpointed pages including plaintext keys) - httputil.go: make ExtractBearerToken case-insensitive per RFC 7235 and adopt it in Server.auth (was dead code with duplicated logic) - server.go: use httputil.ExtractBearerToken instead of inline duplicate
1 parent 9da8423 commit 3ff69b5

23 files changed

Lines changed: 488 additions & 73 deletions

client/adaptive_ratelimit.go

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -387,10 +387,12 @@ func (a *AdaptiveRateLimitProvider) checkAndWait(ctx context.Context) error {
387387
if delay > 0 && delay <= a.config.MaxDelay {
388388
a.throttleCount++
389389
a.mu.Unlock()
390+
timer := time.NewTimer(delay)
390391
select {
391392
case <-ctx.Done():
393+
timer.Stop()
392394
return ctx.Err()
393-
case <-time.After(delay):
395+
case <-timer.C:
394396
}
395397
a.mu.Lock()
396398
now = time.Now()
@@ -421,10 +423,12 @@ func (a *AdaptiveRateLimitProvider) checkAndWait(ctx context.Context) error {
421423
if delay > 0 && delay <= a.config.MaxDelay {
422424
a.throttleCount++
423425
a.mu.Unlock()
426+
timer := time.NewTimer(delay)
424427
select {
425428
case <-ctx.Done():
429+
timer.Stop()
426430
return ctx.Err()
427-
case <-time.After(delay):
431+
case <-timer.C:
428432
}
429433
a.mu.Lock()
430434
now = time.Now()

client/guardrails.go

Lines changed: 54 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,8 @@ type GuardrailViolation struct {
6363
Rule GuardrailRule `json:"rule"`
6464
MatchedText string `json:"matched_text"`
6565
RedactedResult string `json:"redacted_result,omitempty"`
66+
matchStart int // byte offset of the match in the original response
67+
matchEnd int // byte offset one past the last matched byte
6668
}
6769

6870
// GuardrailError is returned when a guardrail blocks a response.
@@ -91,6 +93,10 @@ func NewGuardrails(rules ...GuardrailRule) *Guardrails {
9193
}
9294

9395
// AddRule registers a guardrail rule. It panics if the pattern is invalid.
96+
// This follows the regexp.MustCompile convention for programmatic rules
97+
// where an invalid pattern indicates a programmer error. For rules that
98+
// may originate from untrusted sources (config files, user input), use
99+
// AddRuleSafe instead.
94100
func (g *Guardrails) AddRule(r GuardrailRule) {
95101
if r.Pattern != "" {
96102
compiled, err := regexp.Compile(r.Pattern)
@@ -104,6 +110,37 @@ func (g *Guardrails) AddRule(r GuardrailRule) {
104110
g.rules = append(g.rules, r)
105111
}
106112

113+
// AddRuleSafe registers a guardrail rule and returns an error if the pattern
114+
// is invalid, instead of panicking. Use this when rules may come from
115+
// untrusted sources (config files, user input).
116+
func (g *Guardrails) AddRuleSafe(r GuardrailRule) error {
117+
if r.Pattern != "" {
118+
compiled, err := regexp.Compile(r.Pattern)
119+
if err != nil {
120+
return fmt.Errorf("eyrie: guardrails: invalid regex %q in rule %q: %w", r.Pattern, r.Name, err)
121+
}
122+
r.compiled = compiled
123+
}
124+
g.mu.Lock()
125+
defer g.mu.Unlock()
126+
g.rules = append(g.rules, r)
127+
return nil
128+
}
129+
130+
// NewGuardrailsSafe creates a Guardrails instance and returns an error if any
131+
// rule has an invalid pattern. Use this when rules may come from untrusted
132+
// sources; use NewGuardrails for programmatic rules where invalid patterns
133+
// indicate a programmer error (matching regexp.MustCompile convention).
134+
func NewGuardrailsSafe(rules ...GuardrailRule) (*Guardrails, error) {
135+
g := &Guardrails{}
136+
for _, r := range rules {
137+
if err := g.AddRuleSafe(r); err != nil {
138+
return nil, err
139+
}
140+
}
141+
return g, nil
142+
}
143+
107144
// Rules returns a snapshot of the currently registered rules.
108145
func (g *Guardrails) Rules() []GuardrailRule {
109146
g.mu.RLock()
@@ -134,17 +171,19 @@ func (g *Guardrails) Check(ctx context.Context, response string) ([]GuardrailVio
134171
if rule.compiled == nil {
135172
continue
136173
}
137-
matches := rule.compiled.FindAllString(response, -1)
174+
matches := rule.compiled.FindAllStringIndex(response, -1)
138175
if len(matches) == 0 {
139176
continue
140177
}
141178
for _, match := range matches {
142179
v := GuardrailViolation{
143180
Rule: rule,
144-
MatchedText: match,
181+
MatchedText: response[match[0]:match[1]],
182+
matchStart: match[0],
183+
matchEnd: match[1],
145184
}
146185
if rule.Action == GuardrailRedact {
147-
v.RedactedResult = strings.Repeat("*", len(match))
186+
v.RedactedResult = strings.Repeat("*", len(v.MatchedText))
148187
}
149188
violations = append(violations, v)
150189
if rule.Action == GuardrailBlock {
@@ -165,7 +204,9 @@ func (g *Guardrails) Check(ctx context.Context, response string) ([]GuardrailVio
165204

166205
// ApplyRedactions takes the response text and violations, replacing redacted
167206
// matches with their redaction markers. Non-redact violations are left intact.
168-
// Matches are applied positionally to handle overlapping patterns correctly.
207+
// Match positions are used directly from the violations (captured during Check)
208+
// so the correct instance of each match is redacted even when the matched text
209+
// appears multiple times in the response.
169210
func ApplyRedactions(response string, violations []GuardrailViolation) string {
170211
type replacement struct {
171212
start int
@@ -177,11 +218,17 @@ func ApplyRedactions(response string, violations []GuardrailViolation) string {
177218
if v.Rule.Action != GuardrailRedact {
178219
continue
179220
}
180-
idx := strings.Index(response, v.MatchedText)
181-
if idx < 0 {
221+
if v.matchEnd == 0 && v.matchStart == 0 && v.MatchedText != "" {
222+
// Fallback: violation came from outside Check (e.g. constructed
223+
// manually). Search for the first occurrence.
224+
idx := strings.Index(response, v.MatchedText)
225+
if idx < 0 {
226+
continue
227+
}
228+
reps = append(reps, replacement{start: idx, end: idx + len(v.MatchedText), text: v.RedactedResult})
182229
continue
183230
}
184-
reps = append(reps, replacement{start: idx, end: idx + len(v.MatchedText), text: v.RedactedResult})
231+
reps = append(reps, replacement{start: v.matchStart, end: v.matchEnd, text: v.RedactedResult})
185232
}
186233
if len(reps) == 0 {
187234
return response

client/guardrails_test.go

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,42 @@ func TestGuardrails_InvalidPatternPanics(t *testing.T) {
164164
})
165165
}
166166

167+
func TestGuardrails_InvalidPatternSafeReturnsError(t *testing.T) {
168+
_, err := NewGuardrailsSafe(GuardrailRule{
169+
Type: GuardrailCustom,
170+
Name: "bad_regex",
171+
Pattern: `[invalid`,
172+
Action: GuardrailBlock,
173+
})
174+
if err == nil {
175+
t.Fatal("expected error for invalid regex in NewGuardrailsSafe")
176+
}
177+
}
178+
179+
func TestGuardrails_AddRuleSafe(t *testing.T) {
180+
g := NewGuardrails()
181+
if err := g.AddRuleSafe(GuardrailRule{
182+
Type: GuardrailCustom,
183+
Name: "dynamic_rule",
184+
Pattern: `dynamic_pattern`,
185+
Action: GuardrailWarn,
186+
}); err != nil {
187+
t.Fatalf("expected no error, got: %v", err)
188+
}
189+
if len(g.Rules()) != 1 {
190+
t.Fatalf("expected 1 rule after AddRuleSafe, got %d", len(g.Rules()))
191+
}
192+
193+
if err := g.AddRuleSafe(GuardrailRule{
194+
Type: GuardrailCustom,
195+
Name: "bad_regex",
196+
Pattern: `[invalid`,
197+
Action: GuardrailBlock,
198+
}); err == nil {
199+
t.Fatal("expected error for invalid regex in AddRuleSafe")
200+
}
201+
}
202+
167203
func TestGuardrails_AddRule(t *testing.T) {
168204
g := NewGuardrails()
169205
g.AddRule(GuardrailRule{

client/provider_registry.go

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"context"
55
"fmt"
66
"log/slog"
7+
"time"
78

89
"github.com/GrayCodeAI/eyrie/config"
910
"github.com/GrayCodeAI/eyrie/credentials"
@@ -280,5 +281,13 @@ func ResolveProviderModelEnvOverride(provider string) string {
280281
}
281282

282283
func resolveEnvSecret(envKey string) string {
283-
return credentials.LookupSecret(context.Background(), envKey)
284+
// Bound the lookup to prevent indefinite stalls when the OS keyring
285+
// is unresponsive (e.g., locked keychain on macOS, D-Bus failure on
286+
// Linux). The keyring itself has a 30s timeout, but resolveEnvSecret
287+
// is called multiple times in sequence during provider construction
288+
// (up to 6 calls for AWS Bedrock), so a per-call cap keeps the total
289+
// stall bounded.
290+
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
291+
defer cancel()
292+
return credentials.LookupSecret(ctx, envKey)
284293
}

client/ratelimit.go

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -82,10 +82,12 @@ func (b *tokenBucket) wait(ctx context.Context) error {
8282
if since < b.minInterval {
8383
wait := b.minInterval - since
8484
b.mu.Unlock()
85+
timer := time.NewTimer(wait)
8586
select {
8687
case <-ctx.Done():
88+
timer.Stop()
8789
return fmt.Errorf("eyrie: rate limiter: %w", ctx.Err())
88-
case <-time.After(wait):
90+
case <-timer.C:
8991
}
9092
b.mu.Lock()
9193
}
@@ -100,10 +102,12 @@ func (b *tokenBucket) wait(ctx context.Context) error {
100102
waitDur := time.Duration(needed / b.refillRate)
101103
b.mu.Unlock()
102104

105+
timer := time.NewTimer(waitDur)
103106
select {
104107
case <-ctx.Done():
108+
timer.Stop()
105109
return fmt.Errorf("eyrie: rate limiter: %w", ctx.Err())
106-
case <-time.After(waitDur):
110+
case <-timer.C:
107111
}
108112
}
109113
}

client/retry.go

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -119,10 +119,16 @@ func doWithRetry(ctx context.Context, httpClient *http.Client, req *http.Request
119119
"attempt", attempt, "max", rc.MaxRetries,
120120
"delay", delay, "url", req.URL.String(),
121121
)
122+
// Use time.NewTimer + Stop instead of time.After to avoid leaking
123+
// the timer in the runtime when ctx is cancelled before the delay
124+
// elapses. time.After allocates a timer that lives until it fires,
125+
// even if the caller has already moved on.
126+
timer := time.NewTimer(delay)
122127
select {
123128
case <-ctx.Done():
129+
timer.Stop()
124130
return nil, ctx.Err()
125-
case <-time.After(delay):
131+
case <-timer.C:
126132
}
127133
}
128134

client/stream.go

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import (
88
"fmt"
99
"io"
1010
"log/slog"
11+
"sort"
1112
"strings"
1213
"time"
1314
)
@@ -428,9 +429,21 @@ func processOpenAIStreamWithOpts(ctx context.Context, sseEvents <-chan SSEEvent,
428429
return
429430
}
430431
toolsEmitted = true
431-
for _, t := range tools {
432-
var args map[string]interface{}
433-
_ = json.Unmarshal([]byte(t.argsBuf.String()), &args)
432+
// Sort by index so tool calls are emitted in the order the
433+
// model produced them, not in random map-iteration order.
434+
indices := make([]int, 0, len(tools))
435+
for idx := range tools {
436+
indices = append(indices, idx)
437+
}
438+
sort.Ints(indices)
439+
for _, idx := range indices {
440+
t := tools[idx]
441+
args := map[string]interface{}{}
442+
if err := json.Unmarshal([]byte(t.argsBuf.String()), &args); err != nil {
443+
// On parse failure, pass the raw string as _raw so
444+
// the caller sees something rather than a nil map.
445+
args = map[string]interface{}{"_raw": t.argsBuf.String()}
446+
}
434447
emit(ctx, ch, EyrieStreamEvent{
435448
Type: "tool_call",
436449
ToolCall: &ToolCall{ID: t.id, Name: t.name, Arguments: args},

config/provider_env.go

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package config
33
import (
44
"encoding/json"
55
"fmt"
6+
"net/url"
67
"os"
78
"path/filepath"
89
"strings"
@@ -286,13 +287,21 @@ func ValidateAPIKey(apiKey, providerName string) string {
286287
return ""
287288
}
288289

289-
// ValidateBaseURL validates a base URL.
290+
// ValidateBaseURL validates a base URL. Returns an error message if the URL
291+
// is syntactically invalid (unparseable or missing a scheme), or empty if valid.
290292
func ValidateBaseURL(baseURL string) string {
291293
if baseURL == "" {
292294
return ""
293295
}
294-
if _, err := os.Stat(baseURL); err == nil {
295-
return "Invalid base URL: " + baseURL
296+
u, err := url.Parse(baseURL)
297+
if err != nil {
298+
return "Invalid base URL: " + baseURL + " (" + err.Error() + ")"
299+
}
300+
if u.Scheme != "http" && u.Scheme != "https" {
301+
return "Invalid base URL: " + baseURL + " (must be http or https)"
302+
}
303+
if u.Host == "" {
304+
return "Invalid base URL: " + baseURL + " (missing host)"
296305
}
297306
return ""
298307
}
@@ -360,7 +369,7 @@ func SaveProviderConfig(config *ProviderConfig, path string) error {
360369
if path == "" {
361370
path = GetProviderConfigPath()
362371
}
363-
_ = os.MkdirAll(filepath.Dir(path), 0o755)
372+
_ = os.MkdirAll(filepath.Dir(path), 0o700)
364373
data, err := json.MarshalIndent(config, "", " ")
365374
if err != nil {
366375
return err

config/runtime.go

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"context"
55
"os"
66
"strings"
7+
"time"
78

89
"github.com/GrayCodeAI/eyrie/credentials"
910
)
@@ -43,8 +44,13 @@ func envValue(key string) string {
4344
if key == "" {
4445
return ""
4546
}
47+
// Bound the keyring lookup to prevent indefinite stalls when the OS
48+
// keychain is unresponsive. The keyring itself has a 30s timeout,
49+
// but envValue is called many times during provider construction.
50+
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
51+
defer cancel()
4652
// Always check the credential store first for ALL keys.
47-
if v := credentials.LookupSecret(context.Background(), key); v != "" {
53+
if v := credentials.LookupSecret(ctx, key); v != "" {
4854
return v
4955
}
5056
// Fall back to process environment only when the credential store has

conversation/engine.go

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,9 @@ func (e *Engine) DeleteNode(ctx context.Context, id string) error {
202202
const defaultGroupBudgetMultiplier = 4
203203

204204
func (e *Engine) streamAndSave(ctx context.Context, parentNode *storage.Node, messages []client.EyrieMessage, opts PromptOpts) (<-chan Event, error) {
205+
if e.provider == nil {
206+
return nil, fmt.Errorf("conversation: engine has no provider")
207+
}
205208
_, span := tracer.Start(
206209
ctx, "conversation.streamAndSave",
207210
trace.WithAttributes(
@@ -248,6 +251,18 @@ func (e *Engine) streamAndSave(ctx context.Context, parentNode *storage.Node, me
248251
currentStream = sr.Events
249252
)
250253

254+
// Ensure the last StreamResult is always closed on exit. The
255+
// closure captures currentSR by reference, so it closes whatever
256+
// stream is active when the goroutine returns — including the
257+
// initial stream on early exits and the final continuation stream
258+
// on normal completion. Previous streams are closed explicitly
259+
// during the continuation loop (currentSR.Close() before reassign).
260+
defer func() {
261+
if currentSR != nil {
262+
currentSR.Close()
263+
}
264+
}()
265+
251266
for {
252267
var fullTextBuilder strings.Builder
253268
var usage *client.EyrieUsage

0 commit comments

Comments
 (0)