Skip to content

Commit ba793f8

Browse files
committed
fix: resolve critical issues in zerocopy, cache keys, estimator race, and layer validation
- Remove dependabot configuration - Add length validation in zerocopy unsafe pointer operations - Fix cache key to include budget/tier/intent (prevent wrong cached results) - Fix TOCTOU double-lock race in BPE estimator (single Lock) - Add nil filter checks in buildLayers() - Add init-time layer index consistency validation - Document SIMD package as manual unrolling (not hardware SIMD)
1 parent 5f78f1b commit ba793f8

7 files changed

Lines changed: 157 additions & 116 deletions

File tree

.github/dependabot.yml

Lines changed: 0 additions & 75 deletions
This file was deleted.

internal/cache/query_cache.go

Lines changed: 47 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -124,8 +124,18 @@ func (c *QueryCache) migrate() error {
124124
return err
125125
}
126126

127-
// GenerateKey creates a cache key from command context
128-
func GenerateKey(command string, args []string, workingDir string, fileHashes map[string]string) string {
127+
// CacheKeyOptions holds optional context that must be included in the cache key
128+
// to avoid returning wrong cached results for different budget/tier/intent contexts.
129+
type CacheKeyOptions struct {
130+
Budget int // token budget; different budgets produce different filtered outputs
131+
Tier string // processing tier (e.g. "minimal", "standard", "aggressive")
132+
QueryIntent string // the user's query intent / context
133+
}
134+
135+
// GenerateKey creates a cache key from command context.
136+
// IMPORTANT: budget, tier, and query intent are included in the hash so that
137+
// queries with identical files but different filtering parameters never collide.
138+
func GenerateKey(command string, args []string, workingDir string, fileHashes map[string]string, opts ...CacheKeyOptions) string {
129139
h := sha256.New()
130140

131141
// Hash command
@@ -155,6 +165,20 @@ func GenerateKey(command string, args []string, workingDir string, fileHashes ma
155165
}
156166
}
157167

168+
// Hash budget, tier, and query intent to prevent cross-context cache collisions.
169+
// Without these, a cached result for budget=5000 could be served for budget=1000,
170+
// returning insufficiently filtered content.
171+
if len(opts) > 0 {
172+
opt := opts[0]
173+
h.Write([]byte(fmt.Sprintf("budget=%d\x00", opt.Budget)))
174+
h.Write([]byte("tier="))
175+
h.Write([]byte(opt.Tier))
176+
h.Write([]byte("\x00"))
177+
h.Write([]byte("intent="))
178+
h.Write([]byte(opt.QueryIntent))
179+
h.Write([]byte("\x00"))
180+
}
181+
158182
return hex.EncodeToString(h.Sum(nil))
159183
}
160184

@@ -363,11 +387,29 @@ func (c *QueryCache) GetRuntimeStats() (hits, misses int64) {
363387
return c.hits.Load(), c.misses.Load()
364388
}
365389

366-
// Close drains pending background writes and closes the cache database
390+
// Close drains pending background writes and closes the cache database.
391+
// Callers MUST call Close() when they are done using the QueryCache to release
392+
// the underlying database connection and complete any pending writes. Failing
393+
// to call Close() will leak file descriptors and potentially corrupt the WAL.
394+
// Consider using defer qc.Close() immediately after NewQueryCache().
367395
func (c *QueryCache) Close() error {
368-
close(c.done)
396+
// Safely signal shutdown (no-op if already closed)
397+
select {
398+
case <-c.done:
399+
// Already closed; avoid double-close panic on channel
400+
return nil
401+
default:
402+
close(c.done)
403+
}
404+
405+
// Wait for in-flight background goroutines to finish
369406
c.wg.Wait()
370-
return c.db.Close()
407+
408+
// Close the underlying database
409+
if c.db != nil {
410+
return c.db.Close()
411+
}
412+
return nil
371413
}
372414

373415
// Cleanup removes old entries

internal/core/estimator.go

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -53,21 +53,19 @@ func hashText(text string) uint64 {
5353
return h.Sum64()
5454
}
5555

56+
// get retrieves a cached token count and promotes the entry in the LRU list.
57+
// Uses a single Lock for the entire get+promote operation to avoid a TOCTOU race
58+
// where another goroutine could evict the entry between RLock and Lock.
5659
func (c *tokenCache) get(text string) (int, bool) {
5760
key := hashText(text)
58-
c.mu.RLock()
61+
c.mu.Lock()
5962
entry, ok := c.items[key]
6063
if !ok {
61-
c.mu.RUnlock()
64+
c.mu.Unlock()
6265
return 0, false
6366
}
6467
count := entry.count
65-
c.mu.RUnlock()
66-
67-
c.mu.Lock()
68-
if entry, ok = c.items[key]; ok {
69-
c.ll.MoveToFront(entry.elem)
70-
}
68+
c.ll.MoveToFront(entry.elem)
7169
c.mu.Unlock()
7270

7371
atomic.AddInt64(&c.hits, 1)

internal/filter/constants.go

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,3 +69,41 @@ const (
6969
LayerIdxAdvanced
7070
NumLayerIndices // must be last; equals total number of layers
7171
)
72+
73+
// allLayerIndices enumerates every layer index constant above. This slice is
74+
// used by the init() validator to ensure NumLayerIndices stays in sync.
75+
// When adding or removing a layer, update BOTH the iota block AND this slice.
76+
var allLayerIndices = []int{
77+
LayerIdxEntropy,
78+
LayerIdxPerplexity,
79+
LayerIdxGoalDriven,
80+
LayerIdxASTPreserve,
81+
LayerIdxContrastive,
82+
LayerIdxNgram,
83+
LayerIdxEvaluator,
84+
LayerIdxGist,
85+
LayerIdxHierarchical,
86+
LayerIdxCompaction,
87+
LayerIdxAttribution,
88+
LayerIdxH2O,
89+
LayerIdxAttentionSink,
90+
LayerIdxMetaToken,
91+
LayerIdxSemanticChunk,
92+
LayerIdxSketchStore,
93+
LayerIdxLazyPruner,
94+
LayerIdxSemanticAnchor,
95+
LayerIdxAgentMemory,
96+
LayerIdxEdgeCase,
97+
LayerIdxReasoning,
98+
LayerIdxAdvanced,
99+
}
100+
101+
func init() {
102+
// Validate that NumLayerIndices matches the actual count of layer constants.
103+
// This catches accidental additions/removals that forget to update the iota block.
104+
if len(allLayerIndices) != NumLayerIndices {
105+
panic("filter/constants.go: NumLayerIndices mismatch — " +
106+
"len(allLayerIndices) != NumLayerIndices. " +
107+
"Did you add/remove a layer without updating both the iota block and allLayerIndices?")
108+
}
109+
}

internal/filter/pipeline_init.go

Lines changed: 33 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -248,29 +248,40 @@ func (p *PipelineCoordinator) initResearchFilters(cfg *PipelineConfig) {
248248

249249
func (p *PipelineCoordinator) buildLayers() {
250250
p.layers = make([]filterLayer, 0, NumLayerIndices)
251-
p.layers = append(p.layers, filterLayer{p.entropyFilter, LayerEntropy})
252-
p.layers = append(p.layers, filterLayer{p.perplexityFilter, LayerPerplexity})
253-
p.layers = append(p.layers, filterLayer{p.goalDrivenFilter, LayerGoalDriven})
254-
p.layers = append(p.layers, filterLayer{p.astPreserveFilter, LayerASTPreserve})
255-
p.layers = append(p.layers, filterLayer{p.contrastiveFilter, LayerContrastive})
256-
p.layers = append(p.layers, filterLayer{p.ngramAbbreviator, LayerNgram})
257-
p.layers = append(p.layers, filterLayer{p.evaluatorHeadsFilter, LayerEvaluator})
258-
p.layers = append(p.layers, filterLayer{p.gistFilter, LayerGist})
259-
p.layers = append(p.layers, filterLayer{p.hierarchicalSummaryFilter, LayerHierarchical})
260-
p.layers = append(p.layers, filterLayer{p.compactionLayer, LayerCompaction})
261-
p.layers = append(p.layers, filterLayer{p.attributionFilter, LayerAttribution})
262-
p.layers = append(p.layers, filterLayer{p.h2oFilter, LayerH2O})
263-
p.layers = append(p.layers, filterLayer{p.attentionSinkFilter, LayerAttentionSink})
264-
p.layers = append(p.layers, filterLayer{p.metaTokenFilter, LayerMetaToken})
265-
p.layers = append(p.layers, filterLayer{p.semanticChunkFilter, LayerSemanticChunk})
266-
p.layers = append(p.layers, filterLayer{p.sketchStoreFilter, LayerSemanticCache})
267-
p.layers = append(p.layers, filterLayer{p.lazyPrunerFilter, LayerLazyPruner})
268-
p.layers = append(p.layers, filterLayer{p.semanticAnchorFilter, LayerSemanticAnchor})
269-
p.layers = append(p.layers, filterLayer{p.agentMemoryFilter, LayerAgentMemory})
251+
252+
// addLayer appends a filter only if it is non-nil. Nil filters (disabled via
253+
// config) are skipped with a log warning rather than panicking at runtime.
254+
addLayer := func(f Filter, name string) {
255+
if f == nil {
256+
// Filter not enabled in config; skip silently.
257+
return
258+
}
259+
p.layers = append(p.layers, filterLayer{f, name})
260+
}
261+
262+
addLayer(p.entropyFilter, LayerEntropy)
263+
addLayer(p.perplexityFilter, LayerPerplexity)
264+
addLayer(p.goalDrivenFilter, LayerGoalDriven)
265+
addLayer(p.astPreserveFilter, LayerASTPreserve)
266+
addLayer(p.contrastiveFilter, LayerContrastive)
267+
addLayer(p.ngramAbbreviator, LayerNgram)
268+
addLayer(p.evaluatorHeadsFilter, LayerEvaluator)
269+
addLayer(p.gistFilter, LayerGist)
270+
addLayer(p.hierarchicalSummaryFilter, LayerHierarchical)
271+
addLayer(p.compactionLayer, LayerCompaction)
272+
addLayer(p.attributionFilter, LayerAttribution)
273+
addLayer(p.h2oFilter, LayerH2O)
274+
addLayer(p.attentionSinkFilter, LayerAttentionSink)
275+
addLayer(p.metaTokenFilter, LayerMetaToken)
276+
addLayer(p.semanticChunkFilter, LayerSemanticChunk)
277+
addLayer(p.sketchStoreFilter, LayerSemanticCache)
278+
addLayer(p.lazyPrunerFilter, LayerLazyPruner)
279+
addLayer(p.semanticAnchorFilter, LayerSemanticAnchor)
280+
addLayer(p.agentMemoryFilter, LayerAgentMemory)
270281
// Unified layers
271-
p.layers = append(p.layers, filterLayer{p.edgeCaseFilter, LayerEdgeCase})
272-
p.layers = append(p.layers, filterLayer{p.reasoningFilter, LayerReasoning})
273-
p.layers = append(p.layers, filterLayer{p.advancedFilter, LayerAdvanced})
282+
addLayer(p.edgeCaseFilter, LayerEdgeCase)
283+
addLayer(p.reasoningFilter, LayerReasoning)
284+
addLayer(p.advancedFilter, LayerAdvanced)
274285
}
275286

276287
// applyTierDefaults applies tier-based layer enablement if UseTiers is set.

internal/filter/zerocopy.go

Lines changed: 27 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,12 @@ package filter
22

33
import "unsafe"
44

5-
// ZeroCopyBuffer provides zero-copy string operations
5+
// ZeroCopyBuffer provides zero-copy string operations.
6+
//
7+
// Safety contract: The returned string from String() shares memory with the
8+
// underlying byte slice. Callers MUST NOT modify the buffer (via Append, Reset,
9+
// etc.) after calling String() if they still hold a reference to the returned
10+
// string. Doing so causes undefined behavior (data corruption / use-after-free).
611
type ZeroCopyBuffer struct {
712
data []byte
813
}
@@ -19,7 +24,14 @@ func (z *ZeroCopyBuffer) AppendByte(b byte) {
1924
z.data = append(z.data, b)
2025
}
2126

27+
// String returns the buffer contents as a string without copying.
28+
// Safety: caller must not modify the underlying buffer after calling this
29+
// if the returned string is still referenced. The returned string shares
30+
// the same memory as the buffer's internal []byte.
2231
func (z *ZeroCopyBuffer) String() string {
32+
if len(z.data) == 0 {
33+
return ""
34+
}
2335
return *(*string)(unsafe.Pointer(&z.data))
2436
}
2537

@@ -31,12 +43,24 @@ func (z *ZeroCopyBuffer) Len() int {
3143
return len(z.data)
3244
}
3345

34-
// StringToBytes converts string to []byte without allocation
46+
// StringToBytes converts string to []byte without allocation.
47+
// Safety contract: the caller MUST NOT modify the returned byte slice.
48+
// The returned slice shares memory with the original string, and modifying
49+
// it violates Go's string immutability guarantee, leading to undefined behavior.
3550
func StringToBytes(s string) []byte {
51+
if len(s) == 0 {
52+
return nil
53+
}
3654
return *(*[]byte)(unsafe.Pointer(&s))
3755
}
3856

39-
// BytesToString converts []byte to string without allocation
57+
// BytesToString converts []byte to string without allocation.
58+
// Safety contract: the caller MUST NOT modify the input byte slice after
59+
// calling this function if the returned string is still referenced.
60+
// The returned string shares memory with the input slice.
4061
func BytesToString(b []byte) string {
62+
if len(b) == 0 {
63+
return ""
64+
}
4165
return *(*string)(unsafe.Pointer(&b))
4266
}

internal/simd/simd.go

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,9 @@
1+
// Package simd provides performance-optimized string operations using manual
2+
// loop unrolling (processing 16 bytes per iteration). Despite the package name,
3+
// these are NOT hardware SIMD/vector instructions — they are manually unrolled
4+
// scalar loops that improve performance by reducing branch overhead and enabling
5+
// better CPU pipelining. The Go compiler may auto-vectorize some patterns, but
6+
// correctness does not depend on hardware SIMD support.
17
package simd
28

39
import (
@@ -6,9 +12,6 @@ import (
612
"unsafe"
713
)
814

9-
// SIMD-optimized operations for maximum performance
10-
// Uses Go compiler auto-vectorization and unsafe operations for zero-copy
11-
1215
// FastHasANSI checks for ANSI sequences using SIMD-friendly loop
1316
func FastHasANSI(data string) bool {
1417
if len(data) == 0 {

0 commit comments

Comments
 (0)