GrayCodeAI
diff --git a/‎embeddings/memo.go‎
Lines changed: 63 additions & 17 deletions b/‎embeddings/memo.go‎
Lines changed: 63 additions & 17 deletions
diff --git a/‎embeddings/memo_test.go‎
Lines changed: 76 additions & 0 deletions b/‎embeddings/memo_test.go‎
Lines changed: 76 additions & 0 deletions
diff --git a/‎engine/engine_test.go‎
Lines changed: 3 additions & 3 deletions b/‎engine/engine_test.go‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎engine/fused_recall.go‎
Lines changed: 4 additions & 1 deletion b/‎engine/fused_recall.go‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎engine/search.go‎
Lines changed: 12 additions & 2 deletions b/‎engine/search.go‎
Lines changed: 12 additions & 2 deletions
diff --git a/‎go.sum‎
Lines changed: 2 additions & 2 deletions b/‎go.sum‎
Lines changed: 2 additions & 2 deletions
@@ -4,32 +4,55 @@ import (
 	"context"
 	"crypto/sha256"
 	"encoding/hex"
+	"strconv"
 	"sync"
 )
 
-// EmbeddingMemo caches embeddings by content hash to skip re-embedding unchanged content.
+// EmbeddingMemo caches embeddings to skip re-embedding unchanged content.
+//
+// The cache key is namespace + mode + sha256(content), NOT content alone. The
+// namespace identifies the embedding model (provider Name()), so swapping models
+// or model versions no longer serves stale, incomparable vectors from the old
+// model. The mode dimension keeps document- and query-mode vectors separate for
+// asymmetric retrieval models (e.g. Cohere v3 search_document vs search_query).
 type EmbeddingMemo struct {
-	mu    sync.RWMutex
-	cache map[string][]float32 // sha256(content) -> embedding
-	order []string             // insertion order for LRU eviction
-	max   int
+	mu        sync.RWMutex
+	namespace string               // model identity, prefixed into every key
+	cache     map[string][]float32 // namespace|mode|sha256(content) -> embedding
+	order     []string             // insertion order for LRU eviction
+	max       int
 }
 
-// NewEmbeddingMemo creates a memo cache with the given max entry count.
+// NewEmbeddingMemo creates a memo cache with the given max entry count. The
+// namespace is empty; prefer NewEmbeddingMemoNS so the cache is keyed by model
+// identity. Pass "" only for tests or single-model callers that never switch.
 func NewEmbeddingMemo(maxEntries int) *EmbeddingMemo {
+	return NewEmbeddingMemoNS("", maxEntries)
+}
+
+// NewEmbeddingMemoNS creates a memo cache namespaced by model identity, so that
+// changing the embedding model invalidates the cache instead of serving stale
+// vectors from the previous model.
+func NewEmbeddingMemoNS(namespace string, maxEntries int) *EmbeddingMemo {
 	if maxEntries <= 0 {
 		maxEntries = 1024
 	}
 	return &EmbeddingMemo{
-		cache: make(map[string][]float32, maxEntries),
-		order: make([]string, 0, maxEntries),
-		max:   maxEntries,
+		namespace: namespace,
+		cache:     make(map[string][]float32, maxEntries),
+		order:     make([]string, 0, maxEntries),
+		max:       maxEntries,
 	}
 }
 
-// Get returns a cached embedding for the content, if present.
+// Get returns a cached embedding for the content in ModeDocument, if present.
 func (m *EmbeddingMemo) Get(content string) ([]float32, bool) {
-	key := contentHash(content)
+	return m.GetMode(content, ModeDocument)
+}
+
+// GetMode returns a cached embedding for the content under the given mode.
+func (m *EmbeddingMemo) GetMode(content string, mode EmbedMode) ([]float32, bool) {
+	key := m.key(content, mode)
 	m.mu.Lock()
 	vec, ok := m.cache[key]
 	if ok {
@@ -39,9 +62,15 @@ func (m *EmbeddingMemo) Get(content string) ([]float32, bool) {
 	return vec, ok
 }
 
-// Put stores an embedding for the given content, evicting the oldest entry if at capacity.
+// Put stores an embedding for the given content in ModeDocument.
 func (m *EmbeddingMemo) Put(content string, embedding []float32) {
-	key := contentHash(content)
+	m.PutMode(content, ModeDocument, embedding)
+}
+
+// PutMode stores an embedding for the given content under the given mode,
+// evicting the oldest entry if at capacity.
+func (m *EmbeddingMemo) PutMode(content string, mode EmbedMode, embedding []float32) {
+	key := m.key(content, mode)
 	m.mu.Lock()
 	defer m.mu.Unlock()
 	if _, exists := m.cache[key]; exists {
@@ -75,6 +104,12 @@ func (m *EmbeddingMemo) promote(key string) {
 	}
 }
 
+// key builds the cache key from model namespace, embedding mode, and content
+// hash, so that a model change or a mode change never collides with stale state.
+func (m *EmbeddingMemo) key(content string, mode EmbedMode) string {
+	return m.namespace + "|" + strconv.Itoa(int(mode)) + "|" + contentHash(content)
+}
+
 func contentHash(s string) string {
 	h := sha256.Sum256([]byte(s))
 	return hex.EncodeToString(h[:])
@@ -86,11 +121,13 @@ type MemoizedProvider struct {
 	memo  *EmbeddingMemo
 }
 
-// NewMemoizedProvider wraps an existing Provider with a memo cache.
+// NewMemoizedProvider wraps an existing Provider with a memo cache. The cache is
+// namespaced by the inner provider's Name() (which encodes the model), so a model
+// swap can never serve stale vectors from the previous model.
 func NewMemoizedProvider(inner Provider, maxEntries int) *MemoizedProvider {
 	return &MemoizedProvider{
 		inner: inner,
-		memo:  NewEmbeddingMemo(maxEntries),
+		memo:  NewEmbeddingMemoNS(inner.Name(), maxEntries),
 	}
 }
 
@@ -137,8 +174,17 @@ func (p *MemoizedProvider) EmbedBatch(ctx context.Context, texts []string) ([][]
 }
 
 func (p *MemoizedProvider) EmbedWithMode(ctx context.Context, text string, mode EmbedMode) ([]float32, error) {
-	// Mode-aware calls bypass memo since same content may produce different vectors per mode.
-	return p.inner.EmbedWithMode(ctx, text, mode)
+	// Memoized per-mode: the key includes the mode, so document- and query-mode
+	// vectors for the same text never collide.
+	if vec, ok := p.memo.GetMode(text, mode); ok {
+		return vec, nil
+	}
+	vec, err := p.inner.EmbedWithMode(ctx, text, mode)
+	if err != nil {
+		return nil, err
+	}
+	p.memo.PutMode(text, mode, vec)
+	return vec, nil
 }
 
 // Memo returns the underlying cache for inspection/testing.
 
@@ -40,6 +40,7 @@ type countingProvider struct {
 	localStub
 	embedCalls int64
 	batchCalls int64
+	modeCalls  int64
 }
 
 func (c *countingProvider) Embed(ctx context.Context, text string) ([]float32, error) {
@@ -52,6 +53,11 @@ func (c *countingProvider) EmbedBatch(ctx context.Context, texts []string) ([][]
 	return c.localStub.EmbedBatch(ctx, texts)
 }
 
+func (c *countingProvider) EmbedWithMode(ctx context.Context, text string, mode EmbedMode) ([]float32, error) {
+	atomic.AddInt64(&c.modeCalls, 1)
+	return c.localStub.EmbedWithMode(ctx, text, mode)
+}
+
 func TestMemoizedProvider_Embed(t *testing.T) {
 	inner := &countingProvider{}
 	p := NewMemoizedProvider(inner, 100)
@@ -97,3 +103,73 @@ func TestMemoizedProvider_EmbedBatch(t *testing.T) {
 		t.Fatalf("expected still 1 batch call, got %d", inner.batchCalls)
 	}
 }
+
+// namedProvider lets a test override the reported model name.
+type namedProvider struct {
+	localStub
+	name string
+}
+
+func (p *namedProvider) Name() string { return p.name }
+
+// TestMemoizedProvider_ModelSwapInvalidates pins the core #1 fix: a memo built
+// for one model must not serve its vectors to a different model. Since the memo
+// is namespaced by Name(), each model has its own key space.
+func TestMemoizedProvider_ModelSwapInvalidates(t *testing.T) {
+	ctx := context.Background()
+	old := NewMemoizedProvider(&namedProvider{name: "modelA"}, 100)
+	v1, _ := old.Embed(ctx, "foo")
+
+	// Same content, a different model identity → different namespace → miss.
+	newer := NewMemoizedProvider(&namedProvider{name: "modelB"}, 100)
+	if _, ok := newer.Memo().Get("foo"); ok {
+		t.Fatal("modelB memo should not contain modelA's content")
+	}
+	// Within the same model, content still hits.
+	if _, ok := old.Memo().Get("foo"); !ok {
+		t.Fatal("modelA memo should still contain its own content")
+	}
+	_ = v1
+}
+
+// TestEmbeddingMemo_ModeIsolation pins the #2-supporting behavior: document- and
+// query-mode vectors for identical text occupy distinct keys.
+func TestEmbeddingMemo_ModeIsolation(t *testing.T) {
+	m := NewEmbeddingMemoNS("model", 100)
+	m.PutMode("q", ModeDocument, []float32{1, 0})
+	m.PutMode("q", ModeQuery, []float32{0, 1})
+
+	doc, ok := m.GetMode("q", ModeDocument)
+	if !ok || doc[0] != 1 {
+		t.Fatalf("document-mode vector wrong: %v ok=%v", doc, ok)
+	}
+	qry, ok := m.GetMode("q", ModeQuery)
+	if !ok || qry[1] != 1 {
+		t.Fatalf("query-mode vector wrong: %v ok=%v", qry, ok)
+	}
+}
+
+// TestMemoizedProvider_EmbedWithModeMemoizes pins that mode-aware calls are now
+// cached (previously they bypassed the memo entirely).
+func TestMemoizedProvider_EmbedWithModeMemoizes(t *testing.T) {
+	inner := &countingProvider{}
+	p := NewMemoizedProvider(inner, 100)
+	ctx := context.Background()
+
+	if _, err := p.EmbedWithMode(ctx, "x", ModeQuery); err != nil {
+		t.Fatalf("first mode embed failed: %v", err)
+	}
+	if _, err := p.EmbedWithMode(ctx, "x", ModeQuery); err != nil {
+		t.Fatalf("second mode embed failed: %v", err)
+	}
+	if inner.modeCalls != 1 {
+		t.Fatalf("expected 1 inner mode call, got %d", inner.modeCalls)
+	}
+	// A different mode for the same text must miss and call inner again.
+	if _, err := p.EmbedWithMode(ctx, "x", ModeDocument); err != nil {
+		t.Fatalf("doc-mode embed failed: %v", err)
+	}
+	if inner.modeCalls != 2 {
+		t.Fatalf("expected 2 inner mode calls after mode switch, got %d", inner.modeCalls)
+	}
+}
@@ -555,7 +555,7 @@ func (m *mockStorage) DeleteEmbedding(ctx context.Context, nodeID string) error
 	return nil
 }
 
-func (m *mockStorage) AllEmbeddings(ctx context.Context) (map[string][]float32, error) {
+func (m *mockStorage) AllEmbeddings(ctx context.Context, _ string) (map[string][]float32, error) {
 	if err := ctx.Err(); err != nil {
 		return nil, err
 	}
@@ -581,8 +581,8 @@ func (m *mockStorage) GetEmbedding(ctx context.Context, nodeID string) ([]float3
 	return nil, "", nil
 }
 
-func (m *mockStorage) GetEmbeddingsBatch(ctx context.Context, offset, limit int) (map[string][]float32, error) {
-	return m.AllEmbeddings(ctx)
+func (m *mockStorage) GetEmbeddingsBatch(ctx context.Context, model string, offset, limit int) (map[string][]float32, error) {
+	return m.AllEmbeddings(ctx, model)
 }
 
 func (m *mockStorage) AddFileWatch(ctx context.Context, filePath, nodeID, gitHash string) error {
 
@@ -8,6 +8,7 @@ import (
 	"sync/atomic"
 	"time"
 
+	"github.com/GrayCodeAI/yaad/embeddings"
 	"github.com/GrayCodeAI/yaad/intent"
 	"github.com/GrayCodeAI/yaad/internal/telemetry"
 	"github.com/GrayCodeAI/yaad/storage"
@@ -471,7 +472,9 @@ func fusedMergeKeys(maps ...map[string]int) []string {
 // checks the result before searching.
 func (e *Engine) queryVector(ctx context.Context, query, proxySeedID string) []float32 {
 	if e.embedder != nil {
-		if vec, err := e.embedder.Embed(ctx, query); err == nil && len(vec) > 0 {
+		// Query mode: asymmetric models embed queries (search_query) differently
+		// from stored documents (search_document).
+		if vec, err := e.embedder.EmbedWithMode(ctx, query, embeddings.ModeQuery); err == nil && len(vec) > 0 {
 			return vec
 		} else if err != nil {
 			atomic.AddInt64(&e.metrics.Errors, 1)
 
@@ -61,7 +61,9 @@ func (h *HybridSearch) Search(ctx context.Context, query string, opts RecallOpts
 	// Path 2: Vector seed nodes (if provider available)
 	vectorRanks := map[string]int{}
 	if h.provider != nil {
-		vec, err := h.provider.Embed(ctx, query)
+		// Embed in query mode: asymmetric models (e.g. Cohere v3) expect
+		// search_query for queries vs search_document for stored content.
+		vec, err := h.provider.EmbedWithMode(ctx, query, embeddings.ModeQuery)
 		if err == nil {
 			vectorRanks = h.vectorSearch(ctx, vec, opts.Limit*2)
 		}
@@ -155,10 +157,18 @@ func (h *HybridSearch) vectorSearch(ctx context.Context, queryVec []float32, lim
 	}
 	var pairs []pair
 
+	// Scope the scan to the active provider's model: stored vectors from a
+	// different model occupy an incompatible space and would produce meaningless
+	// cosine scores. The ingest path records model = provider.Name() (rest.go).
+	var model string
+	if h.provider != nil {
+		model = h.provider.Name()
+	}
+
 	const batchSize = 500
 	offset := 0
 	for {
-		batch, err := h.store.GetEmbeddingsBatch(ctx, offset, batchSize)
+		batch, err := h.store.GetEmbeddingsBatch(ctx, model, offset, batchSize)
 		if err != nil || len(batch) == 0 {
 			break
 		}
Original file line number	Diff line number	Diff line change
`@@ -555,7 +555,7 @@ func (m *mockStorage) DeleteEmbedding(ctx context.Context, nodeID string) error`
`555`	`555`	`return nil`
`556`	`556`	`}`
`557`	`557`
`558`		`-func (m *mockStorage) AllEmbeddings(ctx context.Context) (map[string][]float32, error) {`
	`558`	`+func (m *mockStorage) AllEmbeddings(ctx context.Context, _ string) (map[string][]float32, error) {`
`559`	`559`	`if err := ctx.Err(); err != nil {`
`560`	`560`	`return nil, err`
`561`	`561`	`}`
`@@ -581,8 +581,8 @@ func (m *mockStorage) GetEmbedding(ctx context.Context, nodeID string) ([]float3`
`581`	`581`	`return nil, "", nil`
`582`	`582`	`}`
`583`	`583`
`584`		`-func (m *mockStorage) GetEmbeddingsBatch(ctx context.Context, offset, limit int) (map[string][]float32, error) {`
`585`		`- return m.AllEmbeddings(ctx)`
	`584`	`+func (m *mockStorage) GetEmbeddingsBatch(ctx context.Context, model string, offset, limit int) (map[string][]float32, error) {`
	`585`	`+ return m.AllEmbeddings(ctx, model)`
`586`	`586`	`}`
`587`	`587`
`588`	`588`	`func (m *mockStorage) AddFileWatch(ctx context.Context, filePath, nodeID, gitHash string) error {`