Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 19 additions & 1 deletion internal/query/degradation.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,5 +59,23 @@ func (e *Engine) GetDegradationWarnings() []DegradationWarning {
}
}

return GenerateDegradationWarnings(scipAvailable, gitAvailable, scipStale, commitsBehind)
warnings := GenerateDegradationWarnings(scipAvailable, gitAvailable, scipStale, commitsBehind)

// LIP mixed-models warning: cosine similarity across different vector spaces
// is meaningless, so semantic rerank/search is gated off when this is set.
// Only emitted once lipSemanticAvailable has probed the daemon — otherwise
// we would falsely claim a mixed-model state before any query has run.
e.lipHealthMu.RLock()
lipMixed := e.cachedLipMixed
lipChecked := !e.lipHealthCheckedAt.IsZero()
e.lipHealthMu.RUnlock()
if lipChecked && lipMixed {
warnings = append(warnings, DegradationWarning{
Code: "lip_mixed_models",
Message: "LIP index contains vectors from multiple embedding models — semantic ranking disabled until re-index.",
CapabilityPercent: 70,
})
}

return warnings
}
6 changes: 6 additions & 0 deletions internal/query/engine.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,12 @@ type Engine struct {
cachedState *RepoState
stateComputedAt time.Time

// LIP health (cached; refreshed on a short TTL to avoid per-query RPCs).
lipHealthMu sync.RWMutex
cachedLipMixed bool
cachedLipAvailable bool
lipHealthCheckedAt time.Time

// Cache stats
cacheStatsMu sync.RWMutex
cacheHits int64
Expand Down
38 changes: 38 additions & 0 deletions internal/query/lip_health.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
package query

import (
"time"

"github.com/SimplyLiz/CodeMCP/internal/lip"
)

// lipHealthTTL caps how often we re-probe the LIP daemon for index status.
// IndexStatus is a 200 ms RPC, so we do not want this per-query.
const lipHealthTTL = 60 * time.Second

// lipSemanticAvailable reports whether LIP semantic operations (rerank, semantic
// search) can be trusted. Returns false when the daemon is unavailable OR when
// the index contains vectors from more than one embedding model — cosine
// similarity across different vector spaces is mathematically meaningless, so a
// mixed-model index silently produces garbage rankings.
func (e *Engine) lipSemanticAvailable() bool {
e.lipHealthMu.RLock()
fresh := !e.lipHealthCheckedAt.IsZero() && time.Since(e.lipHealthCheckedAt) < lipHealthTTL
avail, mixed := e.cachedLipAvailable, e.cachedLipMixed
e.lipHealthMu.RUnlock()
if fresh {
return avail && !mixed
}

status, _ := lip.IndexStatus()
e.lipHealthMu.Lock()
e.lipHealthCheckedAt = time.Now()
if status == nil {
e.cachedLipAvailable, e.cachedLipMixed = false, false
} else {
e.cachedLipAvailable, e.cachedLipMixed = true, status.MixedModels
}
avail, mixed = e.cachedLipAvailable, e.cachedLipMixed
e.lipHealthMu.Unlock()
return avail && !mixed
}
154 changes: 154 additions & 0 deletions internal/query/lip_health_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
package query

import (
"encoding/binary"
"encoding/json"
"io"
"net"
"os"
"path/filepath"
"sync/atomic"
"testing"
"time"
)

// startLipHealthDaemon launches a test LIP socket that replies to every
// connection with the supplied indexStatusResp-shaped payload, and returns
// a counter of handled requests. Points LIP_SOCKET at itself.
func startLipHealthDaemon(t *testing.T, mixedModels bool) *int64 {
t.Helper()

payload, err := json.Marshal(map[string]any{
"indexed_files": 1,
"pending_embedding_files": 0,
"last_updated_ms": nil,
"mixed_models": mixedModels,
"models_in_index": []string{"model-a"},
})
if err != nil {
t.Fatalf("marshal: %v", err)
}

dir, err := os.MkdirTemp("/tmp", "lip")
if err != nil {
t.Fatalf("mkdirtemp: %v", err)
}
sockPath := filepath.Join(dir, "s.sock")
ln, err := net.Listen("unix", sockPath)
if err != nil {
os.RemoveAll(dir)
t.Fatalf("listen: %v", err)
}

prev := os.Getenv("LIP_SOCKET")
os.Setenv("LIP_SOCKET", sockPath)

var reqs int64
go func() {
for {
conn, err := ln.Accept()
if err != nil {
return
}
go func(c net.Conn) {
defer c.Close()
_ = c.SetDeadline(time.Now().Add(2 * time.Second))
var lenBuf [4]byte
if _, err := io.ReadFull(c, lenBuf[:]); err != nil {
return
}
reqLen := binary.BigEndian.Uint32(lenBuf[:])
if _, err := io.CopyN(io.Discard, c, int64(reqLen)); err != nil {
return
}
atomic.AddInt64(&reqs, 1)
var out [4]byte
binary.BigEndian.PutUint32(out[:], uint32(len(payload)))
_, _ = c.Write(out[:])
_, _ = c.Write(payload)
}(conn)
}
}()

t.Cleanup(func() {
ln.Close()
os.RemoveAll(dir)
os.Setenv("LIP_SOCKET", prev)
})
return &reqs
}

func TestLipSemanticAvailable_HealthyIndex(t *testing.T) {
startLipHealthDaemon(t, false)
e := &Engine{}
if !e.lipSemanticAvailable() {
t.Fatal("lipSemanticAvailable = false for healthy single-model index, want true")
}
}

func TestLipSemanticAvailable_MixedModels(t *testing.T) {
startLipHealthDaemon(t, true)
e := &Engine{}
if e.lipSemanticAvailable() {
t.Fatal("lipSemanticAvailable = true while MixedModels is set, want false")
}
}

func TestLipSemanticAvailable_DaemonDown(t *testing.T) {
// Point at a socket that doesn't exist.
prev := os.Getenv("LIP_SOCKET")
os.Setenv("LIP_SOCKET", "/tmp/ckb-lip-nonexistent.sock")
t.Cleanup(func() { os.Setenv("LIP_SOCKET", prev) })

e := &Engine{}
if e.lipSemanticAvailable() {
t.Fatal("lipSemanticAvailable = true with no daemon, want false")
}
}

func TestLipSemanticAvailable_CacheWithinTTL(t *testing.T) {
reqs := startLipHealthDaemon(t, false)
e := &Engine{}

for i := 0; i < 5; i++ {
if !e.lipSemanticAvailable() {
t.Fatalf("call %d: lipSemanticAvailable = false, want true", i)
}
}
if got := atomic.LoadInt64(reqs); got != 1 {
t.Fatalf("daemon RPC count = %d, want 1 (TTL cache should suppress subsequent probes)", got)
}
}

func TestGetDegradationWarnings_LipMixedModels(t *testing.T) {
startLipHealthDaemon(t, true)
e := &Engine{}
// Prime the cache so GetDegradationWarnings has something to read.
_ = e.lipSemanticAvailable()

warnings := e.GetDegradationWarnings()
var found bool
for _, w := range warnings {
if w.Code == "lip_mixed_models" {
found = true
break
}
}
if !found {
t.Fatalf("expected lip_mixed_models warning, got %+v", warnings)
}
}

func TestGetDegradationWarnings_NoWarningBeforeFirstProbe(t *testing.T) {
// Daemon exists and is mixed, but we never call lipSemanticAvailable so
// the cache has not been populated — we should not emit a warning.
startLipHealthDaemon(t, true)
e := &Engine{}

warnings := e.GetDegradationWarnings()
for _, w := range warnings {
if w.Code == "lip_mixed_models" {
t.Fatalf("lip_mixed_models warning surfaced before first probe: %+v", w)
}
}
}
4 changes: 2 additions & 2 deletions internal/query/symbols.go
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
package query

import (
Expand Down Expand Up @@ -514,7 +514,7 @@
// content table. The threshold of 3 mirrors the PPR/LIP re-ranking gate — below
// that the lexical results aren't trustworthy enough to stand alone.
const lipFallbackThreshold = 3
if len(results) < lipFallbackThreshold {
if len(results) < lipFallbackThreshold && e.lipSemanticAvailable() {
lipSymLimit := opts.Limit * 3
lipResults := SemanticSearchWithLIP(opts.Query, 20, "", 0, func(fileURIs []string) map[string][]SearchResultItem {
// Convert file:// URIs back to repo-relative paths for the batch query.
Expand Down Expand Up @@ -639,7 +639,7 @@
results = reranked
}
}
} else if len(results) > 3 && !lipRanked {
} else if len(results) > 3 && !lipRanked && e.lipSemanticAvailable() {
// Fast tier: use LIP file embeddings as a semantic re-ranking signal.
// Skip when results already came from LIP semantic search (lipRanked=true) —
// they're already ordered by similarity, a second pass would be redundant.
Expand Down
Loading