Skip to content

Commit 02496a2

Browse files
Add SQLite FTS5-backed ToolStore for optimizer search (#3786)
* Add SQLite FTS5-backed ToolStore for optimizer search Add an SQLite FTS5 ToolStore implementation as an alternative to the existing InMemoryToolStore, and wire it into the vMCP server when optimizer config is present. This adds: - pkg/vmcp/optimizer/fts5store: SQLiteToolStore implementation using modernc.org/sqlite (pure Go, no CGO) with FTS5 virtual tables - BM25 ranking for search results with LIKE-based fallback - FTS5 query sanitization for safe handling of special characters - Thread-safe concurrent access with sync.RWMutex - Close() method on the ToolStore interface for resource cleanup - OptimizerStoreCloser on server.Config for store lifecycle management - OptimizerConfig.FTSDBPath for configurable database location (defaults to in-memory, use emptyDir in Kubernetes) - Wire FTS5 store into vMCP serve command when optimizer is configured - Comprehensive tests for search, upsert, and concurrency scenarios Part 2 of the optimizer FTS5 migration (issue #3731). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * review comments --------- Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 8f61745 commit 02496a2

13 files changed

Lines changed: 983 additions & 78 deletions

File tree

cmd/vmcp/app/commands.go

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@ import (
2828
"github.com/stacklok/toolhive/pkg/vmcp/discovery"
2929
"github.com/stacklok/toolhive/pkg/vmcp/health"
3030
"github.com/stacklok/toolhive/pkg/vmcp/k8s"
31-
"github.com/stacklok/toolhive/pkg/vmcp/optimizer"
3231
vmcprouter "github.com/stacklok/toolhive/pkg/vmcp/router"
3332
vmcpserver "github.com/stacklok/toolhive/pkg/vmcp/server"
3433
vmcpstatus "github.com/stacklok/toolhive/pkg/vmcp/status"
@@ -483,8 +482,7 @@ func runServe(cmd *cobra.Command, _ []string) error {
483482
}
484483

485484
if cfg.Optimizer != nil {
486-
// TODO: update this with the real optimizer.
487-
serverCfg.OptimizerFactory = optimizer.NewDummyOptimizerFactory()
485+
serverCfg.OptimizerEnabled = true
488486
}
489487

490488
// Convert composite tool configurations to workflow definitions

pkg/vmcp/optimizer/dummy_optimizer.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,16 @@ func (d *DummyOptimizer) toolNames() []string {
106106
// returned factory share the same underlying storage, enabling cross-session search.
107107
func NewDummyOptimizerFactory() func(context.Context, []server.ServerTool) (Optimizer, error) {
108108
store := NewInMemoryToolStore()
109+
return NewDummyOptimizerFactoryWithStore(store)
110+
}
111+
112+
// NewDummyOptimizerFactoryWithStore returns an OptimizerFactory that creates
113+
// DummyOptimizer instances backed by the given ToolStore. All optimizers created
114+
// by the returned factory share the same store, enabling cross-session search.
115+
//
116+
// Use this when you need to provide a specific store implementation (e.g.,
117+
// SQLiteToolStore for FTS5-based search) instead of the default InMemoryToolStore.
118+
func NewDummyOptimizerFactoryWithStore(store ToolStore) func(context.Context, []server.ServerTool) (Optimizer, error) {
109119
return func(ctx context.Context, tools []server.ServerTool) (Optimizer, error) {
110120
return NewDummyOptimizer(ctx, store, tools)
111121
}

pkg/vmcp/optimizer/dummy_optimizer_test.go

Lines changed: 198 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,136 @@ package optimizer
55

66
import (
77
"context"
8+
"fmt"
89
"testing"
910

1011
"github.com/mark3labs/mcp-go/mcp"
1112
"github.com/mark3labs/mcp-go/server"
1213
"github.com/stretchr/testify/require"
1314
)
1415

16+
// mockToolStore implements ToolStore for testing optimizer logic against a
17+
// controllable store without any database dependency.
18+
type mockToolStore struct {
19+
upsertFunc func(ctx context.Context, tools []server.ServerTool) error
20+
searchFunc func(ctx context.Context, query string, allowedTools []string) ([]ToolMatch, error)
21+
}
22+
23+
func (m *mockToolStore) UpsertTools(ctx context.Context, tools []server.ServerTool) error {
24+
if m.upsertFunc != nil {
25+
return m.upsertFunc(ctx, tools)
26+
}
27+
panic("mockToolStore.UpsertTools called but not configured")
28+
}
29+
30+
func (m *mockToolStore) Search(ctx context.Context, query string, allowedTools []string) ([]ToolMatch, error) {
31+
if m.searchFunc != nil {
32+
return m.searchFunc(ctx, query, allowedTools)
33+
}
34+
panic("mockToolStore.Search called but not configured")
35+
}
36+
37+
func (*mockToolStore) Close() error {
38+
return nil
39+
}
40+
41+
// TestDummyOptimizer_MockStore tests the optimizer against a mock ToolStore,
42+
// verifying search delegation, scoping, and error handling without any database.
43+
func TestDummyOptimizer_MockStore(t *testing.T) {
44+
t.Parallel()
45+
46+
tests := []struct {
47+
name string
48+
tools []server.ServerTool
49+
searchFunc func(ctx context.Context, query string, allowedTools []string) ([]ToolMatch, error)
50+
upsertFunc func(ctx context.Context, tools []server.ServerTool) error
51+
input FindToolInput
52+
expectedNames []string
53+
expectErr bool
54+
errContains string
55+
expectCreate bool // if false, expect NewDummyOptimizer to fail
56+
createErr string
57+
}{
58+
{
59+
name: "delegates search to store with allowedTools",
60+
tools: []server.ServerTool{
61+
{Tool: mcp.Tool{Name: "tool_a", Description: "Tool A"}},
62+
{Tool: mcp.Tool{Name: "tool_b", Description: "Tool B"}},
63+
},
64+
upsertFunc: func(_ context.Context, _ []server.ServerTool) error { return nil },
65+
searchFunc: func(_ context.Context, query string, allowedTools []string) ([]ToolMatch, error) {
66+
require.Equal(t, "query", query)
67+
require.ElementsMatch(t, []string{"tool_a", "tool_b"}, allowedTools)
68+
return []ToolMatch{
69+
{Name: "tool_a", Description: "Tool A", Score: 0.9},
70+
}, nil
71+
},
72+
input: FindToolInput{ToolDescription: "query"},
73+
expectedNames: []string{"tool_a"},
74+
expectCreate: true,
75+
},
76+
{
77+
name: "propagates store search errors",
78+
tools: []server.ServerTool{
79+
{Tool: mcp.Tool{Name: "tool_a", Description: "Tool A"}},
80+
},
81+
upsertFunc: func(_ context.Context, _ []server.ServerTool) error { return nil },
82+
searchFunc: func(context.Context, string, []string) ([]ToolMatch, error) {
83+
return nil, fmt.Errorf("store unavailable")
84+
},
85+
input: FindToolInput{ToolDescription: "query"},
86+
expectErr: true,
87+
errContains: "tool search failed",
88+
expectCreate: true,
89+
},
90+
{
91+
name: "propagates store upsert errors at creation",
92+
tools: []server.ServerTool{
93+
{Tool: mcp.Tool{Name: "tool_a", Description: "Tool A"}},
94+
},
95+
upsertFunc: func(context.Context, []server.ServerTool) error {
96+
return fmt.Errorf("upsert failed")
97+
},
98+
input: FindToolInput{ToolDescription: "query"},
99+
expectCreate: false,
100+
createErr: "failed to upsert tools into store",
101+
},
102+
}
103+
104+
for _, tc := range tests {
105+
t.Run(tc.name, func(t *testing.T) {
106+
t.Parallel()
107+
108+
store := &mockToolStore{
109+
upsertFunc: tc.upsertFunc,
110+
searchFunc: tc.searchFunc,
111+
}
112+
113+
opt, err := NewDummyOptimizer(context.Background(), store, tc.tools)
114+
if !tc.expectCreate {
115+
require.Error(t, err)
116+
require.Contains(t, err.Error(), tc.createErr)
117+
return
118+
}
119+
require.NoError(t, err)
120+
121+
result, err := opt.FindTool(context.Background(), tc.input)
122+
if tc.expectErr {
123+
require.Error(t, err)
124+
require.Contains(t, err.Error(), tc.errContains)
125+
return
126+
}
127+
128+
require.NoError(t, err)
129+
var names []string
130+
for _, m := range result.Tools {
131+
names = append(names, m.Name)
132+
}
133+
require.ElementsMatch(t, tc.expectedNames, names)
134+
})
135+
}
136+
}
137+
15138
func TestDummyOptimizer_FindTool(t *testing.T) {
16139
t.Parallel()
17140

@@ -139,7 +262,7 @@ func TestDummyOptimizerFactory_SharedStorage(t *testing.T) {
139262
require.Len(t, result2.Tools, 1)
140263
require.Equal(t, "tool_b", result2.Tools[0].Name)
141264

142-
// Both tools exist in the shared store — verify by creating an optimizer with both in scope
265+
// Both tools exist in the shared store — verify by creating an optimizer with both in allowedTools
143266
opt3, err := factory(ctx, []server.ServerTool{
144267
{Tool: mcp.Tool{Name: "tool_a", Description: "Alpha tool"}},
145268
{Tool: mcp.Tool{Name: "tool_b", Description: "Beta tool"}},
@@ -154,6 +277,80 @@ func TestDummyOptimizerFactory_SharedStorage(t *testing.T) {
154277
require.ElementsMatch(t, []string{"tool_a", "tool_b"}, names)
155278
}
156279

280+
func TestNewDummyOptimizerFactoryWithStore(t *testing.T) {
281+
t.Parallel()
282+
283+
tests := []struct {
284+
name string
285+
sessionATools []server.ServerTool
286+
sessionBTools []server.ServerTool
287+
searchQuery string
288+
sessionAExpect []string
289+
sessionBExpect []string
290+
}{
291+
{
292+
name: "separate sessions see only their own tools",
293+
sessionATools: []server.ServerTool{
294+
{Tool: mcp.Tool{Name: "tool_alpha", Description: "Alpha tool"}},
295+
},
296+
sessionBTools: []server.ServerTool{
297+
{Tool: mcp.Tool{Name: "tool_beta", Description: "Beta tool"}},
298+
},
299+
searchQuery: "tool",
300+
sessionAExpect: []string{"tool_alpha"},
301+
sessionBExpect: []string{"tool_beta"},
302+
},
303+
{
304+
name: "overlapping tools are shared",
305+
sessionATools: []server.ServerTool{
306+
{Tool: mcp.Tool{Name: "shared_tool", Description: "Shared tool"}},
307+
{Tool: mcp.Tool{Name: "tool_a_only", Description: "A only"}},
308+
},
309+
sessionBTools: []server.ServerTool{
310+
{Tool: mcp.Tool{Name: "shared_tool", Description: "Shared tool"}},
311+
{Tool: mcp.Tool{Name: "tool_b_only", Description: "B only"}},
312+
},
313+
searchQuery: "tool",
314+
sessionAExpect: []string{"shared_tool", "tool_a_only"},
315+
sessionBExpect: []string{"shared_tool", "tool_b_only"},
316+
},
317+
}
318+
319+
for _, tc := range tests {
320+
t.Run(tc.name, func(t *testing.T) {
321+
t.Parallel()
322+
323+
store := NewInMemoryToolStore()
324+
factory := NewDummyOptimizerFactoryWithStore(store)
325+
ctx := context.Background()
326+
327+
optA, err := factory(ctx, tc.sessionATools)
328+
require.NoError(t, err)
329+
330+
optB, err := factory(ctx, tc.sessionBTools)
331+
require.NoError(t, err)
332+
333+
resultA, err := optA.FindTool(ctx, FindToolInput{ToolDescription: tc.searchQuery})
334+
require.NoError(t, err)
335+
336+
var namesA []string
337+
for _, m := range resultA.Tools {
338+
namesA = append(namesA, m.Name)
339+
}
340+
require.ElementsMatch(t, tc.sessionAExpect, namesA)
341+
342+
resultB, err := optB.FindTool(ctx, FindToolInput{ToolDescription: tc.searchQuery})
343+
require.NoError(t, err)
344+
345+
var namesB []string
346+
for _, m := range resultB.Tools {
347+
namesB = append(namesB, m.Name)
348+
}
349+
require.ElementsMatch(t, tc.sessionBExpect, namesB)
350+
})
351+
}
352+
}
353+
157354
func TestDummyOptimizer_CallTool(t *testing.T) {
158355
t.Parallel()
159356

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
-- SPDX-FileCopyrightText: Copyright 2025 Stacklok, Inc.
2+
-- SPDX-License-Identifier: Apache-2.0
3+
4+
-- Capabilities table stores tool/resource/prompt metadata
5+
CREATE TABLE IF NOT EXISTS llm_capabilities (
6+
name TEXT PRIMARY KEY,
7+
description TEXT NOT NULL DEFAULT ''
8+
);
9+
10+
-- FTS5 virtual table for full-text search with BM25 ranking.
11+
-- tokenize='porter' uses the Porter stemming algorithm so that morphological
12+
-- variants of a word (e.g. "running", "runs", "ran") match the root form "run".
13+
-- This improves recall for natural-language tool descriptions.
14+
CREATE VIRTUAL TABLE IF NOT EXISTS llm_capabilities_fts USING fts5(
15+
name,
16+
description,
17+
content=llm_capabilities,
18+
content_rowid=rowid,
19+
tokenize='porter'
20+
);
21+
22+
-- Triggers to keep FTS index in sync with llm_capabilities table
23+
CREATE TRIGGER IF NOT EXISTS llm_capabilities_after_insert AFTER INSERT ON llm_capabilities BEGIN
24+
INSERT INTO llm_capabilities_fts(rowid, name, description) VALUES (new.rowid, new.name, new.description);
25+
END;
26+
27+
CREATE TRIGGER IF NOT EXISTS llm_capabilities_after_delete AFTER DELETE ON llm_capabilities BEGIN
28+
INSERT INTO llm_capabilities_fts(llm_capabilities_fts, rowid, name, description) VALUES('delete', old.rowid, old.name, old.description);
29+
END;
30+
31+
CREATE TRIGGER IF NOT EXISTS llm_capabilities_after_update AFTER UPDATE ON llm_capabilities BEGIN
32+
INSERT INTO llm_capabilities_fts(llm_capabilities_fts, rowid, name, description) VALUES('delete', old.rowid, old.name, old.description);
33+
INSERT INTO llm_capabilities_fts(rowid, name, description) VALUES (new.rowid, new.name, new.description);
34+
END;

0 commit comments

Comments
 (0)