Skip to content

Commit 0928d23

Browse files
dynamic mcp: add registry and loading tools
Co-authored-by: Cursor <cursoragent@cursor.com>
1 parent 6986e50 commit 0928d23

6 files changed

Lines changed: 1448 additions & 0 deletions

File tree

mcp/bm25.go

Lines changed: 160 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,160 @@
1+
// Copyright (c) 2023-present Mattermost, Inc. All Rights Reserved.
2+
// See LICENSE.txt for license information.
3+
4+
package mcp
5+
6+
import (
7+
"math"
8+
"sort"
9+
"strings"
10+
"unicode"
11+
)
12+
13+
const (
14+
bm25K1 = 1.2
15+
bm25B = 0.75
16+
)
17+
18+
type BM25Document struct {
19+
ID string
20+
Text string
21+
}
22+
23+
type BM25Result struct {
24+
ID string
25+
Score float64
26+
}
27+
28+
type BM25Index struct {
29+
documents []bm25IndexedDocument
30+
documentCount int
31+
documentFreqs map[string]int
32+
averageDocLength float64
33+
}
34+
35+
type bm25IndexedDocument struct {
36+
id string
37+
termFreqs map[string]int
38+
tokenCount float64
39+
}
40+
41+
func NewBM25Index(docs []BM25Document) *BM25Index {
42+
idx := &BM25Index{
43+
documents: make([]bm25IndexedDocument, 0, len(docs)),
44+
documentCount: len(docs),
45+
documentFreqs: make(map[string]int),
46+
}
47+
48+
var totalDocLength float64
49+
for _, doc := range docs {
50+
tokens := tokenizeBM25Text(doc.Text)
51+
termFreqs := make(map[string]int)
52+
for _, token := range tokens {
53+
termFreqs[token]++
54+
}
55+
56+
for token := range termFreqs {
57+
idx.documentFreqs[token]++
58+
}
59+
60+
tokenCount := float64(len(tokens))
61+
totalDocLength += tokenCount
62+
idx.documents = append(idx.documents, bm25IndexedDocument{
63+
id: doc.ID,
64+
termFreqs: termFreqs,
65+
tokenCount: tokenCount,
66+
})
67+
}
68+
69+
if idx.documentCount > 0 {
70+
idx.averageDocLength = totalDocLength / float64(idx.documentCount)
71+
}
72+
73+
return idx
74+
}
75+
76+
func (idx *BM25Index) Search(query string, limit int) []BM25Result {
77+
if idx == nil || idx.documentCount == 0 || idx.averageDocLength == 0 {
78+
return nil
79+
}
80+
81+
queryTokens := uniqueBM25Tokens(tokenizeBM25Text(query))
82+
if len(queryTokens) == 0 {
83+
return nil
84+
}
85+
86+
results := make([]BM25Result, 0, len(idx.documents))
87+
for _, doc := range idx.documents {
88+
var score float64
89+
for _, token := range queryTokens {
90+
tf := float64(doc.termFreqs[token])
91+
if tf == 0 {
92+
continue
93+
}
94+
95+
df := idx.documentFreqs[token]
96+
idf := math.Log(1 + (float64(idx.documentCount-df)+0.5)/(float64(df)+0.5))
97+
score += idf * (tf * (bm25K1 + 1)) / (tf + bm25K1*(1-bm25B+bm25B*doc.tokenCount/idx.averageDocLength))
98+
}
99+
100+
if score > 0 {
101+
results = append(results, BM25Result{
102+
ID: doc.id,
103+
Score: score,
104+
})
105+
}
106+
}
107+
108+
sort.Slice(results, func(i, j int) bool {
109+
if results[i].Score == results[j].Score {
110+
return results[i].ID < results[j].ID
111+
}
112+
return results[i].Score > results[j].Score
113+
})
114+
115+
if len(results) == 0 {
116+
return nil
117+
}
118+
119+
if limit > 0 && len(results) > limit {
120+
results = results[:limit]
121+
}
122+
123+
return results
124+
}
125+
126+
func tokenizeBM25Text(text string) []string {
127+
var tokens []string
128+
var current strings.Builder
129+
130+
for _, r := range text {
131+
if unicode.IsLetter(r) || unicode.IsNumber(r) {
132+
current.WriteRune(unicode.ToLower(r))
133+
continue
134+
}
135+
136+
if current.Len() > 0 {
137+
tokens = append(tokens, current.String())
138+
current.Reset()
139+
}
140+
}
141+
142+
if current.Len() > 0 {
143+
tokens = append(tokens, current.String())
144+
}
145+
146+
return tokens
147+
}
148+
149+
func uniqueBM25Tokens(tokens []string) []string {
150+
seen := make(map[string]bool, len(tokens))
151+
unique := make([]string, 0, len(tokens))
152+
for _, token := range tokens {
153+
if seen[token] {
154+
continue
155+
}
156+
seen[token] = true
157+
unique = append(unique, token)
158+
}
159+
return unique
160+
}

mcp/bm25_test.go

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
// Copyright (c) 2023-present Mattermost, Inc. All Rights Reserved.
2+
// See LICENSE.txt for license information.
3+
4+
package mcp
5+
6+
import (
7+
"testing"
8+
9+
"github.com/stretchr/testify/require"
10+
)
11+
12+
func TestBM25SearchRanksRelevantDocuments(t *testing.T) {
13+
idx := NewBM25Index([]BM25Document{
14+
{ID: "jira__get_issue", Text: "jira__get_issue get issue jira ticket"},
15+
{ID: "github__create_pull_request", Text: "github__create_pull_request create pull request"},
16+
{ID: "mattermost__search_users", Text: "mattermost__search_users search users"},
17+
})
18+
19+
results := idx.Search("jira issue", 10)
20+
21+
require.NotEmpty(t, results)
22+
require.Equal(t, "jira__get_issue", results[0].ID)
23+
}
24+
25+
func TestBM25SearchUsesNameAndDescription(t *testing.T) {
26+
idx := NewBM25Index([]BM25Document{
27+
{ID: "jira__get_issue", Text: "jira__get_issue get_issue"},
28+
{ID: "github__create_pull_request", Text: "opens a collaboration review"},
29+
})
30+
31+
nameResults := idx.Search("get issue", 10)
32+
require.NotEmpty(t, nameResults)
33+
require.Equal(t, "jira__get_issue", nameResults[0].ID)
34+
35+
descriptionResults := idx.Search("collaboration review", 10)
36+
require.NotEmpty(t, descriptionResults)
37+
require.Equal(t, "github__create_pull_request", descriptionResults[0].ID)
38+
}
39+
40+
func TestBM25SearchLimitAndTieBreak(t *testing.T) {
41+
idx := NewBM25Index([]BM25Document{
42+
{ID: "charlie", Text: "shared"},
43+
{ID: "bravo", Text: "shared"},
44+
{ID: "alpha", Text: "shared"},
45+
})
46+
47+
results := idx.Search("shared", 2)
48+
49+
require.Len(t, results, 2)
50+
require.Equal(t, "alpha", results[0].ID)
51+
require.Equal(t, "bravo", results[1].ID)
52+
}
53+
54+
func TestBM25EmptyQueryReturnsNil(t *testing.T) {
55+
idx := NewBM25Index([]BM25Document{
56+
{ID: "jira__get_issue", Text: "jira issue"},
57+
})
58+
59+
require.Nil(t, idx.Search("", 10))
60+
require.Nil(t, idx.Search(" ", 10))
61+
}
62+
63+
func TestBM25NoMatchingTokensReturnsNil(t *testing.T) {
64+
idx := NewBM25Index([]BM25Document{
65+
{ID: "jira__get_issue", Text: "jira issue"},
66+
})
67+
68+
require.Nil(t, idx.Search("github", 10))
69+
}
70+
71+
func TestBM25TokenizeNonLatin(t *testing.T) {
72+
idx := NewBM25Index([]BM25Document{
73+
{ID: "japanese", Text: "検索 ユーザー"},
74+
{ID: "chinese_contiguous", Text: "用户搜索"},
75+
})
76+
77+
japaneseResults := idx.Search("検索", 10)
78+
require.NotEmpty(t, japaneseResults)
79+
require.Equal(t, "japanese", japaneseResults[0].ID)
80+
81+
contiguousResults := idx.Search("用户搜索", 10)
82+
require.NotEmpty(t, contiguousResults)
83+
require.Equal(t, "chinese_contiguous", contiguousResults[0].ID)
84+
85+
// There is no CJK segmentation: a substring query does not match a contiguous token.
86+
require.Nil(t, idx.Search("用户", 10))
87+
}
88+
89+
func TestBM25TokenizeNamespacedSnakeNames(t *testing.T) {
90+
idx := NewBM25Index([]BM25Document{
91+
{ID: "jira__get_issue", Text: "jira__get_issue"},
92+
})
93+
94+
results := idx.Search("get issue", 10)
95+
96+
require.NotEmpty(t, results)
97+
require.Equal(t, "jira__get_issue", results[0].ID)
98+
}

0 commit comments

Comments
 (0)