Skip to content

Commit 771a034

Browse files
author
razvan
committed
feat(search): add match_reasons annotation (#2 from proposals)
- pkg/scoring: DetectMatchReasons() pure function - case-insensitive token matching per field (symbol_name/signature/content/docstring) - reuses FilterTokens for consistency with rest of scoring package - 6 unit tests covering all edge cases - SmartSearchInput: new include_reasons bool field - serializeResults/resultToMap: accept query + includeReasons - match_reasons added to result map only when include_reasons=true (zero overhead when not requested)
1 parent e98a5a1 commit 771a034

4 files changed

Lines changed: 118 additions & 5 deletions

File tree

internal/service/tools/smart_search.go

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,9 @@ func (t *SmartSearchTool) Description() string {
4747
"Use 'mode'=\"strict_docs\" when searching for architectural plans or summaries. " +
4848
"Use 'mode'=\"all\" or omit for broad scans. " +
4949
"Set 'min_score' (0.0-1.0) to filter out low-relevance results. When omitted, an automatic threshold is applied: " +
50-
"if the top result scores above 0.70, results below 40% of the top score are automatically pruned."
50+
"if the top result scores above 0.70, results below 40% of the top score are automatically pruned. " +
51+
"Set 'include_reasons' to true to include a 'match_reasons' field in each result, explaining which fields " +
52+
"(symbol_name, signature, content, docstring) contributed to the match — useful for understanding result relevance."
5153
}
5254

5355
type SmartSearchInput struct {
@@ -57,6 +59,7 @@ type SmartSearchInput struct {
5759
MinScore float32 `json:"min_score,omitempty"`
5860
IncludeFullContent bool `json:"include_full_content,omitempty"`
5961
IncludeDocs bool `json:"include_docs,omitempty"`
62+
IncludeReasons bool `json:"include_reasons,omitempty"`
6063
Mode string `json:"mode,omitempty"`
6164
}
6265

@@ -125,7 +128,7 @@ func (t *SmartSearchTool) Execute(ctx context.Context, input SmartSearchInput) (
125128

126129
isFallback := sr.meta.collection == "fallback"
127130
response := t.buildResponseMeta(sr.meta, useCompact)
128-
serializeResults(&response, merged, useCompact, isFallback)
131+
serializeResults(&response, merged, useCompact, isFallback, query, input.IncludeReasons)
129132

130133
return response.JSON()
131134
}

internal/service/tools/smart_search_pipeline.go

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -237,7 +237,9 @@ func (t *SmartSearchTool) buildResponseMeta(meta searchMetadata, useCompact bool
237237

238238
// resultToMap converts a mergedResult to the output map format.
239239
// includeContent controls whether the full source code is included.
240-
func resultToMap(m mergedResult, includeContent bool) map[string]any {
240+
// When includeReasons is true, a match_reasons field is added explaining
241+
// which payload fields (symbol_name, signature, content, docstring) matched the query.
242+
func resultToMap(m mergedResult, includeContent bool, query string, includeReasons bool) map[string]any {
241243
item := map[string]any{
242244
"score": m.score,
243245
"file_path": m.filePath,
@@ -257,6 +259,9 @@ func resultToMap(m mergedResult, includeContent bool) map[string]any {
257259
if m.source != "" {
258260
item["_source"] = m.source
259261
}
262+
if includeReasons && query != "" {
263+
item["match_reasons"] = scoring.DetectMatchReasons(query, m.name, m.signature, m.content, m.docstring)
264+
}
260265
return item
261266
}
262267

@@ -276,14 +281,15 @@ func buildResultsMessage(count int, useCompact, isFallback bool) string {
276281

277282
// serializeResults populates the ToolResponse with either compact or full result data,
278283
// calculates telemetry savings, and detects stale indexed files.
279-
func serializeResults(response *ToolResponse, merged []mergedResult, useCompact, isFallback bool) {
284+
// query and includeReasons control the optional match_reasons annotation per result.
285+
func serializeResults(response *ToolResponse, merged []mergedResult, useCompact, isFallback bool, query string, includeReasons bool) {
280286
data := make([]map[string]any, 0, len(merged))
281287
var baselineBytes, actualBytes int64
282288
seenFiles := make(map[string]bool)
283289
var staleFiles []string
284290

285291
for _, m := range merged {
286-
data = append(data, resultToMap(m, !useCompact))
292+
data = append(data, resultToMap(m, !useCompact, query, includeReasons))
287293

288294
if !useCompact {
289295
actualBytes += int64(len(m.content))

pkg/scoring/match_reasons.go

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
package scoring
2+
3+
import "strings"
4+
5+
// ─── Match Reason Annotation ─────────────────────────────────────────────────
6+
7+
// MatchReasons describes which payload fields of a search result matched the query.
8+
// Useful for AI agents to understand WHY a result was returned and decide
9+
// whether to request full content or treat the result as high/low confidence.
10+
type MatchReasons struct {
11+
SymbolName bool `json:"symbol_name"` // query token found in symbol name
12+
Signature bool `json:"signature"` // query token found in function signature
13+
Content bool `json:"content"` // query token found in code body
14+
Docstring bool `json:"docstring"` // query token found in docstring/comments
15+
}
16+
17+
// DetectMatchReasons returns which fields of a search result contain the query tokens.
18+
// It uses simple case-insensitive substring matching — the same heuristic used
19+
// by the fallback lexical scorer, intentionally kept fast and allocation-light.
20+
//
21+
// query is the original search query (will be lowercased internally).
22+
// name, signature, content, docstring are the corresponding payload fields.
23+
func DetectMatchReasons(query, name, signature, content, docstring string) MatchReasons {
24+
lower := strings.ToLower(query)
25+
tokens := FilterTokens(strings.Fields(lower))
26+
if len(tokens) == 0 {
27+
return MatchReasons{}
28+
}
29+
30+
containsAny := func(text string) bool {
31+
t := strings.ToLower(text)
32+
for _, tok := range tokens {
33+
if strings.Contains(t, tok) {
34+
return true
35+
}
36+
}
37+
return false
38+
}
39+
40+
return MatchReasons{
41+
SymbolName: containsAny(name),
42+
Signature: containsAny(signature),
43+
Content: containsAny(content),
44+
Docstring: containsAny(docstring),
45+
}
46+
}

pkg/scoring/match_reasons_test.go

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
package scoring
2+
3+
import "testing"
4+
5+
func TestDetectMatchReasonsSymbolName(t *testing.T) {
6+
r := DetectMatchReasons("Calculator", "Calculator", "", "", "")
7+
if !r.SymbolName {
8+
t.Error("expected SymbolName=true")
9+
}
10+
if r.Signature || r.Content || r.Docstring {
11+
t.Error("expected other fields false")
12+
}
13+
}
14+
15+
func TestDetectMatchReasonsMultipleFields(t *testing.T) {
16+
r := DetectMatchReasons("auth token", "authenticate", "func authenticate(token string)", "", "validates auth")
17+
if !r.SymbolName {
18+
t.Error("expected SymbolName=true (auth in authenticate)")
19+
}
20+
if !r.Signature {
21+
t.Error("expected Signature=true (token in signature)")
22+
}
23+
if !r.Docstring {
24+
t.Error("expected Docstring=true (auth in docstring)")
25+
}
26+
if r.Content {
27+
t.Error("expected Content=false")
28+
}
29+
}
30+
31+
func TestDetectMatchReasonsCaseInsensitive(t *testing.T) {
32+
r := DetectMatchReasons("CALCULATOR", "Calculator", "", "", "")
33+
if !r.SymbolName {
34+
t.Error("expected case-insensitive match on SymbolName")
35+
}
36+
}
37+
38+
func TestDetectMatchReasonsShortTokensIgnored(t *testing.T) {
39+
// tokens "ab" and "x" are ≤2 chars → filtered out → no matches
40+
r := DetectMatchReasons("ab x", "ab", "x", "ab x", "ab")
41+
if r.SymbolName || r.Signature || r.Content || r.Docstring {
42+
t.Error("short tokens should be filtered, no match expected")
43+
}
44+
}
45+
46+
func TestDetectMatchReasonsEmptyQuery(t *testing.T) {
47+
r := DetectMatchReasons("", "Calculator", "func Calculator()", "body", "docs")
48+
if r.SymbolName || r.Signature || r.Content || r.Docstring {
49+
t.Error("empty query should produce no matches")
50+
}
51+
}
52+
53+
func TestDetectMatchReasonsNoMatch(t *testing.T) {
54+
r := DetectMatchReasons("Payment", "UserAuth", "func UserAuth()", "body code", "user authenticates")
55+
if r.SymbolName || r.Signature || r.Content || r.Docstring {
56+
t.Error("payment not in any field, expected all false")
57+
}
58+
}

0 commit comments

Comments
 (0)