Skip to content

Commit 433e6e5

Browse files
author
razvan
committed
fix: address Copilot review comments on PR #34
- Auto-enable IncludeDocs when mode=strict_docs - Use bufio.Scanner in readLines for memory efficiency - Use struct groupKey instead of string separator for collision safety - Extract shared isDocSymbolType/isDocExtension helpers for consistency - Remove non-doc extensions (.sh, .sql, .css, .scss, .svelte) from isDocFile - Fix misleading comment about code_block in test - Add error check for os.WriteFile in test
1 parent d735d3b commit 433e6e5

2 files changed

Lines changed: 89 additions & 54 deletions

File tree

internal/service/tools/smart_search.go

Lines changed: 85 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package tools
22

33
import (
4+
"bufio"
45
"context"
56
"errors"
67
"fmt"
@@ -98,6 +99,12 @@ func (t *SmartSearchTool) Execute(ctx context.Context, input SmartSearchInput) (
9899
limit = input.Limit
99100
}
100101

102+
// When strict_docs mode is requested, automatically enable docs search
103+
// so that the semantic engine actually fetches documentation results.
104+
if input.Mode == "strict_docs" {
105+
input.IncludeDocs = true
106+
}
107+
101108
// Run both search strategies in parallel
102109
type searchResult struct {
103110
label string
@@ -180,12 +187,13 @@ func (t *SmartSearchTool) Execute(ctx context.Context, input SmartSearchInput) (
180187
// Apply mode filtering
181188
var filtered []mergedResult
182189
for _, m := range merged {
183-
// Strict code mode: ignore completely any markdown or documentation type
184-
if input.Mode == "strict_code" && (m.symbolType == "documentation" || m.symbolType == "markdown" || m.symbolType == "code_block" || strings.HasSuffix(strings.ToLower(m.filePath), ".md") || strings.HasSuffix(strings.ToLower(m.filePath), ".html")) {
190+
isDoc := isDocSymbolType(m.symbolType) || isDocExtension(m.filePath)
191+
// Strict code mode: ignore completely any documentation type or doc file
192+
if input.Mode == "strict_code" && isDoc {
185193
continue
186194
}
187195
// Strict docs mode: ignore anything that isn't documentation
188-
if input.Mode == "strict_docs" && !(m.symbolType == "documentation" || m.symbolType == "markdown" || m.symbolType == "code_block" || strings.HasSuffix(strings.ToLower(m.filePath), ".md") || strings.HasSuffix(strings.ToLower(m.filePath), ".html")) {
196+
if input.Mode == "strict_docs" && !isDoc {
189197
continue
190198
}
191199
filtered = append(filtered, m)
@@ -481,26 +489,56 @@ func (t *SmartSearchTool) handleSearchError(err error, workspaceRoot, workspaceI
481489
return response.JSON()
482490
}
483491

484-
// readLines reads a specific range of lines from a file.
485-
// Lines are 1-indexed.
492+
// isDocSymbolType returns true if the symbol type represents documentation content.
493+
func isDocSymbolType(symbolType string) bool {
494+
return symbolType == "documentation" || symbolType == "code_block" || symbolType == "markdown"
495+
}
496+
497+
// isDocExtension returns true if the file path has a documentation or structured text extension.
498+
func isDocExtension(filePath string) bool {
499+
ext := strings.ToLower(filepath.Ext(filePath))
500+
switch ext {
501+
case ".md", ".markdown", ".html", ".htm",
502+
".yaml", ".yml", ".json", ".xml",
503+
".toml", ".rst":
504+
return true
505+
}
506+
return false
507+
}
508+
509+
// readLines reads a specific range of lines from a file using a buffered scanner
510+
// to avoid loading the entire file into memory. Lines are 1-indexed.
486511
func readLines(filePath string, startLine, endLine int) (string, error) {
487-
content, err := os.ReadFile(filePath)
512+
if startLine < 1 || endLine < startLine {
513+
return "", fmt.Errorf("invalid line range %d-%d", startLine, endLine)
514+
}
515+
516+
f, err := os.Open(filePath)
488517
if err != nil {
489518
return "", err
490519
}
491-
492-
lines := strings.Split(string(content), "\n")
493-
if startLine < 1 {
494-
startLine = 1
520+
defer f.Close()
521+
522+
var collected []string
523+
scanner := bufio.NewScanner(f)
524+
lineNum := 0
525+
for scanner.Scan() {
526+
lineNum++
527+
if lineNum > endLine {
528+
break
529+
}
530+
if lineNum >= startLine {
531+
collected = append(collected, scanner.Text())
532+
}
495533
}
496-
if endLine > len(lines) {
497-
endLine = len(lines)
534+
if err := scanner.Err(); err != nil {
535+
return "", err
498536
}
499-
if startLine > endLine || startLine > len(lines) {
500-
return "", fmt.Errorf("invalid line range")
537+
if len(collected) == 0 {
538+
return "", fmt.Errorf("invalid line range: file has %d lines, requested %d-%d", lineNum, startLine, endLine)
501539
}
502-
503-
return strings.Join(lines[startLine-1:endLine], "\n"), nil
540+
541+
return strings.Join(collected, "\n"), nil
504542
}
505543

506544
// groupDocsByTree aggregates "documentation" and "code_block" chunks
@@ -512,39 +550,35 @@ func (t *SmartSearchTool) groupDocsByTree(results []mergedResult) []mergedResult
512550
}
513551

514552
var out []mergedResult
515-
516-
// Groups are keyed by: filePath_|_signature -> slice of mergedResult indices in groups slice
517-
type docGroup struct {
553+
554+
type groupKey struct {
518555
filePath string
519556
signature string
520-
items []*mergedResult
521-
maxScore float32
522-
minLine int
523-
maxLine int
524-
source string
525557
}
526-
527-
groupsMap := make(map[string]*docGroup)
528-
var orderedGroups []string // keep track of the first time we see a group to maintain rough sorting
558+
559+
type docGroup struct {
560+
key groupKey
561+
items []*mergedResult
562+
maxScore float32
563+
minLine int
564+
maxLine int
565+
source string
566+
}
567+
568+
groupsMap := make(map[groupKey]*docGroup)
569+
var orderedGroups []groupKey // keep track of the first time we see a group to maintain rough sorting
529570

530571
for i := range results {
531572
res := &results[i]
532-
533-
// Only group documentation types and files ending in .md or .html
534-
ext := strings.ToLower(filepath.Ext(res.filePath))
535-
isDocFile := ext == ".md" || ext == ".markdown" || ext == ".html" || ext == ".htm" ||
536-
ext == ".yaml" || ext == ".yml" || ext == ".json" || ext == ".xml" ||
537-
ext == ".toml" || ext == ".rst" || ext == ".css" || ext == ".scss" || ext == ".svelte" || ext == ".sql" || ext == ".sh"
538-
539-
isDocType := res.symbolType == "documentation" || res.symbolType == "code_block" || res.symbolType == "markdown"
540-
541-
if !isDocType || !isDocFile || res.signature == "" {
573+
574+
// Only group documentation types and documentation/structured text files
575+
if !isDocSymbolType(res.symbolType) || !isDocExtension(res.filePath) || res.signature == "" {
542576
// Pass-through code or items without signature
543577
out = append(out, *res)
544578
continue
545579
}
546-
547-
key := fmt.Sprintf("%s_|_%s", res.filePath, res.signature)
580+
581+
key := groupKey{filePath: res.filePath, signature: res.signature}
548582
if g, exists := groupsMap[key]; exists {
549583
g.items = append(g.items, res)
550584
if res.score > g.maxScore {
@@ -569,13 +603,12 @@ func (t *SmartSearchTool) groupDocsByTree(results []mergedResult) []mergedResult
569603
maxL = 1
570604
}
571605
groupsMap[key] = &docGroup{
572-
filePath: res.filePath,
573-
signature: res.signature,
574-
items: []*mergedResult{res},
575-
maxScore: res.score,
576-
minLine: minL,
577-
maxLine: maxL,
578-
source: res.source,
606+
key: key,
607+
items: []*mergedResult{res},
608+
maxScore: res.score,
609+
minLine: minL,
610+
maxLine: maxL,
611+
source: res.source,
579612
}
580613
orderedGroups = append(orderedGroups, key)
581614
}
@@ -584,23 +617,23 @@ func (t *SmartSearchTool) groupDocsByTree(results []mergedResult) []mergedResult
584617
// Reconstruct the grouped items
585618
for _, key := range orderedGroups {
586619
g := groupsMap[key]
587-
620+
588621
if len(g.items) == 1 {
589622
// Nothing to merge, just append
590623
out = append(out, *g.items[0])
591624
continue
592625
}
593-
626+
594627
// Multiple chunks in this group. Let's merge them!
595628
// Attempt to read the full continuous block from the file
596629
fullContent := ""
597630
if g.minLine > 0 && g.maxLine >= g.minLine {
598-
content, err := readLines(g.filePath, g.minLine, g.maxLine)
631+
content, err := readLines(g.key.filePath, g.minLine, g.maxLine)
599632
if err == nil {
600633
fullContent = content
601634
}
602635
}
603-
636+
604637
// If reading from disk failed, append the contents manually with an ellipsis
605638
if fullContent == "" {
606639
var contents []string
@@ -620,10 +653,10 @@ func (t *SmartSearchTool) groupDocsByTree(results []mergedResult) []mergedResult
620653
merged := mergedResult{
621654
id: fmt.Sprintf("merged_%s_%d_%d", baseItem.id, g.minLine, g.maxLine),
622655
score: g.maxScore,
623-
filePath: g.filePath,
656+
filePath: g.key.filePath,
624657
name: baseItem.name,
625658
symbolType: "documentation_merged",
626-
signature: g.signature,
659+
signature: g.key.signature,
627660
pkg: baseItem.pkg,
628661
docstring: fmt.Sprintf("Merged %d chunks spanning %d lines.", len(g.items), g.maxLine-g.minLine+1),
629662
content: fullContent,

internal/service/tools/smart_search_test.go

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,9 @@ func TestGroupDocsByTree(t *testing.T) {
1717
for i := 0; i < 20; i++ {
1818
lines[i] = "Line " + string(rune('A'+i)) // Line A, Line B, etc.
1919
}
20-
os.WriteFile(tempFilePath, []byte(strings.Join(lines, "\n")), 0644)
20+
if err := os.WriteFile(tempFilePath, []byte(strings.Join(lines, "\n")), 0644); err != nil {
21+
t.Fatalf("failed to create test file %s: %v", tempFilePath, err)
22+
}
2123

2224
// Create tool instance (we only need the groupDocsByTree method)
2325
tool := &SmartSearchTool{}
@@ -57,7 +59,7 @@ func TestGroupDocsByTree(t *testing.T) {
5759
id: "chunk_3", filePath: tempFilePath, symbolType: "documentation", signature: "### Setup",
5860
startLine: 8, endLine: 10, score: 0.5,
5961
},
60-
// This one is code, ignore grouping
62+
// code_block is also a doc type, so it gets grouped under "### Intro"
6163
{
6264
id: "chunk_4", filePath: tempFilePath, symbolType: "code_block", signature: "### Intro",
6365
startLine: 7, endLine: 7, score: 0.85,

0 commit comments

Comments
 (0)