Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ data/
test_data/
qdrant_data/
barou/
.agent/

# Binaries
/cmd/agent-runner/agent-runner
Expand Down Expand Up @@ -59,3 +58,8 @@ temp/
# Scripts
scripts/
.ragcode/
.agent/skills/

# Debug logs
startup_debug.txt
windsurf_debug_log.txt
34 changes: 24 additions & 10 deletions cmd/install/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -511,9 +511,9 @@ func installBinary() {
}
var binDir string
if runtime.GOOS == "windows" {
binDir = filepath.Join(home, ".local", "share", "ragcode", "bin")
binDir = filepath.Join(home, installDirName, "bin")
} else {
binDir = filepath.Join(home, ".local", "share", "ragcode", "bin")
binDir = filepath.Join(home, installDirName, "bin")
}
if err := os.MkdirAll(binDir, 0755); err != nil {
fail(fmt.Sprintf("Could not create bin directory: %v", err))
Expand Down Expand Up @@ -588,10 +588,13 @@ func copyFile(src, dst string) error {
return err
}
}
defer destFile.Close()
_, copyErr := io.Copy(destFile, sourceFile)
closeErr := destFile.Close()

_, err = io.Copy(destFile, sourceFile)
return err
if copyErr != nil {
return copyErr
}
return closeErr
}

// downloadAndExtractBinary fetches the release archive and extracts the binary.
Expand Down Expand Up @@ -661,11 +664,17 @@ func downloadAndExtractBinary(dest string) bool {
warn(fmt.Sprintf("Could not create destination file: %v", err))
return false
}
defer outFile.Close()
cmd.Stdout = outFile

if err := cmd.Run(); err != nil {
warn(fmt.Sprintf("Failed to extract binary: %v", err))
runErr := cmd.Run()
closeErr := outFile.Close()

if runErr != nil {
warn(fmt.Sprintf("Failed to extract binary: %v", runErr))
return false
}
if closeErr != nil {
warn(fmt.Sprintf("Failed to finalise binary file: %v", closeErr))
return false
}

Expand Down Expand Up @@ -707,10 +716,15 @@ func addToPath(binDir string) {
warn(fmt.Sprintf("Could not update shell config: %v", err))
return
}
defer f.Close()

if _, err := f.WriteString(fmt.Sprintf("\nexport PATH=\"%s:$PATH\"\n", binDir)); err != nil {
warn(fmt.Sprintf("Could not write to shell config: %v", err))
if cerr := f.Close(); cerr != nil {
warn(fmt.Sprintf("Could not finalise shell config after write failure: %v", cerr))
}
return
}
if err := f.Close(); err != nil {
warn(fmt.Sprintf("Could not finalise shell config: %v", err))
} else {
success(fmt.Sprintf("Added to %s (restart shell to apply)", shellConfig))
}
Expand Down
130 changes: 111 additions & 19 deletions cmd/rag-code-mcp/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (
"os/signal"
"path/filepath"
"strings"
"sync"
"syscall"
"time"

Expand Down Expand Up @@ -364,16 +365,6 @@ workspace:
}

func main() {
// AGGRESSIVE STARTUP DEBUG
f, _ := os.Create("/tmp/ragcode-startup.txt")
cwd, _ := os.Getwd()
exe, _ := os.Executable()
fmt.Fprintf(f, "Time: %s\n", time.Now())
fmt.Fprintf(f, "Exe: %s\n", exe)
fmt.Fprintf(f, "CWD: %s\n", cwd)
fmt.Fprintf(f, "Args: %v\n", os.Args)
f.Close()

// Define flags
configPath := flag.String("config", "config.yaml", "Path to configuration file")
ollamaBaseURLFlag := flag.String("ollama-base-url", "", "Ollama base URL (overrides config/env)")
Expand Down Expand Up @@ -437,7 +428,7 @@ func main() {
// Handle update flag
if *updateFlag {
fmt.Println("Checking for updates...")
info, err := updater.CheckForUpdates(Version)
info, err := updater.CheckForUpdates(context.Background(), Version, true)
if err != nil {
log.Fatalf("Failed to check for updates: %v", err)
}
Expand All @@ -447,8 +438,26 @@ func main() {
}

fmt.Printf("Found new version: %s\nDownloading...\n", info.LatestVersion)
tempFile := filepath.Join(os.TempDir(), "ragcode_update.tar.gz")
if err := info.DownloadAndVerify(tempFile); err != nil {

// Determine extension from asset URL
ext := ".tar.gz"
if strings.HasSuffix(info.AssetURL, ".zip") {
ext = ".zip"
}
// Create a unique temporary file securely
tmp, err := os.CreateTemp("", "ragcode_update_*"+ext)
if err != nil {
log.Fatalf("Failed to create temporary file for update: %v", err)
}
tempFile := tmp.Name()
// We only need the path; close the file descriptor
if err := tmp.Close(); err != nil {
log.Fatalf("Failed to close temporary file for update: %v", err)
}
// Ensure the temporary file is removed after applying the update
defer os.Remove(tempFile)
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The defer os.Remove(tempFile) will not execute because os.Exit(0) is called at the end of this block (line 470). In Go, defer statements are not run when os.Exit() is called. Since this is a short-lived CLI path, you should manually call os.Remove(tempFile) before os.Exit(0) or wrap this logic in a function to ensure the temporary archive is cleaned up.


if err := info.DownloadAndVerify(context.Background(), tempFile); err != nil {
log.Fatalf("Update failed: %v", err)
}

Expand Down Expand Up @@ -477,12 +486,7 @@ func main() {
}

// Background update check
go func() {
info, err := updater.CheckForUpdates(Version)
if err == nil && info != nil {
logger.Info("🌟 New version available: %s. Run 'rag-code-mcp --update' to upgrade.", info.LatestVersion)
}
}()
triggerBackgroundUpdateCheck()

// Apply logging settings from config unless env vars already override them
applyLoggingConfig(cfg.Logging)
Expand Down Expand Up @@ -643,6 +647,11 @@ func main() {

indexWorkspaceTool := tools.NewIndexWorkspaceTool(workspaceManager)

listSkillsTool := tools.NewListSkillsTool()
installSkillTool := tools.NewInstallSkillTool(workspaceManager)
checkUpdateTool := tools.NewCheckUpdateTool(Version)
applyUpdateTool := tools.NewApplyUpdateTool(Version)

// Example: use typed ToolHandlerFor for search_code
registerSearchCodeToolTyped(server, searchTool, cfg)

Expand All @@ -655,6 +664,10 @@ func main() {
registerAgentTool(server, searchDocsTool, cfg)
registerAgentTool(server, hybridTool, cfg)
registerAgentTool(server, indexWorkspaceTool, cfg)
registerAgentTool(server, listSkillsTool, cfg)
registerAgentTool(server, installSkillTool, cfg)
registerAgentTool(server, checkUpdateTool, cfg)
registerAgentTool(server, applyUpdateTool, cfg)
Comment on lines 650 to +670
Copy link

Copilot AI Apr 16, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The PR description/title focuses on embedding payload truncation (#53), but this change set also adds a skills system and updater tools (list_skills/install_skill/check_update/apply_update) plus background update checks. This looks like a significant scope increase that isn't reflected in the PR metadata; please update the PR description (and possibly the title) to cover these additions, or split the updater/skills changes into a separate PR to keep review and rollback risk manageable.

Copilot uses AI. Check for mistakes.

if err := registerFileResources(server); err != nil {
log.Fatalf("Failed to register resources: %v", err)
Expand Down Expand Up @@ -708,6 +721,9 @@ func registerSearchCodeToolTyped(server *mcp.Server, tool *tools.SearchLocalInde

logger.Info("✅ Tool '%s' completed in %v", tool.Name(), duration)

// Trigger background update check (non-blocking)
triggerBackgroundUpdateCheck()

return nil, SearchCodeOutput{Results: result}, nil
})
}
Expand Down Expand Up @@ -751,6 +767,9 @@ func registerAgentTool(server *mcp.Server, tool MCPTool, cfg *config.Config) {

logger.Info("✅ Tool '%s' completed in %v", tool.Name(), duration)

// Trigger background update check (non-blocking)
triggerBackgroundUpdateCheck()

return &mcp.CallToolResult{
Content: []mcp.Content{
&mcp.TextContent{Text: result},
Expand Down Expand Up @@ -1097,6 +1116,54 @@ func getToolSchema(toolName string) map[string]interface{} {
"required": []string{"query"},
}

case "list_skills":
return map[string]interface{}{
"type": "object",
"properties": map[string]interface{}{},
}

case "install_skill":
return map[string]interface{}{
"type": "object",
"properties": map[string]interface{}{
"skill_id": map[string]interface{}{
"type": "string",
"description": "The ID of the skill to install or uninstall",
},
"active": map[string]interface{}{
"type": "boolean",
"description": "True to install the skill, false to uninstall it",
},
"file_path": map[string]interface{}{
"type": "string",
"description": "Optional: file path to help detect workspace context",
},
},
"required": []string{"skill_id", "active"},
}

case "check_update":
return map[string]interface{}{
"type": "object",
"properties": map[string]interface{}{
"force": map[string]interface{}{
"type": "boolean",
"description": "Force check ignoring cache (default: false)",
},
},
}

case "apply_update":
return map[string]interface{}{
"type": "object",
"properties": map[string]interface{}{
"force": map[string]interface{}{
"type": "boolean",
"description": "Force update even if version matches (default: false)",
Copy link

Copilot AI Apr 16, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Tool schema for apply_update documents force as defaulting to false, but ApplyUpdateTool.Execute defaults force to true when the argument is omitted. This mismatch can lead to surprising behavior for MCP clients relying on the schema. Align the schema description with the implementation, or change the tool default to match the documented default.

Suggested change
"description": "Force update even if version matches (default: false)",
"description": "Force update even if version matches (default: true)",

Copilot uses AI. Check for mistakes.
},
},
}

default:
return map[string]interface{}{
"type": "object",
Expand Down Expand Up @@ -1252,6 +1319,7 @@ func ensureIDERules(cfg *config.Config, filePath string) {
- Always provide 'file_path' to tools to ensure they detect the correct project context.
- Use 'hybrid_search' if looking for exact variable names or error messages.
- If the tool says "workspace not indexed", use 'index_workspace' once.
- **Skills System**: Use 'list_skills' to see available AI behaviors and 'install_skill' to enable them in this workspace (e.g., 'ragcode-priority', 'ragcode-update').
`

// 3. Define target rule files
Expand Down Expand Up @@ -1287,3 +1355,27 @@ func ensureIDERules(cfg *config.Config, filePath string) {
}
}
}

var (
lastUpdateCheck time.Time
lastUpdateCheckMutex sync.Mutex
)

func triggerBackgroundUpdateCheck() {
lastUpdateCheckMutex.Lock()
defer lastUpdateCheckMutex.Unlock()

// Only check if more than 1 hour passed since last check in THIS session
// to avoid spamming go-routines, while updater.CheckForUpdates handles the 24h logic
if time.Since(lastUpdateCheck) < 1*time.Hour {
return
}
lastUpdateCheck = time.Now()

go func() {
info, err := updater.CheckForUpdates(context.Background(), Version, false)
if err == nil && info != nil {
logger.Info("🌟 New version available: %s. Run 'rag-code-mcp --update' or use the 'apply_update' tool to upgrade.", info.LatestVersion)
}
}()
}
65 changes: 60 additions & 5 deletions internal/ragcode/indexer.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"encoding/json"
"fmt"
"hash/fnv"
"log"
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

Add unicode/utf8 to the imports to support memory-efficient rune counting and prefixing in the buildEmbedText function.

Suggested change
"log"
"log"
"unicode/utf8"

"path/filepath"
"strings"

Expand All @@ -13,6 +14,57 @@ import (
"github.com/doITmagic/rag-code-mcp/internal/memory"
)

// maxEmbedChars is the maximum number of Unicode characters sent to the embedding
// model. Common models (e.g. nomic-embed-text) have an 8 192-token context window
// (~4 chars/token → ~32 768 chars). We use 30 000 to give ~6% headroom and stay
// compatible with smaller-window models.
const maxEmbedChars = 30_000

// buildEmbedText constructs the text to embed for a CodeChunk, then truncates it
// to maxChars (rune-safe, UTF-8 correct) to avoid exceeding the model's context
// window. Metadata (docstring, signature) is always preserved in full; only Code
// is truncated when the total exceeds maxChars.
// Returns (text, wasTruncated).
func buildEmbedText(ch codetypes.CodeChunk, maxChars int) (string, bool) {
meta := strings.TrimSpace(strings.Join(filterNonEmpty([]string{
ch.Docstring,
ch.Signature,
}), "\n\n"))

var full string
if ch.Code != "" {
if meta != "" {
full = meta + "\n\n" + ch.Code
} else {
full = ch.Code
}
} else {
full = meta
}

runes := []rune(full)
if len(runes) <= maxChars {
return full, false
}
Comment on lines +45 to +48
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

Converting the entire full string to a []rune slice just to check its length is memory-intensive for large code chunks. Consider using utf8.RuneCountInString(full) which calculates the rune count without allocating a new slice.

	if utf8.RuneCountInString(full) <= maxChars {
		return full, false
	}


// Truncate only the Code portion — keep metadata intact.
metaWithSep := meta
if meta != "" && ch.Code != "" {
metaWithSep = meta + "\n\n"
}
metaRunes := []rune(metaWithSep)
remaining := maxChars - len(metaRunes)
if remaining < 0 {
remaining = 0
}
Comment on lines +55 to +59
Copy link

Copilot AI Apr 16, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

buildEmbedText() does not guarantee the returned text stays within maxChars when the metadata (docstring/signature) alone exceeds the limit. In that case metaWithSep is returned in full and can still overflow the embedding model context window, reintroducing the original failure mode. Consider enforcing the cap unconditionally (e.g., always keep signature, truncate docstring next, then code) and add a test where Docstring length > maxChars.

Copilot uses AI. Check for mistakes.
codeRunes := []rune(ch.Code)
if remaining > len(codeRunes) {
remaining = len(codeRunes)
}
Comment on lines +60 to +63
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

Converting the potentially massive ch.Code string to []rune can lead to high memory usage or OOM for very large files. Since you only need a prefix of at most remaining runes, you can optimize this by converting only a safe byte-prefix of the string.

	codePrefix := ch.Code
	if len(codePrefix) > remaining*4 {
		codePrefix = codePrefix[:remaining*4]
	}
	codeRunes := []rune(codePrefix)
	if remaining > len(codeRunes) {
		remaining = len(codeRunes)
	}

truncated := metaWithSep + string(codeRunes[:remaining])
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

If the metadata (metaWithSep) alone exceeds maxChars, the remaining count becomes 0, but the full metadata is still returned. This means the payload could still exceed the model's context window. While metadata is typically small, a strict cap would be safer to prevent the context overflow error mentioned in the PR description.

return truncated, true
Comment on lines +34 to +65
Copy link

Copilot AI Apr 16, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

buildEmbedText() eagerly constructs full = meta + "\n\n" + ch.Code and then later converts the whole string to []rune. For very large ch.Code values this forces a full copy/allocation (and a full rune slice) even though the result will be truncated, which can significantly increase memory/CPU during indexing. Prefer computing rune lengths and truncating the code portion without concatenating/allocating the full payload up front (e.g., build only the needed prefix up to maxChars).

Suggested change
var full string
if ch.Code != "" {
if meta != "" {
full = meta + "\n\n" + ch.Code
} else {
full = ch.Code
}
} else {
full = meta
}
runes := []rune(full)
if len(runes) <= maxChars {
return full, false
}
// Truncate only the Code portion — keep metadata intact.
metaWithSep := meta
if meta != "" && ch.Code != "" {
metaWithSep = meta + "\n\n"
}
metaRunes := []rune(metaWithSep)
remaining := maxChars - len(metaRunes)
if remaining < 0 {
remaining = 0
}
codeRunes := []rune(ch.Code)
if remaining > len(codeRunes) {
remaining = len(codeRunes)
}
truncated := metaWithSep + string(codeRunes[:remaining])
return truncated, true
metaWithSep := meta
if meta != "" && ch.Code != "" {
metaWithSep = meta + "\n\n"
}
metaRuneCount := len([]rune(metaWithSep))
if ch.Code == "" {
if metaRuneCount <= maxChars {
return meta, false
}
return meta, true
}
remaining := maxChars - metaRuneCount
if remaining < 0 {
return metaWithSep, true
}
codeRuneCount := 0
for idx := range ch.Code {
if codeRuneCount == remaining {
return metaWithSep + ch.Code[:idx], true
}
codeRuneCount++
}
return metaWithSep + ch.Code, false

Copilot uses AI. Check for mistakes.
}

// Indexer indexes CodeChunks into LongTermMemory using an embedding Provider.
type Indexer struct {
analyzer codetypes.PathAnalyzer
Expand All @@ -34,14 +86,17 @@ func (i *Indexer) IndexPaths(ctx context.Context, paths []string, sourceTag stri

indexed := 0
for _, ch := range chunks {
text := strings.TrimSpace(strings.Join(filterNonEmpty([]string{
ch.Docstring,
ch.Signature,
ch.Code,
}), "\n\n"))
text, wasTruncated := buildEmbedText(ch, maxEmbedChars)
text = strings.TrimSpace(text)
if text == "" {
continue
}
if wasTruncated {
// Log a warning so users know which symbols were partially indexed.
// The full code is still accessible via rag_read_file_context.
Copy link

Copilot AI Apr 16, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The comment references rag_read_file_context, but that tool name doesn't appear to exist in this codebase (the MCP tool is get_code_context). This can confuse users reading logs; consider updating the comment to the correct tool name or removing it.

Suggested change
// The full code is still accessible via rag_read_file_context.
// The full code is still accessible via get_code_context.

Copilot uses AI. Check for mistakes.
log.Printf("[WARN] embed text truncated for %s (%s:%d-%d) — content exceeds model context window",
ch.Name, filepath.Base(ch.FilePath), ch.StartLine, ch.EndLine)
}

emb, err := i.embedder.Embed(ctx, text)
if err != nil {
Expand Down
Loading
Loading