grasberg
diff --git a/‎README.md‎
Lines changed: 6 additions & 0 deletions b/‎README.md‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎pkg/agent/instance.go‎
Lines changed: 74 additions & 9 deletions b/‎pkg/agent/instance.go‎
Lines changed: 74 additions & 9 deletions
diff --git a/‎pkg/agent/loop.go‎
Lines changed: 5 additions & 17 deletions b/‎pkg/agent/loop.go‎
Lines changed: 5 additions & 17 deletions
diff --git a/‎pkg/agent/loop_llm.go‎
Lines changed: 59 additions & 5 deletions b/‎pkg/agent/loop_llm.go‎
Lines changed: 59 additions & 5 deletions
diff --git a/‎pkg/agent/loop_query.go‎
Lines changed: 36 additions & 10 deletions b/‎pkg/agent/loop_query.go‎
Lines changed: 36 additions & 10 deletions
diff --git a/‎pkg/agent/loop_summarize.go‎
Lines changed: 58 additions & 1 deletion b/‎pkg/agent/loop_summarize.go‎
Lines changed: 58 additions & 1 deletion
@@ -1,3 +1,9 @@
+![GitHub stars](https://img.shields.io/github/stars/grasberg/sofia?style=social)
+![GitHub forks](https://img.shields.io/github/forks/grasberg/sofia?style=social)
+![License](https://img.shields.io/github/license/grasberg/sofia)
+![Go Version](https://img.shields.io/github/go-mod/go-version/grasberg/sofia)
+![Last Commit](https://img.shields.io/github/last-commit/grasberg/sofia)
+
 # Sofia - AI Workspace Assistant 🧠✨
 
 ![Version](https://img.shields.io/badge/version-v0.0.145-blue)
 
@@ -38,9 +38,10 @@ type AgentInstance struct {
 	SkillsFilter   []string
 	IsLocalModel   bool
 	PurposePrompt  string
-	Candidates     []providers.FallbackCandidate
-	Summarization  config.SummarizationConfig
-	ThinkingBudget int
+	Candidates          []providers.FallbackCandidate
+	CandidateProviders  map[string]providers.LLMProvider // "provider/model" → provider
+	Summarization       config.SummarizationConfig
+	ThinkingBudget      int
 }
 
 // NewAgentInstance creates an agent instance from config.
@@ -170,13 +171,38 @@ func NewAgentInstance(
 		temperature = *defaults.Temperature
 	}
 
-	// Resolve fallback candidates
+	// Resolve fallback candidates.
+	// Use full "protocol/model" strings (not aliases) so ResolveCandidates
+	// can extract the provider from the model string.
 	modelCfg := providers.ModelConfig{
-		Primary:   model,
-		Fallbacks: fallbacks,
+		Primary:   resolveModelFullString(model, cfg),
+		Fallbacks: resolveModelFullStrings(fallbacks, cfg),
 	}
 	candidates := providers.ResolveCandidates(modelCfg, defaults.Provider)
 
+	// Build per-candidate providers so the fallback chain can switch between
+	// different API endpoints (e.g. Ollama Cloud primary → OpenRouter fallback).
+	candidateProviders := make(map[string]providers.LLMProvider)
+	for _, c := range candidates {
+		key := providers.ModelKey(c.Provider, c.Model)
+		fullModel := c.Provider + "/" + c.Model
+		mc := findModelConfigByModel(cfg, fullModel)
+		if mc == nil {
+			// Try lookup by alias as fallback
+			if found, err := cfg.GetModelConfig(c.Model); err == nil {
+				mc = found
+			}
+		}
+		if mc != nil {
+			if mc.Workspace == "" {
+				mc.Workspace = cfg.WorkspacePath()
+			}
+			if p, _, err := providers.CreateProviderFromConfig(mc); err == nil && p != nil {
+				candidateProviders[key] = p
+			}
+		}
+	}
+
 	// If this agent has a custom model that differs from the default, create a
 	// per-agent provider from its model config. This allows different agents to
 	// use different API keys or providers without sharing the global provider.
@@ -251,15 +277,54 @@ func NewAgentInstance(
 		SkillsFilter:   skillsFilter,
 		IsLocalModel:   isLocal,
 		PurposePrompt:  contextBuilder.purposeInstructions,
-		Candidates:     candidates,
-		Summarization:  summarization,
-		ThinkingBudget: thinkingBudget,
+		Candidates:         candidates,
+		CandidateProviders: candidateProviders,
+		Summarization:      summarization,
+		ThinkingBudget:     thinkingBudget,
 	}
 }
 
 // resolveAgentModelID resolves the raw model ID (without protocol prefix) for a given alias.
 // It looks up the alias in cfg.ModelList; if found, it extracts the model ID from the
 // Model field (e.g. "openai/gpt-4o" -> "gpt-4o"). Falls back to the alias itself if not found.
+// findModelConfigByModel searches ModelList by the Model field (protocol/model-id)
+// rather than the ModelName alias.
+func findModelConfigByModel(cfg *config.Config, model string) *config.ModelConfig {
+	for i := range cfg.ModelList {
+		if cfg.ModelList[i].Model == model {
+			mc := cfg.ModelList[i] // copy
+			return &mc
+		}
+	}
+	return nil
+}
+
+// resolveModelFullString resolves a model alias to its full "protocol/model"
+// string from the model list. If the alias is not found, it's returned as-is
+// (it may already be a full model string).
+func resolveModelFullString(alias string, cfg *config.Config) string {
+	if alias == "" {
+		return alias
+	}
+	mc, err := cfg.GetModelConfig(alias)
+	if err == nil && mc != nil && mc.Model != "" {
+		return mc.Model
+	}
+	return alias
+}
+
+// resolveModelFullStrings resolves a slice of model aliases.
+func resolveModelFullStrings(aliases []string, cfg *config.Config) []string {
+	if len(aliases) == 0 {
+		return aliases
+	}
+	out := make([]string, len(aliases))
+	for i, a := range aliases {
+		out[i] = resolveModelFullString(a, cfg)
+	}
+	return out
+}
+
 func resolveAgentModelID(alias string, cfg *config.Config) string {
 	if alias == "" {
 		return ""
 
@@ -11,7 +11,6 @@ import (
 	"fmt"
 	"os"
 	"path/filepath"
-	"strings"
 	"sync"
 	"sync/atomic"
 	"time"
@@ -186,18 +185,11 @@ func NewAgentLoop(cfg *config.Config, msgBus *bus.MessageBus, provider providers
 	cooldown := providers.NewCooldownTracker()
 	fallbackChain := providers.NewFallbackChain(cooldown)
 
-	// Set up semantic tool matcher if the provider supports embeddings and
-	// has access to a real embedding model. Local providers like Ollama
-	// typically don't host embedding models, so the keyword matcher is used instead.
+	// Tool filtering uses the keyword matcher (tools.KeywordMatchTools) which
+	// works locally without any API calls. The semantic matcher that called
+	// OpenAI's text-embedding-3-small has been removed to avoid external
+	// dependencies and wasted API round-trips on non-OpenAI providers.
 	var semanticMatcher *tools.SemanticMatcher
-	if embProvider, ok := provider.(providers.EmbeddingProvider); ok {
-		modelName := cfg.Agents.Defaults.GetModelName()
-		mc, _ := cfg.GetModelConfig(modelName)
-		isLocal := mc != nil && (strings.Contains(mc.APIBase, "localhost") || strings.Contains(mc.APIBase, "127.0.0.1"))
-		if !isLocal {
-			semanticMatcher = tools.NewSemanticMatcher(embProvider, "text-embedding-3-small")
-		}
-	}
 
 	// Create state manager using default agent's workspace for channel recording
 	defaultAgent := registry.GetDefaultAgent()
@@ -237,11 +229,6 @@ func NewAgentLoop(cfg *config.Config, msgBus *bus.MessageBus, provider providers
 	toolStatsPath := filepath.Join(filepath.Dir(memDBPath), "tool_stats.json")
 	toolTracker := tools.NewToolTracker(toolStatsPath)
 
-	// Attach tracker to semantic matcher for usage-based ranking
-	if semanticMatcher != nil {
-		semanticMatcher.SetTracker(toolTracker)
-	}
-
 	// Set up Audit Logger for tool call tracing
 	auditDBPath := filepath.Join(filepath.Dir(memDBPath), "audit.db")
 	auditLog, auditErr := audit.NewAuditLogger(auditDBPath)
@@ -774,3 +761,4 @@ func newAgentInstanceFromEvolution(
 	}
 	return NewAgentInstance(&agentCfg, &cfg.Agents.Defaults, cfg, provider, memDB, nil)
 }
+
@@ -332,8 +332,15 @@ func (al *AgentLoop) runLLMIteration(
 					candidates = al.providerRanker.Rank(candidates)
 				}
 				fbResult, fbErr := al.fallback.Execute(ctx, candidates,
-					func(ctx context.Context, provider, model string) (*providers.LLMResponse, error) {
-						return agent.Provider.Chat(ctx, messages, providerToolDefs, model, llmOpts)
+					func(ctx context.Context, candidateProvider, model string) (*providers.LLMResponse, error) {
+						// Use the candidate-specific provider if available,
+						// so fallback can switch between different API endpoints.
+						p := agent.Provider
+						key := providers.ModelKey(candidateProvider, model)
+						if cp, ok := agent.CandidateProviders[key]; ok {
+							p = cp
+						}
+						return p.Chat(ctx, messages, providerToolDefs, model, llmOpts)
 					},
 				)
 				if fbErr != nil {
@@ -373,9 +380,56 @@ func (al *AgentLoop) runLLMIteration(
 			}
 
 			errMsg := strings.ToLower(err.Error())
-			isContextError := strings.Contains(errMsg, "token") ||
-				strings.Contains(errMsg, "invalidparameter") ||
-				strings.Contains(errMsg, "length")
+
+			// Check rate limit FIRST — messages like "The Token Plan is
+			// designed for…" contain the word "token" and must not be
+			// misclassified as context-window errors.
+			isRateLimit := strings.Contains(errMsg, "rate_limit") ||
+				strings.Contains(errMsg, "rate limit") ||
+				strings.Contains(errMsg, "too many requests")
+
+			if isRateLimit && retry < maxRetries {
+				waitSec := 10 * (retry + 1)
+				logger.WarnCF(agentComp, "Rate limit hit, backing off before retry", map[string]any{
+					"error":        err.Error(),
+					"retry":        retry,
+					"wait_seconds": waitSec,
+				})
+				if retry == 0 && !constants.IsInternalChannel(opts.Channel) {
+					al.bus.PublishOutbound(bus.OutboundMessage{
+						Channel: opts.Channel,
+						ChatID:  opts.ChatID,
+						Content: "Rate limited by provider. Retrying shortly...",
+					})
+				}
+				select {
+				case <-time.After(time.Duration(waitSec) * time.Second):
+				case <-ctx.Done():
+					return "", iteration, errorCount, ctx.Err()
+				}
+				continue
+			}
+
+			// Invalid tool call ID — typically caused by context compression
+			// orphaning tool_use / tool_result pairs. Sanitize and retry.
+			isToolIDError := strings.Contains(errMsg, "tool_use_id") ||
+				strings.Contains(errMsg, "tool_use.id") ||
+				strings.Contains(errMsg, "tool call id")
+
+			if isToolIDError && retry < maxRetries {
+				logger.WarnCF(agentComp, "Invalid tool call ID detected, sanitizing messages", map[string]any{
+					"error":    err.Error(),
+					"retry":    retry,
+					"msg_count": len(messages),
+				})
+				messages = sanitizeToolCallIDs(messages)
+				continue
+			}
+
+			isContextError := !isRateLimit &&
+				(strings.Contains(errMsg, "token") ||
+					strings.Contains(errMsg, "invalidparameter") ||
+					strings.Contains(errMsg, "length"))
 
 			if isContextError && retry < maxRetries {
 				logger.WarnCF(agentComp, "Context window error detected, attempting compression", map[string]any{
 
@@ -42,16 +42,7 @@ func (al *AgentLoop) ListGoals(agentID string) ([]*autonomy.Goal, error) {
 	if agentID != "" {
 		return gm.ListAllGoals(agentID)
 	}
-	// Collect goals from all agents
-	var allGoals []*autonomy.Goal
-	for _, id := range al.getRegistry().ListAgentIDs() {
-		goals, err := gm.ListAllGoals(id)
-		if err != nil {
-			continue
-		}
-		allGoals = append(allGoals, goals...)
-	}
-	return allGoals, nil
+	return gm.ListAllGoalsGlobal()
 }
 
 func (al *AgentLoop) GetStartupInfo() map[string]any {
@@ -200,6 +191,41 @@ func (al *AgentLoop) UpdateGoalStatus(goalID int64, status string) error {
 	return err
 }
 
+// RestartGoal transitions a failed goal back to active and resets its plan so
+// the autonomy tick re-dispatches the failed steps.
+func (al *AgentLoop) RestartGoal(goalID int64) error {
+	if al.memDB == nil {
+		return fmt.Errorf("memory database not available")
+	}
+	gm := autonomy.NewGoalManager(al.memDB)
+
+	goal, err := gm.GetGoalByID(goalID)
+	if err != nil {
+		return err
+	}
+	if goal == nil {
+		return fmt.Errorf("goal %d not found", goalID)
+	}
+
+	// Reset the linked plan's failed steps back to pending.
+	if pm := al.planManager; pm != nil {
+		if plan := pm.GetPlanByGoalID(goalID); plan != nil {
+			pm.ResetPlan(plan.ID)
+		}
+	}
+
+	// Set phase back to implement so the tick picks it up.
+	if err := gm.UpdateGoalPhase(goalID, autonomy.GoalPhaseImplement); err != nil {
+		return fmt.Errorf("resetting goal phase: %w", err)
+	}
+
+	// Transition status to active.
+	if _, err := gm.UpdateGoalStatus(goalID, autonomy.GoalStatusActive); err != nil {
+		return fmt.Errorf("reactivating goal: %w", err)
+	}
+	return nil
+}
+
 // DeleteGoal removes a goal and its log from the web UI.
 func (al *AgentLoop) DeleteGoal(goalID int64) error {
 	if al.memDB == nil {
 
@@ -88,7 +88,8 @@ func (al *AgentLoop) forceCompression(agent *AgentInstance, sessionKey string) {
 	threshold := agent.ContextWindow * agent.Summarization.ForceTriggerPctOrDefault() / 100
 
 	if tokenEstimate <= threshold {
-		// Tool result truncation was sufficient
+		// Tool result truncation was sufficient — sanitize IDs before saving.
+		newHistory = sanitizeToolCallIDs(newHistory)
 		agent.Sessions.SetHistory(sessionKey, newHistory)
 		agent.Sessions.Save(sessionKey)
 		logger.InfoCF("agent", "Context compression: truncated tool results", map[string]any{
@@ -114,6 +115,9 @@ func (al *AgentLoop) forceCompression(agent *AgentInstance, sessionKey string) {
 	newHistory = append(newHistory, enhancedHead...)
 	newHistory = append(newHistory, tail...)
 
+	// Dropping the middle can orphan tool_use / tool_result pairs.
+	newHistory = sanitizeToolCallIDs(newHistory)
+
 	agent.Sessions.SetHistory(sessionKey, newHistory)
 	agent.Sessions.Save(sessionKey)
 
@@ -124,6 +128,59 @@ func (al *AgentLoop) forceCompression(agent *AgentInstance, sessionKey string) {
 	})
 }
 
+// sanitizeToolCallIDs removes orphaned tool_use / tool_result pairs from a
+// message slice. An assistant tool_use whose ID has no matching tool_result is
+// stripped (the assistant message is kept if it has text content). A tool_result
+// whose ToolCallID has no matching tool_use is dropped entirely.
+func sanitizeToolCallIDs(messages []providers.Message) []providers.Message {
+	// Collect all tool_use IDs from assistant messages.
+	toolUseIDs := make(map[string]bool)
+	for _, m := range messages {
+		if m.Role == "assistant" {
+			for _, tc := range m.ToolCalls {
+				if tc.ID != "" {
+					toolUseIDs[tc.ID] = true
+				}
+			}
+		}
+	}
+
+	// Collect all tool_result IDs.
+	toolResultIDs := make(map[string]bool)
+	for _, m := range messages {
+		if m.Role == "tool" && m.ToolCallID != "" {
+			toolResultIDs[m.ToolCallID] = true
+		}
+	}
+
+	out := make([]providers.Message, 0, len(messages))
+	for _, m := range messages {
+		switch {
+		case m.Role == "tool" && m.ToolCallID != "" && !toolUseIDs[m.ToolCallID]:
+			// Orphaned tool result — no matching assistant tool_use.
+			continue
+
+		case m.Role == "assistant" && len(m.ToolCalls) > 0:
+			// Strip tool calls that have no matching tool result.
+			var valid []providers.ToolCall
+			for _, tc := range m.ToolCalls {
+				if tc.ID != "" && toolResultIDs[tc.ID] {
+					valid = append(valid, tc)
+				}
+			}
+			if len(valid) != len(m.ToolCalls) {
+				cleaned := m // shallow copy
+				cleaned.ToolCalls = valid
+				out = append(out, cleaned)
+				continue
+			}
+		}
+
+		out = append(out, m)
+	}
+	return out
+}
+
 // safeCutPoint adjusts a cut index forward so the kept messages don't start
 // with an orphaned tool result or sit between an assistant tool-call and its
 // results. It returns the adjusted index.