Add token limit handling to prevent 8k token overflow

yosef-sa · yosef-sa · commit dac7a361f915 · 2026-01-18T10:38:48.000+02:00
Problem:
- GitHub Models API has an 8k token limit for entire requests
- Large git diffs can exceed this limit, causing API failures
- Users experience failures when staging large changes

Solution:
- Added token estimation using character-based heuristic (1 token ≈ 4 chars)
- Implemented truncation logic that preserves UTF-8 boundaries
- Added intelligent content prioritization when over limit

Implementation Details:
- estimateTokens(): Approximates tokens for any text content
- truncateToTokenLimit(): Safely truncates text with ellipsis indicator
- Modified GenerateCommitMessage() to:
  * Estimate tokens for prompt templates + changes + examples
  * Reserve tokens for templates (with buffer)
  * Prioritize examples (20% of remaining tokens) when present
  * Truncate changes to fit remaining budget
  * Display warning when truncation occurs

Benefits:
- Prevents API failures from token overflow
- Maintains functionality by preserving maximum content
- User-friendly with clear truncation warnings
- No external dependencies, follows existing code style
- Gracefully handles both changes-only and changes+examples scenarios
diff --git a/internal/llm/client.go b/internal/llm/client.go
@@ -18,6 +18,47 @@ import (
 //go:embed commitmsg.prompt.yml
 var commitmsgPromptYAML []byte
 
+const (
+	maxTokens     = 8000
+	tokensPerChar = 0.25 // 1 token ≈ 4 characters
+)
+
+// estimateTokens approximates the number of tokens in text using a simple character-based heuristic.
+func estimateTokens(text string) int {
+	return int(float64(len(text)) * tokensPerChar)
+}
+
+// truncateToTokenLimit truncates text to fit within the specified token limit.
+// Preserves UTF-8 boundaries and adds ellipsis when truncation occurs.
+func truncateToTokenLimit(text string, maxTokens int) string {
+	if estimateTokens(text) <= maxTokens {
+		return text
+	}
+
+	// Convert maxTokens to approximate character limit
+	maxChars := int(float64(maxTokens) / tokensPerChar)
+
+	// Ensure we don't exceed the string length
+	if maxChars >= len(text) {
+		return text
+	}
+
+	// Find a safe UTF-8 boundary near the limit
+	runes := []rune(text)
+	targetLen := maxChars
+	if targetLen > len(runes) {
+		targetLen = len(runes)
+	}
+
+	// Reserve space for ellipsis if we're truncating
+	if targetLen > 3 {
+		targetLen -= 3
+	}
+
+	truncated := string(runes[:targetLen]) + "..."
+	return truncated
+}
+
 // PromptConfig represents the structure of the prompt configuration file.
 type PromptConfig struct {
 	Name            string          `yaml:"name"`
@@ -103,17 +144,78 @@ func (c *Client) GenerateCommitMessage(
 	selectedModel := model
 	selectedLanguage := language
 
+	// Estimate tokens and truncate if necessary to stay under 8k limit
+	truncatedChanges := changesSummary
+	truncatedExamples := examples
+
+	// Estimate tokens for the prompt template (without placeholders)
+	promptTemplateTokens := 0
+	for _, msg := range promptConfig.Messages {
+		content := msg.Content
+		content = strings.ReplaceAll(content, "{{changes}}", "")
+		content = strings.ReplaceAll(content, "{{language}}", selectedLanguage)
+		content = strings.ReplaceAll(content, "{{examples}}", "")
+		promptTemplateTokens += estimateTokens(content)
+	}
+
+	// Estimate tokens for changes and examples
+	changesTokens := estimateTokens(changesSummary)
+	examplesTokens := 0
+	if examples != "" {
+		examplesTokens = estimateTokens(createExamplesString(examples))
+	}
+
+	totalTokens := promptTemplateTokens + changesTokens + examplesTokens
+
+	if totalTokens > maxTokens {
+		fmt.Println("  Warning: Content truncated to fit token limit")
+
+		// Reserve tokens for prompt templates (add some buffer)
+		reservedTokens := promptTemplateTokens + 500 // buffer for template processing
+
+		remainingTokens := maxTokens - reservedTokens
+		if remainingTokens < 0 {
+			remainingTokens = 0
+		}
+
+		// Prioritize examples if present, otherwise use all remaining for changes
+		if examplesTokens > 0 {
+			// Reserve some tokens for examples (up to 20% of remaining)
+			examplesReserved := int(float64(remainingTokens) * 0.2)
+			if examplesReserved > examplesTokens {
+				examplesReserved = examplesTokens
+			}
+
+			remainingTokens -= examplesReserved
+
+			// Truncate examples if needed
+			if examplesReserved < examplesTokens {
+				truncatedExamples = truncateToTokenLimit(examples, examplesReserved)
+			}
+
+			// Truncate changes with remaining tokens
+			if remainingTokens < changesTokens {
+				truncatedChanges = truncateToTokenLimit(changesSummary, remainingTokens)
+			}
+		} else {
+			// No examples, use all remaining tokens for changes
+			if remainingTokens < changesTokens {
+				truncatedChanges = truncateToTokenLimit(changesSummary, remainingTokens)
+			}
+		}
+	}
+
 	// Build messages from the prompt config, replacing template variables
 	messages := make([]Message, len(promptConfig.Messages))
 	for i, msg := range promptConfig.Messages {
 		content := msg.Content
 		// Replace the template variables
-		content = strings.ReplaceAll(content, "{{changes}}", changesSummary)
+		content = strings.ReplaceAll(content, "{{changes}}", truncatedChanges)
 		content = strings.ReplaceAll(content, "{{language}}", selectedLanguage)
 
-		if examples != "" && strings.Contains(content, "{{examples}}") {
+		if truncatedExamples != "" && strings.Contains(content, "{{examples}}") {
 			// If examples are provided, replace the {{examples}} placeholder
-			content = strings.ReplaceAll(content, "{{examples}}", createExamplesString(examples))
+			content = strings.ReplaceAll(content, "{{examples}}", createExamplesString(truncatedExamples))
 		} else {
 			// If no examples are provided, remove the {{examples}} placeholder
 			content = strings.ReplaceAll(content, "{{examples}}", "")