feat: Implement token budgeting and context optimization to maximize ROI from token budget using tiktoken.

deepakdgupta1 · deepakdgupta1 · commit 566a2550ad3a · 2025-12-17T19:29:45.000+05:30
diff --git a/pyproject.toml b/pyproject.toml
@@ -12,6 +12,7 @@ dependencies = [
     "tree-sitter==0.21.3",
     "tree-sitter-languages>=1.10.0",
     "GitPython>=3.1.0",
+    "tiktoken>=0.7.0",
 ]
 
 [project.scripts]
diff --git a/src/knowcode/cli.py b/src/knowcode/cli.py
@@ -188,12 +188,12 @@ def query(query_type: str, target: str, store: str, as_json: bool) -> None:
     help="Path to knowledge store file or directory",
 )
 @click.option(
-    "--max-chars", "-m",
+    "--max-tokens", "-m",
     type=int,
-    default=8000,
-    help="Maximum characters in context (default: 8000)",
+    default=2000,
+    help="Maximum tokens in context (default: 2000)",
 )
-def context(target: str, store: str, max_chars: int) -> None:
+def context(target: str, store: str, max_tokens: int) -> None:
     """Generate context bundle for an entity.
 
     TARGET: Entity ID or search pattern
@@ -204,7 +204,7 @@ def context(target: str, store: str, max_chars: int) -> None:
         click.echo("Error: Knowledge store not found. Run 'knowcode analyze' first.", err=True)
         sys.exit(1)
 
-    synthesizer = ContextSynthesizer(knowledge, max_chars=max_chars)
+    synthesizer = ContextSynthesizer(knowledge, max_tokens=max_tokens)
 
     # Try exact match first
     entity = knowledge.get_entity(target)
@@ -222,7 +222,7 @@ def context(target: str, store: str, max_chars: int) -> None:
     bundle = synthesizer.synthesize(entity.id)
     if bundle:
         click.echo(bundle.context_text)
-        click.echo(f"\n--- {bundle.total_chars} chars, {len(bundle.included_entities)} entities ---", err=True)
+        click.echo(f"\n--- {bundle.total_chars} chars, {bundle.total_tokens} tokens, {len(bundle.included_entities)} entities ---", err=True)
         if bundle.truncated:
             click.echo("(truncated)", err=True)
 
diff --git a/src/knowcode/context_synthesizer.py b/src/knowcode/context_synthesizer.py
@@ -5,6 +5,7 @@
 
 from knowcode.knowledge_store import KnowledgeStore
 from knowcode.models import Entity, EntityKind
+from knowcode.token_counter import TokenCounter
 
 
 @dataclass
@@ -15,27 +16,31 @@ class ContextBundle:
     context_text: str
     included_entities: list[str]
     total_chars: int
+    total_tokens: int
     truncated: bool
 
 
 class ContextSynthesizer:
     """Synthesizes context bundles for entities."""
 
-    DEFAULT_MAX_CHARS = 8000  # Rough proxy for ~2K tokens
+    DEFAULT_MAX_TOKENS = 2000
 
     def __init__(
         self,
         store: KnowledgeStore,
-        max_chars: int = DEFAULT_MAX_CHARS,
+        max_tokens: int = DEFAULT_MAX_TOKENS,
+        model: str = "gpt-4",
     ) -> None:
         """Initialize context synthesizer.
 
         Args:
             store: Knowledge store to query.
-            max_chars: Maximum characters in context bundle.
+            max_tokens: Maximum tokens in context bundle.
+            model: Model name for token counting.
         """
         self.store = store
-        self.max_chars = max_chars
+        self.max_tokens = max_tokens
+        self.tokenizer = TokenCounter(model)
 
     def synthesize(self, entity_id: str) -> Optional[ContextBundle]:
         """Synthesize context bundle for an entity.
@@ -52,80 +57,114 @@ def synthesize(self, entity_id: str) -> Optional[ContextBundle]:
 
         sections: list[str] = []
         included: list[str] = [entity_id]
-        truncated = False
-
-        # Section 1: Entity header
-        sections.append(self._format_entity_header(entity))
-
-        # Section 2: Docstring/description
+        
+        # We build sections in priority order but display them in logical order usually.
+        # However, for simplicity, we'll append and check budget.
+        
+        # Priority 1: Entity Core (Header, Signature, Description)
+        header = self._format_entity_header(entity)
+        current_tokens = self.tokenizer.count_tokens(header)
+        sections.append(header)
+        
+        desc = ""
         if entity.docstring:
-            sections.append(f"## Description\n\n{entity.docstring}")
-
-        # Section 3: Signature (for functions/methods)
+            desc = f"## Description\n\n{entity.docstring}"
+            
+        sig = ""
         if entity.signature:
-            sections.append(f"## Signature\n\n```python\n{entity.signature}\n```")
-
-        # Section 4: Source code (if available and fits)
+            sig = f"## Signature\n\n```python\n{entity.signature}\n```"
+            
+        # Add high priority sections if they fit
+        if desc:
+            t = self.tokenizer.count_tokens(desc)
+            if current_tokens + t < self.max_tokens:
+                sections.append(desc)
+                current_tokens += t
+        
+        if sig:
+            t = self.tokenizer.count_tokens(sig)
+            if current_tokens + t < self.max_tokens:
+                sections.append(sig)
+                current_tokens += t
+
+        # Priority 2: Source Code (Huge consumer, often truncated)
         if entity.source_code:
-            code_section = f"## Source Code\n\n```python\n{entity.source_code}\n```"
-            if self._would_fit(sections, code_section):
-                sections.append(code_section)
-
-        # Section 5: Parent context
+            code_header = "## Source Code\n\n```python\n"
+            code_footer = "\n```"
+            overhead = self.tokenizer.count_tokens(code_header + code_footer)
+            remaining = self.max_tokens - current_tokens - overhead
+            
+            if remaining > 100: # Only add if we have decent space
+                code_body = entity.source_code
+                code_tokens = self.tokenizer.count_tokens(code_body)
+                
+                if code_tokens > remaining:
+                    code_body = self.tokenizer.truncate(code_body, remaining) + "\n# ... (truncated)"
+                    # We technically truncated the content
+                    # But we will rely on full budget exhaustion check often
+                
+                sections.append(f"{code_header}{code_body}{code_footer}")
+                current_tokens += self.tokenizer.count_tokens(sections[-1])
+            else:
+                 # Skipped source code due to budget
+                 # We consider this truncation/loss of info
+                 pass 
+
+        # Priority 3: Parent Context
         parent = self.store.get_parent(entity_id)
         if parent:
             parent_section = self._format_parent_context(parent)
-            if self._would_fit(sections, parent_section):
+            t = self.tokenizer.count_tokens(parent_section)
+            if current_tokens + t < self.max_tokens:
                 sections.append(parent_section)
                 included.append(parent.id)
-
-        # Section 6: Callers (who calls this?)
+                current_tokens += t
+
+        # Priority 4: Relationships (Callers, Callees, Children)
+        # We add them greedily until budget exhaust
+        
+        # Unified list of potential sections
+        rel_sections = []
+        
         callers = self.store.get_callers(entity_id)
         if callers:
-            callers_section = self._format_callers(callers)
-            if self._would_fit(sections, callers_section):
-                sections.append(callers_section)
-                included.extend(c.id for c in callers)
+            rel_sections.append((self._format_callers(callers), [c.id for c in callers]))
 
-        # Section 7: Callees (what does this call?)
         callees = self.store.get_callees(entity_id)
         if callees:
-            callees_section = self._format_callees(callees)
-            if self._would_fit(sections, callees_section):
-                sections.append(callees_section)
-                included.extend(c.id for c in callees)
-
-        # Section 8: Children (for classes/modules)
+             rel_sections.append((self._format_callees(callees), [c.id for c in callees]))
+             
         if entity.kind in {EntityKind.CLASS, EntityKind.MODULE, EntityKind.DOCUMENT}:
             children = self.store.get_children(entity_id)
             if children:
-                children_section = self._format_children(children)
-                if self._would_fit(sections, children_section):
-                    sections.append(children_section)
-                    included.extend(c.id for c in children)
+                rel_sections.append((self._format_children(children), [c.id for c in children]))
+
+        is_truncated = False
+        
+        for text, ids in rel_sections:
+            t = self.tokenizer.count_tokens(text)
+            if current_tokens + t < self.max_tokens:
+                sections.append(text)
+                included.extend(ids)
+                current_tokens += t
+            else:
+                is_truncated = True
 
-        # Build final context
         context_text = "\n\n---\n\n".join(sections)
-
-        # Final truncation if still too long
-        if len(context_text) > self.max_chars:
-            context_text = context_text[: self.max_chars - 20] + "\n\n[TRUNCATED]"
-            truncated = True
+        
+        # Check if we skipped source code but had it
+        if entity.source_code and "## Source Code" not in context_text:
+             is_truncated = True
 
         return ContextBundle(
             target_entity=entity,
             context_text=context_text,
             included_entities=included,
             total_chars=len(context_text),
-            truncated=truncated,
+            total_tokens=current_tokens,
+            truncated=is_truncated or (current_tokens >= self.max_tokens),
         )
 
-    def _would_fit(self, current_sections: list[str], new_section: str) -> bool:
-        """Check if adding a section would stay within budget."""
-        current_len = sum(len(s) for s in current_sections)
-        new_len = current_len + len(new_section) + 10  # +10 for separators
-        return new_len < self.max_chars
-
     def _format_entity_header(self, entity: Entity) -> str:
         """Format entity header."""
         lines = [
diff --git a/src/knowcode/token_counter.py b/src/knowcode/token_counter.py
@@ -0,0 +1,58 @@
+"""Token counting utility using tiktoken."""
+
+from functools import lru_cache
+from typing import Optional
+
+import tiktoken
+
+
+class TokenCounter:
+    """Token counter utility."""
+
+    DEFAULT_MODEL = "gpt-4"
+
+    def __init__(self, model: str = DEFAULT_MODEL) -> None:
+        """Initialize token counter.
+
+        Args:
+            model: Model name to use for encoding.
+        """
+        self.model = model
+        try:
+            self.encoding = tiktoken.encoding_for_model(model)
+        except KeyError:
+            # Fallback to cl100k_base (used by gpt-4, gpt-3.5-turbo)
+            self.encoding = tiktoken.get_encoding("cl100k_base")
+
+    def count_tokens(self, text: str) -> int:
+        """Count tokens in text.
+
+        Args:
+            text: Text to count tokens for.
+
+        Returns:
+            Number of tokens.
+        """
+        if not text:
+            return 0
+        return len(self.encoding.encode(text))
+
+    def truncate(self, text: str, max_tokens: int) -> str:
+        """Truncate text to max_tokens.
+
+        Args:
+            text: Text to truncate.
+            max_tokens: Maximum tokens allowed.
+
+        Returns:
+            Truncated text.
+        """
+        if not text:
+            return ""
+            
+        tokens = self.encoding.encode(text)
+        if len(tokens) <= max_tokens:
+            return text
+            
+        truncated_tokens = tokens[:max_tokens]
+        return self.encoding.decode(truncated_tokens)
diff --git a/tests/test_token_optimization.py b/tests/test_token_optimization.py
@@ -0,0 +1,76 @@
+"""Tests for Token Counter and Context Synthesizer."""
+
+import pytest
+from unittest.mock import MagicMock
+from knowcode.token_counter import TokenCounter
+from knowcode.context_synthesizer import ContextSynthesizer, ContextBundle
+from knowcode.models import Entity, EntityKind, Location
+
+def test_token_counter():
+    """Test functionality of TokenCounter."""
+    counter = TokenCounter()
+    
+    text = "Hello world"
+    tokens = counter.count_tokens(text)
+    assert tokens > 0
+    
+    truncated = counter.truncate(text, max_tokens=1)
+    assert counter.count_tokens(truncated) == 1
+    assert truncated != text
+
+def test_context_synthesizer_budget():
+    """Test standard budgeting logic."""
+    store = MagicMock()
+    
+    # Create a mock entity with huge source code
+    large_code = "print('hello')\n" * 1000
+    entity = Entity(
+        id="test::Foo",
+        kind=EntityKind.CLASS,
+        name="Foo",
+        qualified_name="Foo",
+        location=Location("test.py", 1, 1000),
+        source_code=large_code
+    )
+    store.get_entity.return_value = entity
+    store.get_parent.return_value = None
+    store.get_callers.return_value = []
+    store.get_callees.return_value = []
+    store.get_children.return_value = []
+    
+    # Low budget
+    synthesizer = ContextSynthesizer(store, max_tokens=50)
+    bundle = synthesizer.synthesize("test::Foo")
+    
+    assert bundle is not None
+    assert bundle.total_tokens <= 50
+    assert bundle.truncated is True
+    # The text itself might not say 'truncated' if we omitted the whole section
+    # assert "truncated" in bundle.context_text
+
+def test_context_synthesizer_priority():
+    """Test that header is preserved even if code is truncated."""
+    store = MagicMock()
+    
+    entity = Entity(
+        id="test::Bar",
+        kind=EntityKind.FUNCTION,
+        name="bar",
+        qualified_name="bar",
+        location=Location("test.py", 1, 10),
+        source_code="def bar():\n    pass # very long code...",
+        docstring="Checks that header is kept."
+    )
+    store.get_entity.return_value = entity
+    store.get_parent.return_value = None
+    store.get_callers.return_value = []
+    store.get_callees.return_value = []
+    store.get_children.return_value = []
+    
+    synthesizer = ContextSynthesizer(store, max_tokens=100)
+    bundle = synthesizer.synthesize("test::Bar")
+    
+    assert bundle is not None
+    # Ensure header info is present
+    assert "# Function: `bar`" in bundle.context_text
+    assert "**File**: `test.py`" in bundle.context_text
diff --git a/uv.lock b/uv.lock

Original file line number	Diff line number	Diff line change
`@@ -12,6 +12,7 @@ dependencies = [`
`12`	`12`	`"tree-sitter==0.21.3",`
`13`	`13`	`"tree-sitter-languages>=1.10.0",`
`14`	`14`	`"GitPython>=3.1.0",`
	`15`	`+ "tiktoken>=0.7.0",`
`15`	`16`	`]`
`16`	`17`
`17`	`18`	`[project.scripts]`