Token optmzation

piyushroshan · piyushroshan · commit df8cb3f1f62d · 2026-04-02T21:09:33.000+05:30
AI-Session-Id: 67b21795-459d-4390-867a-08427ddf8555
AI-Tool: claude-code
AI-Model: unknown
diff --git a/services/chatbot/src/chatbot/agent_utils.py b/services/chatbot/src/chatbot/agent_utils.py
@@ -1,4 +1,5 @@
 import json
+import logging
 
 from langchain.agents import AgentState
 from langchain.agents.middleware.types import before_model
@@ -7,7 +8,11 @@
 
 from .config import Config
 
+logger = logging.getLogger(__name__)
+
 INDIVIDUAL_MIN_LENGTH = 100
+# Approximate characters per token across providers
+CHARS_PER_TOKEN = 4
 
 
 def collect_long_strings(obj):
@@ -88,3 +93,53 @@ def truncate_tool_messages(state: AgentState, runtime: Runtime) -> AgentState:
         else:
             modified_messages.append(msg)
     return {"messages": modified_messages}
+
+
+def _estimate_tokens(text):
+    """Estimate token count using character-based approximation."""
+    return len(text) // CHARS_PER_TOKEN
+
+
+def _message_content(msg):
+    """Extract text content from a message dict or object."""
+    if isinstance(msg, dict):
+        return msg.get("content", "")
+    return getattr(msg, "content", "")
+
+
+def trim_messages_to_token_limit(messages):
+    """
+    Trim conversation history from the oldest messages to fit within the token
+    budget derived from MAX_CONTENT_LENGTH.
+    The most recent message (the new user turn) is always kept.
+    """
+    max_tokens = Config.MAX_CONTENT_LENGTH // CHARS_PER_TOKEN
+
+    if not messages:
+        return messages
+
+    # Estimate per-message tokens
+    token_counts = [_estimate_tokens(_message_content(m)) for m in messages]
+    total_tokens = sum(token_counts)
+
+    if total_tokens <= max_tokens:
+        return messages
+
+    # Always keep the last message; trim from the front
+    trimmed = list(messages)
+    trimmed_tokens = list(token_counts)
+
+    while len(trimmed) > 1 and sum(trimmed_tokens) > max_tokens:
+        trimmed.pop(0)
+        trimmed_tokens.pop(0)
+
+    logger.info(
+        "Trimmed conversation history from %d to %d messages "
+        "(estimated tokens: %d -> %d, limit: %d)",
+        len(messages),
+        len(trimmed),
+        total_tokens,
+        sum(trimmed_tokens),
+        max_tokens,
+    )
+    return trimmed
diff --git a/services/chatbot/src/chatbot/chat_api.py b/services/chatbot/src/chatbot/chat_api.py
@@ -4,6 +4,7 @@
 
 from quart import Blueprint, jsonify, request
 
+from .agent_utils import trim_messages_to_token_limit
 from .chat_service import (delete_chat_history, get_chat_history,
                            process_user_message)
 from .config import Config
@@ -229,8 +230,7 @@ async def state():
             "Provider API key for session %s: %s", session_id, provider_api_key[:5]
         )
         chat_history = await get_chat_history(session_id)
-        # Limit chat history to last 20 messages
-        chat_history = chat_history[-20:]
+        chat_history = trim_messages_to_token_limit(chat_history)
         return (
             jsonify(
                 {
@@ -259,16 +259,15 @@ async def history():
     provider_api_key = await get_api_key(session_id)
     if provider in {"openai", "anthropic"} and provider_api_key:
         chat_history = await get_chat_history(session_id)
-        # Limit chat history to last 20 messages
-        chat_history = chat_history[-20:]
+        chat_history = trim_messages_to_token_limit(chat_history)
         return jsonify({"chat_history": chat_history}), 200
     if provider in {"openai", "anthropic"}:
         return (
             jsonify({"chat_history": []}),
             200,
         )
     chat_history = await get_chat_history(session_id)
-    chat_history = chat_history[-20:] if chat_history else []
+    chat_history = trim_messages_to_token_limit(chat_history) if chat_history else []
     return jsonify({"chat_history": chat_history}), 200
 
 
diff --git a/services/chatbot/src/chatbot/chat_service.py b/services/chatbot/src/chatbot/chat_service.py
@@ -3,6 +3,7 @@
 
 from langgraph.graph.message import Messages
 
+from .agent_utils import trim_messages_to_token_limit
 from .config import Config
 from .extensions import db
 from .langgraph_agent import execute_langgraph_agent
@@ -80,8 +81,7 @@ async def process_user_message(session_id, user_message, api_key, model_name, us
     )
     logger.debug("Added messages to Chroma collection - session_id: %s", session_id)
 
-    # Limit chat history to last 20 messages
-    history = history[-20:]
+    history = trim_messages_to_token_limit(history)
     await update_chat_history(session_id, history)
     logger.info(
         "Message processing complete - session_id: %s, response_id: %s, history_count: %d",
diff --git a/services/chatbot/src/chatbot/config.py b/services/chatbot/src/chatbot/config.py
@@ -34,6 +34,6 @@ class Config:
     AWS_ROLE_SESSION_NAME = os.getenv("AWS_ROLE_SESSION_NAME", "crapi-chatbot-session")
     VERTEX_PROJECT = os.getenv("VERTEX_PROJECT", "")
     VERTEX_LOCATION = os.getenv("VERTEX_LOCATION", "")
-    MAX_CONTENT_LENGTH = int(os.getenv("MAX_CONTENT_LENGTH", 50000))
+    MAX_CONTENT_LENGTH = int(os.getenv("MAX_CONTENT_LENGTH", 100000))
     CHROMA_HOST = CHROMA_HOST
     CHROMA_PORT = CHROMA_PORT
diff --git a/services/chatbot/src/chatbot/langgraph_agent.py b/services/chatbot/src/chatbot/langgraph_agent.py
@@ -11,7 +11,7 @@
 from langchain_mistralai import ChatMistralAI
 from langchain_openai import AzureChatOpenAI, ChatOpenAI
 
-from .agent_utils import truncate_tool_messages
+from .agent_utils import trim_messages_to_token_limit, truncate_tool_messages
 from .aws_credentials import get_bedrock_credentials_kwargs
 from .config import Config
 from .extensions import postgresdb
@@ -263,6 +263,7 @@ async def execute_langgraph_agent(
         len(messages),
     )
     agent = await build_langgraph_agent(api_key, model_name, user_jwt)
+    messages = trim_messages_to_token_limit(messages)
     logger.debug("Invoking agent with %d messages", len(messages))
     response = await agent.ainvoke({"messages": messages})
     logger.info(