Skip to content

Commit df8cb3f

Browse files
committed
Token optmzation
AI-Session-Id: 67b21795-459d-4390-867a-08427ddf8555 AI-Tool: claude-code AI-Model: unknown
1 parent 2ab5bf6 commit df8cb3f

5 files changed

Lines changed: 64 additions & 9 deletions

File tree

services/chatbot/src/chatbot/agent_utils.py

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import json
2+
import logging
23

34
from langchain.agents import AgentState
45
from langchain.agents.middleware.types import before_model
@@ -7,7 +8,11 @@
78

89
from .config import Config
910

11+
logger = logging.getLogger(__name__)
12+
1013
INDIVIDUAL_MIN_LENGTH = 100
14+
# Approximate characters per token across providers
15+
CHARS_PER_TOKEN = 4
1116

1217

1318
def collect_long_strings(obj):
@@ -88,3 +93,53 @@ def truncate_tool_messages(state: AgentState, runtime: Runtime) -> AgentState:
8893
else:
8994
modified_messages.append(msg)
9095
return {"messages": modified_messages}
96+
97+
98+
def _estimate_tokens(text):
99+
"""Estimate token count using character-based approximation."""
100+
return len(text) // CHARS_PER_TOKEN
101+
102+
103+
def _message_content(msg):
104+
"""Extract text content from a message dict or object."""
105+
if isinstance(msg, dict):
106+
return msg.get("content", "")
107+
return getattr(msg, "content", "")
108+
109+
110+
def trim_messages_to_token_limit(messages):
111+
"""
112+
Trim conversation history from the oldest messages to fit within the token
113+
budget derived from MAX_CONTENT_LENGTH.
114+
The most recent message (the new user turn) is always kept.
115+
"""
116+
max_tokens = Config.MAX_CONTENT_LENGTH // CHARS_PER_TOKEN
117+
118+
if not messages:
119+
return messages
120+
121+
# Estimate per-message tokens
122+
token_counts = [_estimate_tokens(_message_content(m)) for m in messages]
123+
total_tokens = sum(token_counts)
124+
125+
if total_tokens <= max_tokens:
126+
return messages
127+
128+
# Always keep the last message; trim from the front
129+
trimmed = list(messages)
130+
trimmed_tokens = list(token_counts)
131+
132+
while len(trimmed) > 1 and sum(trimmed_tokens) > max_tokens:
133+
trimmed.pop(0)
134+
trimmed_tokens.pop(0)
135+
136+
logger.info(
137+
"Trimmed conversation history from %d to %d messages "
138+
"(estimated tokens: %d -> %d, limit: %d)",
139+
len(messages),
140+
len(trimmed),
141+
total_tokens,
142+
sum(trimmed_tokens),
143+
max_tokens,
144+
)
145+
return trimmed

services/chatbot/src/chatbot/chat_api.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
from quart import Blueprint, jsonify, request
66

7+
from .agent_utils import trim_messages_to_token_limit
78
from .chat_service import (delete_chat_history, get_chat_history,
89
process_user_message)
910
from .config import Config
@@ -229,8 +230,7 @@ async def state():
229230
"Provider API key for session %s: %s", session_id, provider_api_key[:5]
230231
)
231232
chat_history = await get_chat_history(session_id)
232-
# Limit chat history to last 20 messages
233-
chat_history = chat_history[-20:]
233+
chat_history = trim_messages_to_token_limit(chat_history)
234234
return (
235235
jsonify(
236236
{
@@ -259,16 +259,15 @@ async def history():
259259
provider_api_key = await get_api_key(session_id)
260260
if provider in {"openai", "anthropic"} and provider_api_key:
261261
chat_history = await get_chat_history(session_id)
262-
# Limit chat history to last 20 messages
263-
chat_history = chat_history[-20:]
262+
chat_history = trim_messages_to_token_limit(chat_history)
264263
return jsonify({"chat_history": chat_history}), 200
265264
if provider in {"openai", "anthropic"}:
266265
return (
267266
jsonify({"chat_history": []}),
268267
200,
269268
)
270269
chat_history = await get_chat_history(session_id)
271-
chat_history = chat_history[-20:] if chat_history else []
270+
chat_history = trim_messages_to_token_limit(chat_history) if chat_history else []
272271
return jsonify({"chat_history": chat_history}), 200
273272

274273

services/chatbot/src/chatbot/chat_service.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
from langgraph.graph.message import Messages
55

6+
from .agent_utils import trim_messages_to_token_limit
67
from .config import Config
78
from .extensions import db
89
from .langgraph_agent import execute_langgraph_agent
@@ -80,8 +81,7 @@ async def process_user_message(session_id, user_message, api_key, model_name, us
8081
)
8182
logger.debug("Added messages to Chroma collection - session_id: %s", session_id)
8283

83-
# Limit chat history to last 20 messages
84-
history = history[-20:]
84+
history = trim_messages_to_token_limit(history)
8585
await update_chat_history(session_id, history)
8686
logger.info(
8787
"Message processing complete - session_id: %s, response_id: %s, history_count: %d",

services/chatbot/src/chatbot/config.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,6 @@ class Config:
3434
AWS_ROLE_SESSION_NAME = os.getenv("AWS_ROLE_SESSION_NAME", "crapi-chatbot-session")
3535
VERTEX_PROJECT = os.getenv("VERTEX_PROJECT", "")
3636
VERTEX_LOCATION = os.getenv("VERTEX_LOCATION", "")
37-
MAX_CONTENT_LENGTH = int(os.getenv("MAX_CONTENT_LENGTH", 50000))
37+
MAX_CONTENT_LENGTH = int(os.getenv("MAX_CONTENT_LENGTH", 100000))
3838
CHROMA_HOST = CHROMA_HOST
3939
CHROMA_PORT = CHROMA_PORT

services/chatbot/src/chatbot/langgraph_agent.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
from langchain_mistralai import ChatMistralAI
1212
from langchain_openai import AzureChatOpenAI, ChatOpenAI
1313

14-
from .agent_utils import truncate_tool_messages
14+
from .agent_utils import trim_messages_to_token_limit, truncate_tool_messages
1515
from .aws_credentials import get_bedrock_credentials_kwargs
1616
from .config import Config
1717
from .extensions import postgresdb
@@ -263,6 +263,7 @@ async def execute_langgraph_agent(
263263
len(messages),
264264
)
265265
agent = await build_langgraph_agent(api_key, model_name, user_jwt)
266+
messages = trim_messages_to_token_limit(messages)
266267
logger.debug("Invoking agent with %d messages", len(messages))
267268
response = await agent.ainvoke({"messages": messages})
268269
logger.info(

0 commit comments

Comments
 (0)