Skip to content

Commit a657694

Browse files
authored
feat(tiktoken): improve token counting accuracy (#1382)
Use more accurate token prediction when tiktoken core is not available Signed-off-by: Tomas Slusny <slusnucky@gmail.com>
1 parent c4b2e03 commit a657694

2 files changed

Lines changed: 7 additions & 2 deletions

File tree

lua/CopilotChat/client.lua

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -362,6 +362,10 @@ function Client:ask(prompt, opts)
362362
local resource_tokens = #resource_messages > 0 and tiktoken:count(resource_messages[1].content) or 0
363363
local required_tokens = prompt_tokens + system_tokens + resource_tokens
364364

365+
log.debug('Prompt tokens:', prompt_tokens)
366+
log.debug('System tokens:', system_tokens)
367+
log.debug('Resource tokens:', resource_tokens)
368+
365369
-- Calculate how many tokens we can use for history
366370
local history_limit = max_tokens - required_tokens
367371
local history_tokens = 0

lua/CopilotChat/tiktoken.lua

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
local log = require('plenary.log')
12
local notify = require('CopilotChat.notify')
23
local utils = require('CopilotChat.utils')
34
local curl = require('CopilotChat.utils.curl')
@@ -105,12 +106,12 @@ end
105106
---@return number
106107
function Tiktoken:count(prompt)
107108
if not self.tiktoken_core then
108-
return math.ceil(#prompt * 0.5) -- Fallback to 1/2 character count
109+
return math.ceil(#prompt / 4)
109110
end
110111

111112
local tokens = self:encode(prompt)
112113
if not tokens then
113-
return math.ceil(#prompt * 0.5) -- Fallback to 1/2 character count
114+
return math.ceil(#prompt / 4)
114115
end
115116
return #tokens
116117
end

0 commit comments

Comments
 (0)