Skip to content

Commit 0487ec4

Browse files
committed
feat: add token counter class
1 parent a9bdd72 commit 0487ec4

4 files changed

Lines changed: 63 additions & 0 deletions

File tree

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
from .openai_resources import OpenAIResources
2+
3+
__all__ = [
4+
"OpenAIResources"
5+
]
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
#!/usr/bin/python3
2+
# -*- coding: utf-8 -*-
3+
#
4+
# Copyright (C) 2025 - Present Sepine Tam, Inc. All Rights Reserved
5+
#
6+
# @Author : Sepine Tam (谭淞)
7+
# @Email : sepinetam@gmail.com
8+
# @File : openai_resources.py
9+
10+
class OpenAIResources:
11+
CHAT: str = "chat"
12+
EMBEDDING: str = "embedding"
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
from .count_token import CountToken
2+
3+
__all__ = [
4+
"CountToken",
5+
]
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
#!/usr/bin/python3
2+
# -*- coding: utf-8 -*-
3+
#
4+
# Copyright (C) 2025 - Present Sepine Tam, Inc. All Rights Reserved
5+
#
6+
# @Author : Sepine Tam (谭淞)
7+
# @Email : sepinetam@gmail.com
8+
# @File : count_token.py
9+
10+
from typing import Dict, List
11+
12+
from tiktoken import Encoding, get_encoding
13+
14+
from ..core import OpenAIResources
15+
16+
17+
class CountToken:
18+
ENCODING_MODEL_MAPPING: Dict[str, Encoding] = {
19+
OpenAIResources.CHAT: get_encoding("o200k_base"),
20+
OpenAIResources.EMBEDDING: get_encoding("cl100k_base"),
21+
}
22+
23+
def __init__(self, method: OpenAIResources | str = OpenAIResources.CHAT):
24+
self.method = method
25+
26+
@property
27+
def encoding_fn(self) -> Encoding:
28+
return self.ENCODING_MODEL_MAPPING.get(self.method, self.ENCODING_MODEL_MAPPING[OpenAIResources.CHAT])
29+
30+
def encoding(self, text: str) -> List[int]:
31+
"""Encodes a string into tokens."""
32+
return self.encoding_fn.encode(text)
33+
34+
def token(self, text: str) -> int:
35+
"""Returns the number of tokens."""
36+
return len(self.encoding(text))
37+
38+
39+
if __name__ == "__main__":
40+
count_token = CountToken(OpenAIResources.EMBEDDING)
41+
print(count_token.token("Hello, world!"))

0 commit comments

Comments
 (0)