Skip to content

Commit 317d721

Browse files
author
root
committed
Adjust prompt token estimation formula
1 parent 89a0d13 commit 317d721

1 file changed

Lines changed: 28 additions & 9 deletions

File tree

src/proxy.rs

Lines changed: 28 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -794,20 +794,39 @@ fn estimate_tokens_from_text(text: &str) -> Option<i64> {
794794
return None;
795795
}
796796

797-
let mut estimate = 0_i64;
797+
let mut ascii_chars = 0.0_f64;
798+
let mut cjk_chars = 0.0_f64;
799+
let mut spaces = 0.0_f64;
800+
798801
for ch in trimmed.chars() {
799-
if ch.is_ascii_whitespace() {
800-
continue;
801-
}
802-
if ch.is_ascii() {
803-
estimate += 1;
802+
if ch.is_whitespace() {
803+
spaces += 1.0;
804+
} else if is_cjk_character(ch) {
805+
cjk_chars += 1.0;
806+
} else if ch.is_ascii() {
807+
ascii_chars += 1.0;
804808
} else {
805-
estimate += 2;
809+
ascii_chars += 1.0;
806810
}
807811
}
808812

809-
let estimated_tokens = ((estimate + 3) / 4).max(1);
810-
Some(estimated_tokens)
813+
let estimated_tokens = (0.28 * ascii_chars + 1.4 * cjk_chars + 0.15 * spaces + 4.0).ceil() as i64;
814+
Some(estimated_tokens.max(1))
815+
}
816+
817+
fn is_cjk_character(ch: char) -> bool {
818+
matches!(
819+
ch as u32,
820+
0x3400..=0x4DBF
821+
| 0x4E00..=0x9FFF
822+
| 0xF900..=0xFAFF
823+
| 0x20000..=0x2A6DF
824+
| 0x2A700..=0x2B73F
825+
| 0x2B740..=0x2B81F
826+
| 0x2B820..=0x2CEAF
827+
| 0x2CEB0..=0x2EBEF
828+
| 0x30000..=0x3134F
829+
)
811830
}
812831

813832
async fn collect_stream_bytes(

0 commit comments

Comments
 (0)