Skip to content

Commit 716c165

Browse files
authored
Merge pull request #1861 from tisnik/lcore-2493
LCORE-2478: Basic token estimator benchmarks
2 parents aff9214 + 1e4d489 commit 716c165

1 file changed

Lines changed: 58 additions & 0 deletions

File tree

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
"""Benchmarks for token estimator."""
2+
3+
from pytest_benchmark.fixture import BenchmarkFixture
4+
5+
from utils.token_estimator import (
6+
estimate_tokens,
7+
)
8+
9+
LOREM_IPSUM = """
10+
Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor
11+
incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis
12+
nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.
13+
Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu
14+
fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in
15+
culpa qui officia deserunt mollit anim id est laborum.
16+
"""
17+
18+
19+
def test_estimate_empty_string(benchmark: BenchmarkFixture) -> None:
20+
"""Benchmark for empty string as input."""
21+
input_string = ""
22+
benchmark(estimate_tokens, input_string)
23+
24+
25+
def test_estimate_hello_world(benchmark: BenchmarkFixture) -> None:
26+
"""Benchmark for Hello world as input."""
27+
input_string = "Hello world"
28+
benchmark(estimate_tokens, input_string)
29+
30+
31+
def test_pangram(benchmark: BenchmarkFixture) -> None:
32+
"""The pangram tokenizes to the known cl100k_base count."""
33+
input_string = "The quick brown fox jumps over the lazy dog."
34+
benchmark(estimate_tokens, input_string)
35+
36+
37+
def test_lorem_ipsum(benchmark: BenchmarkFixture) -> None:
38+
"""The lorem ipsum tokenizes to the known cl100k_base count."""
39+
input_string = LOREM_IPSUM
40+
benchmark(estimate_tokens, input_string)
41+
42+
43+
def test_lorem_ipsum_times_10_times(benchmark: BenchmarkFixture) -> None:
44+
"""The lorem ipsum tokenizes to the known cl100k_base count."""
45+
input_string = LOREM_IPSUM * 10
46+
benchmark(estimate_tokens, input_string)
47+
48+
49+
def test_lorem_ipsum_times_100_times(benchmark: BenchmarkFixture) -> None:
50+
"""The lorem ipsum tokenizes to the known cl100k_base count."""
51+
input_string = LOREM_IPSUM * 100
52+
benchmark(estimate_tokens, input_string)
53+
54+
55+
def _test_lorem_ipsum_times_1000_times(benchmark: BenchmarkFixture) -> None:
56+
"""The lorem ipsum tokenizes to the known cl100k_base count."""
57+
input_string = LOREM_IPSUM * 1000
58+
benchmark(estimate_tokens, input_string)

0 commit comments

Comments
 (0)