|
| 1 | +"""Benchmarks for token estimator.""" |
| 2 | + |
| 3 | +from pytest_benchmark.fixture import BenchmarkFixture |
| 4 | + |
| 5 | +from utils.token_estimator import ( |
| 6 | + estimate_tokens, |
| 7 | +) |
| 8 | + |
| 9 | +LOREM_IPSUM = """ |
| 10 | +Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor |
| 11 | +incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis |
| 12 | +nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. |
| 13 | +Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu |
| 14 | +fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in |
| 15 | +culpa qui officia deserunt mollit anim id est laborum. |
| 16 | +""" |
| 17 | + |
| 18 | + |
| 19 | +def test_estimate_empty_string(benchmark: BenchmarkFixture) -> None: |
| 20 | + """Benchmark for empty string as input.""" |
| 21 | + input_string = "" |
| 22 | + benchmark(estimate_tokens, input_string) |
| 23 | + |
| 24 | + |
| 25 | +def test_estimate_hello_world(benchmark: BenchmarkFixture) -> None: |
| 26 | + """Benchmark for Hello world as input.""" |
| 27 | + input_string = "Hello world" |
| 28 | + benchmark(estimate_tokens, input_string) |
| 29 | + |
| 30 | + |
| 31 | +def test_pangram(benchmark: BenchmarkFixture) -> None: |
| 32 | + """The pangram tokenizes to the known cl100k_base count.""" |
| 33 | + input_string = "The quick brown fox jumps over the lazy dog." |
| 34 | + benchmark(estimate_tokens, input_string) |
| 35 | + |
| 36 | + |
| 37 | +def test_lorem_ipsum(benchmark: BenchmarkFixture) -> None: |
| 38 | + """The lorem ipsum tokenizes to the known cl100k_base count.""" |
| 39 | + input_string = LOREM_IPSUM |
| 40 | + benchmark(estimate_tokens, input_string) |
| 41 | + |
| 42 | + |
| 43 | +def test_lorem_ipsum_times_10_times(benchmark: BenchmarkFixture) -> None: |
| 44 | + """The lorem ipsum tokenizes to the known cl100k_base count.""" |
| 45 | + input_string = LOREM_IPSUM * 10 |
| 46 | + benchmark(estimate_tokens, input_string) |
| 47 | + |
| 48 | + |
| 49 | +def test_lorem_ipsum_times_100_times(benchmark: BenchmarkFixture) -> None: |
| 50 | + """The lorem ipsum tokenizes to the known cl100k_base count.""" |
| 51 | + input_string = LOREM_IPSUM * 100 |
| 52 | + benchmark(estimate_tokens, input_string) |
| 53 | + |
| 54 | + |
| 55 | +def _test_lorem_ipsum_times_1000_times(benchmark: BenchmarkFixture) -> None: |
| 56 | + """The lorem ipsum tokenizes to the known cl100k_base count.""" |
| 57 | + input_string = LOREM_IPSUM * 1000 |
| 58 | + benchmark(estimate_tokens, input_string) |
0 commit comments