Skip to content

Commit 39297d9

Browse files
committed
Benchmarks for large sets with 10000 lines
1 parent 6358705 commit 39297d9

1 file changed

Lines changed: 85 additions & 0 deletions

File tree

tests/benchmarks/test_token_estimator.py

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,21 @@ def test_lorem_ipsum_times_1000_times(benchmark: BenchmarkFixture) -> None:
123123
benchmark(estimate_tokens, input_string)
124124

125125

126+
def test_lorem_ipsum_times_2000_times(benchmark: BenchmarkFixture) -> None:
127+
"""Benchmark for tokenizing Lorem Ipsum text repeated 2000x.
128+
129+
Parameters:
130+
----------
131+
benchmark (BenchmarkFixture): pytest-benchmark fixture.
132+
133+
Returns:
134+
-------
135+
None
136+
"""
137+
input_string = LOREM_IPSUM * 2000
138+
benchmark(estimate_tokens, input_string)
139+
140+
126141
def benchmark_file_tokenization(benchmark: BenchmarkFixture, filename: str) -> None:
127142
"""Read the given file and tokenize it as benchmark.
128143
@@ -184,6 +199,20 @@ def test_xml_file_1000_lines(benchmark: BenchmarkFixture) -> None:
184199
benchmark_file_tokenization(benchmark, "xml_1000_lines.xml")
185200

186201

202+
def test_xml_file_10000_lines(benchmark: BenchmarkFixture) -> None:
203+
"""Test tokenizing XML file containing just 10000 lines.
204+
205+
Parameters:
206+
----------
207+
benchmark (BenchmarkFixture): pytest-benchmark fixture.
208+
209+
Returns:
210+
-------
211+
None
212+
"""
213+
benchmark_file_tokenization(benchmark, "xml_10000_lines.xml")
214+
215+
187216
def test_yaml_file_10_lines(benchmark: BenchmarkFixture) -> None:
188217
"""Test tokenizing YAML file containing just 10 lines.
189218
@@ -226,6 +255,20 @@ def test_yaml_file_1000_lines(benchmark: BenchmarkFixture) -> None:
226255
benchmark_file_tokenization(benchmark, "yaml_1000_lines.yml")
227256

228257

258+
def test_yaml_file_10000_lines(benchmark: BenchmarkFixture) -> None:
259+
"""Test tokenizing YAML file containing just 10000 lines.
260+
261+
Parameters:
262+
----------
263+
benchmark (BenchmarkFixture): pytest-benchmark fixture.
264+
265+
Returns:
266+
-------
267+
None
268+
"""
269+
benchmark_file_tokenization(benchmark, "yaml_10000_lines.yml")
270+
271+
229272
def test_json_file_10_lines(benchmark: BenchmarkFixture) -> None:
230273
"""Test tokenizing JSON file containing just 10 lines.
231274
@@ -268,6 +311,20 @@ def test_json_file_1000_lines(benchmark: BenchmarkFixture) -> None:
268311
benchmark_file_tokenization(benchmark, "json_1000_lines.json")
269312

270313

314+
def test_json_file_10000_lines(benchmark: BenchmarkFixture) -> None:
315+
"""Test tokenizing JSON file containing just 10000 lines.
316+
317+
Parameters:
318+
----------
319+
benchmark (BenchmarkFixture): pytest-benchmark fixture.
320+
321+
Returns:
322+
-------
323+
None
324+
"""
325+
benchmark_file_tokenization(benchmark, "json_10000_lines.json")
326+
327+
271328
def test_python_source_10_lines(benchmark: BenchmarkFixture) -> None:
272329
"""Test tokenizing Python script containing just 10 lines.
273330
@@ -310,6 +367,20 @@ def test_python_source_1000_lines(benchmark: BenchmarkFixture) -> None:
310367
benchmark_file_tokenization(benchmark, "python_1000_lines.py")
311368

312369

370+
def test_python_source_10000_lines(benchmark: BenchmarkFixture) -> None:
371+
"""Test tokenizing Python script containing just 10000 lines.
372+
373+
Parameters:
374+
----------
375+
benchmark (BenchmarkFixture): pytest-benchmark fixture.
376+
377+
Returns:
378+
-------
379+
None
380+
"""
381+
benchmark_file_tokenization(benchmark, "python_10000_lines.py")
382+
383+
313384
def test_javascript_source_10_lines(benchmark: BenchmarkFixture) -> None:
314385
"""Test tokenizing JavaScript script containing just 10 lines.
315386
@@ -350,3 +421,17 @@ def test_javascript_source_1000_lines(benchmark: BenchmarkFixture) -> None:
350421
None
351422
"""
352423
benchmark_file_tokenization(benchmark, "js_1000_lines.js")
424+
425+
426+
def test_javascript_source_10000_lines(benchmark: BenchmarkFixture) -> None:
427+
"""Test tokenizing JavaScript script containing just 10000 lines.
428+
429+
Parameters:
430+
----------
431+
benchmark (BenchmarkFixture): pytest-benchmark fixture.
432+
433+
Returns:
434+
-------
435+
None
436+
"""
437+
benchmark_file_tokenization(benchmark, "js_10000_lines.js")

0 commit comments

Comments
 (0)