Skip to content

Commit 2e2e19f

Browse files
committed
bench: add libcst visitor benchmarks for multi-file and full pipeline
- test_benchmark_libcst_multi_file: discover_functions + get_code_optimization_context across 10 real source files - test_benchmark_libcst_pipeline: full discover → extract → replace → merge pipeline on one file
1 parent 1a25f05 commit 2e2e19f

2 files changed

Lines changed: 131 additions & 0 deletions

File tree

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
"""Benchmark libcst visitor performance across many files.
2+
3+
Exercises the visitor-heavy codepaths that benefit from the libcst dispatch
4+
table cache: discover_functions + get_code_optimization_context on multiple
5+
real source files.
6+
"""
7+
8+
from __future__ import annotations
9+
10+
from pathlib import Path
11+
12+
from codeflash.discovery.functions_to_optimize import FunctionToOptimize
13+
from codeflash.languages.python.context.code_context_extractor import get_code_optimization_context
14+
from codeflash.languages.python.support import PythonSupport
15+
from codeflash.models.models import FunctionParent
16+
17+
# Real source files from the codeflash codebase, chosen for size and visitor diversity.
18+
_CODEFLASH_ROOT = Path(__file__).parent.parent.parent.resolve() / "codeflash"
19+
20+
_SOURCE_FILES: list[Path] = [
21+
_CODEFLASH_ROOT / "languages" / "function_optimizer.py",
22+
_CODEFLASH_ROOT / "languages" / "python" / "context" / "code_context_extractor.py",
23+
_CODEFLASH_ROOT / "languages" / "python" / "support.py",
24+
_CODEFLASH_ROOT / "languages" / "python" / "static_analysis" / "code_extractor.py",
25+
_CODEFLASH_ROOT / "languages" / "python" / "static_analysis" / "code_replacer.py",
26+
_CODEFLASH_ROOT / "code_utils" / "instrument_existing_tests.py",
27+
_CODEFLASH_ROOT / "benchmarking" / "compare.py",
28+
_CODEFLASH_ROOT / "models" / "models.py",
29+
_CODEFLASH_ROOT / "discovery" / "discover_unit_tests.py",
30+
_CODEFLASH_ROOT / "languages" / "base.py",
31+
]
32+
33+
# For each file, pick one top-level function to extract context for.
34+
# (class, function_name) — class=None means module-level.
35+
_TARGETS: list[tuple[Path, str | None, str]] = [
36+
(_SOURCE_FILES[0], "FunctionOptimizer", "replace_function_and_helpers_with_optimized_code"),
37+
(_SOURCE_FILES[1], None, "get_code_optimization_context"),
38+
(_SOURCE_FILES[2], "PythonSupport", "discover_functions"),
39+
(_SOURCE_FILES[3], None, "add_global_assignments"),
40+
(_SOURCE_FILES[4], None, "replace_functions_in_file"),
41+
(_SOURCE_FILES[5], None, "inject_profiling_into_existing_test"),
42+
(_SOURCE_FILES[6], None, "compare_branches"),
43+
(_SOURCE_FILES[7], None, "get_comment_prefix"),
44+
(_SOURCE_FILES[8], None, "discover_unit_tests"),
45+
(_SOURCE_FILES[9], None, "convert_parents_to_tuple"),
46+
]
47+
48+
49+
def _discover_all() -> None:
50+
"""Run discover_functions on all source files."""
51+
ps = PythonSupport()
52+
for file_path in _SOURCE_FILES:
53+
source = file_path.read_text()
54+
ps.discover_functions(source=source, file_path=file_path)
55+
56+
57+
def _extract_all_contexts() -> None:
58+
"""Run get_code_optimization_context on every target function."""
59+
project_root = _CODEFLASH_ROOT.parent
60+
for file_path, class_name, func_name in _TARGETS:
61+
parents = [FunctionParent(name=class_name, type="ClassDef")] if class_name else []
62+
fto = FunctionToOptimize(
63+
function_name=func_name, file_path=file_path, parents=parents, starting_line=None, ending_line=None
64+
)
65+
get_code_optimization_context(fto, project_root)
66+
67+
68+
def test_benchmark_discover_functions_multi_file(benchmark) -> None:
69+
"""Discover functions across 10 source files."""
70+
benchmark(_discover_all)
71+
72+
73+
def test_benchmark_extract_context_multi_file(benchmark) -> None:
74+
"""Extract code optimization context for 10 functions across 10 files."""
75+
benchmark(_extract_all_contexts)
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
"""Benchmark the full libcst-heavy pipeline on a single file.
2+
3+
Runs discover → extract context → replace functions → add global assignments
4+
in sequence, exercising ~15 distinct visitor/transformer classes in one pass.
5+
"""
6+
7+
from __future__ import annotations
8+
9+
from pathlib import Path
10+
11+
from codeflash.discovery.functions_to_optimize import FunctionToOptimize
12+
from codeflash.languages.python.context.code_context_extractor import get_code_optimization_context
13+
from codeflash.languages.python.static_analysis.code_extractor import add_global_assignments
14+
from codeflash.languages.python.static_analysis.code_replacer import replace_functions_in_file
15+
from codeflash.languages.python.support import PythonSupport
16+
17+
_CODEFLASH_ROOT = Path(__file__).parent.parent.parent.resolve() / "codeflash"
18+
_PROJECT_ROOT = _CODEFLASH_ROOT.parent
19+
20+
# Target: a real, non-trivial file with classes and module-level functions.
21+
_TARGET_FILE = _CODEFLASH_ROOT / "languages" / "python" / "static_analysis" / "code_extractor.py"
22+
_TARGET_FUNC = "add_global_assignments"
23+
24+
# A second file to serve as "optimized" source for replace/merge steps.
25+
_SECOND_FILE = _CODEFLASH_ROOT / "languages" / "python" / "static_analysis" / "code_replacer.py"
26+
27+
28+
def _run_pipeline() -> None:
29+
"""Simulate a single-file optimization pass through the full visitor pipeline."""
30+
source = _TARGET_FILE.read_text()
31+
source2 = _SECOND_FILE.read_text()
32+
33+
# 1. Discover functions (FunctionVisitor + MetadataWrapper)
34+
ps = PythonSupport()
35+
functions = ps.discover_functions(source=source, file_path=_TARGET_FILE)
36+
37+
# 2. Extract code optimization context (multiple collectors + dependency resolver)
38+
fto = FunctionToOptimize(
39+
function_name=_TARGET_FUNC, file_path=_TARGET_FILE, parents=[], starting_line=None, ending_line=None
40+
)
41+
get_code_optimization_context(fto, _PROJECT_ROOT)
42+
43+
# 3. Replace functions (GlobalFunctionCollector + GlobalFunctionTransformer)
44+
# Use a class method from discovered functions if available, else module-level.
45+
func_names = [_TARGET_FUNC]
46+
replace_functions_in_file(
47+
source_code=source, original_function_names=func_names, optimized_code=source2, preexisting_objects=set()
48+
)
49+
50+
# 4. Add global assignments (6 visitors/transformers)
51+
add_global_assignments(source2, source)
52+
53+
54+
def test_benchmark_full_pipeline(benchmark) -> None:
55+
"""Full discover → extract → replace → merge pipeline on one file."""
56+
benchmark(_run_pipeline)

0 commit comments

Comments
 (0)