codeflash/codeflash/languages/python/function_optimizer.py at 4ccbffe4f3eed94b028870bd618f21d44739836c · codeflash-ai/codeflash · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
from __future__ import annotations

import ast
from pathlib import Path
from typing import TYPE_CHECKING

from codeflash.cli_cmds.console import console, logger
from codeflash.code_utils.config_consts import TOTAL_LOOPING_TIME_EFFECTIVE
from codeflash.languages.python.context.unused_definition_remover import (
    detect_unused_helper_functions,
    revert_unused_helper_functions,
)
from codeflash.languages.python.optimizer import resolve_python_function_ast
from codeflash.languages.python.static_analysis.code_extractor import get_opt_review_metrics, is_numerical_code
from codeflash.languages.python.static_analysis.code_replacer import (
    add_custom_marker_to_all_tests,
    modify_autouse_fixture,
)
from codeflash.languages.python.static_analysis.line_profile_utils import add_decorator_imports, contains_jit_decorator
from codeflash.models.models import TestingMode, TestResults
from codeflash.optimization.function_optimizer import FunctionOptimizer
from codeflash.verification.parse_test_output import calculate_function_throughput_from_test_results

if TYPE_CHECKING:
    from typing import Any

    from codeflash.languages.base import Language
    from codeflash.models.function_types import FunctionParent
    from codeflash.models.models import (
        CodeOptimizationContext,
        CodeStringsMarkdown,
        ConcurrencyMetrics,
        CoverageData,
        OriginalCodeBaseline,
        TestDiff,
    )


class PythonFunctionOptimizer(FunctionOptimizer):
    def _resolve_function_ast(
        self, source_code: str, function_name: str, parents: list[FunctionParent]
    ) -> ast.FunctionDef | ast.AsyncFunctionDef | None:
        original_module_ast = ast.parse(source_code)
        return resolve_python_function_ast(function_name, parents, original_module_ast)

    def analyze_code_characteristics(self, code_context: CodeOptimizationContext) -> None:
        self.is_numerical_code = is_numerical_code(code_string=code_context.read_writable_code.flat)

    def get_optimization_review_metrics(
        self,
        source_code: str,
        file_path: Path,
        qualified_name: str,
        project_root: Path,
        tests_root: Path,
        language: Language,
    ) -> str:
        return get_opt_review_metrics(source_code, file_path, qualified_name, project_root, tests_root, language)

    def instrument_test_fixtures(self, test_paths: list[Path]) -> dict[Path, list[str]] | None:
        logger.info("Disabling all autouse fixtures associated with the generated test files")
        original_conftest_content = modify_autouse_fixture(test_paths)
        logger.info("Add custom marker to generated test files")
        add_custom_marker_to_all_tests(test_paths)
        return original_conftest_content

    def instrument_capture(self, file_path_to_helper_classes: dict[Path, set[str]]) -> None:
        from codeflash.verification.instrument_codeflash_capture import instrument_codeflash_capture

        instrument_codeflash_capture(self.function_to_optimize, file_path_to_helper_classes, self.test_cfg.tests_root)

    def should_check_coverage(self) -> bool:
        return True

    def collect_async_metrics(
        self,
        benchmarking_results: TestResults,
        code_context: CodeOptimizationContext,
        helper_code: dict[Path, str],
        test_env: dict[str, str],
    ) -> tuple[int | None, ConcurrencyMetrics | None]:
        if not self.function_to_optimize.is_async:
            return None, None

        async_throughput = calculate_function_throughput_from_test_results(
            benchmarking_results, self.function_to_optimize.function_name
        )
        logger.debug(f"Async function throughput: {async_throughput} calls/second")

        concurrency_metrics = self.run_concurrency_benchmark(
            code_context=code_context, original_helper_code=helper_code, test_env=test_env
        )
        if concurrency_metrics:
            logger.debug(
                f"Concurrency metrics: ratio={concurrency_metrics.concurrency_ratio:.2f}, "
                f"seq={concurrency_metrics.sequential_time_ns}ns, conc={concurrency_metrics.concurrent_time_ns}ns"
            )
        return async_throughput, concurrency_metrics

    def instrument_async_for_mode(self, mode: TestingMode) -> None:
        from codeflash.code_utils.instrument_existing_tests import add_async_decorator_to_function

        add_async_decorator_to_function(
            self.function_to_optimize.file_path, self.function_to_optimize, mode, project_root=self.project_root
        )

    def should_skip_sqlite_cleanup(self, testing_type: TestingMode, optimization_iteration: int) -> bool:
        return False

    def parse_line_profile_test_results(
        self, line_profiler_output_file: Path | None
    ) -> tuple[TestResults | dict, CoverageData | None]:
        from codeflash.verification.parse_line_profile_test_output import parse_line_profile_results

        return parse_line_profile_results(line_profiler_output_file=line_profiler_output_file)

    def compare_candidate_results(
        self,
        baseline_results: OriginalCodeBaseline,
        candidate_behavior_results: TestResults,
        optimization_candidate_index: int,
    ) -> tuple[bool, list[TestDiff]]:
        from codeflash.verification.equivalence import compare_test_results

        return compare_test_results(baseline_results.behavior_test_results, candidate_behavior_results)

    def replace_function_and_helpers_with_optimized_code(
        self,
        code_context: CodeOptimizationContext,
        optimized_code: CodeStringsMarkdown,
        original_helper_code: dict[Path, str],
    ) -> bool:
        did_update = super().replace_function_and_helpers_with_optimized_code(
            code_context, optimized_code, original_helper_code
        )
        unused_helpers = detect_unused_helper_functions(self.function_to_optimize, code_context, optimized_code)
        if unused_helpers:
            revert_unused_helper_functions(self.project_root, unused_helpers, original_helper_code)
        return did_update

    def line_profiler_step(
        self, code_context: CodeOptimizationContext, original_helper_code: dict[Path, str], candidate_index: int
    ) -> dict[str, Any]:
        candidate_fto_code = Path(self.function_to_optimize.file_path).read_text("utf-8")
        if contains_jit_decorator(candidate_fto_code):
            logger.info(
                f"Skipping line profiler for {self.function_to_optimize.function_name} - code contains JIT decorator"
            )
            return {"timings": {}, "unit": 0, "str_out": ""}

        for module_abspath in original_helper_code:
            candidate_helper_code = Path(module_abspath).read_text("utf-8")
            if contains_jit_decorator(candidate_helper_code):
                logger.info(
                    f"Skipping line profiler for {self.function_to_optimize.function_name} - helper code contains JIT decorator"
                )
                return {"timings": {}, "unit": 0, "str_out": ""}

        try:
            console.rule()

            test_env = self.get_test_env(
                codeflash_loop_index=0, codeflash_test_iteration=candidate_index, codeflash_tracer_disable=1
            )
            line_profiler_output_file = add_decorator_imports(self.function_to_optimize, code_context)
            line_profile_results, _ = self.run_and_parse_tests(
                testing_type=TestingMode.LINE_PROFILE,
                test_env=test_env,
                test_files=self.test_files,
                optimization_iteration=0,
                testing_time=TOTAL_LOOPING_TIME_EFFECTIVE,
                enable_coverage=False,
                code_context=code_context,
                line_profiler_output_file=line_profiler_output_file,
            )
        finally:
            self.write_code_and_helpers(
                self.function_to_optimize_source_code, original_helper_code, self.function_to_optimize.file_path
            )
        if isinstance(line_profile_results, TestResults) and not line_profile_results.test_results:
            logger.warning(
                f"Timeout occurred while running line profiler for original function {self.function_to_optimize.function_name}"
            )
            return {"timings": {}, "unit": 0, "str_out": ""}
        if line_profile_results["str_out"] == "":
            logger.warning(
                f"Couldn't run line profiler for original function {self.function_to_optimize.function_name}"
            )
        return line_profile_results