-
Notifications
You must be signed in to change notification settings - Fork 26
Optimization effort #978
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Optimization effort #978
Changes from all commits
1018a69
4fe32d7
444aab2
3e20a37
f4be23b
2f7fc60
cc9316d
a040940
10f3630
a126d9e
18e0b24
8afe34f
54cf458
8455bed
c18af78
2a86446
ca9769c
0a33bc1
6047b81
487d7a0
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,18 +1,20 @@ | ||
| from __future__ import annotations | ||
|
|
||
| from enum import Enum | ||
| from typing import Any, Union | ||
|
|
||
| MAX_TEST_RUN_ITERATIONS = 5 | ||
| INDIVIDUAL_TESTCASE_TIMEOUT = 15 | ||
| MAX_FUNCTION_TEST_SECONDS = 60 | ||
| N_CANDIDATES = 5 | ||
| MIN_IMPROVEMENT_THRESHOLD = 0.05 | ||
| MIN_THROUGHPUT_IMPROVEMENT_THRESHOLD = 0.10 # 10% minimum improvement for async throughput | ||
| MAX_TEST_FUNCTION_RUNS = 50 | ||
| MAX_CUMULATIVE_TEST_RUNTIME_NANOSECONDS = 100e6 # 100ms | ||
| N_TESTS_TO_GENERATE = 2 | ||
| TOTAL_LOOPING_TIME = 10.0 # 10 second candidate benchmarking budget | ||
| COVERAGE_THRESHOLD = 60.0 | ||
| MIN_TESTCASE_PASSED_THRESHOLD = 6 | ||
| REPEAT_OPTIMIZATION_PROBABILITY = 0.1 | ||
| DEFAULT_IMPORTANCE_THRESHOLD = 0.001 | ||
| N_CANDIDATES_LP = 6 | ||
|
|
||
| # pytest loop stability | ||
| # For now, we use strict thresholds (large windows and low tolerances), since this is still experimental. | ||
|
|
@@ -21,44 +23,82 @@ | |
| STABILITY_SPREAD_TOLERANCE = 0.0025 # 0.25% window spread | ||
|
|
||
| # Refinement | ||
| REFINE_ALL_THRESHOLD = 2 # when valid optimizations count is 2 or less, refine all optimizations | ||
| REFINED_CANDIDATE_RANKING_WEIGHTS = (2, 1) # (runtime, diff), runtime is more important than diff by a factor of 2 | ||
| TOP_N_REFINEMENTS = 0.45 # top 45% of valid optimizations (based on the weighted score) are refined | ||
|
|
||
| # LSP-specific | ||
| N_CANDIDATES_LSP = 3 | ||
| N_TESTS_TO_GENERATE_LSP = 2 | ||
| TOTAL_LOOPING_TIME_LSP = 10.0 # Kept same timing for LSP mode to avoid in increase in performance reporting | ||
| N_CANDIDATES_LP_LSP = 3 | ||
|
|
||
| # setting this value to 1 will disable repair if there is at least one correct candidate | ||
| MIN_CORRECT_CANDIDATES = 2 | ||
|
|
||
| # Code repair | ||
| REPAIR_UNMATCHED_PERCENTAGE_LIMIT = 0.4 # if the percentage of unmatched tests is greater than this, we won't fix it (lowering this value makes the repair more stricted) | ||
| MAX_REPAIRS_PER_TRACE = 4 # maximum number of repairs we will do for each function | ||
|
|
||
| # Adaptive optimization | ||
| # TODO (ali): make this configurable with effort arg once the PR is merged | ||
| ADAPTIVE_OPTIMIZATION_THRESHOLD = 2 # Max adaptive optimizations per single candidate tree (for example : optimize -> refine -> adaptive -> another adaptive). | ||
| # MAX_ADAPTIVE_OPTIMIZATIONS_PER_TRACE = 4 # maximum number of adaptive optimizations we will do for each function (this can be 2 adaptive optimizations for 2 candidates for example) | ||
| MAX_ADAPTIVE_OPTIMIZATIONS_PER_TRACE = ( | ||
| 0 # disable adaptive optimizations until we have this value controlled by the effort arg | ||
| ) | ||
|
|
||
| MAX_N_CANDIDATES = 5 | ||
| MAX_N_CANDIDATES_LP = 6 | ||
|
|
||
| try: | ||
| from codeflash.lsp.helpers import is_LSP_enabled | ||
|
|
||
| _IS_LSP_ENABLED = is_LSP_enabled() | ||
| except ImportError: | ||
| _IS_LSP_ENABLED = False | ||
|
|
||
| N_CANDIDATES_EFFECTIVE = min(N_CANDIDATES_LSP if _IS_LSP_ENABLED else N_CANDIDATES, MAX_N_CANDIDATES) | ||
| N_CANDIDATES_LP_EFFECTIVE = min(N_CANDIDATES_LP_LSP if _IS_LSP_ENABLED else N_CANDIDATES_LP, MAX_N_CANDIDATES_LP) | ||
| N_TESTS_TO_GENERATE_EFFECTIVE = N_TESTS_TO_GENERATE_LSP if _IS_LSP_ENABLED else N_TESTS_TO_GENERATE | ||
| TOTAL_LOOPING_TIME_EFFECTIVE = TOTAL_LOOPING_TIME_LSP if _IS_LSP_ENABLED else TOTAL_LOOPING_TIME | ||
|
|
||
| MAX_CONTEXT_LEN_REVIEW = 1000 | ||
|
|
||
|
|
||
| class EffortLevel(str, Enum): | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this way is fine, another way of setting these things is via a yaml file, you could have multiple of them with different combinations of parameters. it's a subjective opinion, this way of implementation is also fine.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yaml makes more sense for configurations, will implement it in a separate PR, and maybe move the rest of config values to yaml not just effort-related
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yaml makes more sense for configurations, will implement it |
||
| LOW = "low" | ||
| MEDIUM = "medium" | ||
| HIGH = "high" | ||
|
|
||
|
|
||
| class EffortKeys(str, Enum): | ||
| N_OPTIMIZER_CANDIDATES = "N_OPTIMIZER_CANDIDATES" | ||
| N_OPTIMIZER_LP_CANDIDATES = "N_OPTIMIZER_LP_CANDIDATES" | ||
| N_GENERATED_TESTS = "N_GENERATED_TESTS" | ||
| MAX_CODE_REPAIRS_PER_TRACE = "MAX_CODE_REPAIRS_PER_TRACE" | ||
| REPAIR_UNMATCHED_PERCENTAGE_LIMIT = "REPAIR_UNMATCHED_PERCENTAGE_LIMIT" | ||
| TOP_VALID_CANDIDATES_FOR_REFINEMENT = "TOP_VALID_CANDIDATES_FOR_REFINEMENT" | ||
| ADAPTIVE_OPTIMIZATION_THRESHOLD = "ADAPTIVE_OPTIMIZATION_THRESHOLD" | ||
| MAX_ADAPTIVE_OPTIMIZATIONS_PER_TRACE = "MAX_ADAPTIVE_OPTIMIZATIONS_PER_TRACE" | ||
|
|
||
|
|
||
| EFFORT_VALUES: dict[str, dict[EffortLevel, Any]] = { | ||
| EffortKeys.N_OPTIMIZER_CANDIDATES.value: {EffortLevel.LOW: 3, EffortLevel.MEDIUM: 5, EffortLevel.HIGH: 6}, | ||
| EffortKeys.N_OPTIMIZER_LP_CANDIDATES.value: {EffortLevel.LOW: 4, EffortLevel.MEDIUM: 6, EffortLevel.HIGH: 7}, | ||
| # we don't use effort with generated tests for now | ||
| EffortKeys.N_GENERATED_TESTS.value: {EffortLevel.LOW: 2, EffortLevel.MEDIUM: 2, EffortLevel.HIGH: 2}, | ||
| # maximum number of repairs we will do for each function (in case the valid candidates is less than MIN_CORRECT_CANDIDATES) | ||
| EffortKeys.MAX_CODE_REPAIRS_PER_TRACE.value: {EffortLevel.LOW: 2, EffortLevel.MEDIUM: 3, EffortLevel.HIGH: 5}, | ||
| # if the percentage of unmatched tests is greater than this, we won't fix it (lowering this value makes the repair more stricted) | ||
| # on the low effort we lower the limit to 20% to be more strict (less repairs, less time) | ||
| EffortKeys.REPAIR_UNMATCHED_PERCENTAGE_LIMIT.value: { | ||
| EffortLevel.LOW: 0.2, | ||
| EffortLevel.MEDIUM: 0.3, | ||
| EffortLevel.HIGH: 0.4, | ||
| }, | ||
| # Top valid candidates for refinements | ||
| EffortKeys.TOP_VALID_CANDIDATES_FOR_REFINEMENT: {EffortLevel.LOW: 2, EffortLevel.MEDIUM: 3, EffortLevel.HIGH: 4}, | ||
| # max number of adaptive optimization calls to make per a single candidates tree | ||
| EffortKeys.ADAPTIVE_OPTIMIZATION_THRESHOLD.value: {EffortLevel.LOW: 0, EffortLevel.MEDIUM: 0, EffortLevel.HIGH: 2}, | ||
| # max number of adaptive optimization calls to make per a single trace | ||
| EffortKeys.MAX_ADAPTIVE_OPTIMIZATIONS_PER_TRACE.value: { | ||
| EffortLevel.LOW: 0, | ||
| EffortLevel.MEDIUM: 0, | ||
| EffortLevel.HIGH: 4, | ||
| }, | ||
| } | ||
|
|
||
|
|
||
| def get_effort_value(key: EffortKeys, effort: Union[EffortLevel, str]) -> Any: # noqa: ANN401 | ||
| key_str = key.value | ||
|
|
||
| if isinstance(effort, str): | ||
| try: | ||
| effort = EffortLevel(effort) | ||
| except ValueError: | ||
| msg = f"Invalid effort level: {effort}" | ||
| raise ValueError(msg) from None | ||
|
|
||
| if key_str not in EFFORT_VALUES: | ||
| msg = f"Invalid key: {key_str}" | ||
| raise ValueError(msg) | ||
|
|
||
| return EFFORT_VALUES[key_str][effort] | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,10 +1,7 @@ | ||
| from __future__ import annotations | ||
|
|
||
| import os | ||
| import shutil | ||
| import subprocess | ||
| import sys | ||
| import tempfile | ||
| import time | ||
| from functools import cache | ||
| from io import StringIO | ||
|
|
@@ -16,7 +13,6 @@ | |
| from unidiff import PatchSet | ||
|
|
||
| from codeflash.cli_cmds.console import logger | ||
| from codeflash.code_utils.config_consts import N_CANDIDATES_EFFECTIVE | ||
|
|
||
| if TYPE_CHECKING: | ||
| from git import Repo | ||
|
|
@@ -195,36 +191,6 @@ def check_and_push_branch(repo: git.Repo, git_remote: str | None = "origin", *, | |
| return True | ||
|
|
||
|
|
||
| def create_worktree_root_dir(module_root: Path) -> tuple[Path | None, Path | None]: | ||
| git_root = git_root_dir() if check_running_in_git_repo(module_root) else None | ||
| worktree_root_dir = Path(tempfile.mkdtemp()) if git_root else None | ||
| return git_root, worktree_root_dir | ||
|
|
||
|
|
||
| def create_git_worktrees( | ||
| git_root: Path | None, worktree_root_dir: Path | None, module_root: Path | ||
| ) -> tuple[Path | None, list[Path]]: | ||
| if git_root and worktree_root_dir: | ||
| worktree_root = Path(tempfile.mkdtemp(dir=worktree_root_dir)) | ||
| worktrees = [Path(tempfile.mkdtemp(dir=worktree_root)) for _ in range(N_CANDIDATES_EFFECTIVE + 1)] | ||
| for worktree in worktrees: | ||
| subprocess.run(["git", "worktree", "add", "-d", worktree], cwd=module_root, check=True) | ||
| else: | ||
| worktree_root = None | ||
| worktrees = [] | ||
| return worktree_root, worktrees | ||
|
|
||
|
|
||
| def remove_git_worktrees(worktree_root: Path | None, worktrees: list[Path]) -> None: | ||
| try: | ||
| for worktree in worktrees: | ||
| subprocess.run(["git", "worktree", "remove", "-f", worktree], check=True) | ||
| except subprocess.CalledProcessError as e: | ||
| logger.warning(f"Error removing worktrees: {e}") | ||
| if worktree_root: | ||
| shutil.rmtree(worktree_root) | ||
|
|
||
|
|
||
|
Comment on lines
-198
to
-227
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. old unused methods |
||
| def get_last_commit_author_if_pr_exists(repo: Repo | None = None) -> str | None: | ||
| """Return the author's name of the last commit in the current branch if PR_NUMBER is set. | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
backwards api endpoint compatibility should be fine, you can confirm it
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yep, I did set default values in the aiservice