Merge pull request #978 from codeflash-ai/optimization-effort

mohammedahmed18 · web-flow · commit 9eec46a3e0cd · 2026-01-12T22:09:22.000+02:00
Optimization effort
diff --git a/codeflash/api/aiservice.py b/codeflash/api/aiservice.py
@@ -16,7 +16,6 @@
 from codeflash.code_utils.env_utils import get_codeflash_api_key
 from codeflash.code_utils.git_utils import get_last_commit_author_if_pr_exists, get_repo_owner_and_name
 from codeflash.code_utils.time_utils import humanize_runtime
-from codeflash.lsp.helpers import is_LSP_enabled
 from codeflash.models.ExperimentMetadata import ExperimentMetadata
 from codeflash.models.models import (
     AIServiceRefinerRequest,
@@ -131,6 +130,7 @@ def optimize_python_code(  # noqa: D417
         experiment_metadata: ExperimentMetadata | None = None,
         *,
         is_async: bool = False,
+        n_candidates: int = 5,
     ) -> list[OptimizedCandidate]:
         """Optimize the given python code for performance by making a request to the Django endpoint.
 
@@ -141,6 +141,7 @@ def optimize_python_code(  # noqa: D417
         - trace_id (str): Trace id of optimization run
         - experiment_metadata (Optional[ExperimentalMetadata, None]): Any available experiment metadata for this optimization
         - is_async (bool): Whether the function being optimized is async
+        - n_candidates (int): Number of candidates to generate
 
         Returns
         -------
@@ -163,10 +164,10 @@ def optimize_python_code(  # noqa: D417
             "repo_owner": git_repo_owner,
             "repo_name": git_repo_name,
             "is_async": is_async,
-            "lsp_mode": is_LSP_enabled(),
             "call_sequence": self.get_next_sequence(),
+            "n_candidates": n_candidates,
         }
-        logger.debug(f"Sending optimize request: trace_id={trace_id}, lsp_mode={payload['lsp_mode']}")
+        logger.debug(f"Sending optimize request: trace_id={trace_id}, n_candidates={payload['n_candidates']}")
 
         try:
             response = self.make_ai_service_request("/optimize", payload=payload, timeout=self.timeout)
@@ -198,6 +199,7 @@ def optimize_python_code_line_profiler(  # noqa: D417
         dependency_code: str,
         trace_id: str,
         line_profiler_results: str,
+        n_candidates: int,
         experiment_metadata: ExperimentMetadata | None = None,
     ) -> list[OptimizedCandidate]:
         """Optimize the given python code for performance using line profiler results.
@@ -209,6 +211,7 @@ def optimize_python_code_line_profiler(  # noqa: D417
         - trace_id (str): Trace id of optimization run
         - line_profiler_results (str): Line profiler output to guide optimization
         - experiment_metadata (Optional[ExperimentalMetadata, None]): Any available experiment metadata for this optimization
+        - n_candidates (int): Number of candidates to generate
 
         Returns
         -------
@@ -225,12 +228,12 @@ def optimize_python_code_line_profiler(  # noqa: D417
         payload = {
             "source_code": source_code,
             "dependency_code": dependency_code,
+            "n_candidates": n_candidates,
             "line_profiler_results": line_profiler_results,
             "trace_id": trace_id,
             "python_version": platform.python_version(),
             "experiment_metadata": experiment_metadata,
             "codeflash_version": codeflash_version,
-            "lsp_mode": is_LSP_enabled(),
             "call_sequence": self.get_next_sequence(),
         }
 
diff --git a/codeflash/cli_cmds/cli.py b/codeflash/cli_cmds/cli.py
@@ -107,6 +107,9 @@ def parse_args() -> Namespace:
         action="store_true",
         help="(Deprecated) Async function optimization is now enabled by default. This flag is ignored.",
     )
+    parser.add_argument(
+        "--effort", type=str, help="Effort level for optimization", choices=["low", "medium", "high"], default="medium"
+    )
 
     args, unknown_args = parser.parse_known_args()
     sys.argv[:] = [sys.argv[0], *unknown_args]
diff --git a/codeflash/code_utils/config_consts.py b/codeflash/code_utils/config_consts.py
@@ -1,18 +1,20 @@
+from __future__ import annotations
+
+from enum import Enum
+from typing import Any, Union
+
 MAX_TEST_RUN_ITERATIONS = 5
 INDIVIDUAL_TESTCASE_TIMEOUT = 15
 MAX_FUNCTION_TEST_SECONDS = 60
-N_CANDIDATES = 5
 MIN_IMPROVEMENT_THRESHOLD = 0.05
 MIN_THROUGHPUT_IMPROVEMENT_THRESHOLD = 0.10  # 10% minimum improvement for async throughput
 MAX_TEST_FUNCTION_RUNS = 50
 MAX_CUMULATIVE_TEST_RUNTIME_NANOSECONDS = 100e6  # 100ms
-N_TESTS_TO_GENERATE = 2
 TOTAL_LOOPING_TIME = 10.0  # 10 second candidate benchmarking budget
 COVERAGE_THRESHOLD = 60.0
 MIN_TESTCASE_PASSED_THRESHOLD = 6
 REPEAT_OPTIMIZATION_PROBABILITY = 0.1
 DEFAULT_IMPORTANCE_THRESHOLD = 0.001
-N_CANDIDATES_LP = 6
 
 # pytest loop stability
 # For now, we use strict thresholds (large windows and low tolerances), since this is still experimental.
@@ -21,44 +23,82 @@
 STABILITY_SPREAD_TOLERANCE = 0.0025  # 0.25% window spread
 
 # Refinement
-REFINE_ALL_THRESHOLD = 2  # when valid optimizations count is 2 or less, refine all optimizations
 REFINED_CANDIDATE_RANKING_WEIGHTS = (2, 1)  # (runtime, diff), runtime is more important than diff by a factor of 2
-TOP_N_REFINEMENTS = 0.45  # top 45% of valid optimizations (based on the weighted score) are refined
 
 # LSP-specific
-N_CANDIDATES_LSP = 3
-N_TESTS_TO_GENERATE_LSP = 2
 TOTAL_LOOPING_TIME_LSP = 10.0  # Kept same timing for LSP mode to avoid in increase in performance reporting
-N_CANDIDATES_LP_LSP = 3
 
 # setting this value to 1 will disable repair if there is at least one correct candidate
 MIN_CORRECT_CANDIDATES = 2
 
-# Code repair
-REPAIR_UNMATCHED_PERCENTAGE_LIMIT = 0.4  # if the percentage of unmatched tests is greater than this, we won't fix it (lowering this value makes the repair more stricted)
-MAX_REPAIRS_PER_TRACE = 4  # maximum number of repairs we will do for each function
-
-# Adaptive optimization
-# TODO (ali): make this configurable with effort arg once the PR is merged
-ADAPTIVE_OPTIMIZATION_THRESHOLD = 2  # Max adaptive optimizations per single candidate tree (for example : optimize -> refine -> adaptive -> another adaptive).
-# MAX_ADAPTIVE_OPTIMIZATIONS_PER_TRACE = 4  # maximum number of adaptive optimizations we will do for each function (this can be 2 adaptive optimizations for 2 candidates for example)
-MAX_ADAPTIVE_OPTIMIZATIONS_PER_TRACE = (
-    0  # disable adaptive optimizations until we have this value controlled by the effort arg
-)
-
-MAX_N_CANDIDATES = 5
-MAX_N_CANDIDATES_LP = 6
-
 try:
     from codeflash.lsp.helpers import is_LSP_enabled
 
     _IS_LSP_ENABLED = is_LSP_enabled()
 except ImportError:
     _IS_LSP_ENABLED = False
 
-N_CANDIDATES_EFFECTIVE = min(N_CANDIDATES_LSP if _IS_LSP_ENABLED else N_CANDIDATES, MAX_N_CANDIDATES)
-N_CANDIDATES_LP_EFFECTIVE = min(N_CANDIDATES_LP_LSP if _IS_LSP_ENABLED else N_CANDIDATES_LP, MAX_N_CANDIDATES_LP)
-N_TESTS_TO_GENERATE_EFFECTIVE = N_TESTS_TO_GENERATE_LSP if _IS_LSP_ENABLED else N_TESTS_TO_GENERATE
 TOTAL_LOOPING_TIME_EFFECTIVE = TOTAL_LOOPING_TIME_LSP if _IS_LSP_ENABLED else TOTAL_LOOPING_TIME
 
 MAX_CONTEXT_LEN_REVIEW = 1000
+
+
+class EffortLevel(str, Enum):
+    LOW = "low"
+    MEDIUM = "medium"
+    HIGH = "high"
+
+
+class EffortKeys(str, Enum):
+    N_OPTIMIZER_CANDIDATES = "N_OPTIMIZER_CANDIDATES"
+    N_OPTIMIZER_LP_CANDIDATES = "N_OPTIMIZER_LP_CANDIDATES"
+    N_GENERATED_TESTS = "N_GENERATED_TESTS"
+    MAX_CODE_REPAIRS_PER_TRACE = "MAX_CODE_REPAIRS_PER_TRACE"
+    REPAIR_UNMATCHED_PERCENTAGE_LIMIT = "REPAIR_UNMATCHED_PERCENTAGE_LIMIT"
+    TOP_VALID_CANDIDATES_FOR_REFINEMENT = "TOP_VALID_CANDIDATES_FOR_REFINEMENT"
+    ADAPTIVE_OPTIMIZATION_THRESHOLD = "ADAPTIVE_OPTIMIZATION_THRESHOLD"
+    MAX_ADAPTIVE_OPTIMIZATIONS_PER_TRACE = "MAX_ADAPTIVE_OPTIMIZATIONS_PER_TRACE"
+
+
+EFFORT_VALUES: dict[str, dict[EffortLevel, Any]] = {
+    EffortKeys.N_OPTIMIZER_CANDIDATES.value: {EffortLevel.LOW: 3, EffortLevel.MEDIUM: 5, EffortLevel.HIGH: 6},
+    EffortKeys.N_OPTIMIZER_LP_CANDIDATES.value: {EffortLevel.LOW: 4, EffortLevel.MEDIUM: 6, EffortLevel.HIGH: 7},
+    # we don't use effort with generated tests for now
+    EffortKeys.N_GENERATED_TESTS.value: {EffortLevel.LOW: 2, EffortLevel.MEDIUM: 2, EffortLevel.HIGH: 2},
+    # maximum number of repairs we will do for each function (in case the valid candidates is less than MIN_CORRECT_CANDIDATES)
+    EffortKeys.MAX_CODE_REPAIRS_PER_TRACE.value: {EffortLevel.LOW: 2, EffortLevel.MEDIUM: 3, EffortLevel.HIGH: 5},
+    # if the percentage of unmatched tests is greater than this, we won't fix it (lowering this value makes the repair more stricted)
+    # on the low effort we lower the limit to 20% to be more strict (less repairs, less time)
+    EffortKeys.REPAIR_UNMATCHED_PERCENTAGE_LIMIT.value: {
+        EffortLevel.LOW: 0.2,
+        EffortLevel.MEDIUM: 0.3,
+        EffortLevel.HIGH: 0.4,
+    },
+    # Top valid candidates for refinements
+    EffortKeys.TOP_VALID_CANDIDATES_FOR_REFINEMENT: {EffortLevel.LOW: 2, EffortLevel.MEDIUM: 3, EffortLevel.HIGH: 4},
+    # max number of adaptive optimization calls to make per a single candidates tree
+    EffortKeys.ADAPTIVE_OPTIMIZATION_THRESHOLD.value: {EffortLevel.LOW: 0, EffortLevel.MEDIUM: 0, EffortLevel.HIGH: 2},
+    # max number of adaptive optimization calls to make per a single trace
+    EffortKeys.MAX_ADAPTIVE_OPTIMIZATIONS_PER_TRACE.value: {
+        EffortLevel.LOW: 0,
+        EffortLevel.MEDIUM: 0,
+        EffortLevel.HIGH: 4,
+    },
+}
+
+
+def get_effort_value(key: EffortKeys, effort: Union[EffortLevel, str]) -> Any:  # noqa: ANN401
+    key_str = key.value
+
+    if isinstance(effort, str):
+        try:
+            effort = EffortLevel(effort)
+        except ValueError:
+            msg = f"Invalid effort level: {effort}"
+            raise ValueError(msg) from None
+
+    if key_str not in EFFORT_VALUES:
+        msg = f"Invalid key: {key_str}"
+        raise ValueError(msg)
+
+    return EFFORT_VALUES[key_str][effort]
diff --git a/codeflash/code_utils/git_utils.py b/codeflash/code_utils/git_utils.py
@@ -1,10 +1,7 @@
 from __future__ import annotations
 
 import os
-import shutil
-import subprocess
 import sys
-import tempfile
 import time
 from functools import cache
 from io import StringIO
@@ -16,7 +13,6 @@
 from unidiff import PatchSet
 
 from codeflash.cli_cmds.console import logger
-from codeflash.code_utils.config_consts import N_CANDIDATES_EFFECTIVE
 
 if TYPE_CHECKING:
     from git import Repo
@@ -195,36 +191,6 @@ def check_and_push_branch(repo: git.Repo, git_remote: str | None = "origin", *,
     return True
 
 
-def create_worktree_root_dir(module_root: Path) -> tuple[Path | None, Path | None]:
-    git_root = git_root_dir() if check_running_in_git_repo(module_root) else None
-    worktree_root_dir = Path(tempfile.mkdtemp()) if git_root else None
-    return git_root, worktree_root_dir
-
-
-def create_git_worktrees(
-    git_root: Path | None, worktree_root_dir: Path | None, module_root: Path
-) -> tuple[Path | None, list[Path]]:
-    if git_root and worktree_root_dir:
-        worktree_root = Path(tempfile.mkdtemp(dir=worktree_root_dir))
-        worktrees = [Path(tempfile.mkdtemp(dir=worktree_root)) for _ in range(N_CANDIDATES_EFFECTIVE + 1)]
-        for worktree in worktrees:
-            subprocess.run(["git", "worktree", "add", "-d", worktree], cwd=module_root, check=True)
-    else:
-        worktree_root = None
-        worktrees = []
-    return worktree_root, worktrees
-
-
-def remove_git_worktrees(worktree_root: Path | None, worktrees: list[Path]) -> None:
-    try:
-        for worktree in worktrees:
-            subprocess.run(["git", "worktree", "remove", "-f", worktree], check=True)
-    except subprocess.CalledProcessError as e:
-        logger.warning(f"Error removing worktrees: {e}")
-    if worktree_root:
-        shutil.rmtree(worktree_root)
-
-
 def get_last_commit_author_if_pr_exists(repo: Repo | None = None) -> str | None:
     """Return the author's name of the last commit in the current branch if PR_NUMBER is set.
 
diff --git a/codeflash/lsp/server.py b/codeflash/lsp/server.py
@@ -7,6 +7,7 @@
 from pygls.lsp.server import LanguageServer
 from pygls.protocol import LanguageServerProtocol
 
+from codeflash.code_utils.config_consts import EffortLevel
 from codeflash.either import Result
 from codeflash.models.models import CodeOptimizationContext
 
@@ -37,6 +38,7 @@ def prepare_optimizer_arguments(self, config_file: Path) -> None:
         args.config_file = config_file
         args.no_pr = True  # LSP server should not create PRs
         args.worktree = True
+        args.effort = EffortLevel.LOW.value  # low effort for high speed
         self.args = args
         # avoid initializing the optimizer during initialization, because it can cause an error if the api key is invalid
 
diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py
diff --git a/codeflash/tracer.py b/codeflash/tracer.py

Original file line number	Diff line number	Diff line change
`@@ -107,6 +107,9 @@ def parse_args() -> Namespace:`
`107`	`107`	`action="store_true",`
`108`	`108`	`help="(Deprecated) Async function optimization is now enabled by default. This flag is ignored.",`
`109`	`109`	`)`
	`110`	`+ parser.add_argument(`
	`111`	`+ "--effort", type=str, help="Effort level for optimization", choices=["low", "medium", "high"], default="medium"`
	`112`	`+ )`
`110`	`113`
`111`	`114`	`args, unknown_args = parser.parse_known_args()`
`112`	`115`	`sys.argv[:] = [sys.argv[0], *unknown_args]`