Skip to content
Merged
Show file tree
Hide file tree
Changes from 19 commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
1018a69
optimization effort
mohammedahmed18 Dec 18, 2025
4fe32d7
Merge branch 'main' of github.com:codeflash-ai/codeflash into optimiz…
mohammedahmed18 Dec 19, 2025
444aab2
Merge branch 'main' of github.com:codeflash-ai/codeflash into optimiz…
mohammedahmed18 Dec 19, 2025
3e20a37
more effort values
mohammedahmed18 Dec 19, 2025
f4be23b
fix
mohammedahmed18 Dec 19, 2025
2f7fc60
set the right effort level for each case
mohammedahmed18 Dec 30, 2025
cc9316d
Merge branch 'main' of github.com:codeflash-ai/codeflash into optimiz…
mohammedahmed18 Dec 31, 2025
a040940
Merge branch 'main' of github.com:codeflash-ai/codeflash into optimiz…
mohammedahmed18 Jan 1, 2026
10f3630
Merge branch 'main' of github.com:codeflash-ai/codeflash into optimiz…
mohammedahmed18 Jan 5, 2026
a126d9e
number of candidates for model distribution & control adaptive optimi…
mohammedahmed18 Jan 6, 2026
18e0b24
default effort value for function optimizer
mohammedahmed18 Jan 6, 2026
8afe34f
fix enum python issue
mohammedahmed18 Jan 6, 2026
54cf458
merge main into optimization-effort
mohammedahmed18 Jan 6, 2026
8455bed
Merge branch 'main' of https://github.com/codeflash-ai/codeflash into…
mohammedahmed18 Jan 6, 2026
c18af78
fixes
mohammedahmed18 Jan 7, 2026
2a86446
formatting and linting
mohammedahmed18 Jan 7, 2026
ca9769c
modify effort values
mohammedahmed18 Jan 7, 2026
0a33bc1
disable adaptive optimization for medium effort
mohammedahmed18 Jan 7, 2026
6047b81
Merge branch 'main' into optimization-effort
mohammedahmed18 Jan 9, 2026
487d7a0
Merge branch 'main' into optimization-effort
mohammedahmed18 Jan 12, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 7 additions & 4 deletions codeflash/api/aiservice.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
from codeflash.code_utils.env_utils import get_codeflash_api_key
from codeflash.code_utils.git_utils import get_last_commit_author_if_pr_exists, get_repo_owner_and_name
from codeflash.code_utils.time_utils import humanize_runtime
from codeflash.lsp.helpers import is_LSP_enabled
from codeflash.models.ExperimentMetadata import ExperimentMetadata
from codeflash.models.models import (
AIServiceRefinerRequest,
Expand Down Expand Up @@ -131,6 +130,7 @@ def optimize_python_code( # noqa: D417
experiment_metadata: ExperimentMetadata | None = None,
*,
is_async: bool = False,
n_candidates: int = 5,
) -> list[OptimizedCandidate]:
"""Optimize the given python code for performance by making a request to the Django endpoint.

Expand All @@ -141,6 +141,7 @@ def optimize_python_code( # noqa: D417
- trace_id (str): Trace id of optimization run
- experiment_metadata (Optional[ExperimentalMetadata, None]): Any available experiment metadata for this optimization
- is_async (bool): Whether the function being optimized is async
- n_candidates (int): Number of candidates to generate

Returns
-------
Expand All @@ -163,10 +164,10 @@ def optimize_python_code( # noqa: D417
"repo_owner": git_repo_owner,
"repo_name": git_repo_name,
"is_async": is_async,
"lsp_mode": is_LSP_enabled(),
"call_sequence": self.get_next_sequence(),
"n_candidates": n_candidates,
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

backwards api endpoint compatibility should be fine, you can confirm it

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep, I did set default values in the aiservice

}
logger.debug(f"Sending optimize request: trace_id={trace_id}, lsp_mode={payload['lsp_mode']}")
logger.debug(f"Sending optimize request: trace_id={trace_id}, n_candidates={payload['n_candidates']}")

try:
response = self.make_ai_service_request("/optimize", payload=payload, timeout=self.timeout)
Expand Down Expand Up @@ -198,6 +199,7 @@ def optimize_python_code_line_profiler( # noqa: D417
dependency_code: str,
trace_id: str,
line_profiler_results: str,
n_candidates: int,
experiment_metadata: ExperimentMetadata | None = None,
) -> list[OptimizedCandidate]:
"""Optimize the given python code for performance using line profiler results.
Expand All @@ -209,6 +211,7 @@ def optimize_python_code_line_profiler( # noqa: D417
- trace_id (str): Trace id of optimization run
- line_profiler_results (str): Line profiler output to guide optimization
- experiment_metadata (Optional[ExperimentalMetadata, None]): Any available experiment metadata for this optimization
- n_candidates (int): Number of candidates to generate

Returns
-------
Expand All @@ -225,12 +228,12 @@ def optimize_python_code_line_profiler( # noqa: D417
payload = {
"source_code": source_code,
"dependency_code": dependency_code,
"n_candidates": n_candidates,
"line_profiler_results": line_profiler_results,
"trace_id": trace_id,
"python_version": platform.python_version(),
"experiment_metadata": experiment_metadata,
"codeflash_version": codeflash_version,
"lsp_mode": is_LSP_enabled(),
"call_sequence": self.get_next_sequence(),
}

Expand Down
3 changes: 3 additions & 0 deletions codeflash/cli_cmds/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,9 @@ def parse_args() -> Namespace:
action="store_true",
help="(Deprecated) Async function optimization is now enabled by default. This flag is ignored.",
)
parser.add_argument(
"--effort", type=str, help="Effort level for optimization", choices=["low", "medium", "high"], default="medium"
)

args, unknown_args = parser.parse_known_args()
sys.argv[:] = [sys.argv[0], *unknown_args]
Expand Down
92 changes: 66 additions & 26 deletions codeflash/code_utils/config_consts.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,20 @@
from __future__ import annotations

from enum import Enum
from typing import Any, Union

MAX_TEST_RUN_ITERATIONS = 5
INDIVIDUAL_TESTCASE_TIMEOUT = 15
MAX_FUNCTION_TEST_SECONDS = 60
N_CANDIDATES = 5
MIN_IMPROVEMENT_THRESHOLD = 0.05
MIN_THROUGHPUT_IMPROVEMENT_THRESHOLD = 0.10 # 10% minimum improvement for async throughput
MAX_TEST_FUNCTION_RUNS = 50
MAX_CUMULATIVE_TEST_RUNTIME_NANOSECONDS = 100e6 # 100ms
N_TESTS_TO_GENERATE = 2
TOTAL_LOOPING_TIME = 10.0 # 10 second candidate benchmarking budget
COVERAGE_THRESHOLD = 60.0
MIN_TESTCASE_PASSED_THRESHOLD = 6
REPEAT_OPTIMIZATION_PROBABILITY = 0.1
DEFAULT_IMPORTANCE_THRESHOLD = 0.001
N_CANDIDATES_LP = 6

# pytest loop stability
# For now, we use strict thresholds (large windows and low tolerances), since this is still experimental.
Expand All @@ -21,44 +23,82 @@
STABILITY_SPREAD_TOLERANCE = 0.0025 # 0.25% window spread

# Refinement
REFINE_ALL_THRESHOLD = 2 # when valid optimizations count is 2 or less, refine all optimizations
REFINED_CANDIDATE_RANKING_WEIGHTS = (2, 1) # (runtime, diff), runtime is more important than diff by a factor of 2
TOP_N_REFINEMENTS = 0.45 # top 45% of valid optimizations (based on the weighted score) are refined

# LSP-specific
N_CANDIDATES_LSP = 3
N_TESTS_TO_GENERATE_LSP = 2
TOTAL_LOOPING_TIME_LSP = 10.0 # Kept same timing for LSP mode to avoid in increase in performance reporting
N_CANDIDATES_LP_LSP = 3

# setting this value to 1 will disable repair if there is at least one correct candidate
MIN_CORRECT_CANDIDATES = 2

# Code repair
REPAIR_UNMATCHED_PERCENTAGE_LIMIT = 0.4 # if the percentage of unmatched tests is greater than this, we won't fix it (lowering this value makes the repair more stricted)
MAX_REPAIRS_PER_TRACE = 4 # maximum number of repairs we will do for each function

# Adaptive optimization
# TODO (ali): make this configurable with effort arg once the PR is merged
ADAPTIVE_OPTIMIZATION_THRESHOLD = 2 # Max adaptive optimizations per single candidate tree (for example : optimize -> refine -> adaptive -> another adaptive).
# MAX_ADAPTIVE_OPTIMIZATIONS_PER_TRACE = 4 # maximum number of adaptive optimizations we will do for each function (this can be 2 adaptive optimizations for 2 candidates for example)
MAX_ADAPTIVE_OPTIMIZATIONS_PER_TRACE = (
0 # disable adaptive optimizations until we have this value controlled by the effort arg
)

MAX_N_CANDIDATES = 5
MAX_N_CANDIDATES_LP = 6

try:
from codeflash.lsp.helpers import is_LSP_enabled

_IS_LSP_ENABLED = is_LSP_enabled()
except ImportError:
_IS_LSP_ENABLED = False

N_CANDIDATES_EFFECTIVE = min(N_CANDIDATES_LSP if _IS_LSP_ENABLED else N_CANDIDATES, MAX_N_CANDIDATES)
N_CANDIDATES_LP_EFFECTIVE = min(N_CANDIDATES_LP_LSP if _IS_LSP_ENABLED else N_CANDIDATES_LP, MAX_N_CANDIDATES_LP)
N_TESTS_TO_GENERATE_EFFECTIVE = N_TESTS_TO_GENERATE_LSP if _IS_LSP_ENABLED else N_TESTS_TO_GENERATE
TOTAL_LOOPING_TIME_EFFECTIVE = TOTAL_LOOPING_TIME_LSP if _IS_LSP_ENABLED else TOTAL_LOOPING_TIME

MAX_CONTEXT_LEN_REVIEW = 1000


class EffortLevel(str, Enum):
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this way is fine, another way of setting these things is via a yaml file, you could have multiple of them with different combinations of parameters. it's a subjective opinion, this way of implementation is also fine.

Copy link
Copy Markdown
Contributor Author

@mohammedahmed18 mohammedahmed18 Jan 9, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yaml makes more sense for configurations, will implement it in a separate PR, and maybe move the rest of config values to yaml not just effort-related

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yaml makes more sense for configurations, will implement it

LOW = "low"
MEDIUM = "medium"
HIGH = "high"


class EffortKeys(str, Enum):
N_OPTIMIZER_CANDIDATES = "N_OPTIMIZER_CANDIDATES"
N_OPTIMIZER_LP_CANDIDATES = "N_OPTIMIZER_LP_CANDIDATES"
N_GENERATED_TESTS = "N_GENERATED_TESTS"
MAX_CODE_REPAIRS_PER_TRACE = "MAX_CODE_REPAIRS_PER_TRACE"
REPAIR_UNMATCHED_PERCENTAGE_LIMIT = "REPAIR_UNMATCHED_PERCENTAGE_LIMIT"
TOP_VALID_CANDIDATES_FOR_REFINEMENT = "TOP_VALID_CANDIDATES_FOR_REFINEMENT"
ADAPTIVE_OPTIMIZATION_THRESHOLD = "ADAPTIVE_OPTIMIZATION_THRESHOLD"
MAX_ADAPTIVE_OPTIMIZATIONS_PER_TRACE = "MAX_ADAPTIVE_OPTIMIZATIONS_PER_TRACE"


EFFORT_VALUES: dict[str, dict[EffortLevel, Any]] = {
EffortKeys.N_OPTIMIZER_CANDIDATES.value: {EffortLevel.LOW: 3, EffortLevel.MEDIUM: 5, EffortLevel.HIGH: 6},
EffortKeys.N_OPTIMIZER_LP_CANDIDATES.value: {EffortLevel.LOW: 4, EffortLevel.MEDIUM: 6, EffortLevel.HIGH: 7},
# we don't use effort with generated tests for now
EffortKeys.N_GENERATED_TESTS.value: {EffortLevel.LOW: 2, EffortLevel.MEDIUM: 2, EffortLevel.HIGH: 2},
# maximum number of repairs we will do for each function (in case the valid candidates is less than MIN_CORRECT_CANDIDATES)
EffortKeys.MAX_CODE_REPAIRS_PER_TRACE.value: {EffortLevel.LOW: 2, EffortLevel.MEDIUM: 3, EffortLevel.HIGH: 5},
# if the percentage of unmatched tests is greater than this, we won't fix it (lowering this value makes the repair more stricted)
# on the low effort we lower the limit to 20% to be more strict (less repairs, less time)
EffortKeys.REPAIR_UNMATCHED_PERCENTAGE_LIMIT.value: {
EffortLevel.LOW: 0.2,
EffortLevel.MEDIUM: 0.3,
EffortLevel.HIGH: 0.4,
},
# Top valid candidates for refinements
EffortKeys.TOP_VALID_CANDIDATES_FOR_REFINEMENT: {EffortLevel.LOW: 2, EffortLevel.MEDIUM: 3, EffortLevel.HIGH: 4},
# max number of adaptive optimization calls to make per a single candidates tree
EffortKeys.ADAPTIVE_OPTIMIZATION_THRESHOLD.value: {EffortLevel.LOW: 0, EffortLevel.MEDIUM: 0, EffortLevel.HIGH: 2},
# max number of adaptive optimization calls to make per a single trace
EffortKeys.MAX_ADAPTIVE_OPTIMIZATIONS_PER_TRACE.value: {
EffortLevel.LOW: 0,
EffortLevel.MEDIUM: 0,
EffortLevel.HIGH: 4,
},
}


def get_effort_value(key: EffortKeys, effort: Union[EffortLevel, str]) -> Any: # noqa: ANN401
key_str = key.value

if isinstance(effort, str):
try:
effort = EffortLevel(effort)
except ValueError:
msg = f"Invalid effort level: {effort}"
raise ValueError(msg) from None

if key_str not in EFFORT_VALUES:
msg = f"Invalid key: {key_str}"
raise ValueError(msg)

return EFFORT_VALUES[key_str][effort]
34 changes: 0 additions & 34 deletions codeflash/code_utils/git_utils.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,7 @@
from __future__ import annotations

import os
import shutil
import subprocess
import sys
import tempfile
import time
from functools import cache
from io import StringIO
Expand All @@ -16,7 +13,6 @@
from unidiff import PatchSet

from codeflash.cli_cmds.console import logger
from codeflash.code_utils.config_consts import N_CANDIDATES_EFFECTIVE

if TYPE_CHECKING:
from git import Repo
Expand Down Expand Up @@ -195,36 +191,6 @@ def check_and_push_branch(repo: git.Repo, git_remote: str | None = "origin", *,
return True


def create_worktree_root_dir(module_root: Path) -> tuple[Path | None, Path | None]:
git_root = git_root_dir() if check_running_in_git_repo(module_root) else None
worktree_root_dir = Path(tempfile.mkdtemp()) if git_root else None
return git_root, worktree_root_dir


def create_git_worktrees(
git_root: Path | None, worktree_root_dir: Path | None, module_root: Path
) -> tuple[Path | None, list[Path]]:
if git_root and worktree_root_dir:
worktree_root = Path(tempfile.mkdtemp(dir=worktree_root_dir))
worktrees = [Path(tempfile.mkdtemp(dir=worktree_root)) for _ in range(N_CANDIDATES_EFFECTIVE + 1)]
for worktree in worktrees:
subprocess.run(["git", "worktree", "add", "-d", worktree], cwd=module_root, check=True)
else:
worktree_root = None
worktrees = []
return worktree_root, worktrees


def remove_git_worktrees(worktree_root: Path | None, worktrees: list[Path]) -> None:
try:
for worktree in worktrees:
subprocess.run(["git", "worktree", "remove", "-f", worktree], check=True)
except subprocess.CalledProcessError as e:
logger.warning(f"Error removing worktrees: {e}")
if worktree_root:
shutil.rmtree(worktree_root)


Comment on lines -198 to -227
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

old unused methods

def get_last_commit_author_if_pr_exists(repo: Repo | None = None) -> str | None:
"""Return the author's name of the last commit in the current branch if PR_NUMBER is set.

Expand Down
2 changes: 2 additions & 0 deletions codeflash/lsp/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from pygls.lsp.server import LanguageServer
from pygls.protocol import LanguageServerProtocol

from codeflash.code_utils.config_consts import EffortLevel
from codeflash.either import Result
from codeflash.models.models import CodeOptimizationContext

Expand Down Expand Up @@ -37,6 +38,7 @@ def prepare_optimizer_arguments(self, config_file: Path) -> None:
args.config_file = config_file
args.no_pr = True # LSP server should not create PRs
args.worktree = True
args.effort = EffortLevel.LOW.value # low effort for high speed
self.args = args
# avoid initializing the optimizer during initialization, because it can cause an error if the api key is invalid

Expand Down
Loading
Loading