Skip to content

Commit a040940

Browse files
Merge branch 'main' of github.com:codeflash-ai/codeflash into optimization-effort
2 parents cc9316d + d2d57fe commit a040940

9 files changed

Lines changed: 186 additions & 43 deletions

File tree

codeflash/api/aiservice.py

Lines changed: 32 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import os
55
import platform
66
import time
7+
from itertools import count
78
from typing import TYPE_CHECKING, Any, cast
89

910
import requests
@@ -39,6 +40,11 @@ class AiServiceClient:
3940
def __init__(self) -> None:
4041
self.base_url = self.get_aiservice_base_url()
4142
self.headers = {"Authorization": f"Bearer {get_codeflash_api_key()}", "Connection": "close"}
43+
self.llm_call_counter = count(1)
44+
45+
def get_next_sequence(self) -> int:
46+
"""Get the next LLM call sequence number."""
47+
return next(self.llm_call_counter)
4248

4349
def get_aiservice_base_url(self) -> str:
4450
if os.environ.get("CODEFLASH_AIS_SERVER", default="prod").lower() == "local":
@@ -105,6 +111,7 @@ def _get_valid_candidates(
105111
optimization_id=opt["optimization_id"],
106112
source=source,
107113
parent_id=opt.get("parent_id", None),
114+
model=opt.get("model"),
108115
)
109116
)
110117
return candidates
@@ -114,7 +121,6 @@ def optimize_python_code( # noqa: D417
114121
source_code: str,
115122
dependency_code: str,
116123
trace_id: str,
117-
num_candidates: int = 10,
118124
experiment_metadata: ExperimentMetadata | None = None,
119125
*,
120126
is_async: bool = False,
@@ -126,21 +132,22 @@ def optimize_python_code( # noqa: D417
126132
- source_code (str): The python code to optimize.
127133
- dependency_code (str): The dependency code used as read-only context for the optimization
128134
- trace_id (str): Trace id of optimization run
129-
- num_candidates (int): Number of optimization variants to generate. Default is 10.
130135
- experiment_metadata (Optional[ExperimentalMetadata, None]): Any available experiment metadata for this optimization
136+
- is_async (bool): Whether the function being optimized is async
131137
132138
Returns
133139
-------
134140
- List[OptimizationCandidate]: A list of Optimization Candidates.
135141
136142
"""
143+
logger.info("Generating optimized candidates…")
144+
console.rule()
137145
start_time = time.perf_counter()
138146
git_repo_owner, git_repo_name = safe_get_repo_owner_and_name()
139147

140148
payload = {
141149
"source_code": source_code,
142150
"dependency_code": dependency_code,
143-
"n_candidates": num_candidates,
144151
"trace_id": trace_id,
145152
"python_version": platform.python_version(),
146153
"experiment_metadata": experiment_metadata,
@@ -149,22 +156,25 @@ def optimize_python_code( # noqa: D417
149156
"repo_owner": git_repo_owner,
150157
"repo_name": git_repo_name,
151158
"is_async": is_async,
159+
"lsp_mode": is_LSP_enabled(),
160+
"call_sequence": self.get_next_sequence(),
152161
}
162+
logger.debug(f"Sending optimize request: trace_id={trace_id}, lsp_mode={payload['lsp_mode']}")
153163

154-
logger.info("!lsp|Generating optimized candidates…")
155-
console.rule()
156164
try:
157165
response = self.make_ai_service_request("/optimize", payload=payload, timeout=60)
158166
except requests.exceptions.RequestException as e:
159167
logger.exception(f"Error generating optimized candidates: {e}")
160168
ph("cli-optimize-error-caught", {"error": str(e)})
169+
console.rule()
161170
return []
162171

163172
if response.status_code == 200:
164173
optimizations_json = response.json()["optimizations"]
165-
console.rule()
166174
end_time = time.perf_counter()
167175
logger.debug(f"!lsp|Generating possible optimizations took {end_time - start_time:.2f} seconds.")
176+
logger.info(f"!lsp|Received {len(optimizations_json)} optimization candidates.")
177+
console.rule()
168178
return self._get_valid_candidates(optimizations_json, OptimizedCandidateSource.OPTIMIZE)
169179
try:
170180
error = response.json()["error"]
@@ -184,21 +194,28 @@ def optimize_python_code_line_profiler( # noqa: D417
184194
num_candidates: int = 8,
185195
experiment_metadata: ExperimentMetadata | None = None,
186196
) -> list[OptimizedCandidate]:
187-
"""Optimize the given python code for performance by making a request to the Django endpoint.
197+
"""Optimize the given python code for performance using line profiler results.
188198
189199
Parameters
190200
----------
191201
- source_code (str): The python code to optimize.
192202
- dependency_code (str): The dependency code used as read-only context for the optimization
193203
- trace_id (str): Trace id of optimization run
194-
- num_candidates (int): Number of optimization variants to generate. Default is 10.
204+
- line_profiler_results (str): Line profiler output to guide optimization
195205
- experiment_metadata (Optional[ExperimentalMetadata, None]): Any available experiment metadata for this optimization
196206
197207
Returns
198208
-------
199209
- List[OptimizationCandidate]: A list of Optimization Candidates.
200210
201211
"""
212+
if line_profiler_results == "":
213+
logger.info("No LineProfiler results were provided, Skipping optimization.")
214+
return []
215+
216+
logger.info("Generating optimized candidates with line profiler…")
217+
console.rule()
218+
202219
payload = {
203220
"source_code": source_code,
204221
"dependency_code": dependency_code,
@@ -209,25 +226,20 @@ def optimize_python_code_line_profiler( # noqa: D417
209226
"experiment_metadata": experiment_metadata,
210227
"codeflash_version": codeflash_version,
211228
"lsp_mode": is_LSP_enabled(),
229+
"call_sequence": self.get_next_sequence(),
212230
}
213231

214-
console.rule()
215-
if line_profiler_results == "":
216-
logger.info("No LineProfiler results were provided, Skipping optimization.")
217-
console.rule()
218-
return []
219232
try:
220233
response = self.make_ai_service_request("/optimize-line-profiler", payload=payload, timeout=60)
221234
except requests.exceptions.RequestException as e:
222235
logger.exception(f"Error generating optimized candidates: {e}")
223236
ph("cli-optimize-error-caught", {"error": str(e)})
237+
console.rule()
224238
return []
225239

226240
if response.status_code == 200:
227241
optimizations_json = response.json()["optimizations"]
228-
logger.info(
229-
f"!lsp|Generated {len(optimizations_json)} candidate optimizations using line profiler information."
230-
)
242+
logger.info(f"!lsp|Received {len(optimizations_json)} line profiler optimization candidates.")
231243
console.rule()
232244
return self._get_valid_candidates(optimizations_json, OptimizedCandidateSource.OPTIMIZE_LP)
233245
try:
@@ -265,6 +277,7 @@ def optimize_python_code_refinement(self, request: list[AIServiceRefinerRequest]
265277
"trace_id": opt.trace_id,
266278
"function_references": opt.function_references,
267279
"python_version": platform.python_version(),
280+
"call_sequence": self.get_next_sequence(),
268281
}
269282
for opt in request
270283
]
@@ -399,6 +412,7 @@ def get_new_explanation( # noqa: D417
399412
"throughput_improvement": throughput_improvement,
400413
"function_references": function_references,
401414
"codeflash_version": codeflash_version,
415+
"call_sequence": self.get_next_sequence(),
402416
}
403417
logger.info("loading|Generating explanation")
404418
console.rule()
@@ -561,6 +575,7 @@ def generate_regression_tests( # noqa: D417
561575
"python_version": platform.python_version(),
562576
"codeflash_version": codeflash_version,
563577
"is_async": function_to_optimize.is_async,
578+
"call_sequence": self.get_next_sequence(),
564579
}
565580
try:
566581
response = self.make_ai_service_request("/testgen", payload=payload, timeout=90)
@@ -647,6 +662,7 @@ def get_optimization_review(
647662
"codeflash_version": codeflash_version,
648663
"calling_fn_details": calling_fn_details,
649664
"python_version": platform.python_version(),
665+
"call_sequence": self.get_next_sequence(),
650666
}
651667
console.rule()
652668
try:

codeflash/code_utils/config_consts.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,13 @@
1313
REPEAT_OPTIMIZATION_PROBABILITY = 0.1
1414
DEFAULT_IMPORTANCE_THRESHOLD = 0.001
1515

16+
# pytest loop stability
17+
# For now, we use strict thresholds (large windows and low tolerances), since this is still experimental.
18+
STABILITY_WINDOW_SIZE = 0.35 # 35% of total window
19+
STABILITY_CENTER_TOLERANCE = 0.0025 # ±0.25% around median
20+
STABILITY_SPREAD_TOLERANCE = 0.0025 # 0.25% window spread
21+
22+
# Refinement
1623
REFINED_CANDIDATE_RANKING_WEIGHTS = (2, 1) # (runtime, diff), runtime is more important than diff by a factor of 2
1724

1825
# LSP-specific

codeflash/code_utils/env_utils.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919
def check_formatter_installed(formatter_cmds: list[str], exit_on_failure: bool = True) -> bool: # noqa
2020
if not formatter_cmds or formatter_cmds[0] == "disabled":
2121
return True
22-
2322
first_cmd = formatter_cmds[0]
2423
cmd_tokens = shlex.split(first_cmd) if isinstance(first_cmd, str) else [first_cmd]
2524

codeflash/code_utils/formatter.py

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -46,18 +46,13 @@ def apply_formatter_cmds(
4646
print_status: bool, # noqa
4747
exit_on_failure: bool = True, # noqa
4848
) -> tuple[Path, str, bool]:
49-
should_make_copy = False
50-
file_path = path
51-
52-
if test_dir_str:
53-
should_make_copy = True
54-
file_path = Path(test_dir_str) / "temp.py"
55-
5649
if not path.exists():
5750
msg = f"File {path} does not exist. Cannot apply formatter commands."
5851
raise FileNotFoundError(msg)
5952

60-
if should_make_copy:
53+
file_path = path
54+
if test_dir_str:
55+
file_path = Path(test_dir_str) / "temp.py"
6156
shutil.copy2(path, file_path)
6257

6358
file_token = "$file" # noqa: S105

codeflash/discovery/discover_unit_tests.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -751,6 +751,7 @@ def process_test_files(
751751

752752
tests_cache = TestsCache(project_root_path)
753753
logger.info("!lsp|Discovering tests and processing unit tests")
754+
console.rule()
754755
with test_files_progress_bar(total=len(file_to_test_map), description="Processing test files") as (
755756
progress,
756757
task_id,

codeflash/models/models.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ class AIServiceRefinerRequest:
4646
original_line_profiler_results: str
4747
optimized_line_profiler_results: str
4848
function_references: str | None = None
49+
call_sequence: int | None = None
4950

5051

5152
class TestDiffScope(str, Enum):
@@ -464,6 +465,7 @@ class OptimizedCandidate:
464465
optimization_id: str
465466
source: OptimizedCandidateSource
466467
parent_id: str | None = None
468+
model: str | None = None # Which LLM model generated this candidate
467469

468470

469471
@dataclass(frozen=True)

codeflash/optimization/function_optimizer.py

Lines changed: 24 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,7 @@ def __init__(
143143
self.ai_service_client = ai_service_client
144144
self.executor = executor
145145
self.effort = effort
146+
self.refinement_calls_count = 0
146147

147148
# Initialize queue with initial candidates
148149
for candidate in initial_candidates:
@@ -152,6 +153,9 @@ def __init__(
152153
self.all_refinements_data = all_refinements_data
153154
self.future_all_code_repair = future_all_code_repair
154155

156+
def get_total_llm_calls(self) -> int:
157+
return self.refinement_calls_count
158+
155159
def get_next_candidate(self) -> OptimizedCandidate | None:
156160
"""Get the next candidate from the queue, handling async results as needed."""
157161
try:
@@ -196,11 +200,13 @@ def _process_refinement_results(self) -> OptimizedCandidate | None:
196200
len(self.all_refinements_data),
197201
)
198202
)
203+
refinement_call_index = 0
199204

200205
if top_n_candidates == len(self.all_refinements_data):
201206
# if we'll refine all candidates, we can skip the ranking and just refine them all
202207
for data in self.all_refinements_data:
203-
future_refinements.append(self.refine_optimizations([data])) # noqa: PERF401
208+
refinement_call_index += 1
209+
future_refinements.append(self.refine_optimizations([data]))
204210
else:
205211
diff_lens_list = []
206212
runtimes_list = []
@@ -218,9 +224,13 @@ def _process_refinement_results(self) -> OptimizedCandidate | None:
218224
top_indecies = sorted(score_dict, key=score_dict.get)[:top_n_candidates]
219225

220226
for idx in top_indecies:
227+
refinement_call_index += 1
221228
data = self.all_refinements_data[idx]
222229
future_refinements.append(self.refine_optimizations([data]))
223230

231+
# Track total refinement calls made
232+
self.refinement_calls_count = refinement_call_index
233+
224234
if future_refinements:
225235
logger.info("loading|Refining generated code for improved quality and performance...")
226236

@@ -240,6 +250,7 @@ def _process_refinement_results(self) -> OptimizedCandidate | None:
240250
logger.info(
241251
f"Added {len(refinement_response)} candidates from refinement, total candidates now: {self.candidate_len}"
242252
)
253+
console.rule()
243254
self.refinement_done = True
244255

245256
return self.get_next_candidate()
@@ -325,7 +336,7 @@ def __init__(
325336

326337
def can_be_optimized(self) -> Result[tuple[bool, CodeOptimizationContext, dict[Path, str]], str]:
327338
should_run_experiment = self.experiment_id is not None
328-
logger.debug(f"Function Trace ID: {self.function_trace_id}")
339+
logger.info(f"Function Trace ID: {self.function_trace_id}")
329340
ph("cli-optimize-function-start", {"function_trace_id": self.function_trace_id})
330341
self.cleanup_leftover_test_return_values()
331342
file_name_from_test_module_name.cache_clear()
@@ -1210,7 +1221,6 @@ def instrument_existing_tests(self, function_to_all_tests: dict[str, set[Functio
12101221
func_qualname = self.function_to_optimize.qualified_name_with_modules_from_root(self.project_root)
12111222
if func_qualname not in function_to_all_tests:
12121223
logger.info(f"Did not find any pre-existing tests for '{func_qualname}', will only use generated tests.")
1213-
console.rule()
12141224
else:
12151225
test_file_invocation_positions = defaultdict(list)
12161226
for tests_in_file in function_to_all_tests.get(func_qualname):
@@ -1350,7 +1360,8 @@ def generate_tests(
13501360
if concolic_test_str:
13511361
count_tests += 1
13521362

1353-
logger.info(f"!lsp|Generated '{count_tests}' tests for '{self.function_to_optimize.function_name}'")
1363+
logger.info(f"!lsp|Generated {count_tests} tests for '{self.function_to_optimize.function_name}'")
1364+
console.rule()
13541365

13551366
generated_tests = GeneratedTestsList(generated_tests=tests)
13561367
return Success((count_tests, generated_tests, function_to_concolic_tests, concolic_test_str))
@@ -1361,15 +1372,13 @@ def generate_optimizations(
13611372
read_only_context_code: str,
13621373
run_experiment: bool = False, # noqa: FBT001, FBT002
13631374
) -> Result[tuple[OptimizationSet, str], str]:
1364-
"""Generate optimization candidates for the function."""
1365-
n_candidates = get_effort_value(EffortKeys.N_OPTIMIZER_CANDIDATES, self.args.effort)
1366-
1375+
"""Generate optimization candidates for the function. Backend handles multi-model diversity."""
1376+
# n_candidates = get_effort_value(EffortKeys.N_OPTIMIZER_CANDIDATES, self.args.effort)
13671377
future_optimization_candidates = self.executor.submit(
13681378
self.aiservice_client.optimize_python_code,
13691379
read_writable_code.markdown,
13701380
read_only_context_code,
13711381
self.function_trace_id[:-4] + "EXP0" if run_experiment else self.function_trace_id,
1372-
n_candidates,
13731382
ExperimentMetadata(id=self.experiment_id, group="control") if run_experiment else None,
13741383
is_async=self.function_to_optimize.is_async,
13751384
)
@@ -1392,7 +1401,6 @@ def generate_optimizations(
13921401
read_writable_code.markdown,
13931402
read_only_context_code,
13941403
self.function_trace_id[:-4] + "EXP1",
1395-
n_candidates,
13961404
ExperimentMetadata(id=self.experiment_id, group="experiment"),
13971405
is_async=self.function_to_optimize.is_async,
13981406
)
@@ -1401,14 +1409,16 @@ def generate_optimizations(
14011409
# Wait for optimization futures to complete
14021410
concurrent.futures.wait(futures)
14031411

1404-
# Retrieve results
1405-
candidates: list[OptimizedCandidate] = future_optimization_candidates.result()
1406-
logger.info(f"!lsp|Generated '{len(candidates)}' candidate optimizations.")
1412+
# Retrieve results - optimize_python_code returns list of candidates
1413+
candidates = future_optimization_candidates.result()
14071414

14081415
if not candidates:
14091416
return Failure(f"/!\\ NO OPTIMIZATIONS GENERATED for {self.function_to_optimize.function_name}")
14101417

1411-
candidates_experiment = future_candidates_exp.result() if future_candidates_exp else None
1418+
# Handle experiment results
1419+
candidates_experiment = None
1420+
if future_candidates_exp:
1421+
candidates_experiment = future_candidates_exp.result()
14121422
function_references = future_references.result()
14131423

14141424
return Success((OptimizationSet(control=candidates, experiment=candidates_experiment), function_references))
@@ -1895,7 +1905,6 @@ def establish_original_code_baseline(
18951905
benchmarking_results, self.function_to_optimize.function_name
18961906
)
18971907
logger.debug(f"Original async function throughput: {async_throughput} calls/second")
1898-
console.rule()
18991908

19001909
if self.args.benchmark:
19011910
replay_benchmarking_test_results = benchmarking_results.group_by_benchmarks(
@@ -2029,6 +2038,7 @@ def run_optimized_candidate(
20292038
return self.get_results_not_matched_error()
20302039

20312040
logger.info(f"loading|Running performance tests for candidate {optimization_candidate_index}...")
2041+
console.rule()
20322042

20332043
# For async functions, instrument at definition site for performance benchmarking
20342044
if self.function_to_optimize.is_async:

0 commit comments

Comments
 (0)