Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 2 additions & 6 deletions codeflash/api/aiservice.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,20 +248,18 @@ def optimize_python_code_refinement(self, request: list[AIServiceRefinerRequest]
"original_source_code": opt.original_source_code,
"read_only_dependency_code": opt.read_only_dependency_code,
"original_line_profiler_results": opt.original_line_profiler_results,
"original_code_runtime": opt.original_code_runtime,
"original_code_runtime": humanize_runtime(opt.original_code_runtime),
"optimized_source_code": opt.optimized_source_code,
"optimized_explanation": opt.optimized_explanation,
"optimized_line_profiler_results": opt.optimized_line_profiler_results,
"optimized_code_runtime": opt.optimized_code_runtime,
"optimized_code_runtime": humanize_runtime(opt.optimized_code_runtime),
"speedup": opt.speedup,
"trace_id": opt.trace_id,
"function_references": opt.function_references,
"python_version": platform.python_version(),
}
for opt in request
]
logger.debug(f"Refining {len(request)} optimizations…")
console.rule()
try:
response = self.make_ai_service_request("/refinement", payload=payload, timeout=120)
except requests.exceptions.RequestException as e:
Expand All @@ -271,8 +269,6 @@ def optimize_python_code_refinement(self, request: list[AIServiceRefinerRequest]

if response.status_code == 200:
refined_optimizations = response.json()["refinements"]
logger.debug(f"Generated {len(refined_optimizations)} candidate refinements.")
console.rule()

refinements = self._get_valid_candidates(refined_optimizations)
return [
Expand Down
57 changes: 57 additions & 0 deletions codeflash/code_utils/code_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,63 @@ def unified_diff_strings(code1: str, code2: str, fromfile: str = "original", tof
return "".join(diff)


def choose_weights(**importance: float) -> list[float]:
"""Choose normalized weights from relative importance values.

Example:
choose_weights(runtime=3, diff=1)
-> [0.75, 0.25]

Args:
**importance: keyword args of metric=importance (relative numbers).

Returns:
A list of weights in the same order as the arguments.

"""
total = sum(importance.values())
if total == 0:
raise ValueError("At least one importance value must be > 0")

return [v / total for v in importance.values()]


def normalize(values: list[float]) -> list[float]:
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you rename this function to min_max_normalize? normalize is too broad

mn, mx = min(values), max(values)
if mx == mn:
return [0.0] * len(values)
return [(v - mn) / (mx - mn) for v in values]


def create_score_dictionary_from_metrics(weights: list[float], *metrics: list[float]) -> dict[int, int]:
"""Combine multiple metrics into a single weighted score dictionary.

Each metric is a list of values (smaller = better).
The total score for each index is the weighted sum of its values
across all metrics:

score[index] = Σ (value * weight)

Args:
weights: A list of weights, one per metric. Larger weight = more influence.
*metrics: Lists of values (one list per metric, aligned by index).

Returns:
A dictionary mapping each index to its combined weighted score.

"""
if len(weights) != len(metrics):
raise ValueError("Number of weights must match number of metrics")

combined: dict[int, float] = {}

for weight, metric in zip(weights, metrics):
for idx, value in enumerate(metric):
combined[idx] = combined.get(idx, 0) + value * weight

return combined


def diff_length(a: str, b: str) -> int:
"""Compute the length (in characters) of the unified diff between two strings.

Expand Down
5 changes: 5 additions & 0 deletions codeflash/code_utils/config_consts.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,11 @@
DEFAULT_IMPORTANCE_THRESHOLD = 0.001
N_CANDIDATES_LP = 6

# Refinement
REFINE_ALL_THRESHOLD = 2 # when valid optimizations count is 2 or less, refine all optimizations
REFINED_CANDIDATE_RANKING_WEIGHTS = (2, 1) # (runtime, diff), runtime is more important than diff by a factor of 2
TOP_N_REFINEMENTS = 0.45 # top 45% of valid optimizations (based on the weighted score) are refined
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

any reason for this number?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nothing in particular, was thinking of making it a fixed number, maybe 3 ?
@misrasaurabh1 @KRRT7 @aseembits93


# LSP-specific
N_CANDIDATES_LSP = 3
N_TESTS_TO_GENERATE_LSP = 2
Expand Down
4 changes: 2 additions & 2 deletions codeflash/models/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,10 @@ class AIServiceRefinerRequest:
optimization_id: str
original_source_code: str
read_only_dependency_code: str
original_code_runtime: str
original_code_runtime: int
optimized_source_code: str
optimized_explanation: str
optimized_code_runtime: str
optimized_code_runtime: int
speedup: str
trace_id: str
original_line_profiler_results: str
Expand Down
116 changes: 66 additions & 50 deletions codeflash/optimization/function_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,16 @@
replace_function_definitions_in_module,
)
from codeflash.code_utils.code_utils import (
choose_weights,
cleanup_paths,
create_rank_dictionary_compact,
create_score_dictionary_from_metrics,
diff_length,
extract_unique_errors,
file_name_from_test_module_name,
get_run_tmp_file,
module_name_from_file_path,
normalize,
restore_conftest,
unified_diff_strings,
)
Expand All @@ -45,7 +48,10 @@
N_CANDIDATES_EFFECTIVE,
N_CANDIDATES_LP_EFFECTIVE,
N_TESTS_TO_GENERATE_EFFECTIVE,
REFINE_ALL_THRESHOLD,
REFINED_CANDIDATE_RANKING_WEIGHTS,
REPEAT_OPTIMIZATION_PROBABILITY,
TOP_N_REFINEMENTS,
TOTAL_LOOPING_TIME_EFFECTIVE,
)
from codeflash.code_utils.deduplicate_code import normalize_code
Expand Down Expand Up @@ -124,19 +130,23 @@ def __init__(
self,
initial_candidates: list,
future_line_profile_results: concurrent.futures.Future,
future_all_refinements: list,
all_refinements_data: list[AIServiceRefinerRequest],
ai_service_client: AiServiceClient,
executor: concurrent.futures.ThreadPoolExecutor,
) -> None:
self.candidate_queue = queue.Queue()
self.line_profiler_done = False
self.refinement_done = False
self.candidate_len = len(initial_candidates)
self.ai_service_client = ai_service_client
self.executor = executor

# Initialize queue with initial candidates
for candidate in initial_candidates:
self.candidate_queue.put(candidate)

self.future_line_profile_results = future_line_profile_results
self.future_all_refinements = future_all_refinements
self.all_refinements_data = all_refinements_data

def get_next_candidate(self) -> OptimizedCandidate | None:
"""Get the next candidate from the queue, handling async results as needed."""
Expand Down Expand Up @@ -168,15 +178,45 @@ def _process_line_profiler_results(self) -> OptimizedCandidate | None:

return self.get_next_candidate()

def refine_optimizations(self, request: list[AIServiceRefinerRequest]) -> concurrent.futures.Future:
return self.executor.submit(self.ai_service_client.optimize_python_code_refinement, request=request)

def _process_refinement_results(self) -> OptimizedCandidate | None:
"""Process refinement results and add to queue."""
if self.future_all_refinements:
"""Process refinement results and add to queue. We generate a weighted ranking based on the runtime and diff lines and select the best (round of 45%) of valid optimizations to be refined."""
future_refinements: list[concurrent.futures.Future] = []

if len(self.all_refinements_data) <= REFINE_ALL_THRESHOLD:
for data in self.all_refinements_data:
future_refinements.append(self.refine_optimizations([data])) # noqa: PERF401
else:
diff_lens_list = []
runtimes_list = []
for c in self.all_refinements_data:
diff_lens_list.append(diff_length(c.original_source_code, c.optimized_source_code))
runtimes_list.append(c.optimized_code_runtime)

runtime_w, diff_w = REFINED_CANDIDATE_RANKING_WEIGHTS
weights = choose_weights(runtime=runtime_w, diff=diff_w)

runtime_norm = normalize(runtimes_list)
diffs_norm = normalize(diff_lens_list)
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i am wondering if min_max_normalization for these are a good idea.
With this, every code with minimal runtime or diff_len will have a weighted value of 0. Every maximal will have a value of 1. It won't matter even if the difference between the min and the max is miniscule.

The problem i see is that min-max normalization gets rid of the relative scale of the runtime or the diff lens.

Instead of normalizing with min = minimal data point, why not try with min = 0? Diff len or runtime can only ever be as small as 0, and with this formulation we can think of the values as a vector emanating from origin and we give the largest datapoint a value of 1 and the minimal one as some number relative to the maginitude b/w 0 and the max number. So if the runtime is half of max, then the score of 0.5 sounds reasonable rather than 0.

This preserve a sense of scale

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nice, this is definitely more accurate

# the lower the better
score_dict = create_score_dictionary_from_metrics(weights, runtime_norm, diffs_norm)
top_n_candidates = int((TOP_N_REFINEMENTS * len(runtimes_list)) + 0.5)
top_indecies = sorted(score_dict, key=score_dict.get)[:top_n_candidates]

for idx in top_indecies:
data = self.all_refinements_data[idx]
future_refinements.append(self.refine_optimizations([data]))

if future_refinements:
logger.info("loading|Refining generated code for improved quality and performance...")
concurrent.futures.wait(self.future_all_refinements)

concurrent.futures.wait(future_refinements)
refinement_response = []

for future_refinement in self.future_all_refinements:
possible_refinement = future_refinement.result()
for f in future_refinements:
possible_refinement = f.result()
if len(possible_refinement) > 0:
refinement_response.append(possible_refinement[0])

Expand Down Expand Up @@ -684,15 +724,14 @@ def process_single_candidate(
original_helper_code: dict[Path, str],
file_path_to_helper_classes: dict[Path, set[str]],
eval_ctx: CandidateEvaluationContext,
future_all_refinements: list[concurrent.futures.Future],
ai_service_client: AiServiceClient,
all_refinements_data: list[AIServiceRefinerRequest],
exp_type: str,
function_references: str,
) -> BestOptimization | None:
"""Process a single optimization candidate.

Returns the BestOptimization if the candidate is successful, None otherwise.
Updates eval_ctx with results and may append to future_all_refinements.
Updates eval_ctx with results and may append to all_refinements_data.
"""
# Cleanup temp files
get_run_tmp_file(Path(f"test_return_values_{candidate_index}.bin")).unlink(missing_ok=True)
Expand Down Expand Up @@ -787,14 +826,19 @@ def process_single_candidate(

# Queue refinement for non-refined candidates
if not candidate.optimization_id.endswith("refi"):
future_all_refinements.append(
self.refine_optimizations(
valid_optimizations=[best_optimization],
original_code_baseline=original_code_baseline,
code_context=code_context,
all_refinements_data.append(
AIServiceRefinerRequest(
optimization_id=best_optimization.candidate.optimization_id,
original_source_code=code_context.read_writable_code.markdown,
read_only_dependency_code=code_context.read_only_context_code,
original_code_runtime=original_code_baseline.runtime,
optimized_source_code=best_optimization.candidate.source_code.markdown,
optimized_explanation=best_optimization.candidate.explanation,
optimized_code_runtime=best_optimization.runtime,
speedup=f"{int(performance_gain(original_runtime_ns=original_code_baseline.runtime, optimized_runtime_ns=best_optimization.runtime) * 100)}%",
trace_id=self.get_trace_id(exp_type),
ai_service_client=ai_service_client,
executor=self.executor,
original_line_profiler_results=original_code_baseline.line_profile_results["str_out"],
optimized_line_profiler_results=best_optimization.line_profiler_test_results["str_out"],
function_references=function_references,
)
)
Expand Down Expand Up @@ -830,7 +874,7 @@ def determine_best_candidate(

# Initialize evaluation context and async tasks
eval_ctx = CandidateEvaluationContext()
future_all_refinements: list[concurrent.futures.Future] = []
all_refinements_data: list[AIServiceRefinerRequest] = []
ai_service_client = self.aiservice_client if exp_type == "EXP0" else self.local_aiservice_client
assert ai_service_client is not None, "AI service client must be set for optimization"

Expand All @@ -848,7 +892,9 @@ def determine_best_candidate(
else None,
)

processor = CandidateProcessor(candidates, future_line_profile_results, future_all_refinements)
processor = CandidateProcessor(
candidates, future_line_profile_results, all_refinements_data, self.aiservice_client, self.executor
)
candidate_index = 0

# Process candidates using queue-based approach
Expand All @@ -869,8 +915,7 @@ def determine_best_candidate(
original_helper_code=original_helper_code,
file_path_to_helper_classes=file_path_to_helper_classes,
eval_ctx=eval_ctx,
future_all_refinements=future_all_refinements,
ai_service_client=ai_service_client,
all_refinements_data=all_refinements_data,
exp_type=exp_type,
function_references=function_references,
)
Expand Down Expand Up @@ -903,35 +948,6 @@ def determine_best_candidate(

return best_optimization

def refine_optimizations(
self,
valid_optimizations: list[BestOptimization],
original_code_baseline: OriginalCodeBaseline,
code_context: CodeOptimizationContext,
trace_id: str,
ai_service_client: AiServiceClient,
executor: concurrent.futures.ThreadPoolExecutor,
function_references: str | None = None,
) -> concurrent.futures.Future:
request = [
AIServiceRefinerRequest(
optimization_id=opt.candidate.optimization_id,
original_source_code=code_context.read_writable_code.markdown,
read_only_dependency_code=code_context.read_only_context_code,
original_code_runtime=humanize_runtime(original_code_baseline.runtime),
optimized_source_code=opt.candidate.source_code.markdown,
optimized_explanation=opt.candidate.explanation,
optimized_code_runtime=humanize_runtime(opt.runtime),
speedup=f"{int(performance_gain(original_runtime_ns=original_code_baseline.runtime, optimized_runtime_ns=opt.runtime) * 100)}%",
trace_id=trace_id,
original_line_profiler_results=original_code_baseline.line_profile_results["str_out"],
optimized_line_profiler_results=opt.line_profiler_test_results["str_out"],
function_references=function_references,
)
for opt in valid_optimizations
]
return executor.submit(ai_service_client.optimize_python_code_refinement, request=request)

def log_successful_optimization(
self, explanation: Explanation, generated_tests: GeneratedTestsList, exp_type: str
) -> None:
Expand Down
Loading