|
| 1 | +from __future__ import annotations |
| 2 | + |
| 3 | +from enum import Enum |
| 4 | +from typing import Any, Union |
| 5 | + |
1 | 6 | MAX_TEST_RUN_ITERATIONS = 5 |
2 | 7 | INDIVIDUAL_TESTCASE_TIMEOUT = 15 |
3 | 8 | MAX_FUNCTION_TEST_SECONDS = 60 |
4 | | -N_CANDIDATES = 5 |
5 | 9 | MIN_IMPROVEMENT_THRESHOLD = 0.05 |
6 | 10 | MIN_THROUGHPUT_IMPROVEMENT_THRESHOLD = 0.10 # 10% minimum improvement for async throughput |
7 | 11 | MAX_TEST_FUNCTION_RUNS = 50 |
8 | 12 | MAX_CUMULATIVE_TEST_RUNTIME_NANOSECONDS = 100e6 # 100ms |
9 | | -N_TESTS_TO_GENERATE = 2 |
10 | 13 | TOTAL_LOOPING_TIME = 10.0 # 10 second candidate benchmarking budget |
11 | 14 | COVERAGE_THRESHOLD = 60.0 |
12 | 15 | MIN_TESTCASE_PASSED_THRESHOLD = 6 |
13 | 16 | REPEAT_OPTIMIZATION_PROBABILITY = 0.1 |
14 | 17 | DEFAULT_IMPORTANCE_THRESHOLD = 0.001 |
15 | | -N_CANDIDATES_LP = 6 |
16 | 18 |
|
17 | 19 | # pytest loop stability |
18 | 20 | # For now, we use strict thresholds (large windows and low tolerances), since this is still experimental. |
|
21 | 23 | STABILITY_SPREAD_TOLERANCE = 0.0025 # 0.25% window spread |
22 | 24 |
|
23 | 25 | # Refinement |
24 | | -REFINE_ALL_THRESHOLD = 2 # when valid optimizations count is 2 or less, refine all optimizations |
25 | 26 | REFINED_CANDIDATE_RANKING_WEIGHTS = (2, 1) # (runtime, diff), runtime is more important than diff by a factor of 2 |
26 | | -TOP_N_REFINEMENTS = 0.45 # top 45% of valid optimizations (based on the weighted score) are refined |
27 | 27 |
|
28 | 28 | # LSP-specific |
29 | | -N_CANDIDATES_LSP = 3 |
30 | | -N_TESTS_TO_GENERATE_LSP = 2 |
31 | 29 | TOTAL_LOOPING_TIME_LSP = 10.0 # Kept same timing for LSP mode to avoid in increase in performance reporting |
32 | | -N_CANDIDATES_LP_LSP = 3 |
33 | 30 |
|
34 | 31 | # setting this value to 1 will disable repair if there is at least one correct candidate |
35 | 32 | MIN_CORRECT_CANDIDATES = 2 |
36 | 33 |
|
37 | | -# Code repair |
38 | | -REPAIR_UNMATCHED_PERCENTAGE_LIMIT = 0.4 # if the percentage of unmatched tests is greater than this, we won't fix it (lowering this value makes the repair more stricted) |
39 | | -MAX_REPAIRS_PER_TRACE = 4 # maximum number of repairs we will do for each function |
40 | | - |
41 | | -# Adaptive optimization |
42 | | -# TODO (ali): make this configurable with effort arg once the PR is merged |
43 | | -ADAPTIVE_OPTIMIZATION_THRESHOLD = 2 # Max adaptive optimizations per single candidate tree (for example : optimize -> refine -> adaptive -> another adaptive). |
44 | | -# MAX_ADAPTIVE_OPTIMIZATIONS_PER_TRACE = 4 # maximum number of adaptive optimizations we will do for each function (this can be 2 adaptive optimizations for 2 candidates for example) |
45 | | -MAX_ADAPTIVE_OPTIMIZATIONS_PER_TRACE = ( |
46 | | - 0 # disable adaptive optimizations until we have this value controlled by the effort arg |
47 | | -) |
48 | | - |
49 | | -MAX_N_CANDIDATES = 5 |
50 | | -MAX_N_CANDIDATES_LP = 6 |
51 | | - |
52 | 34 | try: |
53 | 35 | from codeflash.lsp.helpers import is_LSP_enabled |
54 | 36 |
|
55 | 37 | _IS_LSP_ENABLED = is_LSP_enabled() |
56 | 38 | except ImportError: |
57 | 39 | _IS_LSP_ENABLED = False |
58 | 40 |
|
59 | | -N_CANDIDATES_EFFECTIVE = min(N_CANDIDATES_LSP if _IS_LSP_ENABLED else N_CANDIDATES, MAX_N_CANDIDATES) |
60 | | -N_CANDIDATES_LP_EFFECTIVE = min(N_CANDIDATES_LP_LSP if _IS_LSP_ENABLED else N_CANDIDATES_LP, MAX_N_CANDIDATES_LP) |
61 | | -N_TESTS_TO_GENERATE_EFFECTIVE = N_TESTS_TO_GENERATE_LSP if _IS_LSP_ENABLED else N_TESTS_TO_GENERATE |
62 | 41 | TOTAL_LOOPING_TIME_EFFECTIVE = TOTAL_LOOPING_TIME_LSP if _IS_LSP_ENABLED else TOTAL_LOOPING_TIME |
63 | 42 |
|
64 | 43 | MAX_CONTEXT_LEN_REVIEW = 1000 |
| 44 | + |
| 45 | + |
| 46 | +class EffortLevel(str, Enum): |
| 47 | + LOW = "low" |
| 48 | + MEDIUM = "medium" |
| 49 | + HIGH = "high" |
| 50 | + |
| 51 | + |
| 52 | +class EffortKeys(str, Enum): |
| 53 | + N_OPTIMIZER_CANDIDATES = "N_OPTIMIZER_CANDIDATES" |
| 54 | + N_OPTIMIZER_LP_CANDIDATES = "N_OPTIMIZER_LP_CANDIDATES" |
| 55 | + N_GENERATED_TESTS = "N_GENERATED_TESTS" |
| 56 | + MAX_CODE_REPAIRS_PER_TRACE = "MAX_CODE_REPAIRS_PER_TRACE" |
| 57 | + REPAIR_UNMATCHED_PERCENTAGE_LIMIT = "REPAIR_UNMATCHED_PERCENTAGE_LIMIT" |
| 58 | + TOP_VALID_CANDIDATES_FOR_REFINEMENT = "TOP_VALID_CANDIDATES_FOR_REFINEMENT" |
| 59 | + ADAPTIVE_OPTIMIZATION_THRESHOLD = "ADAPTIVE_OPTIMIZATION_THRESHOLD" |
| 60 | + MAX_ADAPTIVE_OPTIMIZATIONS_PER_TRACE = "MAX_ADAPTIVE_OPTIMIZATIONS_PER_TRACE" |
| 61 | + |
| 62 | + |
| 63 | +EFFORT_VALUES: dict[str, dict[EffortLevel, Any]] = { |
| 64 | + EffortKeys.N_OPTIMIZER_CANDIDATES.value: {EffortLevel.LOW: 3, EffortLevel.MEDIUM: 5, EffortLevel.HIGH: 6}, |
| 65 | + EffortKeys.N_OPTIMIZER_LP_CANDIDATES.value: {EffortLevel.LOW: 4, EffortLevel.MEDIUM: 6, EffortLevel.HIGH: 7}, |
| 66 | + # we don't use effort with generated tests for now |
| 67 | + EffortKeys.N_GENERATED_TESTS.value: {EffortLevel.LOW: 2, EffortLevel.MEDIUM: 2, EffortLevel.HIGH: 2}, |
| 68 | + # maximum number of repairs we will do for each function (in case the valid candidates is less than MIN_CORRECT_CANDIDATES) |
| 69 | + EffortKeys.MAX_CODE_REPAIRS_PER_TRACE.value: {EffortLevel.LOW: 2, EffortLevel.MEDIUM: 3, EffortLevel.HIGH: 5}, |
| 70 | + # if the percentage of unmatched tests is greater than this, we won't fix it (lowering this value makes the repair more stricted) |
| 71 | + # on the low effort we lower the limit to 20% to be more strict (less repairs, less time) |
| 72 | + EffortKeys.REPAIR_UNMATCHED_PERCENTAGE_LIMIT.value: { |
| 73 | + EffortLevel.LOW: 0.2, |
| 74 | + EffortLevel.MEDIUM: 0.3, |
| 75 | + EffortLevel.HIGH: 0.4, |
| 76 | + }, |
| 77 | + # Top valid candidates for refinements |
| 78 | + EffortKeys.TOP_VALID_CANDIDATES_FOR_REFINEMENT: {EffortLevel.LOW: 2, EffortLevel.MEDIUM: 3, EffortLevel.HIGH: 4}, |
| 79 | + # max number of adaptive optimization calls to make per a single candidates tree |
| 80 | + EffortKeys.ADAPTIVE_OPTIMIZATION_THRESHOLD.value: {EffortLevel.LOW: 0, EffortLevel.MEDIUM: 0, EffortLevel.HIGH: 2}, |
| 81 | + # max number of adaptive optimization calls to make per a single trace |
| 82 | + EffortKeys.MAX_ADAPTIVE_OPTIMIZATIONS_PER_TRACE.value: { |
| 83 | + EffortLevel.LOW: 0, |
| 84 | + EffortLevel.MEDIUM: 0, |
| 85 | + EffortLevel.HIGH: 4, |
| 86 | + }, |
| 87 | +} |
| 88 | + |
| 89 | + |
| 90 | +def get_effort_value(key: EffortKeys, effort: Union[EffortLevel, str]) -> Any: # noqa: ANN401 |
| 91 | + key_str = key.value |
| 92 | + |
| 93 | + if isinstance(effort, str): |
| 94 | + try: |
| 95 | + effort = EffortLevel(effort) |
| 96 | + except ValueError: |
| 97 | + msg = f"Invalid effort level: {effort}" |
| 98 | + raise ValueError(msg) from None |
| 99 | + |
| 100 | + if key_str not in EFFORT_VALUES: |
| 101 | + msg = f"Invalid key: {key_str}" |
| 102 | + raise ValueError(msg) |
| 103 | + |
| 104 | + return EFFORT_VALUES[key_str][effort] |
0 commit comments