Merge pull request #1033 from codeflash-ai/optimize-tracer-replay

KRRT7 · web-flow · commit 6ab27102ad51 · 2026-01-09T22:14:45.000-05:00
simplify E2E replay test to reduce load in CI
diff --git a/code_to_optimize/code_directories/simple_tracer_e2e/codeflash.trace b/code_to_optimize/code_directories/simple_tracer_e2e/codeflash.trace
diff --git a/code_to_optimize/code_directories/simple_tracer_e2e/workload.py b/code_to_optimize/code_directories/simple_tracer_e2e/workload.py
@@ -1,74 +1,39 @@
 from concurrent.futures import ThreadPoolExecutor
-from time import sleep
 
 
 def funcA(number):
-    number = number if number < 1000 else 1000
+    number = number if number < 100 else 100
     k = 0
-    for i in range(number * 100):
+    for i in range(number * 10):
         k += i
-    # Simplify the for loop by using sum with a range object
     j = sum(range(number))
-
-    # Use a generator expression directly in join for more efficiency
     return " ".join(str(i) for i in range(number))
 
 
 def test_threadpool() -> None:
-    pool = ThreadPoolExecutor(max_workers=3)
-    args = list(range(10, 31, 10))
+    pool = ThreadPoolExecutor(max_workers=2)
+    args = [5, 10, 15]
     result = pool.map(funcA, args)
 
     for r in result:
         print(r)
 
 class AlexNet:
-    def __init__(self, num_classes=1000):
+    def __init__(self, num_classes=10):
         self.num_classes = num_classes
-        self.features_size = 256 * 6 * 6
 
     def forward(self, x):
-        features = self._extract_features(x)
-        
-        output = self._classify(features)
-        return output
-
-    def _extract_features(self, x):
-        result = []
-        for i in range(len(x)):
-            pass
-
-        return result
-
-    def _classify(self, features):
-        total = sum(features)
-        return [total % self.num_classes for _ in features]
-
-class SimpleModel:
-    @staticmethod
-    def predict(data):
-        result = []
-        sleep(0.1) # can be optimized away
-        for i in range(500):
-            for x in data:
-                computation = 0
-                computation += x * i ** 2
-                result.append(computation)
-        return result
-    
-    @classmethod
-    def create_default(cls):
-        return cls()
+        result = 0
+        for val in x:
+            result += val * val
+        return result % self.num_classes
 
 
 def test_models():
     model = AlexNet(num_classes=10)
     input_data = [1, 2, 3, 4, 5]
     result = model.forward(input_data)
 
-    model2 = SimpleModel.create_default()
-    prediction = model2.predict(input_data)
-
 if __name__ == "__main__":
     test_threadpool()
     test_models()
diff --git a/tests/scripts/end_to_end_test_tracer_replay.py b/tests/scripts/end_to_end_test_tracer_replay.py
@@ -10,7 +10,7 @@ def run_test(expected_improvement_pct: int) -> bool:
         min_improvement_x=0.1,
         expected_unit_tests_count=None,  # Tracer creates replay tests dynamically, skip validation
         coverage_expectations=[
-            CoverageExpectation(function_name="funcA", expected_coverage=100.0, expected_lines=[6, 7, 8, 9, 11, 14])
+            CoverageExpectation(function_name="funcA", expected_coverage=100.0, expected_lines=[5, 6, 7, 8, 9, 10])
         ],
     )
     cwd = (
diff --git a/tests/scripts/end_to_end_test_utilities.py b/tests/scripts/end_to_end_test_utilities.py
@@ -262,9 +262,9 @@ def run_trace_test(cwd: pathlib.Path, config: TestConfig, expected_improvement_p
     if not functions_traced:
         logging.error("Failed to find traced functions in output")
         return False
-    if int(functions_traced.group(1)) != 13:
+    if int(functions_traced.group(1)) != 8:
         logging.error(functions_traced.groups())
-        logging.error("Expected 13 traced functions")
+        logging.error("Expected 8 traced functions")
         return False
 
     # Validate optimization results (from optimization phase)
diff --git a/tests/test_function_ranker.py b/tests/test_function_ranker.py
@@ -58,8 +58,8 @@ def test_load_function_stats(function_ranker):
     # Verify funcA specific values
     assert func_a_stats["function_name"] == "funcA"
     assert func_a_stats["call_count"] == 1
-    assert func_a_stats["own_time_ns"] == 63000
-    assert func_a_stats["cumulative_time_ns"] == 5443000
+    assert func_a_stats["own_time_ns"] == 153000
+    assert func_a_stats["cumulative_time_ns"] == 1324000
 
 
 def test_get_function_addressable_time(function_ranker, workload_functions):
@@ -71,10 +71,10 @@ def test_get_function_addressable_time(function_ranker, workload_functions):
     
     assert func_a is not None
     addressable_time = function_ranker.get_function_addressable_time(func_a)
-    
+
     # Expected addressable time: own_time + (time_in_callees / call_count)
-    # = 63000 + ((5443000 - 63000) / 1) = 5443000
-    assert addressable_time == 5443000
+    # = 153000 + ((1324000 - 153000) / 1) = 1324000
+    assert addressable_time == 1324000
 
 
 def test_rank_functions(function_ranker, workload_functions):
@@ -107,9 +107,9 @@ def test_get_function_stats_summary(function_ranker, workload_functions):
     
     assert stats is not None
     assert stats["function_name"] == "funcA"
-    assert stats["own_time_ns"] == 63000
-    assert stats["cumulative_time_ns"] == 5443000
-    assert stats["addressable_time_ns"] == 5443000
+    assert stats["own_time_ns"] == 153000
+    assert stats["cumulative_time_ns"] == 1324000
+    assert stats["addressable_time_ns"] == 1324000
 
 
 
@@ -128,40 +128,8 @@ def test_importance_calculation(function_ranker):
     
     assert func_a_stats is not None
     importance = func_a_stats["own_time_ns"] / total_program_time
-    
-    # funcA importance should be approximately 0.57% (63000/10968000)
-    assert abs(importance - 0.0057) < 0.001
+
+    # funcA importance should be approximately 1.9% (153000/7958000)
+    assert abs(importance - 0.019) < 0.01
 
 
-def test_simple_model_predict_stats(function_ranker, workload_functions):
-    # Find SimpleModel::predict function
-    predict_func = None
-    for func in workload_functions:
-        if func.function_name == "predict":
-            predict_func = func
-            break
-    
-    assert predict_func is not None
-    
-    stats = function_ranker.get_function_stats_summary(predict_func)
-    assert stats is not None
-    assert stats["function_name"] == "predict"
-    assert stats["call_count"] == 1
-    assert stats["own_time_ns"] == 2289000
-    assert stats["cumulative_time_ns"] == 4017000
-    assert stats["addressable_time_ns"] == 4017000
-    
-    # Test addressable time calculation
-    addressable_time = function_ranker.get_function_addressable_time(predict_func)
-    # Expected addressable time: own_time + (time_in_callees / call_count)
-    # = 2289000 + ((4017000 - 2289000) / 1) = 4017000
-    assert addressable_time == 4017000
-    
-    # Test importance calculation for predict function
-    total_program_time = sum(
-        s["own_time_ns"] for s in function_ranker._function_stats.values() 
-        if s.get("own_time_ns", 0) > 0
-    )
-    importance = stats["own_time_ns"] / total_program_time
-    # predict importance should be approximately 20.9% (2289000/10968000)
-    assert abs(importance - 0.209) < 0.01

Original file line number	Diff line number	Diff line change
`@@ -10,7 +10,7 @@ def run_test(expected_improvement_pct: int) -> bool:`
`10`	`10`	`min_improvement_x=0.1,`
`11`	`11`	`expected_unit_tests_count=None, # Tracer creates replay tests dynamically, skip validation`
`12`	`12`	`coverage_expectations=[`
`13`		`- CoverageExpectation(function_name="funcA", expected_coverage=100.0, expected_lines=[6, 7, 8, 9, 11, 14])`
	`13`	`+ CoverageExpectation(function_name="funcA", expected_coverage=100.0, expected_lines=[5, 6, 7, 8, 9, 10])`
`14`	`14`	`],`
`15`	`15`	`)`
`16`	`16`	`cwd = (`