Skip to content

Commit 2353fb2

Browse files
misrasaurabh1claude
andcommitted
test: add comprehensive Java run-and-parse integration tests
Add end-to-end tests for Java test instrumentation, execution, and result parsing, covering both behavior and performance testing modes. Key additions: - PreciseWaiter: monotonic timing implementation with <2% variance - 3 behavior tests: single/multiple test methods, return value validation - 2 performance tests: timing accuracy, inner/outer loop counts - Validation of total_passed_runtime() aggregation Infrastructure improvements: - Add inner_iterations parameter to benchmarking call chain - Rename pytest_* parameters to language-agnostic names: - pytest_min_loops → min_outer_loops - pytest_max_loops → max_outer_loops - pytest_inner_iterations → inner_iterations - Pass inner_iterations from tests through function_optimizer → test_runner → language_support All tests validate timing accuracy (±2%), variance (<2% CV), and correct result grouping by test case including iteration_id. Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
1 parent bdf8f84 commit 2353fb2

10 files changed

Lines changed: 830 additions & 170 deletions

codeflash/optimization/function_optimizer.py

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3064,8 +3064,9 @@ def run_and_parse_tests(
30643064
testing_time: float = TOTAL_LOOPING_TIME_EFFECTIVE,
30653065
*,
30663066
enable_coverage: bool = False,
3067-
pytest_min_loops: int = 5,
3068-
pytest_max_loops: int = 250,
3067+
min_outer_loops: int = 5,
3068+
max_outer_loops: int = 250,
3069+
inner_iterations: int | None = None,
30693070
code_context: CodeOptimizationContext | None = None,
30703071
line_profiler_output_file: Path | None = None,
30713072
) -> tuple[TestResults | dict, CoverageData | None]:
@@ -3101,10 +3102,11 @@ def run_and_parse_tests(
31013102
cwd=self.project_root,
31023103
test_env=test_env,
31033104
pytest_cmd=self.test_cfg.pytest_cmd,
3104-
pytest_timeout=INDIVIDUAL_TESTCASE_TIMEOUT,
3105-
pytest_target_runtime_seconds=testing_time,
3106-
pytest_min_loops=pytest_min_loops,
3107-
pytest_max_loops=pytest_max_loops,
3105+
timeout=INDIVIDUAL_TESTCASE_TIMEOUT,
3106+
target_runtime_seconds=testing_time,
3107+
min_outer_loops=min_outer_loops,
3108+
max_outer_loops=max_outer_loops,
3109+
inner_iterations=inner_iterations,
31083110
test_framework=self.test_cfg.test_framework,
31093111
js_project_root=self.test_cfg.js_project_root,
31103112
)
@@ -3368,8 +3370,8 @@ def run_concurrency_benchmark(
33683370
testing_time=5.0, # Short benchmark time
33693371
enable_coverage=False,
33703372
code_context=code_context,
3371-
pytest_min_loops=1,
3372-
pytest_max_loops=3,
3373+
min_outer_loops=1,
3374+
max_outer_loops=3,
33733375
)
33743376
except Exception as e:
33753377
logger.debug(f"Concurrency benchmark failed: {e}")

codeflash/verification/test_runner.py

Lines changed: 29 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -345,10 +345,11 @@ def run_benchmarking_tests(
345345
cwd: Path,
346346
test_framework: str,
347347
*,
348-
pytest_target_runtime_seconds: float = TOTAL_LOOPING_TIME_EFFECTIVE,
349-
pytest_timeout: int | None = None,
350-
pytest_min_loops: int = 5,
351-
pytest_max_loops: int = 100_000,
348+
target_runtime_seconds: float = TOTAL_LOOPING_TIME_EFFECTIVE,
349+
timeout: int | None = None,
350+
min_outer_loops: int = 5,
351+
max_outer_loops: int = 100_000,
352+
inner_iterations: int | None = None,
352353
js_project_root: Path | None = None,
353354
) -> tuple[Path, subprocess.CompletedProcess]:
354355
logger.debug(f"run_benchmarking_tests called: framework={test_framework}, num_files={len(test_paths.test_files)}")
@@ -359,26 +360,30 @@ def run_benchmarking_tests(
359360
# Use Java-specific timeout if no explicit timeout provided
360361
from codeflash.code_utils.config_consts import JAVA_TESTCASE_TIMEOUT
361362

362-
effective_timeout = pytest_timeout
363-
if test_framework in ("junit4", "junit5", "testng") and pytest_timeout is not None:
363+
effective_timeout = timeout
364+
if test_framework in ("junit4", "junit5", "testng") and timeout is not None:
364365
# For Java, use a minimum timeout to account for Maven overhead
365-
effective_timeout = max(pytest_timeout, JAVA_TESTCASE_TIMEOUT)
366-
if effective_timeout != pytest_timeout:
366+
effective_timeout = max(timeout, JAVA_TESTCASE_TIMEOUT)
367+
if effective_timeout != timeout:
367368
logger.debug(
368-
f"Increased Java test timeout from {pytest_timeout}s to {effective_timeout}s "
369+
f"Increased Java test timeout from {timeout}s to {effective_timeout}s "
369370
"to account for Maven startup overhead"
370371
)
371372

372-
return language_support.run_benchmarking_tests(
373-
test_paths=test_paths,
374-
test_env=test_env,
375-
cwd=cwd,
376-
timeout=effective_timeout,
377-
project_root=js_project_root,
378-
min_loops=pytest_min_loops,
379-
max_loops=pytest_max_loops,
380-
target_duration_seconds=pytest_target_runtime_seconds,
381-
)
373+
kwargs = {
374+
"test_paths": test_paths,
375+
"test_env": test_env,
376+
"cwd": cwd,
377+
"timeout": effective_timeout,
378+
"project_root": js_project_root,
379+
"min_loops": min_outer_loops,
380+
"max_loops": max_outer_loops,
381+
"target_duration_seconds": target_runtime_seconds,
382+
}
383+
# Pass inner_iterations if specified (for Java/JavaScript)
384+
if inner_iterations is not None:
385+
kwargs["inner_iterations"] = inner_iterations
386+
return language_support.run_benchmarking_tests(**kwargs)
382387
if is_python(): # pytest runs both pytest and unittest tests
383388
pytest_cmd_list = (
384389
shlex.split(f"{SAFE_SYS_EXECUTABLE} -m pytest", posix=IS_POSIX)
@@ -393,13 +398,13 @@ def run_benchmarking_tests(
393398
"--capture=tee-sys",
394399
"-q",
395400
"--codeflash_loops_scope=session",
396-
f"--codeflash_min_loops={pytest_min_loops}",
397-
f"--codeflash_max_loops={pytest_max_loops}",
398-
f"--codeflash_seconds={pytest_target_runtime_seconds}",
401+
f"--codeflash_min_loops={min_outer_loops}",
402+
f"--codeflash_max_loops={max_outer_loops}",
403+
f"--codeflash_seconds={target_runtime_seconds}",
399404
"--codeflash_stability_check=true",
400405
]
401-
if pytest_timeout is not None:
402-
pytest_args.append(f"--timeout={pytest_timeout}")
406+
if timeout is not None:
407+
pytest_args.append(f"--timeout={timeout}")
403408

404409
result_file_path = get_run_tmp_file(Path("pytest_results.xml"))
405410
result_args = [f"--junitxml={result_file_path.as_posix()}", "-o", "junit_logging=all"]

tests/test_async_run_and_parse_tests.py

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -118,8 +118,8 @@ async def test_async_sort():
118118
test_env=test_env,
119119
test_files=func_optimizer.test_files,
120120
optimization_iteration=0,
121-
pytest_min_loops=1,
122-
pytest_max_loops=1,
121+
min_outer_loops=1,
122+
max_outer_loops=1,
123123
testing_time=0.1,
124124
)
125125

@@ -244,8 +244,8 @@ async def test_async_class_sort():
244244
test_env=test_env,
245245
test_files=func_optimizer.test_files,
246246
optimization_iteration=0,
247-
pytest_min_loops=1,
248-
pytest_max_loops=1,
247+
min_outer_loops=1,
248+
max_outer_loops=1,
249249
testing_time=0.1,
250250
)
251251

@@ -369,8 +369,8 @@ async def test_async_perf():
369369
test_env=test_env,
370370
test_files=func_optimizer.test_files,
371371
optimization_iteration=0,
372-
pytest_min_loops=1,
373-
pytest_max_loops=1,
372+
min_outer_loops=1,
373+
max_outer_loops=1,
374374
testing_time=0.1,
375375
)
376376

@@ -489,8 +489,8 @@ async def async_error_function(lst):
489489
test_env=test_env,
490490
test_files=func_optimizer.test_files,
491491
optimization_iteration=0,
492-
pytest_min_loops=1,
493-
pytest_max_loops=1,
492+
min_outer_loops=1,
493+
max_outer_loops=1,
494494
testing_time=0.1,
495495
)
496496

@@ -594,8 +594,8 @@ async def test_async_multi():
594594
test_env=test_env,
595595
test_files=func_optimizer.test_files,
596596
optimization_iteration=0,
597-
pytest_min_loops=2,
598-
pytest_max_loops=5,
597+
min_outer_loops=2,
598+
max_outer_loops=5,
599599
testing_time=0.2,
600600
)
601601

@@ -714,8 +714,8 @@ async def test_async_edge_cases():
714714
test_env=test_env,
715715
test_files=func_optimizer.test_files,
716716
optimization_iteration=0,
717-
pytest_min_loops=1,
718-
pytest_max_loops=1,
717+
min_outer_loops=1,
718+
max_outer_loops=1,
719719
testing_time=0.1,
720720
)
721721

@@ -860,8 +860,8 @@ def test_sync_sort():
860860
test_env=test_env,
861861
test_files=func_optimizer.test_files,
862862
optimization_iteration=0,
863-
pytest_min_loops=1,
864-
pytest_max_loops=1,
863+
min_outer_loops=1,
864+
max_outer_loops=1,
865865
testing_time=0.1,
866866
)
867867

@@ -1035,8 +1035,8 @@ async def test_mixed_sorting():
10351035
test_env=test_env,
10361036
test_files=func_optimizer.test_files,
10371037
optimization_iteration=0,
1038-
pytest_min_loops=1,
1039-
pytest_max_loops=1,
1038+
min_outer_loops=1,
1039+
max_outer_loops=1,
10401040
testing_time=0.1,
10411041
)
10421042

tests/test_codeflash_capture.py

Lines changed: 32 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -475,8 +475,8 @@ def __init__(self, x=2):
475475
test_env=test_env,
476476
test_files=func_optimizer.test_files,
477477
optimization_iteration=0,
478-
pytest_min_loops=1,
479-
pytest_max_loops=1,
478+
min_outer_loops=1,
479+
max_outer_loops=1,
480480
testing_time=0.1,
481481
)
482482
assert len(test_results) == 3
@@ -508,8 +508,8 @@ def __init__(self, x=2):
508508
test_env=test_env,
509509
test_files=func_optimizer.test_files,
510510
optimization_iteration=0,
511-
pytest_min_loops=1,
512-
pytest_max_loops=1,
511+
min_outer_loops=1,
512+
max_outer_loops=1,
513513
testing_time=0.1,
514514
)
515515
match, _ = compare_test_results(test_results, test_results2)
@@ -598,8 +598,8 @@ def __init__(self, *args, **kwargs):
598598
test_env=test_env,
599599
test_files=func_optimizer.test_files,
600600
optimization_iteration=0,
601-
pytest_min_loops=1,
602-
pytest_max_loops=1,
601+
min_outer_loops=1,
602+
max_outer_loops=1,
603603
testing_time=0.1,
604604
)
605605
assert len(test_results) == 3
@@ -632,8 +632,8 @@ def __init__(self, *args, **kwargs):
632632
test_env=test_env,
633633
test_files=func_optimizer.test_files,
634634
optimization_iteration=0,
635-
pytest_min_loops=1,
636-
pytest_max_loops=1,
635+
min_outer_loops=1,
636+
max_outer_loops=1,
637637
testing_time=0.1,
638638
)
639639

@@ -725,8 +725,8 @@ def __init__(self, x=2):
725725
test_env=test_env,
726726
test_files=func_optimizer.test_files,
727727
optimization_iteration=0,
728-
pytest_min_loops=1,
729-
pytest_max_loops=1,
728+
min_outer_loops=1,
729+
max_outer_loops=1,
730730
testing_time=0.1,
731731
)
732732

@@ -761,8 +761,8 @@ def __init__(self, x=2):
761761
test_env=test_env,
762762
test_files=func_optimizer.test_files,
763763
optimization_iteration=0,
764-
pytest_min_loops=1,
765-
pytest_max_loops=1,
764+
min_outer_loops=1,
765+
max_outer_loops=1,
766766
testing_time=0.1,
767767
)
768768

@@ -889,8 +889,8 @@ def another_helper(self):
889889
test_env=test_env,
890890
test_files=func_optimizer.test_files,
891891
optimization_iteration=0,
892-
pytest_min_loops=1,
893-
pytest_max_loops=1,
892+
min_outer_loops=1,
893+
max_outer_loops=1,
894894
testing_time=0.1,
895895
)
896896

@@ -910,8 +910,8 @@ def another_helper(self):
910910
test_env=test_env,
911911
test_files=func_optimizer.test_files,
912912
optimization_iteration=0,
913-
pytest_min_loops=1,
914-
pytest_max_loops=1,
913+
min_outer_loops=1,
914+
max_outer_loops=1,
915915
testing_time=0.1,
916916
)
917917

@@ -1049,8 +1049,8 @@ def another_helper(self):
10491049
test_env=test_env,
10501050
test_files=func_optimizer.test_files,
10511051
optimization_iteration=0,
1052-
pytest_min_loops=1,
1053-
pytest_max_loops=1,
1052+
min_outer_loops=1,
1053+
max_outer_loops=1,
10541054
testing_time=0.1,
10551055
)
10561056

@@ -1101,8 +1101,8 @@ def target_function(self):
11011101
test_env=test_env,
11021102
test_files=func_optimizer.test_files,
11031103
optimization_iteration=0,
1104-
pytest_min_loops=1,
1105-
pytest_max_loops=1,
1104+
min_outer_loops=1,
1105+
max_outer_loops=1,
11061106
testing_time=0.1,
11071107
)
11081108
# Remove instrumentation
@@ -1140,8 +1140,8 @@ def target_function(self):
11401140
test_env=test_env,
11411141
test_files=func_optimizer.test_files,
11421142
optimization_iteration=0,
1143-
pytest_min_loops=1,
1144-
pytest_max_loops=1,
1143+
min_outer_loops=1,
1144+
max_outer_loops=1,
11451145
testing_time=0.1,
11461146
)
11471147
# Remove instrumentation
@@ -1179,8 +1179,8 @@ def target_function(self):
11791179
test_env=test_env,
11801180
test_files=func_optimizer.test_files,
11811181
optimization_iteration=0,
1182-
pytest_min_loops=1,
1183-
pytest_max_loops=1,
1182+
min_outer_loops=1,
1183+
max_outer_loops=1,
11841184
testing_time=0.1,
11851185
)
11861186
# Remove instrumentation
@@ -1471,8 +1471,8 @@ def calculate_portfolio_metrics(
14711471
test_env=test_env,
14721472
test_files=func_optimizer.test_files,
14731473
optimization_iteration=0,
1474-
pytest_min_loops=1,
1475-
pytest_max_loops=1,
1474+
min_outer_loops=1,
1475+
max_outer_loops=1,
14761476
testing_time=0.1,
14771477
)
14781478

@@ -1538,8 +1538,8 @@ def risk_adjusted_return(return_val, weight):
15381538
test_env=test_env,
15391539
test_files=func_optimizer.test_files,
15401540
optimization_iteration=0,
1541-
pytest_min_loops=1,
1542-
pytest_max_loops=1,
1541+
min_outer_loops=1,
1542+
max_outer_loops=1,
15431543
testing_time=0.1,
15441544
)
15451545
# Remove instrumentation
@@ -1601,8 +1601,8 @@ def calculate_portfolio_metrics(
16011601
test_env=test_env,
16021602
test_files=func_optimizer.test_files,
16031603
optimization_iteration=0,
1604-
pytest_min_loops=1,
1605-
pytest_max_loops=1,
1604+
min_outer_loops=1,
1605+
max_outer_loops=1,
16061606
testing_time=0.1,
16071607
)
16081608
# Remove instrumentation
@@ -1687,8 +1687,8 @@ def __init__(self, x, y):
16871687
test_env=test_env,
16881688
test_files=func_optimizer.test_files,
16891689
optimization_iteration=0,
1690-
pytest_min_loops=1,
1691-
pytest_max_loops=1,
1690+
min_outer_loops=1,
1691+
max_outer_loops=1,
16921692
testing_time=0.1,
16931693
)
16941694

0 commit comments

Comments
 (0)