@@ -413,7 +413,8 @@ class TestJavaRunAndParsePerformance:
413413 """Tests that the performance instrumentation produces correct timing data.
414414
415415 Uses precise busy-wait with System.nanoTime() (monotonic clock) to achieve
416- <1% timing variance, validating measurement system accuracy.
416+ <5% timing variance, accounting for JIT warmup effects where first iterations
417+ are cold and subsequent iterations benefit from JIT optimization.
417418 """
418419
419420 PRECISE_WAITER_TEST = """package com.example;
@@ -487,7 +488,7 @@ def _instrument_and_run(self, project_root, src_dir, test_dir, test_source, test
487488 return test_results
488489
489490 def test_performance_inner_loop_count_and_timing (self , java_project ):
490- """2 outer × 2 inner = 4 results with <2 % variance and accurate 10ms timing."""
491+ """2 outer × 2 inner = 4 results with <5 % variance and accurate 10ms timing."""
491492 skip_if_maven_not_available ()
492493 project_root , src_dir , test_dir = self ._setup_precise_waiter_project (java_project )
493494
@@ -520,18 +521,18 @@ def test_performance_inner_loop_count_and_timing(self, java_project):
520521 stddev_runtime = statistics .stdev (runtimes )
521522 coefficient_of_variation = stddev_runtime / mean_runtime
522523
523- # Target: 10ms (10,000,000 ns), allow <2 % coefficient of variation
524- # (userspace busy-wait can still experience minor OS scheduling effects )
524+ # Target: 10ms (10,000,000 ns), allow <5 % coefficient of variation
525+ # (accounts for JIT warmup - first iteration is cold, subsequent are optimized )
525526 expected_ns = 10_000_000
526527 runtimes_ms = [r / 1_000_000 for r in runtimes ]
527528
528- assert coefficient_of_variation < 0.02 , (
529- f"Timing variance too high: CV={ coefficient_of_variation :.2%} (should be <2 %). "
529+ assert coefficient_of_variation < 0.05 , (
530+ f"Timing variance too high: CV={ coefficient_of_variation :.2%} (should be <5 %). "
530531 f"Runtimes: { runtimes_ms } ms (mean={ mean_runtime / 1_000_000 :.3f} ms)"
531532 )
532533
533- # Verify measured time is close to expected 10ms (allow ±2 % for measurement overhead )
534- assert expected_ns * 0.98 <= mean_runtime <= expected_ns * 1.02 , (
534+ # Verify measured time is close to expected 10ms (allow ±5 % for JIT warmup )
535+ assert expected_ns * 0.95 <= mean_runtime <= expected_ns * 1.05 , (
535536 f"Mean runtime { mean_runtime / 1_000_000 :.3f} ms not close to expected 10.0ms"
536537 )
537538
@@ -554,14 +555,15 @@ def test_performance_inner_loop_count_and_timing(self, java_project):
554555 )
555556
556557 # Total should be sum of 2 minimums (one per inner iteration) ≈ 20ms
558+ # Minimums filter out JIT warmup, so use tighter ±2% tolerance
557559 expected_total_ns = 2 * expected_ns
558- assert expected_total_ns * 0.96 <= total_runtime <= expected_total_ns * 1.04 , (
560+ assert expected_total_ns * 0.98 <= total_runtime <= expected_total_ns * 1.02 , (
559561 f"total_passed_runtime { total_runtime / 1_000_000 :.3f} ms not close to expected "
560- f"{ expected_total_ns / 1_000_000 :.1f} ms (2 inner iterations × 10ms each)"
562+ f"{ expected_total_ns / 1_000_000 :.1f} ms (2 inner iterations × 10ms each, ±2% )"
561563 )
562564
563565 def test_performance_multiple_test_methods_inner_loop (self , java_project ):
564- """Two @Test methods: 2 outer × 2 inner = 8 results with <2 % variance."""
566+ """Two @Test methods: 2 outer × 2 inner = 8 results with <5 % variance."""
565567 skip_if_maven_not_available ()
566568 project_root , src_dir , test_dir = self ._setup_precise_waiter_project (java_project )
567569
@@ -612,18 +614,18 @@ def test_performance_multiple_test_methods_inner_loop(self, java_project):
612614 stddev_runtime = statistics .stdev (runtimes )
613615 coefficient_of_variation = stddev_runtime / mean_runtime
614616
615- # Target: 10ms (10,000,000 ns), allow <2 % coefficient of variation
616- # (userspace busy-wait can still experience minor OS scheduling effects )
617+ # Target: 10ms (10,000,000 ns), allow <5 % coefficient of variation
618+ # (accounts for JIT warmup - first iteration is cold, subsequent are optimized )
617619 expected_ns = 10_000_000
618620 runtimes_ms = [r / 1_000_000 for r in runtimes ]
619621
620- assert coefficient_of_variation < 0.02 , (
621- f"Timing variance too high: CV={ coefficient_of_variation :.2%} (should be <2 %). "
622+ assert coefficient_of_variation < 0.05 , (
623+ f"Timing variance too high: CV={ coefficient_of_variation :.2%} (should be <5 %). "
622624 f"Runtimes: { runtimes_ms } ms (mean={ mean_runtime / 1_000_000 :.3f} ms)"
623625 )
624626
625- # Verify measured time is close to expected 10ms (allow ±2 % for measurement overhead )
626- assert expected_ns * 0.98 <= mean_runtime <= expected_ns * 1.02 , (
627+ # Verify measured time is close to expected 10ms (allow ±5 % for JIT warmup )
628+ assert expected_ns * 0.95 <= mean_runtime <= expected_ns * 1.05 , (
627629 f"Mean runtime { mean_runtime / 1_000_000 :.3f} ms not close to expected 10.0ms"
628630 )
629631
@@ -646,8 +648,9 @@ def test_performance_multiple_test_methods_inner_loop(self, java_project):
646648 )
647649
648650 # Total should be sum of 4 minimums ≈ 40ms
651+ # Minimums filter out JIT warmup, so use tighter ±2% tolerance
649652 expected_total_ns = 4 * expected_ns # 4 test cases × 10ms each
650- assert expected_total_ns * 0.96 <= total_runtime <= expected_total_ns * 1.04 , (
653+ assert expected_total_ns * 0.98 <= total_runtime <= expected_total_ns * 1.02 , (
651654 f"total_passed_runtime { total_runtime / 1_000_000 :.3f} ms not close to expected "
652- f"{ expected_total_ns / 1_000_000 :.1f} ms (2 methods × 2 inner iterations × 10ms)"
655+ f"{ expected_total_ns / 1_000_000 :.1f} ms (2 methods × 2 inner iterations × 10ms, ±2% )"
653656 )
0 commit comments