Skip to content
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
e4cbf95
exp
mohammedahmed18 Sep 8, 2025
01ad626
still experimenting
mohammedahmed18 Sep 9, 2025
2b01faf
Merge branch 'main' of github.com:codeflash-ai/codeflash into exp/con…
mohammedahmed18 Dec 9, 2025
ce89905
reset
mohammedahmed18 Dec 9, 2025
1f367be
dynamic tolerance
mohammedahmed18 Dec 9, 2025
0d819a8
Merge branch 'main' of github.com:codeflash-ai/codeflash into exp/con…
mohammedahmed18 Dec 11, 2025
5c4a6d9
get the duration from the pytest overriden methods
mohammedahmed18 Dec 12, 2025
ecd21d5
remove debug log
mohammedahmed18 Dec 12, 2025
30c89ce
respect the min loop count -just in case-
mohammedahmed18 Dec 12, 2025
ce2c05b
Merge branch 'main' of github.com:codeflash-ai/codeflash into exp/con…
mohammedahmed18 Dec 12, 2025
89fc939
more closer method
mohammedahmed18 Dec 15, 2025
cc94694
Merge branch 'main' of github.com:codeflash-ai/codeflash into exp/con…
mohammedahmed18 Dec 16, 2025
a67dad3
working version
mohammedahmed18 Dec 16, 2025
d52aae4
even better
mohammedahmed18 Dec 16, 2025
244f9ca
better stability algorithm
mohammedahmed18 Dec 17, 2025
a890d4f
should stop metrics
mohammedahmed18 Dec 19, 2025
3159eb6
Merge branch 'main' of github.com:codeflash-ai/codeflash into exp/con…
mohammedahmed18 Dec 22, 2025
95f22ee
better stability with sum the min of all prev loops
mohammedahmed18 Dec 22, 2025
9f311cd
Optimize should_stop
codeflash-ai[bot] Dec 22, 2025
83dff02
best summed runtime helper
mohammedahmed18 Dec 23, 2025
a8e93c7
Merge branch 'main' of github.com:codeflash-ai/codeflash into codefla…
mohammedahmed18 Dec 23, 2025
e49ba13
linting
mohammedahmed18 Dec 23, 2025
91cbc74
Merge pull request #984 from codeflash-ai/codeflash/optimize-pr967-20…
mohammedahmed18 Dec 23, 2025
0b3be3f
some enhancements from claude pr review
mohammedahmed18 Dec 23, 2025
b57fa1a
Merge branch 'exp/consistent-loop-break' of github.com:codeflash-ai/c…
mohammedahmed18 Dec 23, 2025
46701c7
window percentage
mohammedahmed18 Dec 26, 2025
74520f6
cleanup
mohammedahmed18 Dec 26, 2025
9ab06d3
revert comment
mohammedahmed18 Dec 26, 2025
f1058ea
for unit tests
mohammedahmed18 Dec 26, 2025
56cce15
Merge branch 'main' of github.com:codeflash-ai/codeflash into exp/con…
mohammedahmed18 Dec 26, 2025
8ea9231
toggle stability by arg and zero tolerance
mohammedahmed18 Dec 28, 2025
9079590
configs and cleaner impl
mohammedahmed18 Dec 29, 2025
70b7627
typo
mohammedahmed18 Dec 29, 2025
dd3707a
Merge branch 'main' of github.com:codeflash-ai/codeflash into exp/con…
mohammedahmed18 Dec 29, 2025
9cae2a1
refactor
mohammedahmed18 Dec 29, 2025
7f1818b
refactoring
mohammedahmed18 Dec 31, 2025
270af89
Merge branch 'main' into exp/consistent-loop-break
KRRT7 Dec 31, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions codeflash/code_utils/config_consts.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
MAX_CUMULATIVE_TEST_RUNTIME_NANOSECONDS = 100e6 # 100ms
N_TESTS_TO_GENERATE = 2
TOTAL_LOOPING_TIME = 10.0 # 10 second candidate benchmarking budget
CONSISTENT_LOOP_COUNT = 3
COVERAGE_THRESHOLD = 60.0
MIN_TESTCASE_PASSED_THRESHOLD = 6
REPEAT_OPTIMIZATION_PROBABILITY = 0.1
Expand Down
1 change: 0 additions & 1 deletion codeflash/code_utils/env_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
def check_formatter_installed(formatter_cmds: list[str], exit_on_failure: bool = True) -> bool: # noqa
if not formatter_cmds or formatter_cmds[0] == "disabled":
return True

first_cmd = formatter_cmds[0]
cmd_tokens = shlex.split(first_cmd) if isinstance(first_cmd, str) else [first_cmd]

Expand Down
54 changes: 49 additions & 5 deletions codeflash/verification/pytest_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,17 @@

import contextlib
import inspect

# System Imports
import logging
import os
import platform
import re
import statistics
import sys
import time as _time_module
import warnings
from collections import deque

# System Imports
from pathlib import Path
from typing import TYPE_CHECKING, Any, Callable
from unittest import TestCase
Expand All @@ -19,6 +21,8 @@
import pytest
from pluggy import HookspecMarker

from codeflash.code_utils.config_consts import CONSISTENT_LOOP_COUNT

if TYPE_CHECKING:
from _pytest.config import Config, Parser
from _pytest.main import Session
Expand Down Expand Up @@ -268,9 +272,30 @@ def __init__(self, config: Config) -> None:
level = logging.DEBUG if config.option.verbose > 1 else logging.INFO
logging.basicConfig(level=level)
self.logger = logging.getLogger(self.name)
self.current_loop_durations_in_seconds: list[float] = []

def dynamic_tolerance(self, avg: float) -> float:
if avg < 0.0001: # < 100 µs
return 0.7
if avg < 0.0005: # < 500 µs
return 0.5
if avg < 0.001: # < 1 ms
return 0.4
if avg < 0.01: # < 10 ms
return 0.2
if avg < 0.1: # < 100 ms
return 0.1
return 0.03 # > 0.1 s
Comment thread
KRRT7 marked this conversation as resolved.
Outdated

@pytest.hookimpl
def pytest_runtest_logreport(self, report: pytest.TestReport) -> None:
if report.when == "call" and report.outcome == "passed":
self.current_loop_durations_in_seconds.append(report.duration)

@hookspec(firstresult=True)
def pytest_runtestloop(self, session: Session) -> bool:
durations = deque(maxlen=CONSISTENT_LOOP_COUNT)

"""Reimplement the test loop but loop for the user defined amount of time."""
if session.testsfailed and not session.config.option.continue_on_collection_errors:
msg = "{} error{} during collection".format(session.testsfailed, "s" if session.testsfailed != 1 else "")
Expand All @@ -284,9 +309,9 @@ def pytest_runtestloop(self, session: Session) -> bool:

count: int = 0

while total_time >= SHORTEST_AMOUNT_OF_TIME: # need to run at least one for normal tests
while total_time >= SHORTEST_AMOUNT_OF_TIME:
count += 1
total_time = self._get_total_time(session)
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

see if total_time changes inside this loop

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it doesn't, I checked

self.current_loop_durations_in_seconds.clear()

for index, item in enumerate(session.items):
item: pytest.Item = item # noqa: PLW0127, PLW2901
Expand All @@ -304,8 +329,27 @@ def pytest_runtestloop(self, session: Session) -> bool:
raise session.Failed(session.shouldfail)
if session.shouldstop:
raise session.Interrupted(session.shouldstop)

total_duration_in_seconds = sum(self.current_loop_durations_in_seconds)

if total_duration_in_seconds > 0:
durations.append(total_duration_in_seconds)
else:
durations.clear()

# Consistency check
if len(durations) == CONSISTENT_LOOP_COUNT:
avg = statistics.median(durations)
if avg == 0:
consistent = all(d == 0 for d in durations)
else:
consistent = all(abs(d - avg) / avg <= self.dynamic_tolerance(avg) for d in durations)
if consistent:
break

if self._timed_out(session, start_time, count):
break # exit loop
break

_ORIGINAL_TIME_SLEEP(self._get_delay_time(session))
return True

Expand Down
Loading