OpenHands/tests/unit/test_control_flags.py at main · openhands-agent/OpenHands · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
import pytest

from openhands.controller.state.control_flags import (
    BudgetControlFlag,
    IterationControlFlag,
)


def test_iteration_control_flag_reaches_limit_and_increases():
    flag = IterationControlFlag(limit_increase_amount=5, current_value=5, max_value=5)

    # Should be at limit
    assert flag.reached_limit() is True
    assert flag._hit_limit is True

    # Increase limit in non-headless mode
    flag.increase_limit(headless_mode=False)
    assert flag.max_value == 10  # increased by limit_increase_amount

    # After increase, we should no longer be at limit
    flag._hit_limit = False  # simulate reset
    assert flag.reached_limit() is False


def test_iteration_control_flag_does_not_increase_in_headless():
    flag = IterationControlFlag(limit_increase_amount=5, current_value=5, max_value=5)

    assert flag.reached_limit() is True
    assert flag._hit_limit is True

    # Should NOT increase max_value in headless mode
    flag.increase_limit(headless_mode=True)
    assert flag.max_value == 5


def test_iteration_control_flag_step_behavior():
    flag = IterationControlFlag(limit_increase_amount=2, current_value=0, max_value=2)

    # First step
    flag.step()
    assert flag.current_value == 1
    assert not flag.reached_limit()

    # Second step
    flag.step()
    assert flag.current_value == 2
    assert flag.reached_limit()

    # Stepping again should raise error
    with pytest.raises(RuntimeError, match='Agent reached maximum iteration'):
        flag.step()


# ----- BudgetControlFlag Tests -----


def test_budget_control_flag_reaches_limit_and_increases():
    flag = BudgetControlFlag(
        limit_increase_amount=10.0, current_value=50.0, max_value=50.0
    )

    # Should be at limit
    assert flag.reached_limit() is True
    assert flag._hit_limit is True

    # Increase budget — allowed only if _hit_limit == True
    flag.increase_limit(headless_mode=False)
    assert flag.max_value == 60.0  # current_value + limit_increase_amount

    # After increasing, _hit_limit should be reset manually in your logic
    flag._hit_limit = False
    flag.current_value = 55.0
    assert flag.reached_limit() is False


def test_budget_control_flag_does_not_increase_if_not_hit_limit():
    flag = BudgetControlFlag(
        limit_increase_amount=10.0, current_value=40.0, max_value=50.0
    )

    # Not at limit yet
    assert flag.reached_limit() is False
    assert flag._hit_limit is False

    # Try to increase — should do nothing
    old_max_value = flag.max_value
    flag.increase_limit(headless_mode=False)
    assert flag.max_value == old_max_value


def test_budget_control_flag_does_not_increase_in_headless():
    flag = BudgetControlFlag(
        limit_increase_amount=10.0, current_value=50.0, max_value=50.0
    )

    assert flag.reached_limit() is True
    assert flag._hit_limit is True

    # Increase limit in headless mode — should still increase since BudgetControlFlag ignores headless param
    flag.increase_limit(headless_mode=True)
    assert flag.max_value == 60.0


def test_budget_control_flag_step_raises_on_limit():
    flag = BudgetControlFlag(
        limit_increase_amount=5.0, current_value=55.0, max_value=50.0
    )

    # Should raise RuntimeError
    with pytest.raises(RuntimeError, match='Agent reached maximum budget'):
        flag.step()

    # After increasing limit, step should not raise
    flag.max_value = 60.0
    flag._hit_limit = False
    flag.step()  # Should not raise


def test_budget_control_flag_hit_limit_resets_after_increase():
    flag = BudgetControlFlag(
        limit_increase_amount=10.0, current_value=50.0, max_value=50.0
    )

    # Initially should hit limit
    assert flag.reached_limit() is True
    assert flag._hit_limit is True

    # Increase limit
    flag.increase_limit(headless_mode=False)

    # After increasing, _hit_limit should be reset
    assert flag._hit_limit is False

    # Should no longer report reaching limit unless value exceeds new max
    assert flag.reached_limit() is False

    # If we push current_value over new max_value:
    flag.current_value = flag.max_value + 1.0
    assert flag.reached_limit() is True