Skip to content

Commit ad02110

Browse files
codelionclaude
andcommitted
Add tests for recent features
Add comprehensive tests for recently merged PRs: - test_llm_config_optional_params.py: Tests for optional temperature/top_p parameters (PR #385 - Anthropic model compatibility) - test_snapshot_artifacts_limit.py: Tests for configurable max_snapshot_artifacts (PR #386) - test_visualization_sanitization.py: Tests for -inf/+inf/NaN sanitization in visualization (PR #384) - test_early_stopping_config.py: Tests for event-based early stopping configuration (PR #375) - test_changes_description.py: Tests for large codebase support via changes description (PR #376) Total tests increased from 264 to 326. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1 parent c23b827 commit ad02110

5 files changed

Lines changed: 784 additions & 0 deletions

tests/test_changes_description.py

Lines changed: 153 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,153 @@
1+
"""
2+
Tests for large codebase support via changes description.
3+
Programs can be represented as compact change descriptions instead of full code.
4+
"""
5+
6+
import unittest
7+
8+
from openevolve.config import Config, PromptConfig
9+
10+
11+
class TestChangesDescriptionConfigDefaults(unittest.TestCase):
12+
"""Tests for changes description configuration defaults"""
13+
14+
def test_programs_as_changes_description_default_false(self):
15+
"""Test that programs_as_changes_description defaults to False"""
16+
config = Config()
17+
self.assertFalse(config.prompt.programs_as_changes_description)
18+
19+
def test_system_message_changes_description_default_none(self):
20+
"""Test that system_message_changes_description defaults to None"""
21+
config = Config()
22+
self.assertIsNone(config.prompt.system_message_changes_description)
23+
24+
def test_initial_changes_description_default_empty(self):
25+
"""Test that initial_changes_description defaults to empty string"""
26+
config = Config()
27+
self.assertEqual(config.prompt.initial_changes_description, "")
28+
29+
30+
class TestChangesDescriptionValidation(unittest.TestCase):
31+
"""Tests for changes description validation rules"""
32+
33+
def test_requires_diff_based_evolution(self):
34+
"""Test that programs_as_changes_description requires diff_based_evolution"""
35+
config_dict = {
36+
"llm": {"primary_model": "gpt-4"},
37+
"diff_based_evolution": False,
38+
"prompt": {
39+
"programs_as_changes_description": True,
40+
}
41+
}
42+
with self.assertRaises(ValueError) as context:
43+
Config.from_dict(config_dict)
44+
self.assertIn("diff_based_evolution", str(context.exception))
45+
46+
def test_works_with_diff_based_evolution_enabled(self):
47+
"""Test that changes description works when diff_based_evolution=True"""
48+
config_dict = {
49+
"llm": {"primary_model": "gpt-4"},
50+
"diff_based_evolution": True,
51+
"prompt": {
52+
"programs_as_changes_description": True,
53+
}
54+
}
55+
config = Config.from_dict(config_dict)
56+
self.assertTrue(config.prompt.programs_as_changes_description)
57+
self.assertTrue(config.diff_based_evolution)
58+
59+
def test_disabled_without_diff_based_evolution_is_ok(self):
60+
"""Test that disabled changes description works without diff_based_evolution"""
61+
config_dict = {
62+
"llm": {"primary_model": "gpt-4"},
63+
"diff_based_evolution": False,
64+
"prompt": {
65+
"programs_as_changes_description": False,
66+
}
67+
}
68+
config = Config.from_dict(config_dict)
69+
self.assertFalse(config.prompt.programs_as_changes_description)
70+
71+
72+
class TestChangesDescriptionFromDict(unittest.TestCase):
73+
"""Tests for loading changes description config from dict"""
74+
75+
def test_custom_system_message(self):
76+
"""Test setting custom system_message_changes_description"""
77+
config_dict = {
78+
"llm": {"primary_model": "gpt-4"},
79+
"diff_based_evolution": True,
80+
"prompt": {
81+
"programs_as_changes_description": True,
82+
"system_message_changes_description": "You are optimizing a large codebase.",
83+
}
84+
}
85+
config = Config.from_dict(config_dict)
86+
self.assertEqual(
87+
config.prompt.system_message_changes_description,
88+
"You are optimizing a large codebase."
89+
)
90+
91+
def test_custom_initial_description(self):
92+
"""Test setting custom initial_changes_description"""
93+
config_dict = {
94+
"llm": {"primary_model": "gpt-4"},
95+
"diff_based_evolution": True,
96+
"prompt": {
97+
"programs_as_changes_description": True,
98+
"initial_changes_description": "Initial implementation with basic algorithm.",
99+
}
100+
}
101+
config = Config.from_dict(config_dict)
102+
self.assertEqual(
103+
config.prompt.initial_changes_description,
104+
"Initial implementation with basic algorithm."
105+
)
106+
107+
def test_all_changes_description_options(self):
108+
"""Test setting all changes description options together"""
109+
config_dict = {
110+
"llm": {"primary_model": "gpt-4"},
111+
"diff_based_evolution": True,
112+
"prompt": {
113+
"programs_as_changes_description": True,
114+
"system_message_changes_description": "Custom system message",
115+
"initial_changes_description": "Initial state description",
116+
}
117+
}
118+
config = Config.from_dict(config_dict)
119+
self.assertTrue(config.prompt.programs_as_changes_description)
120+
self.assertEqual(
121+
config.prompt.system_message_changes_description,
122+
"Custom system message"
123+
)
124+
self.assertEqual(
125+
config.prompt.initial_changes_description,
126+
"Initial state description"
127+
)
128+
129+
130+
class TestPromptConfigChangesDescription(unittest.TestCase):
131+
"""Tests for PromptConfig changes description fields"""
132+
133+
def test_prompt_config_defaults(self):
134+
"""Test PromptConfig defaults for changes description"""
135+
prompt_config = PromptConfig()
136+
self.assertFalse(prompt_config.programs_as_changes_description)
137+
self.assertIsNone(prompt_config.system_message_changes_description)
138+
self.assertEqual(prompt_config.initial_changes_description, "")
139+
140+
def test_prompt_config_custom_values(self):
141+
"""Test PromptConfig with custom changes description values"""
142+
prompt_config = PromptConfig(
143+
programs_as_changes_description=True,
144+
system_message_changes_description="Custom message",
145+
initial_changes_description="Initial state",
146+
)
147+
self.assertTrue(prompt_config.programs_as_changes_description)
148+
self.assertEqual(prompt_config.system_message_changes_description, "Custom message")
149+
self.assertEqual(prompt_config.initial_changes_description, "Initial state")
150+
151+
152+
if __name__ == "__main__":
153+
unittest.main()
Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
"""
2+
Tests for early stopping configuration and behavior.
3+
"""
4+
5+
import unittest
6+
7+
from openevolve.config import Config
8+
9+
10+
class TestEarlyStoppingConfigDefaults(unittest.TestCase):
11+
"""Tests for early stopping configuration defaults"""
12+
13+
def test_patience_default_is_none(self):
14+
"""Test that early_stopping_patience defaults to None (disabled)"""
15+
config = Config()
16+
self.assertIsNone(config.early_stopping_patience)
17+
18+
def test_convergence_threshold_default(self):
19+
"""Test that convergence_threshold defaults to 0.001"""
20+
config = Config()
21+
self.assertEqual(config.convergence_threshold, 0.001)
22+
23+
def test_metric_default(self):
24+
"""Test that early_stopping_metric defaults to combined_score"""
25+
config = Config()
26+
self.assertEqual(config.early_stopping_metric, "combined_score")
27+
28+
29+
class TestEarlyStoppingConfigFromDict(unittest.TestCase):
30+
"""Tests for loading early stopping config from dict"""
31+
32+
def test_custom_patience(self):
33+
"""Test setting custom early_stopping_patience"""
34+
config_dict = {
35+
"llm": {"primary_model": "gpt-4"},
36+
"early_stopping_patience": 50,
37+
}
38+
config = Config.from_dict(config_dict)
39+
self.assertEqual(config.early_stopping_patience, 50)
40+
41+
def test_custom_convergence_threshold(self):
42+
"""Test setting custom convergence_threshold"""
43+
config_dict = {
44+
"llm": {"primary_model": "gpt-4"},
45+
"convergence_threshold": 0.01,
46+
}
47+
config = Config.from_dict(config_dict)
48+
self.assertEqual(config.convergence_threshold, 0.01)
49+
50+
def test_custom_metric(self):
51+
"""Test setting custom early_stopping_metric"""
52+
config_dict = {
53+
"llm": {"primary_model": "gpt-4"},
54+
"early_stopping_metric": "score",
55+
}
56+
config = Config.from_dict(config_dict)
57+
self.assertEqual(config.early_stopping_metric, "score")
58+
59+
def test_all_early_stopping_options(self):
60+
"""Test setting all early stopping options together"""
61+
config_dict = {
62+
"llm": {"primary_model": "gpt-4"},
63+
"early_stopping_patience": 100,
64+
"convergence_threshold": 0.005,
65+
"early_stopping_metric": "validity",
66+
}
67+
config = Config.from_dict(config_dict)
68+
self.assertEqual(config.early_stopping_patience, 100)
69+
self.assertEqual(config.convergence_threshold, 0.005)
70+
self.assertEqual(config.early_stopping_metric, "validity")
71+
72+
def test_zero_patience_disables_early_stopping(self):
73+
"""Test that patience=0 effectively disables early stopping"""
74+
config_dict = {
75+
"llm": {"primary_model": "gpt-4"},
76+
"early_stopping_patience": 0,
77+
}
78+
config = Config.from_dict(config_dict)
79+
self.assertEqual(config.early_stopping_patience, 0)
80+
81+
def test_negative_patience_allowed(self):
82+
"""Test that negative patience is allowed (but probably shouldn't be used)"""
83+
config_dict = {
84+
"llm": {"primary_model": "gpt-4"},
85+
"early_stopping_patience": -1,
86+
}
87+
# Should not raise an error during loading
88+
config = Config.from_dict(config_dict)
89+
self.assertEqual(config.early_stopping_patience, -1)
90+
91+
92+
class TestEarlyStoppingWithYaml(unittest.TestCase):
93+
"""Tests for early stopping config from YAML"""
94+
95+
def test_config_to_dict_includes_early_stopping(self):
96+
"""Test that to_dict includes early stopping settings"""
97+
config = Config()
98+
config_dict = config.to_dict()
99+
100+
self.assertIn("early_stopping_patience", config_dict)
101+
self.assertIn("convergence_threshold", config_dict)
102+
self.assertIn("early_stopping_metric", config_dict)
103+
104+
105+
if __name__ == "__main__":
106+
unittest.main()
Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
"""
2+
Tests for optional LLM parameters (temperature, top_p).
3+
Ensures Anthropic model compatibility where both params cannot be specified together.
4+
"""
5+
6+
import unittest
7+
8+
from openevolve.config import Config, LLMConfig, LLMModelConfig
9+
10+
11+
class TestOptionalTemperatureTopP(unittest.TestCase):
12+
"""Tests for optional temperature and top_p parameters"""
13+
14+
def test_llm_config_temperature_default(self):
15+
"""Test that temperature defaults to 0.7 in LLMConfig"""
16+
config = LLMConfig()
17+
self.assertEqual(config.temperature, 0.7)
18+
19+
def test_llm_config_top_p_default_is_none(self):
20+
"""Test that top_p defaults to None in LLMConfig (for Anthropic compatibility)"""
21+
config = LLMConfig()
22+
self.assertIsNone(config.top_p)
23+
24+
def test_model_config_temperature_none_by_default(self):
25+
"""Test that LLMModelConfig temperature is None by default"""
26+
config = LLMModelConfig()
27+
self.assertIsNone(config.temperature)
28+
29+
def test_model_config_top_p_none_by_default(self):
30+
"""Test that LLMModelConfig top_p is None by default"""
31+
config = LLMModelConfig()
32+
self.assertIsNone(config.top_p)
33+
34+
def test_type_annotation_allows_none(self):
35+
"""Test that temperature and top_p can be set to None"""
36+
config = LLMModelConfig(temperature=None, top_p=None)
37+
self.assertIsNone(config.temperature)
38+
self.assertIsNone(config.top_p)
39+
40+
def test_type_annotation_allows_float(self):
41+
"""Test that temperature and top_p can be set to float values"""
42+
config = LLMModelConfig(temperature=0.5, top_p=0.9)
43+
self.assertEqual(config.temperature, 0.5)
44+
self.assertEqual(config.top_p, 0.9)
45+
46+
47+
class TestConfigFromDictWithOptionalParams(unittest.TestCase):
48+
"""Tests for loading config with optional temperature/top_p from dict"""
49+
50+
def test_config_with_null_temperature_uses_default(self):
51+
"""Test loading config with null temperature uses default"""
52+
config_dict = {
53+
"llm": {
54+
"primary_model": "claude-sonnet",
55+
"api_base": "https://api.anthropic.com/v1",
56+
"temperature": None,
57+
}
58+
}
59+
config = Config.from_dict(config_dict)
60+
# None is stripped, so default 0.7 is used
61+
self.assertEqual(config.llm.temperature, 0.7)
62+
63+
def test_config_with_null_top_p(self):
64+
"""Test loading config with null top_p"""
65+
config_dict = {
66+
"llm": {
67+
"primary_model": "gpt-4",
68+
"top_p": None,
69+
}
70+
}
71+
config = Config.from_dict(config_dict)
72+
self.assertIsNone(config.llm.top_p)
73+
74+
def test_config_with_only_temperature(self):
75+
"""Test config with only temperature set (typical for Anthropic)"""
76+
config_dict = {
77+
"llm": {
78+
"primary_model": "claude-sonnet",
79+
"temperature": 0.9,
80+
}
81+
}
82+
config = Config.from_dict(config_dict)
83+
self.assertEqual(config.llm.temperature, 0.9)
84+
self.assertIsNone(config.llm.top_p)
85+
86+
def test_config_with_only_top_p(self):
87+
"""Test config with only top_p set"""
88+
config_dict = {
89+
"llm": {
90+
"primary_model": "gpt-4",
91+
"temperature": None,
92+
"top_p": 0.95,
93+
}
94+
}
95+
config = Config.from_dict(config_dict)
96+
self.assertEqual(config.llm.top_p, 0.95)
97+
98+
def test_config_with_both_params(self):
99+
"""Test config with both temperature and top_p set (OpenAI compatible)"""
100+
config_dict = {
101+
"llm": {
102+
"primary_model": "gpt-4",
103+
"temperature": 0.8,
104+
"top_p": 0.9,
105+
}
106+
}
107+
config = Config.from_dict(config_dict)
108+
self.assertEqual(config.llm.temperature, 0.8)
109+
self.assertEqual(config.llm.top_p, 0.9)
110+
111+
def test_models_inherit_optional_params(self):
112+
"""Test that models inherit temperature/top_p from parent config"""
113+
config_dict = {
114+
"llm": {
115+
"primary_model": "gpt-4",
116+
"temperature": 0.5,
117+
"top_p": None,
118+
}
119+
}
120+
config = Config.from_dict(config_dict)
121+
# Check that models inherited the temperature
122+
for model in config.llm.models:
123+
self.assertEqual(model.temperature, 0.5)
124+
125+
126+
if __name__ == "__main__":
127+
unittest.main()

0 commit comments

Comments
 (0)