-
Notifications
You must be signed in to change notification settings - Fork 17
Expand file tree
/
Copy pathtest_eval_protocol_import.py
More file actions
292 lines (229 loc) · 10.6 KB
/
Copy pathtest_eval_protocol_import.py
File metadata and controls
292 lines (229 loc) · 10.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
"""Test that eval_protocol imports work correctly and provide the same functionality as eval_protocol."""
import importlib
import sys
from unittest.mock import patch
import pytest
class TestRewardProtocolImports:
"""Test that eval_protocol provides the same functionality as eval_protocol."""
def test_basic_imports(self):
"""Test that both packages can be imported successfully."""
import eval_protocol
# Both should be importable
assert eval_protocol is not None
assert eval_protocol is not None
def test_version_consistency(self):
"""Test that both packages have the same version."""
import eval_protocol
assert hasattr(eval_protocol, "__version__")
assert hasattr(eval_protocol, "__version__")
assert eval_protocol.__version__ == eval_protocol.__version__
def test_all_exports_consistency(self):
"""Test that both packages export the same __all__ list."""
import eval_protocol
assert hasattr(eval_protocol, "__all__")
assert hasattr(eval_protocol, "__all__")
assert eval_protocol.__all__ == eval_protocol.__all__
def test_core_classes_available(self):
"""Test that core classes are available through both imports."""
from eval_protocol import (
EvaluateResult,
EvaluateResult as RPEvaluateResult,
Message,
Message as RPMessage,
MetricResult,
MetricResult as RPMetricResult,
RewardFunction,
RewardFunction as RPRewardFunction,
)
# Classes should be the same
assert RewardFunction is RPRewardFunction
assert Message is RPMessage
assert MetricResult is RPMetricResult
assert EvaluateResult is RPEvaluateResult
def test_functions_available(self):
"""Test that core functions are available through both imports."""
from eval_protocol import (
load_jsonl,
load_jsonl as rp_load_jsonl,
make,
make as rp_make,
reward_function,
reward_function as rp_reward_function,
rollout,
rollout as rp_rollout,
test_mcp,
test_mcp as rp_test_mcp,
)
# Functions should be the same
assert reward_function is rp_reward_function
assert load_jsonl is rp_load_jsonl
assert make is rp_make
assert rollout is rp_rollout
assert test_mcp is rp_test_mcp
def test_submodules_available(self):
"""Test that submodules are available through both imports."""
import eval_protocol
# Test a few key submodules
submodules_to_test = ["models", "auth", "config", "rewards", "mcp"]
for submodule in submodules_to_test:
assert hasattr(eval_protocol, submodule)
assert hasattr(eval_protocol, submodule)
# The submodules should be the same object
assert getattr(eval_protocol, submodule) is getattr(eval_protocol, submodule)
def test_star_import_works(self):
"""Test that star imports work for both packages."""
# This needs to be done in separate namespaces to avoid conflicts
# Test eval_protocol star import
rk_globals = {}
exec("from eval_protocol import *", rk_globals)
# Test eval_protocol star import
rp_globals = {}
exec("from eval_protocol import *", rp_globals)
# Both should have the same set of imported names (minus built-ins)
rk_names = {k for k in rk_globals.keys() if not k.startswith("__")}
rp_names = {k for k in rp_globals.keys() if not k.startswith("__")}
assert rk_names == rp_names
# Test that key items are available
expected_items = ["RewardFunction", "Message", "reward_function", "load_jsonl"]
for item in expected_items:
assert item in rk_names
assert item in rp_names
def test_reward_function_decorator_works(self):
"""Test that the @reward_function decorator works through both imports."""
from eval_protocol import (
EvaluateResult,
reward_function as rk_reward_function,
reward_function as rp_reward_function,
)
# Create a simple reward function using eval_protocol
@rk_reward_function
def test_reward_rk(response: str, **kwargs) -> EvaluateResult:
score = len(response) / 10.0
return EvaluateResult(
score=score,
reason=f"Score based on response length: {len(response)} characters",
is_score_valid=True,
)
# Create the same reward function using eval_protocol
@rp_reward_function
def test_reward_rp(response: str, **kwargs) -> EvaluateResult:
score = len(response) / 10.0
return EvaluateResult(
score=score,
reason=f"Score based on response length: {len(response)} characters",
is_score_valid=True,
)
# Both should work the same way
test_input = "Hello, world!"
result_rk = test_reward_rk(test_input)
result_rp = test_reward_rp(test_input)
# Both should return EvaluateResult objects with the same score
assert isinstance(result_rk, EvaluateResult)
assert isinstance(result_rp, EvaluateResult)
assert result_rk.score == result_rp.score
assert result_rk.score == len(test_input) / 10.0
def test_message_class_works(self):
"""Test that Message class works through both imports."""
from eval_protocol import Message as RKMessage, Message as RPMessage
# They should be the same class
assert RKMessage is RPMessage
# Test creating instances
msg_data = {"role": "user", "content": "Hello"}
rk_msg = RKMessage(**msg_data)
rp_msg = RPMessage(**msg_data)
assert rk_msg.role == rp_msg.role
assert rk_msg.content == rp_msg.content
def test_console_scripts_in_setup(self):
"""Test that console scripts are defined in setup.py."""
import os
# Read setup.py content directly to avoid running it
setup_path = os.path.join(os.path.dirname(__file__), "..", "pyproject.toml")
with open(setup_path, "r") as f:
setup_content = f.read()
# Check for console scripts in the file content
expected_scripts = [
'fireworks-reward = "eval_protocol.cli:main"',
'eval-protocol = "eval_protocol.cli:main"',
]
for script in expected_scripts:
assert script in setup_content, f"Console script '{script}' not found in pyproject.toml"
def test_package_structure_in_setup(self):
"""Test that both packages are included in setup.py."""
from setuptools import find_packages
packages = find_packages(include=["eval_protocol*", "eval_protocol*"])
# Should include both main packages
assert "eval_protocol" in packages
assert "eval_protocol" in packages
# Should include subpackages
assert any(pkg.startswith("eval_protocol.") for pkg in packages)
def test_deep_import_consistency(self):
"""Test that deep imports work consistently."""
try:
# Test importing from submodules
from eval_protocol.models import Message as RKMessage, Message as RPMessage
# Should be the same class
assert RKMessage is RPMessage
except ImportError:
# If submodule imports don't work, that's expected in some install scenarios
# Just verify the star import works
from eval_protocol import Message as RKMessage, Message as RPMessage
assert RKMessage is RPMessage
try:
# Test another submodule - use a function that actually exists
from eval_protocol.auth import (
get_fireworks_account_id,
get_fireworks_account_id as rp_get_fireworks_account_id,
)
assert get_fireworks_account_id is rp_get_fireworks_account_id
except ImportError:
# If submodule imports don't work, verify through star import
from eval_protocol import auth as rk_auth, auth as rp_auth
assert rk_auth is rp_auth
class TestRewardProtocolFunctionality:
"""Test that eval_protocol functionality works correctly."""
def test_reward_function_creation(self):
"""Test creating reward functions with eval_protocol."""
from eval_protocol import EvaluateResult, reward_function
@reward_function
def simple_reward(response: str, **kwargs) -> EvaluateResult:
"""Simple reward based on response length."""
score = float(len(response))
return EvaluateResult(
score=score,
reason=f"Score based on response length: {len(response)} characters",
is_score_valid=True,
)
# Test the reward function
result = simple_reward("Hello")
assert isinstance(result, EvaluateResult)
assert result.score == 5.0
assert result.is_score_valid is True
assert "5 characters" in result.reason
# Test that the function is callable (the decorator returns a callable)
assert callable(simple_reward)
def test_message_creation(self):
"""Test creating Message objects with eval_protocol."""
from eval_protocol import Message
msg = Message(role="user", content="Test message")
assert msg.role == "user"
assert msg.content == "Test message"
def test_message_preserves_token_ids(self):
"""Test token IDs round-trip on messages."""
from eval_protocol import Message
msg = Message(role="assistant", content="Hi", token_ids=[1, 2], logprobs=[-0.1, -0.2])
assert msg.model_dump()["token_ids"] == [1, 2]
def test_message_rejects_misaligned_float_logprobs(self):
"""Test token IDs and flat float logprobs must align."""
import pytest
from pydantic import ValidationError
from eval_protocol import Message
with pytest.raises(ValidationError):
Message(role="assistant", content="Hi", token_ids=[1, 2], logprobs=[-0.1])
def test_utility_functions(self):
"""Test that utility functions work through eval_protocol."""
from eval_protocol import create_llm_resource, load_jsonl
# These should be callable
assert callable(load_jsonl)
assert callable(create_llm_resource)
if __name__ == "__main__":
pytest.main([__file__, "-v"])