Skip to content

Commit 9766c5d

Browse files
committed
reward
1 parent 23ba2b3 commit 9766c5d

2 files changed

Lines changed: 14 additions & 11 deletions

File tree

eval_protocol/models.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -776,6 +776,14 @@ class ExecutionMetadata(BaseModel):
776776
description="Processing duration in seconds for an entire experiment. Note that includes time it took for retries.",
777777
)
778778

779+
# Generic bag for integration-specific metadata.
780+
# Examples:
781+
# - OpenEnvRolloutProcessor: per-step rewards, token IDs for GRPO / TRL
782+
extra: Optional[Dict[str, Any]] = Field(
783+
default=None,
784+
description="Arbitrary execution metadata for integrations (step rewards, token IDs, debug info, etc.).",
785+
)
786+
779787

780788
class EvaluationRow(BaseModel):
781789
"""

tests/pytest/test_openenv_echo_hub.py

Lines changed: 6 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,10 @@
88
from eval_protocol.pytest.openenv_rollout_processor import OpenEnvRolloutProcessor
99
import pytest
1010

11-
try:
12-
# Preferred import when using the monolithic `openenv` package
13-
from openenv.envs.echo_env import EchoEnv # type: ignore
1411

15-
_HAS_ECHO = True
16-
except Exception:
17-
_HAS_ECHO = False
12+
# Preferred import when using the monolithic `openenv` package
13+
from envs.echo_env import EchoEnv # type: ignore
14+
1815

1916
# Skip these integration-heavy tests on CI runners by default
2017
pytestmark = pytest.mark.skipif(os.getenv("CI") == "true", reason="Skip OpenEnv integration tests on CI")
@@ -43,7 +40,7 @@ def action_parser(response_text: str):
4340
Convert raw model response to EchoAction.
4441
"""
4542
try:
46-
from openenv.envs.echo_env import EchoAction # type: ignore
43+
from envs.echo_env import EchoAction # type: ignore
4744
except Exception:
4845
pytest.skip("OpenEnv (openenv.envs.echo_env) is not installed; skipping Echo hub test.")
4946
raise
@@ -91,17 +88,14 @@ def action_parser(response_text: str):
9188
timeout_ms=5000,
9289
num_generations=1,
9390
)
94-
if _HAS_ECHO
95-
else None
9691
),
9792
)
9893
def test_openenv_echo_hub(row: EvaluationRow) -> EvaluationRow:
9994
"""
10095
Smoke test for Echo env via Hugging Face Hub (registry.hf.space/openenv-echo-env).
10196
Extracts env rewards (from rollout policy extras) and sets evaluation_result.
10297
"""
103-
if not _HAS_ECHO:
104-
pytest.skip("OpenEnv (openenv.envs.echo_env) is not installed; skipping Echo hub test.")
98+
10599
# Try to read rewards/usage left in execution metadata extra.
106100
total_reward = 0.0
107101
try:
@@ -110,6 +104,7 @@ def test_openenv_echo_hub(row: EvaluationRow) -> EvaluationRow:
110104
if isinstance(extra, dict):
111105
raw = extra.get("step_rewards") or []
112106
step_rewards = [float(r) for r in raw]
107+
print(f"Step rewards: {step_rewards}")
113108
total_reward = float(sum(step_rewards)) if step_rewards else 0.0
114109
except Exception:
115110
total_reward = 0.0

0 commit comments

Comments
 (0)