Skip to content

Commit 2a3d4bc

Browse files
committed
Emit structured inference stdout for validator
1 parent 542b458 commit 2a3d4bc

File tree

2 files changed

+54
-7
lines changed

2 files changed

+54
-7
lines changed

inference.py

Lines changed: 40 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
import asyncio
66
import os
7+
from typing import Any
78

89
try:
910
from code_review_env import CodeReviewAction, CodeReviewEnv, ReviewFinding
@@ -15,15 +16,26 @@
1516
DEFAULT_BASE_URL = "https://rohan556-openenv-code-review-arena.hf.space"
1617

1718

19+
def emit_block(tag: str, **fields: Any) -> None:
20+
"""Print a single structured stdout line for the hackathon validator."""
21+
22+
serialized = " ".join(f"{key}={value}" for key, value in fields.items())
23+
print(f"[{tag}] {serialized}", flush=True)
24+
25+
1826
async def main() -> None:
1927
base_url = os.getenv("CODE_REVIEW_ENV_URL", DEFAULT_BASE_URL)
2028

2129
async with CodeReviewEnv(base_url=base_url) as env:
2230
result = await env.reset(task_id="sql_injection_report_filters")
23-
print(f"task={result.observation.task_id}")
24-
print(f"pr={result.observation.pr_title}")
31+
emit_block(
32+
"START",
33+
task=result.observation.task_id,
34+
difficulty=result.observation.difficulty,
35+
repo=result.observation.repo_name,
36+
)
2537

26-
await env.step(
38+
inspection = await env.step(
2739
CodeReviewAction(
2840
action_type="inspect_file",
2941
file_path="analytics/reporting.py",
@@ -32,6 +44,14 @@ async def main() -> None:
3244
end_line=80,
3345
)
3446
)
47+
emit_block(
48+
"STEP",
49+
step=1,
50+
action="inspect_file",
51+
reward=inspection.reward,
52+
done=inspection.done,
53+
phase=inspection.observation.phase,
54+
)
3555

3656
graded = await env.step(
3757
CodeReviewAction(
@@ -54,14 +74,27 @@ async def main() -> None:
5474
],
5575
)
5676
)
77+
emit_block(
78+
"STEP",
79+
step=2,
80+
action="submit_review",
81+
reward=graded.reward,
82+
done=graded.done,
83+
phase=graded.observation.phase,
84+
)
5785

5886
scorecard = graded.observation.scorecard
59-
print(f"done={graded.done}")
6087
if scorecard is None:
6188
raise RuntimeError("Expected a scorecard after submit_review")
62-
print(f"score={scorecard.overall_score}")
63-
print(f"grade_band={scorecard.grade_band}")
64-
print(scorecard.summary)
89+
emit_block(
90+
"END",
91+
task=result.observation.task_id,
92+
score=scorecard.overall_score,
93+
steps=2,
94+
grade=scorecard.grade_band,
95+
matched=scorecard.matched_findings,
96+
expected=scorecard.expected_findings,
97+
)
6598

6699

67100
if __name__ == "__main__":

tests/test_environment.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from fastapi.testclient import TestClient
22

3+
from inference import emit_block
34
from code_review_env.models import CodeReviewAction, ReviewFinding
45
from code_review_env.server.app import app
56
from code_review_env.server.code_review_environment import CodeReviewEnvironment
@@ -70,3 +71,16 @@ def test_fastapi_endpoints_expose_openenv_contract():
7071
assert tasks.status_code == 200
7172
task_items = tasks.json()
7273
assert any(item["id"] == "sql_injection_report_filters" for item in task_items)
74+
75+
76+
def test_inference_emits_structured_stdout(capsys):
77+
emit_block("START", task="sql_injection_report_filters", difficulty="medium")
78+
emit_block("STEP", step=1, reward=-0.005, done=False)
79+
emit_block("END", task="sql_injection_report_filters", score=0.9355, steps=2)
80+
81+
lines = capsys.readouterr().out.strip().splitlines()
82+
assert lines == [
83+
"[START] task=sql_injection_report_filters difficulty=medium",
84+
"[STEP] step=1 reward=-0.005 done=False",
85+
"[END] task=sql_injection_report_filters score=0.9355 steps=2",
86+
]

0 commit comments

Comments
 (0)