@@ -156,7 +156,7 @@ async def _judge_with_retries() -> JudgeResponse:
156156 )
157157
158158async def compute_reward (
159- vllm_client : AsyncOpenAI ,
159+ gpt_oss_client : AsyncOpenAI ,
160160 gpt_oss_tokenizer : PreTrainedTokenizerBase ,
161161 behavior_id : str , # unique identifier for the problem
162162 policy_output : str ,
@@ -207,7 +207,7 @@ async def compute_response_score(response: str) -> JudgeResponse:
207207 async def sample_proposal () -> tuple [str , str ]:
208208 prompt = PROPOSAL_TEMPLATE_V1 .format (user_prompt = policy_output )
209209
210- response = await vllm_client .completions .create (
210+ response = await gpt_oss_client .completions .create (
211211 model = "openai/gpt-oss-20b" ,
212212 prompt = prompt ,
213213 temperature = 1.0 ,
@@ -234,7 +234,7 @@ async def sample_proposal() -> tuple[str, str]:
234234 before_sleep = before_sleep_log (logger , logging .WARNING ),
235235 )
236236 async def sample_thinking_tokens () -> tuple [str , str ]:
237- response = await vllm_client .completions .create (
237+ response = await gpt_oss_client .completions .create (
238238 model = "openai/gpt-oss-20b" ,
239239 prompt = gpt_oss_tokenizer .apply_chat_template (
240240 [
@@ -288,7 +288,7 @@ async def get_normal_logprobs(prefix: str | None, suffix: str | None):
288288 if prefix is not None and suffix is not None :
289289 # this is logp(proposal | prompt, CoT-sampled)
290290 return await get_token_logprobs (
291- vllm_client ,
291+ gpt_oss_client ,
292292 tokenizer = gpt_oss_tokenizer ,
293293 model = target_model ,
294294 input_token_ids = cast (
@@ -301,7 +301,7 @@ async def get_normal_logprobs(prefix: str | None, suffix: str | None):
301301 # this is logp(proposal | prompt, CoT-fallback).
302302 # it's probably worse than above but is useful as a backup.
303303 return await get_token_logprobs (
304- vllm_client ,
304+ gpt_oss_client ,
305305 tokenizer = gpt_oss_tokenizer ,
306306 model = target_model ,
307307 input_token_ids = cast (
@@ -323,7 +323,7 @@ async def get_steered_logprobs():
323323 # compute logq(proposal-response | prompt)
324324 # this is logq(proposal-response | prompt, proposal-CoT)
325325 return await get_token_logprobs (
326- vllm_client ,
326+ gpt_oss_client ,
327327 tokenizer = gpt_oss_tokenizer ,
328328 model = target_model ,
329329 input_token_ids = cast (
0 commit comments