Skip to content

Commit f6a5885

Browse files
committed
[Move DISCO queue to core]:
- Update DefaultMMLUBenchmark.run_agents to pass type checks.
1 parent bf4abbb commit f6a5885

1 file changed

Lines changed: 8 additions & 8 deletions

File tree

maseval/benchmark/mmlu/mmlu.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -516,17 +516,18 @@ def run_agents(
516516
otherwise delegates to ``HuggingFaceModelScorer.loglikelihood_choices()``
517517
which automatically picks single-token or multi-token scoring.
518518
"""
519-
prompt = environment.get_prompt()
520-
choices = environment.state["choices"]
519+
mmlu_env = cast(MMLUEnvironment, environment)
520+
prompt = mmlu_env.get_prompt()
521+
choices = mmlu_env.state["choices"]
521522
doc_id = task.metadata["doc_id"]
523+
agent = cast(_ScorerBackedAdapter, agents[0])
522524

523525
if hasattr(self, "_precomputed_logprobs") and doc_id in self._precomputed_logprobs:
524526
logprobs = self._precomputed_logprobs[doc_id]
525527
best_idx = logprobs.index(max(logprobs))
526528
answer = choices[best_idx]
527-
environment.state["logprobs"] = logprobs
528-
environment.state["predicted_idx"] = best_idx
529-
agent = agents[0]
529+
mmlu_env.state["logprobs"] = logprobs
530+
mmlu_env.state["predicted_idx"] = best_idx
530531
agent._messages.append({"role": "user", "content": prompt})
531532
agent._messages.append({"role": "assistant", "content": answer, "logprobs": logprobs})
532533
return answer
@@ -535,10 +536,9 @@ def run_agents(
535536

536537
best_idx = logprobs.index(max(logprobs))
537538
answer = choices[best_idx]
538-
environment.state["logprobs"] = logprobs
539-
environment.state["predicted_idx"] = best_idx
539+
mmlu_env.state["logprobs"] = logprobs
540+
mmlu_env.state["predicted_idx"] = best_idx
540541

541-
agent = agents[0]
542542
agent._messages.append({"role": "user", "content": prompt})
543543
agent._messages.append({"role": "assistant", "content": answer, "logprobs": logprobs})
544544
return answer

0 commit comments

Comments
 (0)