Skip to content

Commit d8ae9e3

Browse files
author
root
committed
construct the framework of agent-v1
1 parent 41ac050 commit d8ae9e3

2 files changed

Lines changed: 32 additions & 5 deletions

File tree

agent_runner.py

Lines changed: 31 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,33 @@ def run_agent_eval(
114114

115115
results: List[Tuple[int, Dict[str, Any], str]] = []
116116
tasks = list(range(len(dataset)))
117+
tasks_to_run = tasks
118+
if reuse:
119+
tasks_to_run = []
120+
for idx in tasks:
121+
if do_eval:
122+
eval_cached = store.load_eval(idx)
123+
if eval_cached is not None:
124+
cached_score = eval_cached.get("score", eval_cached)
125+
cached_final = eval_cached.get("final_answer", "")
126+
if not cached_final:
127+
traj = store.load_traj(idx)
128+
if traj is not None:
129+
cached_final = traj.get("final_answer", "")
130+
results.append((idx, cached_score, cached_final))
131+
continue
132+
tasks_to_run.append(idx)
133+
continue
134+
135+
if do_infer:
136+
traj = store.load_traj(idx)
137+
if traj and traj.get("success"):
138+
results.append((idx, {}, traj.get("final_answer", "")))
139+
else:
140+
tasks_to_run.append(idx)
141+
else:
142+
tasks_to_run.append(idx)
143+
117144
if nproc > 1:
118145
with ThreadPoolExecutor(max_workers=nproc) as executor:
119146
futures = [
@@ -128,15 +155,15 @@ def run_agent_eval(
128155
do_infer,
129156
do_eval,
130157
)
131-
for idx in tasks
158+
for idx in tasks_to_run
132159
]
133-
with tqdm(total=len(tasks), desc="Agent Eval", unit="sample") as pbar:
160+
with tqdm(total=len(tasks_to_run), desc="Agent Eval", unit="sample") as pbar:
134161
for fut in as_completed(futures):
135162
results.append(fut.result())
136163
pbar.update(1)
137164
else:
138-
with tqdm(total=len(tasks), desc="Agent Eval", unit="sample") as pbar:
139-
for idx in tasks:
165+
with tqdm(total=len(tasks_to_run), desc="Agent Eval", unit="sample") as pbar:
166+
for idx in tasks_to_run:
140167
results.append(
141168
_run_one_sample(
142169
idx, agent, dataset, store, judge_kwargs, reuse, do_infer, do_eval

scieval/agents/smolagents.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ def __init__(
104104
):
105105
super().__init__(name=self.name, model_version=model_version)
106106
self.api_key = api_key or os.environ.get("OPENAI_API_KEY", "")
107-
self.api_base = api_base or os.environ.get("OPENAI_BASE_URL", "")
107+
self.api_base = api_base or os.environ.get("OPENAI_API_BASE", "")
108108
self.model_version = model_version or os.environ.get("MODEL_ID", "o3")
109109

110110
def run(self, sample: EvalSample) -> EvalResult:

0 commit comments

Comments
 (0)