diff --git a/packages/tasks/src/eval.ts b/packages/tasks/src/eval.ts index f72349cf8c..2860356a4d 100644 --- a/packages/tasks/src/eval.ts +++ b/packages/tasks/src/eval.ts @@ -101,6 +101,12 @@ export const EVALUATION_FRAMEWORKS = { "CLAW-Eval is an evaluation framework for assessing LLMs as autonomous agents across 300 human-verified tasks covering communication, finance, and productivity domains.", url: "https://github.com/claw-eval/claw-eval", }, + researchclawbench: { + name: "researchclawbench", + description: + "ResearchClawBench is a benchmark for evaluating AI agents on end-to-end scientific research tasks, from reading data and related work to producing code, figures, and publication-style reports.", + url: "https://github.com/InternScience/ResearchClawBench", + }, pbench: { name: "pbench", description: