66LumiOpen/MATH-500_mt
77
88abstract:
9- Multilingual translations of the MATH-500 benchmark, a subset of 500 problems
10- from the MATH benchmark that OpenAI created in their Let's Verify Step by Step
11- paper. Currently contains Finnish translations produced with Claude Opus 4.5.
9+ Multilingual MATH-500 benchmark, a subset of 500 problems from the MATH
10+ benchmark that OpenAI created in their Let's Verify Step by Step paper.
11+ Contains the original English problems and Finnish translations produced
12+ with Claude Opus 4.5. Supports configurable scorer model via env vars.
1213
1314languages:
14- finnish
15+ english, finnish
1516
1617tags:
1718math, reasoning, multilingual
3334
3435
3536MATH_QUERY_TEMPLATES = {
37+ "en" : """
38+ Solve the following problem. The final line of your response MUST be of the following format:
39+ "ANSWER: $ANSWER" (without quotes) where $ANSWER is the final answer. Think step by step before answering.
40+
41+ {prompt}
42+ """ .strip (),
3643 "fi" : """
3744Ratkaise seuraava tehtävä. Vastauksesi viimeisen rivin TÄYTYY olla seuraavassa muodossa:
3845"ANSWER: $ANSWER" (ilman lainausmerkkejä), jossa $ANSWER on lopullinen vastaus. Ajattele vaiheittain ennen vastaamista.
@@ -84,6 +91,25 @@ def record_to_sample(record):
8491 return Sample (input = query , target = target )
8592
8693
94+ mmath500_en = LightevalTaskConfig (
95+ name = "mmath500:en" ,
96+ prompt_function = _mmath500_prompt_fn ("en" ),
97+ hf_repo = "HuggingFaceH4/MATH-500" ,
98+ hf_subset = "default" ,
99+ hf_avail_splits = ["test" ],
100+ evaluation_splits = ["test" ],
101+ few_shots_split = None ,
102+ few_shots_select = None ,
103+ generation_size = 32768 ,
104+ metrics = [
105+ Metrics .pass_at_k_math (sample_params = {"k" : 1 , "n" : 1 }),
106+ ],
107+ version = 1 ,
108+ sample_fields = record_to_sample ,
109+ solver = [prompt_template (MATH_QUERY_TEMPLATES ["en" ]), generate (cache = True )],
110+ scorer = model_graded_fact (model = _get_scorer_model ()),
111+ )
112+
87113mmath500_fi = LightevalTaskConfig (
88114 name = "mmath500:fi" ,
89115 prompt_function = _mmath500_prompt_fn ("fi" ),
@@ -104,5 +130,6 @@ def record_to_sample(record):
104130)
105131
106132TASKS_TABLE = [
133+ mmath500_en ,
107134 mmath500_fi ,
108135]
0 commit comments