Skip to content

Commit 4311b36

Browse files
author
Daniel Zautner
committed
Translate maime prompt templates to target languages (fi, da)
The instruction prompts were in English for all language variants. Now Finnish tasks get Finnish prompts and Danish tasks get Danish prompts. The boxed answer format tokens are kept as-is for scorer compatibility.
1 parent 3a93167 commit 4311b36

1 file changed

Lines changed: 38 additions & 24 deletions

File tree

  • src/lighteval/tasks/multilingual/tasks

src/lighteval/tasks/multilingual/tasks/maime.py

Lines changed: 38 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -33,39 +33,53 @@
3333
from lighteval.tasks.requests import Doc
3434

3535

36-
# Prompt template adapted from AIME task
37-
# Note: Uses English instructions for consistency with AIME
38-
MATH_PROMPT_TEMPLATE = dedent("""
39-
Solve the following math problem efficiently and clearly.
40-
The last line of your response should be of the following format:
41-
'Therefore, the final answer is: $\\boxed{{ANSWER}}$. I hope it is correct'
42-
(without quotes) where ANSWER is just the final number or expression
43-
that solves the problem. Think step by step before answering.
36+
# Per-language prompt templates
37+
MATH_PROMPT_TEMPLATES = {
38+
"fi": dedent("""
39+
Ratkaise seuraava matemaattinen tehtävä tehokkaasti ja selkeästi.
40+
Vastauksesi viimeisen rivin tulee olla seuraavassa muodossa:
41+
'Näin ollen lopullinen vastaus on: $\\boxed{{ANSWER}}$. Toivottavasti se on oikein'
42+
(ilman lainausmerkkejä), jossa ANSWER on pelkästään lopullinen luku tai lauseke,
43+
joka ratkaisee tehtävän. Ajattele vaihe vaiheelta ennen vastaamista.
4444
4545
{prompt}
46-
""")
46+
"""),
47+
"da": dedent("""
48+
Løs følgende matematiske problem effektivt og tydeligt.
49+
Den sidste linje i dit svar skal være i følgende format:
50+
'Derfor er det endelige svar: $\\boxed{{ANSWER}}$. Jeg håber, det er korrekt'
51+
(uden anførselstegn), hvor ANSWER kun er det endelige tal eller udtryk,
52+
der løser problemet. Tænk trin for trin, før du svarer.
4753
54+
{prompt}
55+
"""),
56+
}
4857

4958

5059
def record_to_sample(record):
5160
return Sample(input=record["question"], target=record["solution"])
5261

5362

54-
def maime_prompt(line, task_name: str = None):
55-
return Doc(
56-
task_name=task_name,
57-
query=MATH_PROMPT_TEMPLATE.format(prompt=line["question"]),
58-
choices=[line["solution"]],
59-
gold_index=0,
60-
)
63+
def _maime_prompt_fn(lang: str):
64+
template = MATH_PROMPT_TEMPLATES[lang]
65+
66+
def maime_prompt(line, task_name: str = None):
67+
return Doc(
68+
task_name=task_name,
69+
query=template.format(prompt=line["question"]),
70+
choices=[line["solution"]],
71+
gold_index=0,
72+
)
73+
74+
return maime_prompt
6175

6276

6377
# Danish tasks
6478
maime25_da = LightevalTaskConfig(
6579
name="maime25:da",
66-
prompt_function=maime_prompt,
80+
prompt_function=_maime_prompt_fn("da"),
6781
sample_fields=record_to_sample,
68-
solver=[prompt_template(MATH_PROMPT_TEMPLATE), generate(cache=True)],
82+
solver=[prompt_template(MATH_PROMPT_TEMPLATES["da"]), generate(cache=True)],
6983
scorer=math_scorer(),
7084
hf_repo="LumiOpen/mAIME2025",
7185
hf_subset="da_combined",
@@ -83,7 +97,7 @@ def maime_prompt(line, task_name: str = None):
8397

8498
maime25_da_avg = LightevalTaskConfig(
8599
name="maime25_avg:da",
86-
prompt_function=maime_prompt,
100+
prompt_function=_maime_prompt_fn("da"),
87101
sample_fields=record_to_sample,
88102
hf_repo="LumiOpen/mAIME2025",
89103
hf_subset="da_combined",
@@ -98,7 +112,7 @@ def maime_prompt(line, task_name: str = None):
98112

99113
maime25_da_gpassk = LightevalTaskConfig(
100114
name="maime25_gpassk:da",
101-
prompt_function=maime_prompt,
115+
prompt_function=_maime_prompt_fn("da"),
102116
sample_fields=record_to_sample,
103117
hf_repo="LumiOpen/mAIME2025",
104118
hf_subset="da_combined",
@@ -114,9 +128,9 @@ def maime_prompt(line, task_name: str = None):
114128
# Finnish tasks
115129
maime25_fi = LightevalTaskConfig(
116130
name="maime25:fi",
117-
prompt_function=maime_prompt,
131+
prompt_function=_maime_prompt_fn("fi"),
118132
sample_fields=record_to_sample,
119-
solver=[prompt_template(MATH_PROMPT_TEMPLATE), generate(cache=True)],
133+
solver=[prompt_template(MATH_PROMPT_TEMPLATES["fi"]), generate(cache=True)],
120134
scorer=math_scorer(),
121135
hf_repo="LumiOpen/mAIME2025",
122136
hf_subset="fi_combined",
@@ -134,7 +148,7 @@ def maime_prompt(line, task_name: str = None):
134148

135149
maime25_fi_avg = LightevalTaskConfig(
136150
name="maime25_avg:fi",
137-
prompt_function=maime_prompt,
151+
prompt_function=_maime_prompt_fn("fi"),
138152
sample_fields=record_to_sample,
139153
hf_repo="LumiOpen/mAIME2025",
140154
hf_subset="fi_combined",
@@ -149,7 +163,7 @@ def maime_prompt(line, task_name: str = None):
149163

150164
maime25_fi_gpassk = LightevalTaskConfig(
151165
name="maime25_gpassk:fi",
152-
prompt_function=maime_prompt,
166+
prompt_function=_maime_prompt_fn("fi"),
153167
sample_fields=record_to_sample,
154168
hf_repo="LumiOpen/mAIME2025",
155169
hf_subset="fi_combined",

0 commit comments

Comments
 (0)