Skip to content

Commit 009680b

Browse files
author
Daniel Zautner
committed
Translate maime prompt templates to target languages (fi, da)
The instruction prompts were in English for all language variants. Now Finnish tasks get Finnish prompts and Danish tasks get Danish prompts. The boxed answer format tokens are kept as-is for scorer compatibility.
1 parent bf22a5c commit 009680b

1 file changed

Lines changed: 38 additions & 24 deletions

File tree

  • src/lighteval/tasks/multilingual/tasks

src/lighteval/tasks/multilingual/tasks/maime.py

Lines changed: 38 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -33,38 +33,52 @@
3333
from lighteval.tasks.requests import Doc
3434

3535

36-
# Prompt template adapted from AIME task
37-
# Note: Uses English instructions for consistency with AIME
38-
MATH_PROMPT_TEMPLATE = dedent("""
39-
Solve the following math problem efficiently and clearly.
40-
The last line of your response should be of the following format:
41-
'Therefore, the final answer is: $\\boxed{{ANSWER}}$. I hope it is correct'
42-
(without quotes) where ANSWER is just the final number or expression
43-
that solves the problem. Think step by step before answering.
36+
# Per-language prompt templates
37+
MATH_PROMPT_TEMPLATES = {
38+
"fi": dedent("""
39+
Ratkaise seuraava matemaattinen tehtävä tehokkaasti ja selkeästi.
40+
Vastauksesi viimeisen rivin tulee olla seuraavassa muodossa:
41+
'Näin ollen lopullinen vastaus on: $\\boxed{{ANSWER}}$. Toivottavasti se on oikein'
42+
(ilman lainausmerkkejä), jossa ANSWER on pelkästään lopullinen luku tai lauseke,
43+
joka ratkaisee tehtävän. Ajattele vaihe vaiheelta ennen vastaamista.
4444
4545
{prompt}
46-
""")
46+
"""),
47+
"da": dedent("""
48+
Løs følgende matematiske problem effektivt og tydeligt.
49+
Den sidste linje i dit svar skal være i følgende format:
50+
'Derfor er det endelige svar: $\\boxed{{ANSWER}}$. Jeg håber, det er korrekt'
51+
(uden anførselstegn), hvor ANSWER kun er det endelige tal eller udtryk,
52+
der løser problemet. Tænk trin for trin, før du svarer.
4753
54+
{prompt}
55+
"""),
56+
}
4857

4958
def record_to_sample(record):
5059
return Sample(input=record["question"], target=record["solution"])
5160

5261

53-
def maime_prompt(line, task_name: str = None):
54-
return Doc(
55-
task_name=task_name,
56-
query=MATH_PROMPT_TEMPLATE.format(prompt=line["question"]),
57-
choices=[line["solution"]],
58-
gold_index=0,
59-
)
62+
def _maime_prompt_fn(lang: str):
63+
template = MATH_PROMPT_TEMPLATES[lang]
64+
65+
def maime_prompt(line, task_name: str = None):
66+
return Doc(
67+
task_name=task_name,
68+
query=template.format(prompt=line["question"]),
69+
choices=[line["solution"]],
70+
gold_index=0,
71+
)
72+
73+
return maime_prompt
6074

6175

6276
# Danish tasks
6377
maime25_da = LightevalTaskConfig(
6478
name="maime25:da",
65-
prompt_function=maime_prompt,
79+
prompt_function=_maime_prompt_fn("da"),
6680
sample_fields=record_to_sample,
67-
solver=[prompt_template(MATH_PROMPT_TEMPLATE), generate(cache=True)],
81+
solver=[prompt_template(MATH_PROMPT_TEMPLATES["da"]), generate(cache=True)],
6882
scorer=math_scorer(),
6983
hf_repo="LumiOpen/mAIME2025",
7084
hf_subset="da_combined",
@@ -82,7 +96,7 @@ def maime_prompt(line, task_name: str = None):
8296

8397
maime25_da_avg = LightevalTaskConfig(
8498
name="maime25_avg:da",
85-
prompt_function=maime_prompt,
99+
prompt_function=_maime_prompt_fn("da"),
86100
sample_fields=record_to_sample,
87101
hf_repo="LumiOpen/mAIME2025",
88102
hf_subset="da_combined",
@@ -97,7 +111,7 @@ def maime_prompt(line, task_name: str = None):
97111

98112
maime25_da_gpassk = LightevalTaskConfig(
99113
name="maime25_gpassk:da",
100-
prompt_function=maime_prompt,
114+
prompt_function=_maime_prompt_fn("da"),
101115
sample_fields=record_to_sample,
102116
hf_repo="LumiOpen/mAIME2025",
103117
hf_subset="da_combined",
@@ -113,9 +127,9 @@ def maime_prompt(line, task_name: str = None):
113127
# Finnish tasks
114128
maime25_fi = LightevalTaskConfig(
115129
name="maime25:fi",
116-
prompt_function=maime_prompt,
130+
prompt_function=_maime_prompt_fn("fi"),
117131
sample_fields=record_to_sample,
118-
solver=[prompt_template(MATH_PROMPT_TEMPLATE), generate(cache=True)],
132+
solver=[prompt_template(MATH_PROMPT_TEMPLATES["fi"]), generate(cache=True)],
119133
scorer=math_scorer(),
120134
hf_repo="LumiOpen/mAIME2025",
121135
hf_subset="fi_combined",
@@ -133,7 +147,7 @@ def maime_prompt(line, task_name: str = None):
133147

134148
maime25_fi_avg = LightevalTaskConfig(
135149
name="maime25_avg:fi",
136-
prompt_function=maime_prompt,
150+
prompt_function=_maime_prompt_fn("fi"),
137151
sample_fields=record_to_sample,
138152
hf_repo="LumiOpen/mAIME2025",
139153
hf_subset="fi_combined",
@@ -148,7 +162,7 @@ def maime_prompt(line, task_name: str = None):
148162

149163
maime25_fi_gpassk = LightevalTaskConfig(
150164
name="maime25_gpassk:fi",
151-
prompt_function=maime_prompt,
165+
prompt_function=_maime_prompt_fn("fi"),
152166
sample_fields=record_to_sample,
153167
hf_repo="LumiOpen/mAIME2025",
154168
hf_subset="fi_combined",

0 commit comments

Comments
 (0)