Skip to content
Merged
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 37 additions & 24 deletions src/lighteval/tasks/multilingual/tasks/maime.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,38 +33,51 @@
from lighteval.tasks.requests import Doc


# Prompt template adapted from AIME task
# Note: Uses English instructions for consistency with AIME
MATH_PROMPT_TEMPLATE = dedent("""
Solve the following math problem efficiently and clearly.
The last line of your response should be of the following format:
'Therefore, the final answer is: $\\boxed{{ANSWER}}$. I hope it is correct'
(without quotes) where ANSWER is just the final number or expression
that solves the problem. Think step by step before answering.
# Per-language prompt templates
MATH_PROMPT_TEMPLATES = {
"fi": dedent("""
Ratkaise seuraava matemaattinen tehtävä tehokkaasti ja selkeästi.
Vastauksesi viimeisen rivin tulee olla seuraavassa muodossa:
'Näin ollen lopullinen vastaus on: $\\boxed{{ANSWER}}$. Toivottavasti se on oikein'
(ilman lainausmerkkejä), jossa ANSWER on pelkästään lopullinen luku tai lauseke,
joka ratkaisee tehtävän. Ajattele vaiheittain ennen vastaamista.

{prompt}
""")
"""),
"da": dedent("""
Løs følgende matematiske problem korrekt og effektivt.
Den sidste linje i dit svar skal være i følgende format:
'Derfor er det endelige svar: $\\boxed{{ANSWER}}$. Jeg håber, det er korrekt'
(uden anførselstegn), hvor ANSWER kun er den endelige løsning. Tænk trin for trin, før du svarer.

{prompt}
"""),
}

def record_to_sample(record):
return Sample(input=record["question"], target=record["solution"])


def maime_prompt(line, task_name: str = None):
return Doc(
task_name=task_name,
query=MATH_PROMPT_TEMPLATE.format(prompt=line["question"]),
choices=[line["solution"]],
gold_index=0,
)
def _maime_prompt_fn(lang: str):
template = MATH_PROMPT_TEMPLATES[lang]

def maime_prompt(line, task_name: str = None):
return Doc(
task_name=task_name,
query=template.format(prompt=line["question"]),
choices=[line["solution"]],
gold_index=0,
)

return maime_prompt


# Danish tasks
maime25_da = LightevalTaskConfig(
name="maime25:da",
prompt_function=maime_prompt,
prompt_function=_maime_prompt_fn("da"),
sample_fields=record_to_sample,
solver=[prompt_template(MATH_PROMPT_TEMPLATE), generate(cache=True)],
solver=[prompt_template(MATH_PROMPT_TEMPLATES["da"]), generate(cache=True)],
scorer=math_scorer(),
hf_repo="LumiOpen/mAIME2025",
hf_subset="da_combined",
Expand All @@ -82,7 +95,7 @@ def maime_prompt(line, task_name: str = None):

maime25_da_avg = LightevalTaskConfig(
name="maime25_avg:da",
prompt_function=maime_prompt,
prompt_function=_maime_prompt_fn("da"),
sample_fields=record_to_sample,
hf_repo="LumiOpen/mAIME2025",
hf_subset="da_combined",
Expand All @@ -97,7 +110,7 @@ def maime_prompt(line, task_name: str = None):

maime25_da_gpassk = LightevalTaskConfig(
name="maime25_gpassk:da",
prompt_function=maime_prompt,
prompt_function=_maime_prompt_fn("da"),
sample_fields=record_to_sample,
hf_repo="LumiOpen/mAIME2025",
hf_subset="da_combined",
Expand All @@ -113,9 +126,9 @@ def maime_prompt(line, task_name: str = None):
# Finnish tasks
maime25_fi = LightevalTaskConfig(
name="maime25:fi",
prompt_function=maime_prompt,
prompt_function=_maime_prompt_fn("fi"),
sample_fields=record_to_sample,
solver=[prompt_template(MATH_PROMPT_TEMPLATE), generate(cache=True)],
solver=[prompt_template(MATH_PROMPT_TEMPLATES["fi"]), generate(cache=True)],
scorer=math_scorer(),
hf_repo="LumiOpen/mAIME2025",
hf_subset="fi_combined",
Expand All @@ -133,7 +146,7 @@ def maime_prompt(line, task_name: str = None):

maime25_fi_avg = LightevalTaskConfig(
name="maime25_avg:fi",
prompt_function=maime_prompt,
prompt_function=_maime_prompt_fn("fi"),
sample_fields=record_to_sample,
hf_repo="LumiOpen/mAIME2025",
hf_subset="fi_combined",
Expand All @@ -148,7 +161,7 @@ def maime_prompt(line, task_name: str = None):

maime25_fi_gpassk = LightevalTaskConfig(
name="maime25_gpassk:fi",
prompt_function=maime_prompt,
prompt_function=_maime_prompt_fn("fi"),
sample_fields=record_to_sample,
hf_repo="LumiOpen/mAIME2025",
hf_subset="fi_combined",
Expand Down
Loading