Skip to content

Commit f9fb253

Browse files
committed
modify process_data to generate separate user/system parts in prompts
1 parent 086c50d commit f9fb253

1 file changed

Lines changed: 24 additions & 23 deletions

File tree

src/maxtext/trainers/post_train/rl/utils_rl.py

Lines changed: 24 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -527,27 +527,28 @@ def _to_str(val):
527527
answer = extract_hash_answer(answer)
528528

529529
return {
530-
# passed to model forward pass
531-
"prompts": model_tokenizer.apply_chat_template(
532-
[
533-
{
534-
"role": "user",
535-
"content": template_config["TEMPLATE"].format(
536-
system_prompt=template_config["SYSTEM_PROMPT"].format(
537-
reasoning_start_token=tmvp_config.reasoning_start_token,
538-
reasoning_end_token=tmvp_config.reasoning_end_token,
539-
solution_start_token=tmvp_config.solution_start_token,
540-
solution_end_token=tmvp_config.solution_end_token,
541-
),
542-
question=question,
543-
),
544-
},
545-
],
546-
tokenize=False,
547-
add_generation_prompt=True,
548-
),
549-
# passed to reward functions
550-
"question": question,
551-
# passed to reward functions
552-
"answer": answer,
530+
# passed to model forward pass
531+
"prompts": model_tokenizer.apply_chat_template(
532+
[
533+
{
534+
"role": "system",
535+
"content": template_config["SYSTEM_PROMPT"].format(
536+
reasoning_start_token=tmvp_config.reasoning_start_token,
537+
reasoning_end_token=tmvp_config.reasoning_end_token,
538+
solution_start_token=tmvp_config.solution_start_token,
539+
solution_end_token=tmvp_config.solution_end_token,
540+
),
541+
},
542+
{
543+
"role": "user",
544+
"content": question,
545+
}
546+
],
547+
tokenize=False,
548+
add_generation_prompt=True,
549+
),
550+
# passed to reward functions
551+
"question": question,
552+
# passed to reward functions
553+
"answer": answer,
553554
}

0 commit comments

Comments
 (0)