@@ -527,27 +527,28 @@ def _to_str(val):
527527 answer = extract_hash_answer (answer )
528528
529529 return {
530- # passed to model forward pass
531- "prompts" : model_tokenizer .apply_chat_template (
532- [
533- {
534- "role" : "user" ,
535- "content" : template_config ["TEMPLATE" ].format (
536- system_prompt = template_config ["SYSTEM_PROMPT" ].format (
537- reasoning_start_token = tmvp_config .reasoning_start_token ,
538- reasoning_end_token = tmvp_config .reasoning_end_token ,
539- solution_start_token = tmvp_config .solution_start_token ,
540- solution_end_token = tmvp_config .solution_end_token ,
541- ),
542- question = question ,
543- ),
544- },
545- ],
546- tokenize = False ,
547- add_generation_prompt = True ,
548- ),
549- # passed to reward functions
550- "question" : question ,
551- # passed to reward functions
552- "answer" : answer ,
530+ # passed to model forward pass
531+ "prompts" : model_tokenizer .apply_chat_template (
532+ [
533+ {
534+ "role" : "system" ,
535+ "content" : template_config ["SYSTEM_PROMPT" ].format (
536+ reasoning_start_token = tmvp_config .reasoning_start_token ,
537+ reasoning_end_token = tmvp_config .reasoning_end_token ,
538+ solution_start_token = tmvp_config .solution_start_token ,
539+ solution_end_token = tmvp_config .solution_end_token ,
540+ ),
541+ },
542+ {
543+ "role" : "user" ,
544+ "content" : question ,
545+ }
546+ ],
547+ tokenize = False ,
548+ add_generation_prompt = True ,
549+ ),
550+ # passed to reward functions
551+ "question" : question ,
552+ # passed to reward functions
553+ "answer" : answer ,
553554 }
0 commit comments