Adding tokenizer_chat_template_path to valid inputs for config

dasoto · dasoto · commit ab3b93546aa6 · 2026-06-02T20:37:14.000Z
diff --git a/src/maxtext/configs/base.yml b/src/maxtext/configs/base.yml
@@ -645,6 +645,8 @@ tokenizer_path: ""
 tokenizer_type: "sentencepiece" # Currently supporting: "tiktoken", "sentencepiece", "huggingface"
 use_chat_template: false
 chat_template_path: "" # path to chat template json file
+chat_template: "" # Chat template to use with HF tokenizers. It should be a valid Jinja2-formatted template.
+tokenizer_chat_template_path: "" # Path to a chat template file to be loaded into the tokenizer if missing.
 tokenize_train_data: true  # false if the dataset is pre-tokenized
 tokenize_eval_data: true  # false if the dataset is pre-tokenized
 add_bos: true
diff --git a/src/maxtext/configs/types.py b/src/maxtext/configs/types.py
@@ -1081,6 +1081,10 @@ class Tokenizer(BaseModel):
       "",
       description="Chat template to use with HF tokenizers. It should be a valid Jinja2-formatted template.",
   )
+  tokenizer_chat_template_path: str = Field(
+      "",
+      description="Path to a chat template file to be loaded into the tokenizer if missing.",
+  )
   tokenize_train_data: bool = Field(True, description="If False, assumes the training dataset is pre-tokenized.")
   tokenize_eval_data: bool = Field(True, description="If False, assumes the evaluation dataset is pre-tokenized.")
   add_bos: bool = Field(True, description="Whether to add a beginning-of-sentence token.")