Skip to content

Commit 2e7d5f8

Browse files
feat: allow specifying the chat template as base64 to avoid weird escaping and templating issues
Signed-off-by: Harikrishnan Balagopal <harikrishmenon@gmail.com>
1 parent a84b716 commit 2e7d5f8

1 file changed

Lines changed: 13 additions & 0 deletions

File tree

tuning/data/data_config.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
# limitations under the License.
1414

1515
# Standard
16+
from base64 import b64decode
1617
from dataclasses import dataclass
1718
from typing import Dict, List, Optional
1819
import logging
@@ -153,6 +154,18 @@ def _validate_dataprocessor_config(dataprocessor_config) -> DataPreProcessorConf
153154
chat_template = kwargs["chat_template"]
154155
assert isinstance(chat_template, str), "chat_template should be a string"
155156
c.chat_template = chat_template
157+
elif "chat_template_base64" in kwargs:
158+
chat_template_base64 = kwargs["chat_template_base64"]
159+
assert isinstance(
160+
chat_template_base64, str
161+
), "chat_template_base64 should be a string"
162+
logger.warning(
163+
"You are using the 'chat_template_base64' field. "
164+
+ "Please use the 'chat_template' field instead for better readability."
165+
)
166+
chat_template_bytes = b64decode(chat_template_base64)
167+
chat_template = chat_template_bytes.decode("utf-8")
168+
c.chat_template = chat_template
156169
return c
157170

158171

0 commit comments

Comments
 (0)