Skip to content

Commit 87716f0

Browse files
feat: allow specifying the chat template as base64 to avoid weird escaping and templating issues
Signed-off-by: Harikrishnan Balagopal <harikrishmenon@gmail.com>
1 parent ebe35a3 commit 87716f0

1 file changed

Lines changed: 13 additions & 0 deletions

File tree

tuning/data/data_config.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
# limitations under the License.
1414

1515
# Standard
16+
from base64 import b64decode
1617
from dataclasses import dataclass
1718
from typing import Dict, List, Optional
1819
import logging
@@ -152,6 +153,18 @@ def _validate_dataprocessor_config(dataprocessor_config) -> DataPreProcessorConf
152153
chat_template = kwargs["chat_template"]
153154
assert isinstance(chat_template, str), "chat_template should be a string"
154155
c.chat_template = chat_template
156+
elif "chat_template_base64" in kwargs:
157+
chat_template_base64 = kwargs["chat_template_base64"]
158+
assert isinstance(
159+
chat_template_base64, str
160+
), "chat_template_base64 should be a string"
161+
logger.warning(
162+
"You are using the 'chat_template_base64' field. "
163+
+ "Please use the 'chat_template' field instead for better readability."
164+
)
165+
chat_template_bytes = b64decode(chat_template_base64)
166+
chat_template = chat_template_bytes.decode("utf-8")
167+
c.chat_template = chat_template
155168
return c
156169

157170

0 commit comments

Comments
 (0)