Skip to content

Commit ac4bd26

Browse files
dushyantbehlChanderG
authored andcommitted
Apply unescape on all fields
Signed-off-by: Dushyant Behl <dushyantbehl@in.ibm.com>
1 parent bba695c commit ac4bd26

3 files changed

Lines changed: 12 additions & 2 deletions

File tree

tuning/config/configs.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,17 @@ class DataArguments:
131131
},
132132
)
133133

134+
def __post_init__(self):
135+
def unescape(s):
136+
if s is not None and isinstance(s, str):
137+
return s.encode("utf-8").decode("unicode_escape")
138+
return s
139+
140+
self.chat_template = unescape(self.chat_template)
141+
self.data_formatter_template = unescape(self.data_formatter_template)
142+
self.response_template = unescape(self.response_template)
143+
self.instruction_template = unescape(self.instruction_template)
144+
134145

135146
@dataclass
136147
class TrainingArguments(transformers.TrainingArguments):

tuning/data/data_preprocessing_utils.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,6 @@ def get_data_collator(
6969
# otherwise template is not found. We will create issue to clean this out after we discuss
7070
# data formats and collators we will support.
7171
if response_template:
72-
response_template = response_template.encode('utf-8').decode('unicode_escape')
7372
response_template_ids = tokenizer.encode(
7473
response_template, add_special_tokens=False
7574
)[2:]

tuning/data/setup_dataprocessor.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,7 @@ def _get_dataset_formatting_handlers(data_args, packing, is_padding_free=False):
173173
arguments={"fn_kwargs": fn_kwargs, "batched": False},
174174
)
175175
else:
176-
fn_kwargs["template"] = data_args.data_formatter_template.encode('utf-8').decode('unicode_escape')
176+
fn_kwargs["template"] = data_args.data_formatter_template
177177
handler = DataHandlerConfig(
178178
"apply_custom_data_formatting_template",
179179
arguments={"fn_kwargs": fn_kwargs, "batched": False},

0 commit comments

Comments
 (0)