File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -341,6 +341,7 @@ def train(
341341 max_seq_length ,
342342 data_args .tokens_field ,
343343 data_args .instruction_template ,
344+ attention_and_distributed_packing_config .padding_free ,
344345 )
345346
346347 if framework is not None and framework .requires_agumentation :
Original file line number Diff line number Diff line change @@ -167,6 +167,7 @@ def get_data_collator(
167167 max_seq_length : int ,
168168 tokens_field : str = True ,
169169 instruction_template : Optional [str ] = None ,
170+ padding_free : str = None ,
170171) -> Callable :
171172 """Create and return the the appropriate collator type based on the configuration for packing,
172173 response_template, and dataset_text_field.
@@ -186,6 +187,8 @@ def get_data_collator(
186187 feature having tokens
187188 instruction_template: Optional[str]
188189 start of user answer.
190+ padding_free: str
191+ padding free method
189192
190193 Returns:
191194 Callable
@@ -240,6 +243,12 @@ def get_data_collator(
240243 return DataCollatorForSeq2Seq (
241244 tokenizer = tokenizer , padding = True , max_length = max_seq_length
242245 )
246+ if padding_free :
247+ # when packing is false but padding_free is used and no response template is used
248+ # then its a pretrained scenario.
249+ return DataCollatorForSeq2Seq (
250+ tokenizer = tokenizer , padding = False , max_length = max_seq_length
251+ )
243252 raise ValueError (
244253 "Could not pick a data collator. Please refer to supported data formats"
245254 )
You can’t perform that action at this time.
0 commit comments