Skip to content

Commit c23cc9b

Browse files
authored
fix: set model max length to either passed in or tokenizer value (#17)
1 parent f28e0c0 commit c23cc9b

1 file changed

Lines changed: 3 additions & 1 deletion

File tree

tuning/sft_trainer.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,8 +93,10 @@ def train(
9393
"pad_token": "<pad>",
9494
})
9595

96-
model_max_length = tokenizer.model_max_length
96+
model_max_length = min(train_args.model_max_length, tokenizer.model_max_length)
9797
logger.info(f"Model max length {model_max_length}")
98+
if train_args.model_max_length > tokenizer.model_max_length:
99+
logger.warning(f"model_max_length {model_max_length} exceeds tokenizer.model_max_length {tokenizer.model_max_length}, using tokenizer.model_max_length {tokenizer.model_max_length}")
98100

99101
# TODO: we need to change this, perhaps follow what open instruct does?
100102
special_tokens_dict = dict()

0 commit comments

Comments
 (0)