Skip to content

Commit b2fc665

Browse files
committed
Fix PretrainedFromHF tokenizer with T5 training
1 parent 09a35f5 commit b2fc665

1 file changed

Lines changed: 12 additions & 0 deletions

File tree

megatron/tokenizer/tokenizer.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -388,6 +388,18 @@ def eos(self):
388388
candidate = self.tokenizer.eos_token_id
389389
return self._check_token_candidate(candidate)
390390

391+
@property
392+
def bos_token_id(self):
393+
"""Id of the beginning of sentence token in the vocabulary."""
394+
candidate = self.tokenizer.bos_token_id
395+
return self._check_token_candidate(candidate)
396+
397+
@property
398+
def eos_token_id(self):
399+
"""Id of the end of sentence token in the vocabulary."""
400+
candidate = self.tokenizer.eos_token_id
401+
return self._check_token_candidate(candidate)
402+
391403
@property
392404
def additional_special_tokens_ids(self):
393405
""" All the additional special tokens you may want to use (list of strings)."""

0 commit comments

Comments
 (0)