File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -10912,14 +10912,14 @@ def set_vocab(self):
1091210912 vocab_size = -(vocab_size // -pad_vocab) * pad_vocab
1091310913 self.hparams["vocab_size"] = vocab_size
1091410914
10915- assert max(tokenizer.vocab.values()) < vocab_size
10915+ assert max(tokenizer.vocab.values()) < vocab_size # ty: ignore[unresolved-attribute]
1091610916
1091710917 tokpre = self.get_vocab_base_pre(tokenizer)
1091810918
10919- reverse_vocab = {id_: encoded_tok for encoded_tok, id_ in tokenizer.vocab.items()}
10920- added_vocab = tokenizer.get_added_vocab()
10919+ reverse_vocab = {id_: encoded_tok for encoded_tok, id_ in tokenizer.vocab.items()} # ty: ignore[unresolved-attribute]
10920+ added_vocab = tokenizer.get_added_vocab() # ty: ignore[unresolved-attribute]
1092110921
10922- added_tokens_decoder = tokenizer.added_tokens_decoder
10922+ added_tokens_decoder = tokenizer.added_tokens_decoder # ty: ignore[unresolved-attribute]
1092310923
1092410924 for i in range(vocab_size):
1092510925 if i not in reverse_vocab:
@@ -10930,7 +10930,7 @@ def set_vocab(self):
1093010930 if token in added_vocab:
1093110931 if not added_tokens_decoder[i].normalized:
1093210932 previous_token = token
10933- token = tokenizer.decode(tokenizer.encode(token, add_special_tokens=False))
10933+ token = tokenizer.decode(tokenizer.encode(token, add_special_tokens=False)) # ty: ignore[unresolved-attribute, invalid-assignment]
1093410934 if previous_token != token:
1093510935 logger.info(f"{repr(previous_token)} is encoded and decoded back to {repr(token)} using AutoTokenizer")
1093610936
You can’t perform that action at this time.
0 commit comments