Skip to content

Commit 883196d

Browse files
CISCArberSephirotheca
authored andcommitted
convert : fix (ignore for now) typings errors (ggml-org#22002)
1 parent fed737b commit 883196d

1 file changed

Lines changed: 5 additions & 5 deletions

File tree

convert_hf_to_gguf.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10912,14 +10912,14 @@ def set_vocab(self):
1091210912
vocab_size = -(vocab_size // -pad_vocab) * pad_vocab
1091310913
self.hparams["vocab_size"] = vocab_size
1091410914

10915-
assert max(tokenizer.vocab.values()) < vocab_size
10915+
assert max(tokenizer.vocab.values()) < vocab_size # ty: ignore[unresolved-attribute]
1091610916

1091710917
tokpre = self.get_vocab_base_pre(tokenizer)
1091810918

10919-
reverse_vocab = {id_: encoded_tok for encoded_tok, id_ in tokenizer.vocab.items()}
10920-
added_vocab = tokenizer.get_added_vocab()
10919+
reverse_vocab = {id_: encoded_tok for encoded_tok, id_ in tokenizer.vocab.items()} # ty: ignore[unresolved-attribute]
10920+
added_vocab = tokenizer.get_added_vocab() # ty: ignore[unresolved-attribute]
1092110921

10922-
added_tokens_decoder = tokenizer.added_tokens_decoder
10922+
added_tokens_decoder = tokenizer.added_tokens_decoder # ty: ignore[unresolved-attribute]
1092310923

1092410924
for i in range(vocab_size):
1092510925
if i not in reverse_vocab:
@@ -10930,7 +10930,7 @@ def set_vocab(self):
1093010930
if token in added_vocab:
1093110931
if not added_tokens_decoder[i].normalized:
1093210932
previous_token = token
10933-
token = tokenizer.decode(tokenizer.encode(token, add_special_tokens=False))
10933+
token = tokenizer.decode(tokenizer.encode(token, add_special_tokens=False)) # ty: ignore[unresolved-attribute, invalid-assignment]
1093410934
if previous_token != token:
1093510935
logger.info(f"{repr(previous_token)} is encoded and decoded back to {repr(token)} using AutoTokenizer")
1093610936

0 commit comments

Comments
 (0)