We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
2 parents 7949845 + e1fe3c7 commit c5393adCopy full SHA for c5393ad
1 file changed
model2vec/distill/distillation.py
@@ -65,7 +65,7 @@ def distill(
65
# Get the ids of the unused token.
66
wrong_token_ids = [vocab[token] for token in wrong_tokens]
67
# Remove the unused tokens from the tokenizer.
68
- new_tokenizer = remove_tokens(original_tokenizer, wrong_tokens)
+ new_tokenizer = remove_tokens(original_tokenizer.backend_tokenizer, wrong_tokens)
69
# Remove the embeddings of the unused tokens.
70
embeddings = np.delete(embeddings, wrong_token_ids, axis=0)
71
logger.info(f"Removed {len(wrong_tokens)} unused tokens from the tokenizer and embeddings.")
0 commit comments