Skip to content

Commit c5393ad

Browse files
authored
Merge pull request #40 from MinishLab/fix_tokenizer_bug
Fix bug where we accidentally pass a pretrainedtokenizer
2 parents 7949845 + e1fe3c7 commit c5393ad

1 file changed

Lines changed: 1 addition & 1 deletion

File tree

model2vec/distill/distillation.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ def distill(
6565
# Get the ids of the unused token.
6666
wrong_token_ids = [vocab[token] for token in wrong_tokens]
6767
# Remove the unused tokens from the tokenizer.
68-
new_tokenizer = remove_tokens(original_tokenizer, wrong_tokens)
68+
new_tokenizer = remove_tokens(original_tokenizer.backend_tokenizer, wrong_tokens)
6969
# Remove the embeddings of the unused tokens.
7070
embeddings = np.delete(embeddings, wrong_token_ids, axis=0)
7171
logger.info(f"Removed {len(wrong_tokens)} unused tokens from the tokenizer and embeddings.")

0 commit comments

Comments
 (0)