diff --git a/chapters/en/chapter3/2.mdx b/chapters/en/chapter3/2.mdx index 232441b84..48d389075 100644 --- a/chapters/en/chapter3/2.mdx +++ b/chapters/en/chapter3/2.mdx @@ -120,8 +120,8 @@ from transformers import AutoTokenizer checkpoint = "bert-base-uncased" tokenizer = AutoTokenizer.from_pretrained(checkpoint) -tokenized_sentences_1 = tokenizer(raw_datasets["train"]["sentence1"]) -tokenized_sentences_2 = tokenizer(raw_datasets["train"]["sentence2"]) +tokenized_sentences_1 = tokenizer(list(raw_datasets["train"]["sentence1"])) +tokenized_sentences_2 = tokenizer(list(raw_datasets["train"]["sentence2"])) ``` > [!TIP] @@ -180,8 +180,8 @@ Now that we have seen how our tokenizer can deal with one pair of sentences, we ```py tokenized_dataset = tokenizer( - raw_datasets["train"]["sentence1"], - raw_datasets["train"]["sentence2"], + list(raw_datasets["train"]["sentence1"]), + list(raw_datasets["train"]["sentence2"]), padding=True, truncation=True, )