Skip to content

Commit 72dc048

Browse files
authored
Revert "feat: upgrade to transformers v5 (#659)" (#674)
This reverts commit 09810e3.
1 parent 84d9bc7 commit 72dc048

12 files changed

Lines changed: 45 additions & 317 deletions

.pylintrc

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -447,10 +447,7 @@ disable=raw-checker-failed,
447447
duplicate-code,
448448
unbalanced-tuple-unpacking,
449449
unspecified-encoding,
450-
too-many-lines,
451-
no-name-in-module,
452-
unexpected-keyword-arg,
453-
unused-argument
450+
too-many-lines
454451

455452
# Enable the message, report, category or checker with the given id(s). You can
456453
# either give multiple identifier separated by comma (,) or put this option

build/accelerate_launch.py

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -110,19 +110,14 @@ def main():
110110
# message to termination log.
111111
logging.error(traceback.format_exc())
112112
# The exit code that sft_trainer.py threw is captured in e.returncode
113+
113114
return_code = e.returncode
114115
if return_code not in [INTERNAL_ERROR_EXIT_CODE, USER_ERROR_EXIT_CODE]:
115116
return_code = INTERNAL_ERROR_EXIT_CODE
116117
write_termination_log(f"Unhandled exception during training. {e}")
117118
sys.exit(return_code)
118119
except Exception as e: # pylint: disable=broad-except
119120
logging.error(traceback.format_exc())
120-
# v5: torch.distributed raises ChildFailedError with per-rank exit codes
121-
# Check if the root cause was a user error
122-
if hasattr(e, "failures"):
123-
root_codes = [f.exitcode for f in e.failures.values()]
124-
if any(c == USER_ERROR_EXIT_CODE for c in root_codes):
125-
sys.exit(USER_ERROR_EXIT_CODE)
126121
write_termination_log(f"Unhandled exception during training. {e}")
127122
sys.exit(INTERNAL_ERROR_EXIT_CODE)
128123

pyproject.toml

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -28,19 +28,18 @@ classifiers=[
2828
dependencies = [
2929
"numpy>=1.26.4,<2.2.0",
3030
"accelerate>=1.9.0,<2.0.0",
31-
"transformers>=5.2.0,<5.3.0",
32-
"torch>2.7.0,<=2.9.0",
33-
"torchvision<=0.24.0",
31+
"transformers>=4.55.0,<=4.55.4",
32+
"torch>2.7.0,<2.9.0",
33+
"torchvision<0.24",
3434
"sentencepiece>=0.1.99,<0.3",
35-
"tokenizers<=0.23.0",
35+
"tokenizers<=0.22",
3636
"tqdm>=4.66.2,<5.0",
37-
"trl>=0.27.0,<0.29.0",
38-
"peft>=0.18.1,<0.19.0",
37+
"trl>=0.19.1,<0.20.0",
38+
"peft>=0.18.0,< 0.19.0",
3939
"datasets>=4.0.0,<5.0.0",
4040
"simpleeval>=0.9.13,<2.0",
4141
"pillow>=12.1.1",
42-
"kernels>=0.12.1,<0.13.0",
43-
"huggingface_hub>=1.3.0,<1.4.0",
42+
"kernels<=0.9.0",
4443
]
4544

4645
[project.optional-dependencies]

tests/build/test_launch_script.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@
5151
"warmup_ratio": 0.03,
5252
"lr_scheduler_type": "cosine",
5353
"logging_steps": 1,
54-
"include_num_input_tokens_seen": True,
54+
"include_tokens_per_second": True,
5555
"packing": False,
5656
"response_template": "\n### Label:",
5757
"dataset_text_field": "output",

tests/data/test_data_preprocessing.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
from datasets import Dataset, DatasetDict, IterableDataset
2323
from PIL import Image
2424
from transformers import AutoProcessor, AutoTokenizer, DataCollatorForSeq2Seq
25+
from trl import DataCollatorForCompletionOnlyLM
2526
import datasets
2627
import numpy as np
2728
import pyarrow
@@ -68,7 +69,7 @@
6869
# Local
6970
from tuning.config import configs
7071
from tuning.config.acceleration_configs import AttentionAndDistributedPackingConfig
71-
from tuning.data.collators import DataCollatorForCompletionOnlyLM, VisionDataCollator
72+
from tuning.data.collators import VisionDataCollator
7273
from tuning.data.data_config import (
7374
DataHandlerConfig,
7475
DataPreProcessorConfig,

tests/test_sft_trainer.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@
124124
warmup_ratio=0.03,
125125
lr_scheduler_type="cosine",
126126
logging_steps=1,
127-
include_num_input_tokens_seen=True,
127+
include_tokens_per_second=True,
128128
packing=False,
129129
max_seq_length=4096,
130130
save_strategy="epoch",
@@ -140,7 +140,7 @@
140140
warmup_ratio=0.03,
141141
lr_scheduler_type="cosine",
142142
logging_steps=1,
143-
include_num_input_tokens_seen=True,
143+
include_tokens_per_second=True,
144144
packing=False,
145145
max_seq_length=4096,
146146
save_strategy="epoch",

tests/utils/test_embedding_resize.py

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,11 @@
2020

2121
# Third Party
2222
from transformers import (
23-
AutoModelForImageTextToText, # AutoModelForVision2Seq was renamed to this in transformers v5
23+
AutoModelForCausalLM,
24+
AutoModelForVision2Seq,
25+
AutoProcessor,
26+
AutoTokenizer,
2427
)
25-
from transformers import AutoModelForCausalLM, AutoProcessor, AutoTokenizer
2628
import torch
2729

2830
# First Party
@@ -126,17 +128,16 @@ def test_special_tokens_before_and_after():
126128
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME)
127129

128130
input_tokenizer_len = len(tokenizer.get_vocab())
129-
addn_spl_tokens_before = list(tokenizer.extra_special_tokens)
131+
addn_spl_tokens_before = tokenizer.special_tokens_map.get(
132+
"additional_special_tokens"
133+
)
130134
assert (
131135
len(addn_spl_tokens_before) > 0
132136
), "this test needs tokenizer special tokens to not be empty before testing"
133137

134138
special_tokens_dict = {"sep_token": "<SEP>", "pad_token": "<PAD>"}
135139
addn_spl_tokens_added = ["<NotSeenTokenA>", "<NotSeenTokenB>", "<NotSeenTokenC>"]
136-
# for transformers v5: merge existing extra_special_tokens with new ones to prevent replacement
137-
special_tokens_dict["additional_special_tokens"] = (
138-
list(tokenizer.extra_special_tokens) + addn_spl_tokens_added
139-
)
140+
special_tokens_dict["additional_special_tokens"] = addn_spl_tokens_added
140141

141142
resize_result = tokenizer_and_embedding_resize(
142143
special_tokens_dict=special_tokens_dict,
@@ -149,7 +150,9 @@ def test_special_tokens_before_and_after():
149150
addn_spl_tokens_before.extend(addn_spl_tokens_added)
150151
expected_addn_special_tokens = addn_spl_tokens_before
151152
expected_embedding_size = input_tokenizer_len + len(addn_spl_tokens_added) + 2
152-
addn_spl_tokens_after = list(tokenizer.extra_special_tokens)
153+
addn_spl_tokens_after = tokenizer.special_tokens_map.get(
154+
"additional_special_tokens"
155+
)
153156

154157
assert "<SEP>" in tokenizer.get_vocab()
155158
assert "<PAD>" in tokenizer.get_vocab()
@@ -209,9 +212,7 @@ def test_resize_with_multiple_of():
209212

210213

211214
def test_resize_llama_vision_model():
212-
model = AutoModelForImageTextToText.from_pretrained(
213-
TINY_LLAMA_VISION_MODEL_NAME
214-
) # AutoModelForVision2Seq was renamed to AutoModelForImageTextToText in transformers v5
215+
model = AutoModelForVision2Seq.from_pretrained(TINY_LLAMA_VISION_MODEL_NAME)
215216
processor = AutoProcessor.from_pretrained(TINY_LLAMA_VISION_MODEL_NAME)
216217
tokenizer = processor.tokenizer
217218

0 commit comments

Comments
 (0)