Skip to content

Commit 4f482f1

Browse files
committed
linting and formatting
Signed-off-by: Yash Mehan <yashmehan@gmail.com>
1 parent eb7b243 commit 4f482f1

5 files changed

Lines changed: 28 additions & 24 deletions

File tree

build/accelerate_launch.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,7 @@ def main():
120120
logging.error(traceback.format_exc())
121121
# v5: torch.distributed raises ChildFailedError with per-rank exit codes
122122
# Check if the root cause was a user error
123-
if hasattr(e, 'failures'):
123+
if hasattr(e, "failures"):
124124
root_codes = [f.exitcode for f in e.failures.values()]
125125
if any(c == USER_ERROR_EXIT_CODE for c in root_codes):
126126
sys.exit(USER_ERROR_EXIT_CODE)

pyproject.toml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,9 @@ classifiers=[
2828
dependencies = [
2929
"numpy>=1.26.4,<2.2.0",
3030
"accelerate>=1.9.0,<2.0.0",
31-
"transformers>=5.2.0,<=5.3.0",
32-
"torch>=2.10.0,<2.11.0",
33-
"torchvision<=0.25.1",
31+
"transformers>=5.2.0,<5.3.0",
32+
"torch>2.7.0,<=2.9.0",
33+
"torchvision<=0.24.0",
3434
"sentencepiece>=0.1.99,<0.3",
3535
"tokenizers<=0.23.0",
3636
"tqdm>=4.66.2,<5.0",
@@ -40,7 +40,7 @@ dependencies = [
4040
"simpleeval>=0.9.13,<2.0",
4141
"pillow>=12.1.1",
4242
"kernels>=0.12.1,<0.13.0",
43-
"huggingface_hub>=1.3.0,<1.4.0"
43+
"huggingface_hub>=1.3.0,<1.4.0",
4444
]
4545

4646
[project.optional-dependencies]

tests/utils/test_embedding_resize.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,9 @@
2020

2121
# Third Party
2222
from transformers import (
23-
AutoModelForCausalLM,
24-
AutoModelForImageTextToText, #AutoModelForVision2Seq was renamed to this in transformers v5
25-
AutoProcessor,
26-
AutoTokenizer,
23+
AutoModelForImageTextToText, # AutoModelForVision2Seq was renamed to this in transformers v5
2724
)
25+
from transformers import AutoModelForCausalLM, AutoProcessor, AutoTokenizer
2826
import torch
2927

3028
# First Party
@@ -136,8 +134,9 @@ def test_special_tokens_before_and_after():
136134
special_tokens_dict = {"sep_token": "<SEP>", "pad_token": "<PAD>"}
137135
addn_spl_tokens_added = ["<NotSeenTokenA>", "<NotSeenTokenB>", "<NotSeenTokenC>"]
138136
# for transformers v5: merge existing extra_special_tokens with new ones to prevent replacement
139-
special_tokens_dict["additional_special_tokens"] = list(tokenizer.extra_special_tokens) + addn_spl_tokens_added
140-
137+
special_tokens_dict["additional_special_tokens"] = (
138+
list(tokenizer.extra_special_tokens) + addn_spl_tokens_added
139+
)
141140

142141
resize_result = tokenizer_and_embedding_resize(
143142
special_tokens_dict=special_tokens_dict,
@@ -210,7 +209,9 @@ def test_resize_with_multiple_of():
210209

211210

212211
def test_resize_llama_vision_model():
213-
model = AutoModelForImageTextToText.from_pretrained(TINY_LLAMA_VISION_MODEL_NAME) # AutoModelForVision2Seq was renamed to AutoModelForImageTextToText in transformers v5
212+
model = AutoModelForImageTextToText.from_pretrained(
213+
TINY_LLAMA_VISION_MODEL_NAME
214+
) # AutoModelForVision2Seq was renamed to AutoModelForImageTextToText in transformers v5
214215
processor = AutoProcessor.from_pretrained(TINY_LLAMA_VISION_MODEL_NAME)
215216
tokenizer = processor.tokenizer
216217

tuning/data/tokenizer_utils.py

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -45,31 +45,32 @@ def get_special_tokens_dict(
4545
special_tokens_dict = {}
4646
if not tokenizer_name_or_path:
4747
llama_classes = tuple(
48-
cls for cls in [
48+
cls
49+
for cls in [
4950
getattr(transformers, "LlamaTokenizer", None),
5051
getattr(transformers, "LlamaTokenizerFast", None),
51-
] if cls is not None
52+
]
53+
if cls is not None
5254
)
5355
is_llama_tokenizer = (
54-
(bool(llama_classes) and isinstance(tokenizer, llama_classes))
55-
or "llama" in (getattr(tokenizer, "name_or_path", "") or "").lower()
56-
)
56+
bool(llama_classes) and isinstance(tokenizer, llama_classes)
57+
) or "llama" in (getattr(tokenizer, "name_or_path", "") or "").lower()
5758

5859
gpt_neox_classes = tuple(
59-
cls for cls in [
60+
cls
61+
for cls in [
6062
getattr(transformers, "GPTNeoXTokenizerFast", None),
6163
getattr(transformers, "GPTNeoXTokenizer", None),
62-
] if cls is not None
64+
]
65+
if cls is not None
6366
)
6467

6568
if is_llama_tokenizer:
6669
special_tokens_dict["bos_token"] = "<s>"
6770
special_tokens_dict["eos_token"] = "</s>"
6871
special_tokens_dict["unk_token"] = "<unk>"
6972
special_tokens_dict["pad_token"] = "<pad>"
70-
elif isinstance(
71-
tokenizer, (transformers.GPT2Tokenizer, *gpt_neox_classes)
72-
):
73+
elif isinstance(tokenizer, (transformers.GPT2Tokenizer, *gpt_neox_classes)):
7374
special_tokens_dict["pad_token"] = "<pad>"
7475

7576
# Add special tokens only when a custom tokenizer is not passed
@@ -117,7 +118,7 @@ def tokenizer_and_embedding_resize(
117118
dict: Metadata on number of added tokens.
118119
"""
119120
num_new_tokens = tokenizer.add_special_tokens(
120-
special_tokens_dict=special_tokens_dict,
121+
special_tokens_dict=special_tokens_dict,
121122
# replace_additional_special_tokens=False
122123
)
123124
embedding_size = int(multiple_of * math.ceil(len(tokenizer) / multiple_of))

tuning/sft_trainer.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,11 @@
2828
from peft import LoraConfig
2929
from peft.utils.other import fsdp_auto_wrap_policy
3030
from torch.cuda import OutOfMemoryError
31+
from transformers import (
32+
AutoModelForImageTextToText, # AutoModelForVision2Seq was renamed in transformers v5
33+
)
3134
from transformers import (
3235
AutoModelForCausalLM,
33-
AutoModelForImageTextToText, # AutoModelForVision2Seq was renamed in transformers v5
3436
AutoProcessor,
3537
AutoTokenizer,
3638
TrainerCallback,

0 commit comments

Comments
 (0)