Fix 7 test failures: compatibility bugs and missing skip markers

yanjunqiAz · yanjunqiAz · commit 95cf68ea5859 · 2026-04-16T15:36:48.000-04:00
Code fixes:
- Replace removed transformers.optimization.AdamW with torch.optim.AdamW
  in trainer.py (removed in transformers&gt;=4.x)
- Use AutoTokenizer/AutoModelForMaskedLM instead of BertTokenizer/BertForMaskedLM
  in ChineseWordSwapMaskedLM, since xlm-roberta-base requires its own tokenizer
- Fix hardcoded CUDA device in ChineseWordSwapMaskedLM to auto-detect device

Test fixes:
- Update stale expected output for list_augmentation_recipes to include
  BackTranscriptionAugmenter
- Add pytest.skip for tests requiring tensorflow_hub when not installed
  (interactive_mode, adv_metrics attack tests, train test)
- Add pytest.skipif for test_embedding_gensim when gensim not installed
- Replace deprecated gensim Word2VecKeyedVectors API with KeyedVectors
diff --git a/tests/sample_outputs/list_augmentation_recipes.txt b/tests/sample_outputs/list_augmentation_recipes.txt
@@ -1,4 +1,5 @@
 [94mback_trans[0m (textattack.augmentation.BackTranslationAugmenter)
+[94mback_transcription[0m (textattack.augmentation.BackTranscriptionAugmenter)
 [94mcharswap[0m (textattack.augmentation.CharSwapAugmenter)
 [94mchecklist[0m (textattack.augmentation.CheckListAugmenter)
 [94mclare[0m (textattack.augmentation.CLAREAugmenter)
diff --git a/tests/test_command_line/test_attack.py b/tests/test_command_line/test_attack.py
@@ -1,9 +1,12 @@
+import importlib
 import pdb
 import re
 
 from helpers import run_command_and_get_result
 import pytest
 
+_tensorflow_hub_available = importlib.util.find_spec("tensorflow_hub") is not None
+
 DEBUG = False
 """Attack command-line tests in the format (name, args, sample_output_file)"""
 
@@ -171,6 +174,9 @@
 @pytest.mark.slow
 def test_command_line_attack(name, command, sample_output_file):
     """Runs attack tests and compares their outputs to a reference file."""
+    _tf_hub_tests = {"interactive_mode", "attack_from_transformers_adv_metrics", "run_attack_hotflip_lstm_mr_4_adv_metrics"}
+    if name in _tf_hub_tests and not _tensorflow_hub_available:
+        pytest.skip("tensorflow_hub is not installed")
     # read in file and create regex
     desired_output = open(sample_output_file, "r").read().strip()
     print("desired_output.encoded =>", desired_output.encode())
diff --git a/tests/test_command_line/test_train.py b/tests/test_command_line/test_train.py
@@ -1,9 +1,14 @@
+import importlib
 import os
 import re
 
 from helpers import run_command_and_get_result
+import pytest
 
+_tensorflow_hub_available = importlib.util.find_spec("tensorflow_hub") is not None
 
+
+@pytest.mark.skipif(not _tensorflow_hub_available, reason="tensorflow_hub is not installed")
 def test_train_tiny():
     command = "textattack train --model distilbert-base-uncased --attack textfooler --dataset rotten_tomatoes --model-max-length 64  --num-epochs 1 --num-clean-epochs 0 --num-train-adv-examples 2"
 
diff --git a/tests/test_word_embedding.py b/tests/test_word_embedding.py
@@ -1,10 +1,13 @@
+import importlib
 import os
 
 import numpy as np
 import pytest
 
 from textattack.shared import GensimWordEmbedding, WordEmbedding
 
+_gensim_available = importlib.util.find_spec("gensim") is not None
+
 
 def test_embedding_paragramcf():
     word_embedding = WordEmbedding.counterfitted_GLOVE_embedding()
@@ -13,6 +16,7 @@ def test_embedding_paragramcf():
     assert word_embedding[10**9] is None
 
 
+@pytest.mark.skipif(not _gensim_available, reason="gensim is not installed")
 def test_embedding_gensim():
     # download a trained word2vec model
     from textattack.shared.utils import LazyLoader
@@ -30,10 +34,9 @@ def test_embedding_gensim():
     )
     f.close()
 
-    gensim = LazyLoader("gensim", globals(), "gensim")
-    keyed_vectors = (
-        gensim.models.keyedvectors.Word2VecKeyedVectors.load_word2vec_format(path)
-    )
+    from gensim.models import KeyedVectors
+
+    keyed_vectors = KeyedVectors.load_word2vec_format(path)
     word_embedding = GensimWordEmbedding(keyed_vectors)
     assert pytest.approx(word_embedding[0][0]) == 1
     assert pytest.approx(word_embedding["bye-bye"][0]) == -1 / np.sqrt(2)
diff --git a/textattack/trainer.py b/textattack/trainer.py
@@ -361,7 +361,7 @@ def get_optimizer_and_scheduler(self, model, num_training_steps):
                 },
             ]
 
-            optimizer = transformers.optimization.AdamW(
+            optimizer = torch.optim.AdamW(
                 optimizer_grouped_parameters, lr=self.training_args.learning_rate
             )
             if isinstance(self.training_args.num_warmup_steps, float):
diff --git a/textattack/transformations/word_swaps/chn_transformations/chinese_word_swap_masked.py b/textattack/transformations/word_swaps/chn_transformations/chinese_word_swap_masked.py
@@ -13,11 +13,13 @@ class ChineseWordSwapMaskedLM(WordSwap):
     model."""
 
     def __init__(self, task="fill-mask", model="xlm-roberta-base", **kwargs):
-        from transformers import BertForMaskedLM, BertTokenizer
+        from transformers import AutoModelForMaskedLM, AutoTokenizer
 
-        self.tt = BertTokenizer.from_pretrained(model)
-        self.mm = BertForMaskedLM.from_pretrained(model)
-        self.mm.to("cuda")
+        self.tt = AutoTokenizer.from_pretrained(model)
+        self.mm = AutoModelForMaskedLM.from_pretrained(model)
+        device = "cuda" if torch.cuda.is_available() else "cpu"
+        self.mm.to(device)
+        self._device = device
         super().__init__(**kwargs)
 
     def get_replacement_words(self, current_text, indice_to_modify):
@@ -26,7 +28,7 @@ def get_replacement_words(self, current_text, indice_to_modify):
         )  # 修改前<mask>，xlmrberta的模型
         tokens = self.tt.tokenize(masked_text.text)
         input_ids = self.tt.convert_tokens_to_ids(tokens)
-        input_tensor = torch.tensor([input_ids]).to("cuda")
+        input_tensor = torch.tensor([input_ids]).to(self._device)
         with torch.no_grad():
             outputs = self.mm(input_tensor)
             predictions = outputs.logits

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,5 @@`
`1`	`1`	`[94mback_trans[0m (textattack.augmentation.BackTranslationAugmenter)`
	`2`	`+[94mback_transcription[0m (textattack.augmentation.BackTranscriptionAugmenter)`
`2`	`3`	`[94mcharswap[0m (textattack.augmentation.CharSwapAugmenter)`
`3`	`4`	`[94mchecklist[0m (textattack.augmentation.CheckListAugmenter)`
`4`	`5`	`[94mclare[0m (textattack.augmentation.CLAREAugmenter)`
Original file line number	Diff line number	Diff line change
`@@ -361,7 +361,7 @@ def get_optimizer_and_scheduler(self, model, num_training_steps):`
`361`	`361`	`},`
`362`	`362`	`]`
`363`	`363`
`364`		`- optimizer = transformers.optimization.AdamW(`
	`364`	`+ optimizer = torch.optim.AdamW(`
`365`	`365`	`optimizer_grouped_parameters, lr=self.training_args.learning_rate`
`366`	`366`	`)`
`367`	`367`	`if isinstance(self.training_args.num_warmup_steps, float):`