99- `deepfool(model, x, y, num_steps=10)`: DeepFool attack.
1010- `jsma(model, x, y, theta=0.1, gamma=0.1, num_steps=10)`: Jacobian-based Saliency Map Attack (JSMA).
1111- `spsa(model, x, y, epsilon=0.01, num_steps=10)`: Simultaneous Perturbation Stochastic Approximation (SPSA) attack.
12+ - `mim(model, x, y, epsilon=0.01, alpha=0.01, num_steps=10, decay_factor=1.0)`: Momentum Iterative Method (MIM) attack.
13+ - `ead(model, x, y, epsilon=0.01, beta=0.01, num_steps=10, alpha=0.01)`: Elastic Net Attack (EAD).
14+ - `word_swap(text, swap_dict=None)`: Simple word swap attack for text.
15+ - `char_swap(text, swap_prob=0.1)`: Simple character swap attack for text.
1216"""
1317
1418import numpy as np
@@ -27,6 +31,9 @@ def fgsm(model, x, y, epsilon=0.01):
2731 Returns:
2832 adversarial_example (numpy.ndarray): The perturbed input example.
2933 """
34+ x = tf .cast (x , tf .float32 )
35+ y = tf .cast (y , tf .float32 )
36+
3037 # Determine the loss function based on the number of classes
3138 if y .shape [- 1 ] == 1 or len (y .shape ) == 1 :
3239 loss_object = tf .keras .losses .BinaryCrossentropy ()
@@ -45,6 +52,139 @@ def fgsm(model, x, y, epsilon=0.01):
4552 adversarial_example = x + perturbation
4653 return adversarial_example .numpy ()
4754
55+ def mim (model , x , y , epsilon = 0.01 , alpha = 0.01 , num_steps = 10 , decay_factor = 1.0 ):
56+ """
57+ Momentum Iterative Method (MIM) attack.
58+
59+ Parameters:
60+ model (tensorflow.keras.Model): The target model to attack.
61+ x (numpy.ndarray): The input example to attack.
62+ y (numpy.ndarray): The true labels of the input example.
63+ epsilon (float): The maximum magnitude of the perturbation (default: 0.01).
64+ alpha (float): The step size for each iteration (default: 0.01).
65+ num_steps (int): The number of MIM iterations (default: 10).
66+ decay_factor (float): The decay factor for momentum (default: 1.0).
67+
68+ Returns:
69+ adversarial_example (numpy.ndarray): The perturbed input example.
70+ """
71+ x = tf .cast (x , tf .float32 )
72+ y = tf .cast (y , tf .float32 )
73+ adversarial_example = tf .identity (x )
74+ momentum = tf .zeros_like (x )
75+
76+ # Determine the loss function based on the number of classes
77+ if y .shape [- 1 ] == 1 or len (y .shape ) == 1 :
78+ loss_object = tf .keras .losses .BinaryCrossentropy ()
79+ else :
80+ loss_object = tf .keras .losses .CategoricalCrossentropy ()
81+
82+ for _ in range (num_steps ):
83+ with tf .GradientTape () as tape :
84+ tape .watch (adversarial_example )
85+ prediction = model (adversarial_example )
86+ loss = loss_object (y , prediction )
87+
88+ gradient = tape .gradient (loss , adversarial_example )
89+ # L1 normalize gradient
90+ grad_l1 = tf .reduce_sum (tf .abs (gradient ))
91+ gradient = gradient / (grad_l1 + 1e-8 )
92+
93+ momentum = decay_factor * momentum + gradient
94+
95+ perturbation = alpha * tf .sign (momentum )
96+ adversarial_example = tf .clip_by_value (adversarial_example + perturbation , 0 , 1 )
97+ adversarial_example = tf .clip_by_value (adversarial_example , x - epsilon , x + epsilon )
98+
99+ return adversarial_example .numpy ()
100+
101+ def ead (model , x , y , epsilon = 0.01 , beta = 0.01 , num_steps = 10 , alpha = 0.01 ):
102+ """
103+ Elastic Net Attack (EAD) attack.
104+
105+ Parameters:
106+ model (tensorflow.keras.Model): The target model to attack.
107+ x (numpy.ndarray): The input example to attack.
108+ y (numpy.ndarray): The true labels of the input example.
109+ epsilon (float): The maximum magnitude of the perturbation (default: 0.01).
110+ beta (float): The L1 regularization parameter (default: 0.01).
111+ num_steps (int): The number of EAD iterations (default: 10).
112+ alpha (float): The step size for each iteration (default: 0.01).
113+
114+ Returns:
115+ adversarial_example (numpy.ndarray): The perturbed input example.
116+ """
117+ x = tf .cast (x , tf .float32 )
118+ y = tf .cast (y , tf .float32 )
119+ adversarial_example = tf .identity (x )
120+
121+ # Determine the loss function based on the number of classes
122+ if y .shape [- 1 ] == 1 or len (y .shape ) == 1 :
123+ loss_object = tf .keras .losses .BinaryCrossentropy ()
124+ else :
125+ loss_object = tf .keras .losses .CategoricalCrossentropy ()
126+
127+ for _ in range (num_steps ):
128+ with tf .GradientTape () as tape :
129+ tape .watch (adversarial_example )
130+ prediction = model (adversarial_example )
131+ loss = loss_object (y , prediction )
132+
133+ gradient = tape .gradient (loss , adversarial_example )
134+
135+ perturbation = alpha * tf .sign (gradient )
136+ new_x = adversarial_example + perturbation
137+
138+ # Proximal operator for L1 (soft thresholding)
139+ diff = new_x - x
140+ adversarial_example = x + tf .sign (diff ) * tf .maximum (tf .abs (diff ) - beta , 0 )
141+
142+ adversarial_example = tf .clip_by_value (adversarial_example , 0 , 1 )
143+ adversarial_example = tf .clip_by_value (adversarial_example , x - epsilon , x + epsilon )
144+
145+ return adversarial_example .numpy ()
146+
147+ def word_swap (text , swap_dict = None ):
148+ """
149+ Simple word swap attack for text.
150+
151+ Parameters:
152+ text (str): The input text.
153+ swap_dict (dict): Dictionary of words and their substitutes.
154+
155+ Returns:
156+ perturbed_text (str): The text with swapped words.
157+ """
158+ if swap_dict is None :
159+ return text
160+
161+ words = text .split ()
162+ for i in range (len (words )):
163+ if words [i ] in swap_dict :
164+ words [i ] = swap_dict [words [i ]]
165+
166+ return " " .join (words )
167+
168+ def char_swap (text , swap_prob = 0.1 ):
169+ """
170+ Simple character swap attack for text.
171+
172+ Parameters:
173+ text (str): The input text.
174+ swap_prob (float): The probability of swapping a character in a word (default: 0.1).
175+
176+ Returns:
177+ perturbed_text (str): The text with swapped characters.
178+ """
179+ words = text .split ()
180+ for i in range (len (words )):
181+ if len (words [i ]) > 1 and np .random .rand () < swap_prob :
182+ word_list = list (words [i ])
183+ idx = np .random .randint (0 , len (word_list ) - 1 )
184+ word_list [idx ], word_list [idx + 1 ] = word_list [idx + 1 ], word_list [idx ]
185+ words [i ] = "" .join (word_list )
186+ return " " .join (words )
187+
48188def pgd (model , x , y , epsilon = 0.01 , alpha = 0.01 , num_steps = 10 ):
49189 """
50190 Projected Gradient Descent (PGD) attack.
@@ -60,6 +200,8 @@ def pgd(model, x, y, epsilon=0.01, alpha=0.01, num_steps=10):
60200 Returns:
61201 adversarial_example (numpy.ndarray): The perturbed input example.
62202 """
203+ x = tf .cast (x , tf .float32 )
204+ y = tf .cast (y , tf .float32 )
63205 adversarial_example = tf .identity (x )
64206
65207 for _ in range (num_steps ):
@@ -90,6 +232,8 @@ def bim(model, x, y, epsilon=0.01, alpha=0.01, num_steps=10):
90232 Returns:
91233 adversarial_example (numpy.ndarray): The perturbed input example.
92234 """
235+ x = tf .cast (x , tf .float32 )
236+ y = tf .cast (y , tf .float32 )
93237 adversarial_example = tf .identity (x )
94238
95239 for _ in range (num_steps ):
@@ -122,6 +266,9 @@ def cw(model, x, y, epsilon=0.01, c=1, kappa=0, num_steps=10, alpha=0.01):
122266 Returns:
123267 adversarial_example (numpy.ndarray): The perturbed input example.
124268 """
269+ x = tf .cast (x , tf .float32 )
270+ y = tf .cast (y , tf .float32 )
271+
125272 # Define the loss function
126273 def loss_function (x , y , model , c , kappa ):
127274 prediction = model (x )
@@ -157,6 +304,8 @@ def deepfool(model, x, y, num_steps=10):
157304 Returns:
158305 adversarial_example (numpy.ndarray): The perturbed input example.
159306 """
307+ x = tf .cast (x , tf .float32 )
308+ y = tf .cast (y , tf .float32 )
160309 # Initialize the adversarial example
161310 adversarial_example = tf .identity (x )
162311
@@ -188,6 +337,8 @@ def jsma(model, x, y, theta=0.1, gamma=0.1, num_steps=10):
188337 Returns:
189338 adversarial_example (numpy.ndarray): The perturbed input example.
190339 """
340+ x = tf .cast (x , tf .float32 )
341+ y = tf .cast (y , tf .float32 )
191342 # Initialize the adversarial example
192343 adversarial_example = tf .identity (x )
193344
0 commit comments