Skip to content

Commit 9a98ea1

Browse files
authored
Merge pull request #2 from Infinitode/deepdefend-update-0.1.5-4621323909674233512
DeepDefend 0.1.5 Update: New Techniques and Logic Fixes
2 parents ae957d6 + a0842e7 commit 9a98ea1

File tree

3 files changed

+575
-60
lines changed

3 files changed

+575
-60
lines changed

deepdefend/attacks.py

Lines changed: 151 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,10 @@
99
- `deepfool(model, x, y, num_steps=10)`: DeepFool attack.
1010
- `jsma(model, x, y, theta=0.1, gamma=0.1, num_steps=10)`: Jacobian-based Saliency Map Attack (JSMA).
1111
- `spsa(model, x, y, epsilon=0.01, num_steps=10)`: Simultaneous Perturbation Stochastic Approximation (SPSA) attack.
12+
- `mim(model, x, y, epsilon=0.01, alpha=0.01, num_steps=10, decay_factor=1.0)`: Momentum Iterative Method (MIM) attack.
13+
- `ead(model, x, y, epsilon=0.01, beta=0.01, num_steps=10, alpha=0.01)`: Elastic Net Attack (EAD).
14+
- `word_swap(text, swap_dict=None)`: Simple word swap attack for text.
15+
- `char_swap(text, swap_prob=0.1)`: Simple character swap attack for text.
1216
"""
1317

1418
import numpy as np
@@ -27,6 +31,9 @@ def fgsm(model, x, y, epsilon=0.01):
2731
Returns:
2832
adversarial_example (numpy.ndarray): The perturbed input example.
2933
"""
34+
x = tf.cast(x, tf.float32)
35+
y = tf.cast(y, tf.float32)
36+
3037
# Determine the loss function based on the number of classes
3138
if y.shape[-1] == 1 or len(y.shape) == 1:
3239
loss_object = tf.keras.losses.BinaryCrossentropy()
@@ -45,6 +52,139 @@ def fgsm(model, x, y, epsilon=0.01):
4552
adversarial_example = x + perturbation
4653
return adversarial_example.numpy()
4754

55+
def mim(model, x, y, epsilon=0.01, alpha=0.01, num_steps=10, decay_factor=1.0):
56+
"""
57+
Momentum Iterative Method (MIM) attack.
58+
59+
Parameters:
60+
model (tensorflow.keras.Model): The target model to attack.
61+
x (numpy.ndarray): The input example to attack.
62+
y (numpy.ndarray): The true labels of the input example.
63+
epsilon (float): The maximum magnitude of the perturbation (default: 0.01).
64+
alpha (float): The step size for each iteration (default: 0.01).
65+
num_steps (int): The number of MIM iterations (default: 10).
66+
decay_factor (float): The decay factor for momentum (default: 1.0).
67+
68+
Returns:
69+
adversarial_example (numpy.ndarray): The perturbed input example.
70+
"""
71+
x = tf.cast(x, tf.float32)
72+
y = tf.cast(y, tf.float32)
73+
adversarial_example = tf.identity(x)
74+
momentum = tf.zeros_like(x)
75+
76+
# Determine the loss function based on the number of classes
77+
if y.shape[-1] == 1 or len(y.shape) == 1:
78+
loss_object = tf.keras.losses.BinaryCrossentropy()
79+
else:
80+
loss_object = tf.keras.losses.CategoricalCrossentropy()
81+
82+
for _ in range(num_steps):
83+
with tf.GradientTape() as tape:
84+
tape.watch(adversarial_example)
85+
prediction = model(adversarial_example)
86+
loss = loss_object(y, prediction)
87+
88+
gradient = tape.gradient(loss, adversarial_example)
89+
# L1 normalize gradient
90+
grad_l1 = tf.reduce_sum(tf.abs(gradient))
91+
gradient = gradient / (grad_l1 + 1e-8)
92+
93+
momentum = decay_factor * momentum + gradient
94+
95+
perturbation = alpha * tf.sign(momentum)
96+
adversarial_example = tf.clip_by_value(adversarial_example + perturbation, 0, 1)
97+
adversarial_example = tf.clip_by_value(adversarial_example, x - epsilon, x + epsilon)
98+
99+
return adversarial_example.numpy()
100+
101+
def ead(model, x, y, epsilon=0.01, beta=0.01, num_steps=10, alpha=0.01):
102+
"""
103+
Elastic Net Attack (EAD) attack.
104+
105+
Parameters:
106+
model (tensorflow.keras.Model): The target model to attack.
107+
x (numpy.ndarray): The input example to attack.
108+
y (numpy.ndarray): The true labels of the input example.
109+
epsilon (float): The maximum magnitude of the perturbation (default: 0.01).
110+
beta (float): The L1 regularization parameter (default: 0.01).
111+
num_steps (int): The number of EAD iterations (default: 10).
112+
alpha (float): The step size for each iteration (default: 0.01).
113+
114+
Returns:
115+
adversarial_example (numpy.ndarray): The perturbed input example.
116+
"""
117+
x = tf.cast(x, tf.float32)
118+
y = tf.cast(y, tf.float32)
119+
adversarial_example = tf.identity(x)
120+
121+
# Determine the loss function based on the number of classes
122+
if y.shape[-1] == 1 or len(y.shape) == 1:
123+
loss_object = tf.keras.losses.BinaryCrossentropy()
124+
else:
125+
loss_object = tf.keras.losses.CategoricalCrossentropy()
126+
127+
for _ in range(num_steps):
128+
with tf.GradientTape() as tape:
129+
tape.watch(adversarial_example)
130+
prediction = model(adversarial_example)
131+
loss = loss_object(y, prediction)
132+
133+
gradient = tape.gradient(loss, adversarial_example)
134+
135+
perturbation = alpha * tf.sign(gradient)
136+
new_x = adversarial_example + perturbation
137+
138+
# Proximal operator for L1 (soft thresholding)
139+
diff = new_x - x
140+
adversarial_example = x + tf.sign(diff) * tf.maximum(tf.abs(diff) - beta, 0)
141+
142+
adversarial_example = tf.clip_by_value(adversarial_example, 0, 1)
143+
adversarial_example = tf.clip_by_value(adversarial_example, x - epsilon, x + epsilon)
144+
145+
return adversarial_example.numpy()
146+
147+
def word_swap(text, swap_dict=None):
148+
"""
149+
Simple word swap attack for text.
150+
151+
Parameters:
152+
text (str): The input text.
153+
swap_dict (dict): Dictionary of words and their substitutes.
154+
155+
Returns:
156+
perturbed_text (str): The text with swapped words.
157+
"""
158+
if swap_dict is None:
159+
return text
160+
161+
words = text.split()
162+
for i in range(len(words)):
163+
if words[i] in swap_dict:
164+
words[i] = swap_dict[words[i]]
165+
166+
return " ".join(words)
167+
168+
def char_swap(text, swap_prob=0.1):
169+
"""
170+
Simple character swap attack for text.
171+
172+
Parameters:
173+
text (str): The input text.
174+
swap_prob (float): The probability of swapping a character in a word (default: 0.1).
175+
176+
Returns:
177+
perturbed_text (str): The text with swapped characters.
178+
"""
179+
words = text.split()
180+
for i in range(len(words)):
181+
if len(words[i]) > 1 and np.random.rand() < swap_prob:
182+
word_list = list(words[i])
183+
idx = np.random.randint(0, len(word_list) - 1)
184+
word_list[idx], word_list[idx+1] = word_list[idx+1], word_list[idx]
185+
words[i] = "".join(word_list)
186+
return " ".join(words)
187+
48188
def pgd(model, x, y, epsilon=0.01, alpha=0.01, num_steps=10):
49189
"""
50190
Projected Gradient Descent (PGD) attack.
@@ -60,6 +200,8 @@ def pgd(model, x, y, epsilon=0.01, alpha=0.01, num_steps=10):
60200
Returns:
61201
adversarial_example (numpy.ndarray): The perturbed input example.
62202
"""
203+
x = tf.cast(x, tf.float32)
204+
y = tf.cast(y, tf.float32)
63205
adversarial_example = tf.identity(x)
64206

65207
for _ in range(num_steps):
@@ -90,6 +232,8 @@ def bim(model, x, y, epsilon=0.01, alpha=0.01, num_steps=10):
90232
Returns:
91233
adversarial_example (numpy.ndarray): The perturbed input example.
92234
"""
235+
x = tf.cast(x, tf.float32)
236+
y = tf.cast(y, tf.float32)
93237
adversarial_example = tf.identity(x)
94238

95239
for _ in range(num_steps):
@@ -122,6 +266,9 @@ def cw(model, x, y, epsilon=0.01, c=1, kappa=0, num_steps=10, alpha=0.01):
122266
Returns:
123267
adversarial_example (numpy.ndarray): The perturbed input example.
124268
"""
269+
x = tf.cast(x, tf.float32)
270+
y = tf.cast(y, tf.float32)
271+
125272
# Define the loss function
126273
def loss_function(x, y, model, c, kappa):
127274
prediction = model(x)
@@ -157,6 +304,8 @@ def deepfool(model, x, y, num_steps=10):
157304
Returns:
158305
adversarial_example (numpy.ndarray): The perturbed input example.
159306
"""
307+
x = tf.cast(x, tf.float32)
308+
y = tf.cast(y, tf.float32)
160309
# Initialize the adversarial example
161310
adversarial_example = tf.identity(x)
162311

@@ -188,6 +337,8 @@ def jsma(model, x, y, theta=0.1, gamma=0.1, num_steps=10):
188337
Returns:
189338
adversarial_example (numpy.ndarray): The perturbed input example.
190339
"""
340+
x = tf.cast(x, tf.float32)
341+
y = tf.cast(y, tf.float32)
191342
# Initialize the adversarial example
192343
adversarial_example = tf.identity(x)
193344

0 commit comments

Comments
 (0)