-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathTimer_Trans_DE.py
More file actions
101 lines (80 loc) · 2.84 KB
/
Timer_Trans_DE.py
File metadata and controls
101 lines (80 loc) · 2.84 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
import pandas as pd
import torch
import numpy as np
from transformers import pipeline
import time
from transformers import MarianTokenizer, MarianMTModel
from typing import List
print("Enter attack csv file name")
path=input()
ds1=pd.read_csv(path)
## Translation pipeline
src = 'en' # source language
trg = 'de' # target language
mname = f'Helsinki-NLP/opus-mt-{src}-{trg}'
model = MarianMTModel.from_pretrained(mname)
tok = MarianTokenizer.from_pretrained(mname)
def en_de(txt):
batch = tok.prepare_seq2seq_batch(src_texts=[txt]) # don't need tgt_text for inference
gen = model.generate(**batch) # for forward pass: model(**batch)
words: List[str] = tok.batch_decode(gen, skip_special_tokens=True) # returns "Where is the bus stop ?"
return(words[0])
model_name = f'Helsinki-NLP/opus-mt-{trg}-{src}'
tokenizer = MarianTokenizer.from_pretrained(model_name)
model1 = MarianMTModel.from_pretrained(model_name)
def de_en(txtx):
translated = model1.generate(**tokenizer.prepare_seq2seq_batch([txtx]))
tgt = [tokenizer.decode(t, skip_special_tokens=True) for t in translated]
return(tgt)
from transformers import AutoTokenizer, AutoModelForSequenceClassification
tok_name = input("Enter Tokenizer name (corr. to the classifier model) from Hugging Face")
tokenizer = AutoTokenizer.from_pretrained(tok_name)
model_name = input("Enter classifier model name from Hugging Face")
model = AutoModelForSequenceClassification.from_pretrained("model_name")
time_tran=[]
time_cls=[]
list1=ds1.perturbed_text
list2=[]
a=0
for x in list1[:20]:
cnta=time.time()
w1 = en_de(x)
list2.append(de_en(w1))
cntb=time.time()
time_tran.append(cntb-cnta)
print(f"Example number {a}")
a+=1
translated=list2
perturbed=list1
initial_prob=list(ds1.original_output)
initial_score=list(ds1.original_score)
perturbed_prob=list(ds1.perturbed_output)
perturbed_score=list(ds1.perturbed_score)
final_prob=[]
final_score=[]
a=0
for y in translated:
cnta= time.timeit()
txf1=tokenizer.encode_plus(y,return_tensors="pt",max_length=512)
txf2=model(**txf1)[0]
results=torch.softmax(txf2, dim=1).tolist()[0]
final_prob.append(np.argmax(results))
final_score.append(results[np.argmax(results)])
cntb=time.timeit()
time_tran[a]=time_sum[a]+(cntb-cnta)
print(f"Iteration number={a}")
a+=1
a=0
for y in perturbed[:20]:
cnta= time.timeit()
txf1=tokenizer.encode_plus(y,return_tensors="pt",max_length=512)
txf2=model(**txf1)[0]
results=torch.softmax(txf2, dim=1).tolist()[0]
final_prob.append(np.argmax(results))
final_score.append(results[np.argmax(results)])
cntb=time.timeit()
time_cls.append(cntb-cnta)
print(f"Iteration number={a}")
a+=1
print(f"Average time w/o defense={np.mean(time_cls)}")
print(f"Average time w defense_German={np.mean(time_tran)}")