-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathTimer_Trans_FR.py
More file actions
100 lines (79 loc) · 2.81 KB
/
Timer_Trans_FR.py
File metadata and controls
100 lines (79 loc) · 2.81 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
import pandas as pd
import torch
import numpy as np
from transformers import pipeline
import time
from transformers import MarianTokenizer, MarianMTModel
from typing import List
print("Enter attack csv file name")
path=input()
ds1=pd.read_csv(path)
src = 'en' # source language
trg = 'fr' # target language
mname = f'Helsinki-NLP/opus-mt-{src}-{trg}'
model = MarianMTModel.from_pretrained(mname)
tok = MarianTokenizer.from_pretrained(mname)
def en_fr(txt):
batch = tok.prepare_seq2seq_batch(src_texts=[txt]) # don't need tgt_text for inference
gen = model.generate(**batch) # for forward pass: model(**batch)
words: List[str] = tok.batch_decode(gen, skip_special_tokens=True) # returns "Where is the bus stop ?"
return(words[0])
model_name = f'Helsinki-NLP/opus-mt-{trg}-{src}'
tokenizer = MarianTokenizer.from_pretrained(model_name)
model1 = MarianMTModel.from_pretrained(model_name)
def fr_en(txtx):
translated = model1.generate(**tokenizer.prepare_seq2seq_batch([txtx]))
tgt = [tokenizer.decode(t, skip_special_tokens=True) for t in translated]
return(tgt)
from transformers import AutoTokenizer, AutoModelForSequenceClassification
tok_name = input("Enter Tokenizer name (corr. to the classifier model) from Hugging Face")
tokenizer = AutoTokenizer.from_pretrained(tok_name)
model_name = input("Enter classifier model name from Hugging Face")
model = AutoModelForSequenceClassification.from_pretrained("model_name")
time_tran=[]
time_cls=[]
list1=ds1.perturbed_text
list2=[]
a=0
for x in list1[:20]:
cnta=time.time()
w1 = en_fr(x)
list2.append(fr_en(w1))
cntb=time.time()
time_tran.append(cntb-cnta)
print(f"Example number {a}")
a+=1
translated=list2
perturbed=list1
initial_prob=list(ds1.original_output)
initial_score=list(ds1.original_score)
perturbed_prob=list(ds1.perturbed_output)
perturbed_score=list(ds1.perturbed_score)
final_prob=[]
final_score=[]
a=0
for y in translated:
cnta= time.timeit()
txf1=tokenizer.encode_plus(y,return_tensors="pt",max_length=512)
txf2=model(**txf1)[0]
results=torch.softmax(txf2, dim=1).tolist()[0]
final_prob.append(np.argmax(results))
final_score.append(results[np.argmax(results)])
cntb=time.timeit()
time_tran[a]=time_sum[a]+(cntb-cnta)
print(f"Iteration number={a}")
a+=1
a=0
for y in perturbed[:20]:
cnta= time.timeit()
txf1=tokenizer.encode_plus(y,return_tensors="pt",max_length=512)
txf2=model(**txf1)[0]
results=torch.softmax(txf2, dim=1).tolist()[0]
final_prob.append(np.argmax(results))
final_score.append(results[np.argmax(results)])
cntb=time.timeit()
time_cls.append(cntb-cnta)
print(f"Iteration number={a}")
a+=1
print(f"Average time w/o defense={np.mean(time_cls)}")
print(f"Average time w defense_French={np.mean(time_tran)}")