1+ import glob
2+ import os
13import sys
24
35import torch
46from sentence_transformers import SentenceTransformer
7+
8+ from data import test_texts , test_labels
59from vulnscan import SimpleNN
6- import glob
7- import os
10+
811# ---------------- INIT ----------------
912NAME = "Model_Sense.4n1"
1013ROUND = 5
@@ -36,16 +39,17 @@ def load_embeddings(folder_path, pattern):
3639
3740# Load all train/test/val embeddings
3841train_embeddings , train_labels = load_embeddings (cache_dir , "train_*.pt" )
39- test_embeddings , test_labels = load_embeddings (cache_dir , "test_*.pt" )
42+ test_embeddings , embed_test_labels = load_embeddings (cache_dir , "test_*.pt" )
4043val_embeddings , val_labels = load_embeddings (cache_dir , "validation_*.pt" )
4144
4245# Initialize model
4346input_dim = train_embeddings .shape [1 ]
4447model = SimpleNN (input_dim = input_dim ).to (device )
45- model . load_state_dict ( torch .load (
48+ state = torch .load (
4649 f"../cache/{ NAME } /round_{ ROUND } /{ NAME } _round{ ROUND } .pth" ,
4750 map_location = "cpu"
48- ))
51+ )
52+ model .load_state_dict (state ["model_state_dict" ])
4953model .eval ()
5054
5155# Load SentenceTransformer
@@ -60,119 +64,14 @@ def load_embeddings(folder_path, pattern):
6064print ("Sample predictions vs true labels (first 50%):" )
6165sample_size = len (preds ) // 2
6266for i in range (sample_size ):
63- print (f" Pred={ preds [i ].item ():.3f} | Label={ test_labels [i ].item ()} " )
67+ print (f" Pred={ preds [i ].item ():.3f} | Label={ embed_test_labels [i ].item ()} " )
6468# Calculate accuracy
65- pred_labels = (preds >= 0.5 ).long ().cpu ()
66- accuracy = (pred_labels == test_labels ).sum ().item () / len (test_labels )
67- print (f"\n Accuracy on stored embeddings: { accuracy * 100 :.2f} %" )
69+ pred_labels = (preds >= 0.5 ).long ().view (- 1 ).cpu ()
70+ true_labels = embed_test_labels .view (- 1 ).cpu ()
71+ accuracy = (pred_labels == true_labels ).float ().mean ().item ()
72+ print (f"Accuracy on stored embeddings: { accuracy * 100 :.2f} %" )
6873
6974# ---------------- 2. TEST ON NATURAL EXAMPLES ----------------
70- sensitive_texts = [
71- "My SSN is 123-45-6789" ,
72- "Here is my credit card number: 4111 1111 1111 1111" ,
73- "Password: hunter2" ,
74- "My email is johndoe@gmail.com" ,
75- "Bank account number: 987654321" ,
76- "Private key: -----BEGIN RSA PRIVATE KEY-----" ,
77- "The system IP is 192.168.1.1" ,
78- "Contact me at +1-202-555-0147" ,
79- "Visa card exp 12/25 CVV 123" ,
80- "Secret API key: sk_test_4eC39HqLyjWDarjtT1zdp7dc" ,
81- "My passport number is X1234567" ,
82- "Driver's license ID: D12345678" ,
83- "Social security info: 987-65-4321" ,
84- "Encrypted key: 6f1e9a2c3d4b5e6f7a8b" ,
85- "Database password: P@ssw0rd123" ,
86- "Credit card CVV: 321" ,
87- "Bank routing number: 021000021" ,
88- "Private SSH key: -----BEGIN OPENSSH PRIVATE KEY-----" ,
89- "My personal address: 123 Main St, Springfield" ,
90- "Medical record ID: MR123456" ,
91- "Tax ID: 123-45-6789" ,
92- "Company login password: Admin@2025" ,
93- "Phone PIN: 4321" ,
94- "Wi-Fi password: mysecretwifi" ,
95- "API token: abc123def456" ,
96- "OAuth secret: 987zyx654" ,
97- "PIN code: 2468" ,
98- "Student ID: S123456789" ,
99- "Mother's maiden name: Smith" ,
100- "Credit card expiration: 10/27" ,
101- "Bank account PIN: 5678" ,
102- "Encrypted password hash: $2b$12$abcd1234..." ,
103- "My vehicle VIN: 1HGCM82633A004352" ,
104- "Passport expiration: 09/2030" ,
105- "Private email password: qwerty123" ,
106- "Debit card number: 5500 0000 0000 0004" ,
107- "Bank security code: 789" ,
108- "My date of birth: 1990-01-01" ,
109- "Secret question answer: Blue" ,
110- "Corporate VPN password: VPN@1234" ,
111- "My home phone: +1-555-123-4567" ,
112- "Bank card CVV2: 456" ,
113- "Employee SSN: 234-56-7890" ,
114- "Private notes: Login credentials" ,
115- "Access key: AKIAIOSFODNN7EXAMPLE" ,
116- "Credit card PIN: 1357" ,
117- "Encrypted token: eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9..." ,
118- "My secondary email password: pass1234" ,
119- "Personal tax record: TR987654321" ,
120- ]
121- nonsensitive_texts = [
122- "The sky is blue today" ,
123- "I had pasta for lunch" ,
124- "Cats are very playful animals" ,
125- "The capital of France is Paris" ,
126- "I enjoy playing video games" ,
127- "Tomorrow will be sunny" ,
128- "Mount Everest is the tallest mountain" ,
129- "Water boils at 100 degrees Celsius" ,
130- "I love listening to music" ,
131- "Python is a programming language" ,
132- "I walked in the park yesterday" ,
133- "Coffee tastes great in the morning" ,
134- "The train arrives at 9 AM" ,
135- "My favorite color is green" ,
136- "Birds are singing outside" ,
137- "The museum opens at 10 AM" ,
138- "I like reading science fiction novels" ,
139- "The ocean waves are calming" ,
140- "I bought a new notebook" ,
141- "Chocolate ice cream is delicious" ,
142- "The book is on the table" ,
143- "I enjoy jogging in the evening" ,
144- "The concert was amazing" ,
145- "I prefer tea over coffee" ,
146- "Clouds are moving quickly" ,
147- "The cat is sleeping on the sofa" ,
148- "I painted a landscape yesterday" ,
149- "The sun sets in the west" ,
150- "I visited the mountains last summer" ,
151- "Reading improves vocabulary" ,
152- "The flowers bloom in spring" ,
153- "I wrote a poem today" ,
154- "Dogs are loyal pets" ,
155- "The bakery smells wonderful" ,
156- "I attended a workshop on AI" ,
157- "The painting has vibrant colors" ,
158- "I love hiking in the forest" ,
159- "The classroom is very bright" ,
160- "I learned a new recipe today" ,
161- "The movie was entertaining" ,
162- "I played chess with a friend" ,
163- "The library has many books" ,
164- "I enjoy listening to jazz music" ,
165- "The playground is full of kids" ,
166- "I watched a documentary yesterday" ,
167- "The stars are shining tonight" ,
168- "I planted a tree in the backyard" ,
169- "The festival was fun and lively" ,
170- "I took a photography course" ,
171- "The coffee shop is near my office" ,
172- ]
173- test_texts = sensitive_texts + nonsensitive_texts
174- test_labels = [1 ] * len (sensitive_texts ) + [0 ] * len (nonsensitive_texts )
175-
17675# Encode with SentenceTransformer
17776with torch .no_grad ():
17877 test_embs = embed_model .encode (test_texts , convert_to_tensor = True , device = device )
0 commit comments