Skip to content

Commit cc9965b

Browse files
Refactor data handling and output paths in Study_Models and Test_Model scripts
1 parent c01a814 commit cc9965b

File tree

3 files changed

+35
-24
lines changed

3 files changed

+35
-24
lines changed

.gitignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -207,4 +207,4 @@ marimo/_lsp/
207207
__marimo__/
208208
/models/
209209
/cache/
210-
/tools/data/
210+
/data/

tools/Study_Models.py

Lines changed: 16 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,12 @@
1313
from torchviz import make_dot
1414

1515
# ----------------- Setup -----------------
16-
DATA_DIR = "data"
17-
os.makedirs(DATA_DIR, exist_ok=True)
18-
EMBEDDINGS_DIR = "../cache/Model_Sense.4n1/round_7/embeddings" # your embeddings directory
19-
MODEL_PATH = "../cache/Model_Sense.4n1/round_7/Model_Sense.4n1_round7.pth" # path to saved SimpleNN state_dict
20-
16+
NAME = "Model_Sense.4n1"
17+
ROUND = 5
18+
OUTPUT_DIR = f"../{NAME}_Data_Visualization"
19+
os.makedirs(OUTPUT_DIR, exist_ok=True)
20+
EMBEDDINGS_DIR = f"../cache/{NAME}/round_{ROUND}/embeddings" # your embeddings directory
21+
MODEL_PATH = f"../cache/{NAME}/round_{ROUND}/{NAME}_round{ROUND}.pth" # path to saved SimpleNN state_dict
2122
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
2223

2324

@@ -79,7 +80,7 @@ def visualize_weight_distribution(model_, filename="Weight_Distribution.png"):
7980
plt.title("Weight Distribution - First Layer")
8081
plt.xlabel("Weight Value")
8182
plt.ylabel("Frequency")
82-
plt.savefig(os.path.join(DATA_DIR, filename))
83+
plt.savefig(os.path.join(OUTPUT_DIR, filename))
8384
plt.close()
8485

8586

@@ -98,7 +99,7 @@ def hook_fn(module_, input_, output):
9899
plt.title("Activation Values - First Layer")
99100
plt.xlabel("Neuron Index")
100101
plt.ylabel("Activation Value")
101-
plt.savefig(os.path.join(DATA_DIR, filename))
102+
plt.savefig(os.path.join(OUTPUT_DIR, filename))
102103
plt.close()
103104

104105

@@ -151,7 +152,7 @@ def visualize_tsne_custom(model_, embedder_, texts_, labels_, filename="Visualiz
151152
plt.xlabel("t-SNE Dim 1")
152153
plt.ylabel("t-SNE Dim 2")
153154
plt.title("t-SNE of Custom Real-World Samples")
154-
plt.savefig(os.path.join(DATA_DIR, filename))
155+
plt.savefig(os.path.join(OUTPUT_DIR, filename))
155156
plt.close()
156157

157158

@@ -184,7 +185,7 @@ def visualize_tsne(model_, dataloader_, filename="Visualize_tSNE.png", use_penul
184185
plt.xlabel("t-SNE Dim 1")
185186
plt.ylabel("t-SNE Dim 2")
186187
plt.title("t-SNE Visualization of Features")
187-
plt.savefig(os.path.join(DATA_DIR, filename))
188+
plt.savefig(os.path.join(OUTPUT_DIR, filename))
188189
plt.close()
189190

190191

@@ -198,7 +199,7 @@ def visualize_feature_importance(input_dim_, filename="Feature_Importance.svg"):
198199
plt.xlabel("Features")
199200
plt.ylabel("Importance")
200201
plt.xticks(rotation=45)
201-
plt.savefig(os.path.join(DATA_DIR, filename))
202+
plt.savefig(os.path.join(OUTPUT_DIR, filename))
202203
plt.close()
203204

204205

@@ -230,11 +231,11 @@ def plot_loss_landscape_3d(model_, dataloader_, criterion_, grid_size=30, epsilo
230231
X_grid, Y_grid = np.meshgrid(x, y)
231232
fig = go.Figure(data=[go.Surface(z=loss_values, x=X_grid, y=Y_grid, colorscale="Viridis")])
232233
fig.update_layout(title="Loss Landscape", scene=dict(xaxis_title="u", yaxis_title="v", zaxis_title="Loss"))
233-
fig.write_html(os.path.join(DATA_DIR, filename))
234+
fig.write_html(os.path.join(OUTPUT_DIR, filename))
234235

235236

236237
def save_model_state_dict(model_, filename="Model_State_Dict.txt"):
237-
with open(os.path.join(DATA_DIR, filename), "w") as f:
238+
with open(os.path.join(OUTPUT_DIR, filename), "w") as f:
238239
for name, tensor in model_.state_dict().items():
239240
f.write(f"{name}: {tensor.size()}\n")
240241

@@ -243,7 +244,7 @@ def generate_model_visualization(model_, input_dim_, filename="Model_Visualizati
243244
dummy_input = torch.randn(1, input_dim_).to(DEVICE)
244245
dot = make_dot(model_(dummy_input), params=dict(model_.named_parameters()))
245246
dot.format = "png"
246-
dot.render(filename=os.path.join(DATA_DIR, filename), format="png")
247+
dot.render(filename=os.path.join(OUTPUT_DIR, filename), format="png")
247248

248249

249250
def save_graph(model_, filename="Neural_Network_Nodes_Graph.gexf"):
@@ -255,11 +256,11 @@ def save_graph(model_, filename="Neural_Network_Nodes_Graph.gexf"):
255256
rows, cols = np.where(np.abs(W) > threshold)
256257
for r, c in zip(rows, cols):
257258
G.add_edge(f"{name}_in_{c}", f"{name}_out_{r}", weight=W[r, c])
258-
nx.write_gexf(G, os.path.join(DATA_DIR, filename))
259+
nx.write_gexf(G, os.path.join(OUTPUT_DIR, filename))
259260

260261

261262
def save_model_summary(model_, filename="Model_Summary.txt"):
262-
with open(os.path.join(DATA_DIR, filename), "w") as f:
263+
with open(os.path.join(OUTPUT_DIR, filename), "w") as f:
263264
f.write(str(model_))
264265

265266

load_and_test.py renamed to tools/Test_Model.py

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,13 @@
33
from vulnscan import SimpleNN
44
import glob
55
import os
6+
# ---------------- INIT ----------------
7+
NAME = "Model_Sense.4n1"
8+
ROUND = 5
69

710
# ---------------- LOAD MODEL + EMBEDDER ----------------
811
device = "cuda" if torch.cuda.is_available() else "cpu"
12+
print("Using device:", device)
913

1014

1115
def load_embeddings(folder_path, pattern):
@@ -23,7 +27,7 @@ def load_embeddings(folder_path, pattern):
2327

2428

2529
# Example paths
26-
cache_dir = "cache/Model_Sense.4n1/round_7/embeddings"
30+
cache_dir = f"cache/{NAME}/round_{ROUND}/embeddings"
2731

2832
# Load all train/test/val embeddings
2933
train_embeddings, train_labels = load_embeddings(cache_dir, "train_*.pt")
@@ -34,7 +38,7 @@ def load_embeddings(folder_path, pattern):
3438
input_dim = train_embeddings.shape[1]
3539
model = SimpleNN(input_dim=input_dim).to(device)
3640
model.load_state_dict(torch.load(
37-
"cache/Model_Sense.4n1/round_7/Model_Sense.4n1_round7.pth",
41+
f"cache/{NAME}/round_{ROUND}/{NAME}_round{ROUND}.pth",
3842
map_location="cpu"
3943
))
4044
model.eval()
@@ -47,10 +51,15 @@ def load_embeddings(folder_path, pattern):
4751
outputs = model(test_embeddings.to(device))
4852
preds = torch.sigmoid(outputs).squeeze()
4953

50-
print("\n=== TEST 1: Stored Embeddings ===")
51-
print("Sample predictions vs true labels (first 10):")
52-
for i in range(10):
53-
print(f"Pred={preds[i].item():.3f} | Label={test_labels[i].item()}")
54+
print("\n=== TEST 1: Stored Embeddings (50% sample) ===")
55+
print("Sample predictions vs true labels (first 50%):")
56+
sample_size = len(preds) // 2
57+
for i in range(sample_size):
58+
print(f" Pred={preds[i].item():.3f} | Label={test_labels[i].item()}")
59+
# Calculate accuracy
60+
pred_labels = (preds >= 0.5).long().cpu()
61+
accuracy = (pred_labels == test_labels).sum().item() / len(test_labels)
62+
print(f"\nAccuracy on stored embeddings: {accuracy * 100:.2f}%")
5463

5564
# ---------------- 2. TEST ON NATURAL EXAMPLES ----------------
5665
sensitive_texts = [
@@ -165,12 +174,13 @@ def load_embeddings(folder_path, pattern):
165174
outputs = model(test_embs)
166175
preds = torch.sigmoid(outputs).squeeze()
167176

168-
print("\n=== TEST 2: Natural Examples ===")
177+
print("\n\n=== TEST 2: Natural Examples ===")
178+
print("Real-world samples predictions vs true labels:")
169179
correct = 0
170180
for i, (text, pred, label) in enumerate(zip(test_texts, preds, test_labels)):
171181
decision = 1 if pred.item() >= 0.5 else 0
172182
if decision == label:
173183
correct += 1
174-
print(f"[{i + 1}] Pred={pred.item():.3f} | Label={label} | Text={text[:50]}...")
184+
print(f" [{i + 1}] Pred={pred.item():.3f} | Label={label} | Text={text[:50]}...")
175185

176186
print(f"\nAccuracy on natural 100 samples: {correct}/{len(test_labels)} = {correct / len(test_labels) * 100:.2f}%")

0 commit comments

Comments
 (0)