Skip to content

Commit bafebf7

Browse files
authored
Create SpamShield.py
1 parent 485b0c7 commit bafebf7

1 file changed

Lines changed: 338 additions & 0 deletions

File tree

Lines changed: 338 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,338 @@
1+
"""
2+
SpamShield v3.1 - SMS Spam Classifier
3+
AI-Powered SMS Spam Detection
4+
Auto-downloads the SMSSpamCollection dataset if missing
5+
Efficient batch classification from CSV/TXT
6+
"""
7+
8+
import os, sys, threading, csv
9+
import tkinter as tk
10+
from tkinter import filedialog, messagebox, ttk
11+
12+
import ttkbootstrap as tb
13+
from ttkbootstrap.constants import *
14+
15+
try:
16+
from tkinterdnd2 import TkinterDnD, DND_FILES
17+
DND_ENABLED = True
18+
except ImportError:
19+
DND_ENABLED = False
20+
print("Drag & Drop requires tkinterdnd2: pip install tkinterdnd2")
21+
22+
# ML libs
23+
from sklearn.feature_extraction.text import TfidfVectorizer
24+
from sklearn.naive_bayes import MultinomialNB
25+
from sklearn.pipeline import make_pipeline
26+
from sklearn.model_selection import train_test_split
27+
from sklearn.metrics import accuracy_score
28+
import pandas as pd
29+
import joblib
30+
31+
import urllib.request
32+
import zipfile
33+
34+
# ---------------------- UTIL ----------------------
35+
36+
def resource_path(file_name):
37+
base_path = getattr(sys, "_MEIPASS", os.path.dirname(os.path.abspath(__file__)))
38+
return os.path.join(base_path, file_name)
39+
40+
def download_dataset():
41+
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/00228/smsspamcollection.zip"
42+
zip_path = resource_path("smsspamcollection.zip")
43+
try:
44+
urllib.request.urlretrieve(url, zip_path)
45+
except Exception as e:
46+
messagebox.showerror("Download Failed", f"Failed to download dataset:\n{e}")
47+
sys.exit(1)
48+
49+
with zipfile.ZipFile(zip_path, 'r') as z:
50+
z.extractall(resource_path(""))
51+
os.remove(zip_path)
52+
53+
# ---------------------- ML MODEL ----------------------
54+
55+
def train_sms_model():
56+
ds_path = resource_path("SMSSpamCollection")
57+
if not os.path.exists(ds_path):
58+
download_dataset()
59+
60+
df = pd.read_csv(ds_path, sep="\t", header=None, names=["label", "text"])
61+
df["label_num"] = df["label"].map({"ham": 0, "spam": 1})
62+
63+
X_train, X_test, y_train, y_test = train_test_split(df["text"], df["label_num"],
64+
test_size=0.2, random_state=42)
65+
66+
model = make_pipeline(TfidfVectorizer(), MultinomialNB())
67+
model.fit(X_train, y_train)
68+
69+
y_pred = model.predict(X_test)
70+
print(f"[INFO] Model trained — Test Accuracy: {accuracy_score(y_test, y_pred)*100:.2f}%")
71+
72+
model_path = resource_path("sms_spam_model.pkl")
73+
joblib.dump(model, model_path)
74+
75+
return model
76+
77+
def load_model():
78+
model_path = resource_path("sms_spam_model.pkl")
79+
if os.path.exists(model_path):
80+
return joblib.load(model_path)
81+
return train_sms_model()
82+
83+
# ---------------------- SPAM WORKER ----------------------
84+
85+
class SpamWorker:
86+
def __init__(self, files, model, callbacks, max_results=200_000):
87+
self.files = files
88+
self.model = model
89+
self.callbacks = callbacks
90+
self.max_results = max_results
91+
self._running = True
92+
93+
def stop(self):
94+
self._running = False
95+
96+
def run(self):
97+
total_files = len(self.files)
98+
stats = {"TOTAL": 0, "SPAM": 0, "HAM": 0}
99+
100+
for i, path in enumerate(self.files):
101+
if not self._running:
102+
break
103+
104+
try:
105+
texts = []
106+
with open(path, newline="", encoding="utf-8", errors="ignore") as f:
107+
reader = csv.reader(f)
108+
for row in reader:
109+
if row:
110+
texts.append(row[0].strip())
111+
112+
if texts:
113+
labels_num = self.model.predict(texts)
114+
labels = ["SPAM" if l == 1 else "HAM" for l in labels_num]
115+
116+
for t, lbl in zip(texts, labels):
117+
stats[lbl] += 1
118+
stats["TOTAL"] += 1
119+
120+
if "found" in self.callbacks:
121+
self.callbacks["found"](path, t, lbl)
122+
123+
if stats["TOTAL"] >= self.max_results:
124+
break
125+
126+
except Exception as e:
127+
print(f"[WARN] Could not read {path}: {e}")
128+
129+
pct = int((i + 1) / total_files * 100)
130+
if "progress" in self.callbacks:
131+
self.callbacks["progress"](pct)
132+
if "stats" in self.callbacks:
133+
self.callbacks["stats"](dict(stats))
134+
135+
if "finished" in self.callbacks:
136+
self.callbacks["finished"]()
137+
138+
# ---------------------- MAIN APP ----------------------
139+
140+
class SpamShieldApp:
141+
APP_NAME = "SpamShield"
142+
APP_VERSION = "3.1"
143+
SUPPORTED_EXT = (".csv", ".txt")
144+
145+
def __init__(self):
146+
if DND_ENABLED:
147+
self.root = TkinterDnD.Tk()
148+
else:
149+
self.root = tb.Window(themename="darkly")
150+
151+
self.root.title(f"{self.APP_NAME} v{self.APP_VERSION}")
152+
self.root.minsize(1200, 650)
153+
154+
self.model = load_model()
155+
156+
self.worker = None
157+
self.smooth = 0
158+
self.target = 0
159+
self.file_set = set()
160+
161+
self._build_ui()
162+
self._apply_styles()
163+
164+
def _build_ui(self):
165+
main = tb.Frame(self.root, padding=10)
166+
main.pack(fill=tk.BOTH, expand=True)
167+
168+
tb.Label(main, text=f"📩 {self.APP_NAME} - AI SMS Spam Detector",
169+
font=("Segoe UI", 22, "bold")).pack(pady=(0,4))
170+
171+
tb.Label(main, text="Batch classification — handles large SMS datasets",
172+
font=("Segoe UI", 10, "italic"), foreground="#9ca3af").pack(pady=(0,12))
173+
174+
row1 = tb.Frame(main)
175+
row1.pack(fill=tk.X)
176+
177+
self.path_input = tb.Entry(row1, width=90)
178+
self.path_input.pack(side=tk.LEFT, fill=tk.X, expand=True, padx=(0,6))
179+
self.path_input.insert(0, "Drag & drop CSV/TXT files with SMS data…")
180+
181+
btn_browse = tb.Button(row1, text="📂 Browse", bootstyle=INFO, command=self.browse)
182+
btn_browse.pack(side=tk.LEFT, padx=3)
183+
184+
self.btn_start = tb.Button(row1, text="🚀 Start", bootstyle=SUCCESS, command=self.start)
185+
self.btn_start.pack(side=tk.LEFT, padx=3)
186+
187+
self.btn_cancel = tb.Button(row1, text="⏹ Cancel", bootstyle=DANGER, command=self.cancel)
188+
self.btn_cancel.pack(side=tk.LEFT, padx=3)
189+
self.btn_cancel.config(state=tk.DISABLED)
190+
191+
btn_export = tb.Button(row1, text="💾 Export", bootstyle=PRIMARY, command=self.export_results)
192+
btn_export.pack(side=tk.LEFT, padx=3)
193+
194+
btn_about = tb.Button(row1, text="ℹ️ About", bootstyle=INFO, command=self.show_about)
195+
btn_about.pack(side=tk.LEFT, padx=3)
196+
197+
self.progress = tb.Progressbar(main, bootstyle="success-striped", maximum=100)
198+
self.progress.pack(fill=tk.X, pady=(6,6))
199+
200+
columns = ("selected", "text", "label")
201+
self.tree = ttk.Treeview(main, columns=columns, show="headings", height=20)
202+
self.tree.heading("selected", text="☑️")
203+
self.tree.heading("text", text="SMS Text")
204+
self.tree.heading("label", text="Prediction")
205+
206+
self.tree.column("selected", width=50, anchor=tk.CENTER)
207+
self.tree.column("text", width=800)
208+
self.tree.column("label", width=120)
209+
210+
self.tree.pack(fill=tk.BOTH, expand=True)
211+
212+
self.stats_lbl = tb.Label(main, text="TOTAL: 0 | SPAM: 0 | HAM: 0")
213+
self.stats_lbl.pack(anchor=tk.E)
214+
215+
self.root.after(15, self._anim_progress)
216+
217+
if DND_ENABLED:
218+
self.tree.drop_target_register(DND_FILES)
219+
self.tree.dnd_bind("<<Drop>>", self.on_drop)
220+
221+
# ---- File Queue ----
222+
223+
def browse(self):
224+
files = filedialog.askopenfilenames(title="Select SMS Data Files",
225+
filetypes=[("CSV Files","*.csv"), ("Text Files","*.txt")])
226+
if files:
227+
self._queue_files(files)
228+
229+
def on_drop(self, event):
230+
paths = self.root.tk.splitlist(event.data)
231+
self._queue_files(paths)
232+
233+
def _queue_files(self, paths):
234+
for p in paths:
235+
ext = os.path.splitext(p)[1].lower()
236+
if ext in self.SUPPORTED_EXT and p not in self.file_set:
237+
self.file_set.add(p)
238+
self.tree.insert("", tk.END, values=("☑️", p, "Queued"))
239+
self.path_input.delete(0, tk.END)
240+
self.path_input.insert(0, f"{len(self.file_set)} files queued")
241+
242+
# ---- Actions ----
243+
244+
def start(self):
245+
selected = [self.tree.item(i)["values"][1] for i in self.tree.get_children()
246+
if self.tree.item(i)["values"][0] == "☑️"]
247+
if not selected:
248+
messagebox.showwarning("No Data", "Select at least one file to classify.")
249+
return
250+
251+
self.btn_start.config(state=tk.DISABLED)
252+
self.btn_cancel.config(state=tk.NORMAL)
253+
self.progress["value"] = 0
254+
self.smooth = 0
255+
self.target = 0
256+
257+
threading.Thread(target=self._run_worker, args=(selected,), daemon=True).start()
258+
259+
def _run_worker(self, files):
260+
self.worker = SpamWorker(files, self.model,
261+
callbacks={"found": self._add_row,
262+
"progress": self._set_target,
263+
"stats": self._update_stats,
264+
"finished": self._finish})
265+
self.worker.run()
266+
267+
def _add_row(self, file, text, label):
268+
self.tree.insert("", tk.END, values=("☑️", text, label))
269+
self.tree.tag_configure(label, foreground="#dc2626" if label == "SPAM" else "#4ade80")
270+
iid = self.tree.get_children()[-1]
271+
self.tree.item(iid, tags=(label,))
272+
273+
def _update_stats(self, stats):
274+
self.stats_lbl.config(text=f"TOTAL: {stats['TOTAL']} | SPAM: {stats['SPAM']} | HAM: {stats['HAM']}")
275+
276+
def _set_target(self, v):
277+
self.target = v
278+
279+
def _anim_progress(self):
280+
if self.smooth < self.target:
281+
self.smooth += 1
282+
self.progress["value"] = self.smooth
283+
self.root.after(15, self._anim_progress)
284+
285+
def cancel(self):
286+
if self.worker:
287+
self.worker.stop()
288+
self._finish()
289+
290+
def _finish(self):
291+
self.btn_start.config(state=tk.NORMAL)
292+
self.btn_cancel.config(state=tk.DISABLED)
293+
self.progress["value"] = 100
294+
295+
# ---- Export ----
296+
297+
def export_results(self):
298+
rows = [self.tree.item(i)["values"] for i in self.tree.get_children()
299+
if self.tree.item(i)["values"][0] == "☑️"]
300+
if not rows:
301+
messagebox.showwarning("Export", "No classified messages to export.")
302+
return
303+
304+
path = filedialog.asksaveasfilename(defaultextension=".txt",
305+
filetypes=[("Text Files","*.txt")])
306+
if path:
307+
with open(path, "w", encoding="utf-8") as f:
308+
for _, text, lbl in rows:
309+
f.write(f"{text} | {lbl}\n")
310+
messagebox.showinfo("Export", "Results exported successfully!")
311+
312+
# ---- About ----
313+
314+
def show_about(self):
315+
messagebox.showinfo(
316+
f"About {self.APP_NAME}",
317+
f"{self.APP_NAME} v{self.APP_VERSION}\n\n"
318+
"• Drag & drop SMS dataset files\n"
319+
"• Auto-downloads needed dataset\n"
320+
"• Batch ML classification\n"
321+
"• SPAM/HAM highlighting\n"
322+
"• Export results\n\n"
323+
"🏢 Built with ❤️"
324+
)
325+
326+
def _apply_styles(self):
327+
self.root.style = tb.Style(theme="darkly")
328+
self.root.style.configure("TProgressbar", troughcolor="#1b1f3a",
329+
background="#7c3aed", thickness=14)
330+
331+
def run(self):
332+
self.root.mainloop()
333+
334+
# ---- Run App ----
335+
336+
if __name__ == "__main__":
337+
app = SpamShieldApp()
338+
app.run()

0 commit comments

Comments
 (0)