Skip to content

Commit 0d457a4

Browse files
authored
Create Titanic_Survival_Predictor.py
1 parent df20bbe commit 0d457a4

1 file changed

Lines changed: 347 additions & 0 deletions

File tree

Lines changed: 347 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,347 @@
1+
"""
2+
Titanic Survival Predictor v3.1
3+
ML-based Survival Prediction GUI
4+
Auto-detects columns and can auto-train on any uploaded CSV
5+
Fully compatible with pandas 3.x (no FutureWarnings)
6+
"""
7+
8+
import os, sys, threading
9+
import pandas as pd
10+
import numpy as np
11+
import tkinter as tk
12+
from tkinter import filedialog, messagebox, ttk
13+
import ttkbootstrap as tb
14+
from ttkbootstrap.constants import *
15+
16+
try:
17+
from tkinterdnd2 import TkinterDnD, DND_FILES
18+
DND_ENABLED = True
19+
except ImportError:
20+
DND_ENABLED = False
21+
print("Drag & Drop requires tkinterdnd2: pip install tkinterdnd2")
22+
23+
from sklearn.ensemble import RandomForestClassifier
24+
25+
# ---------------------- UTIL ----------------------
26+
def resource_path(file_name):
27+
base_path = getattr(sys, "_MEIPASS", os.path.dirname(os.path.abspath(__file__)))
28+
return os.path.join(base_path, file_name)
29+
30+
# ---------------------- WORKER ----------------------
31+
class PredictionWorker:
32+
def __init__(self, files, model, callbacks):
33+
self.files = files
34+
self.model = model
35+
self.callbacks = callbacks
36+
self._running = True
37+
38+
def stop(self):
39+
self._running = False
40+
41+
def run(self):
42+
total_files = len(self.files)
43+
for i, path in enumerate(self.files):
44+
if not self._running:
45+
break
46+
try:
47+
df = pd.read_csv(path)
48+
df_pre = self._preprocess(df)
49+
X = df_pre[['Pclass','Sex','Age','SibSp','Parch','Fare']]
50+
preds = self.model.predict(X)
51+
for idx, pred in enumerate(preds):
52+
if not self._running:
53+
break
54+
if 'found' in self.callbacks:
55+
self.callbacks['found'](path, df_pre.iloc[idx], pred)
56+
except Exception as e:
57+
print(f"Error processing {path}: {e}")
58+
if 'progress' in self.callbacks:
59+
self.callbacks['progress'](int((i+1)/total_files*100))
60+
if 'finished' in self.callbacks:
61+
self.callbacks['finished']()
62+
63+
def _preprocess(self, df):
64+
df = df.copy()
65+
# Map columns
66+
column_map = {
67+
'PassengerId': ['PassengerId','passengerid','Passenger ID','pid'],
68+
'Name': ['Name','FullName','full_name'],
69+
'Pclass': ['Pclass','Class'],
70+
'Sex': ['Sex','Gender'],
71+
'Age': ['Age','age'],
72+
'SibSp': ['SibSp','Siblings/Spouses'],
73+
'Parch': ['Parch','Parents/Children'],
74+
'Fare': ['Fare','fare']
75+
}
76+
for key, options in column_map.items():
77+
for opt in options:
78+
if opt in df.columns:
79+
df[key] = df[opt]
80+
break
81+
if key not in df.columns:
82+
if key in ['Age','SibSp','Parch','Fare','Pclass']:
83+
df[key] = 0
84+
else:
85+
df[key] = ""
86+
87+
df['Sex'] = df['Sex'].map({'male':0,'female':1}).fillna(0)
88+
89+
# Pandas 3.x safe assignment
90+
for col in ['Age','Fare']:
91+
df[col] = df[col].fillna(df[col].median())
92+
for col in ['SibSp','Parch','Pclass']:
93+
df[col] = df[col].fillna(0)
94+
95+
return df
96+
97+
# ---------------------- MAIN APP ----------------------
98+
class TitanicApp:
99+
APP_NAME = "Titanic Survival Predictor"
100+
APP_VERSION = "3.1"
101+
SUPPORTED_EXT = (".csv",)
102+
103+
def __init__(self):
104+
# Root window
105+
if DND_ENABLED:
106+
self.root = TkinterDnD.Tk()
107+
else:
108+
self.root = tb.Window(themename="darkly")
109+
110+
self.root.title(f"{self.APP_NAME} v{self.APP_VERSION}")
111+
self.root.minsize(1000, 600)
112+
113+
self.worker_obj = None
114+
self.smooth_value = 0
115+
self.target_progress = 0
116+
self.file_set = set()
117+
118+
# Load model
119+
self.model = self._load_or_train_model()
120+
121+
self._build_ui()
122+
self._apply_styles()
123+
124+
# ---------------------- MODEL ----------------------
125+
def _load_or_train_model(self):
126+
train_file = resource_path("train.csv")
127+
if os.path.exists(train_file):
128+
df = pd.read_csv(train_file)
129+
messagebox.showinfo("Model", "Loaded train.csv for model training.")
130+
else:
131+
# No train.csv → smart retrain on first uploaded CSV
132+
messagebox.showinfo("Model", "train.csv not found. The first CSV uploaded will be used to train the model automatically.")
133+
df = pd.DataFrame(columns=['Survived','Pclass','Sex','Age','SibSp','Parch','Fare'])
134+
df = self._preprocess(df)
135+
X = df[['Pclass','Sex','Age','SibSp','Parch','Fare']]
136+
y = df.get('Survived', pd.Series([0]*len(df)))
137+
model = RandomForestClassifier(n_estimators=100, random_state=42)
138+
if len(df) > 0:
139+
model.fit(X, y)
140+
return model
141+
142+
def _preprocess(self, df):
143+
df = df.copy()
144+
# Map columns
145+
column_map = {
146+
'PassengerId': ['PassengerId','passengerid','Passenger ID','pid'],
147+
'Name': ['Name','FullName','full_name'],
148+
'Pclass': ['Pclass','Class'],
149+
'Sex': ['Sex','Gender'],
150+
'Age': ['Age','age'],
151+
'SibSp': ['SibSp','Siblings/Spouses'],
152+
'Parch': ['Parch','Parents/Children'],
153+
'Fare': ['Fare','fare']
154+
}
155+
for key, options in column_map.items():
156+
for opt in options:
157+
if opt in df.columns:
158+
df[key] = df[opt]
159+
break
160+
if key not in df.columns:
161+
if key in ['Age','SibSp','Parch','Fare','Pclass']:
162+
df[key] = 0
163+
else:
164+
df[key] = ""
165+
166+
df['Sex'] = df['Sex'].map({'male':0,'female':1}).fillna(0)
167+
168+
# Pandas 3.x safe assignment
169+
for col in ['Age','Fare']:
170+
df[col] = df[col].fillna(df[col].median())
171+
for col in ['SibSp','Parch','Pclass']:
172+
df[col] = df[col].fillna(0)
173+
174+
return df
175+
176+
# ---------------------- UI ----------------------
177+
def _build_ui(self):
178+
main = tb.Frame(self.root, padding=10)
179+
main.pack(fill=tk.BOTH, expand=True)
180+
181+
tb.Label(main, text=f"🛳 {self.APP_NAME}",
182+
font=("Segoe UI", 22, "bold")).pack(pady=(0, 4))
183+
tb.Label(main, text="Predict Titanic Passenger Survival",
184+
font=("Segoe UI", 10, "italic"), foreground="#9ca3af").pack(pady=(0, 20))
185+
186+
# Row 1: File selection
187+
row1 = tb.Frame(main)
188+
row1.pack(fill=tk.X, pady=(0,6))
189+
190+
self.path_input = tb.Entry(row1, width=80)
191+
self.path_input.pack(side=tk.LEFT, fill=tk.X, expand=True, padx=(0,6))
192+
self.path_input.insert(0, "Drag & drop CSV files here…")
193+
194+
browse_btn = tb.Button(row1, text="📂 Browse", bootstyle=INFO, command=self.browse)
195+
browse_btn.pack(side=tk.LEFT, padx=3)
196+
197+
self.start_btn = tb.Button(row1, text="🚀 Start Prediction", bootstyle=SUCCESS, command=self.start)
198+
self.start_btn.pack(side=tk.LEFT, padx=3)
199+
200+
self.cancel_btn = tb.Button(row1, text="⏹ Cancel", bootstyle=DANGER, command=self.cancel)
201+
self.cancel_btn.pack(side=tk.LEFT, padx=3)
202+
self.cancel_btn.config(state=DISABLED)
203+
204+
export_btn = tb.Button(row1, text="💾 Export Results", bootstyle=PRIMARY, command=self.export_results)
205+
export_btn.pack(side=tk.LEFT, padx=3)
206+
207+
about_btn = tb.Button(row1, text="ℹ️ About", bootstyle=INFO, command=self.show_about)
208+
about_btn.pack(side=tk.LEFT, padx=3)
209+
210+
# Progress bar
211+
self.progress = tb.Progressbar(main, bootstyle="success-striped", maximum=100)
212+
self.progress.pack(fill=tk.X, pady=(0,6))
213+
214+
# Treeview
215+
columns = ("selected","PassengerID","Name","Prediction")
216+
self.tree = ttk.Treeview(main, columns=columns, show="headings", selectmode="extended", height=20)
217+
self.tree.heading("selected", text="✅")
218+
self.tree.heading("PassengerID", text="Passenger ID", anchor=W)
219+
self.tree.heading("Name", text="Name", anchor=W)
220+
self.tree.heading("Prediction", text="Survived", anchor=W)
221+
self.tree.column("selected", width=50, anchor=tk.CENTER)
222+
self.tree.column("PassengerID", width=80)
223+
self.tree.column("Name", width=400)
224+
self.tree.column("Prediction", width=100)
225+
self.tree.pack(fill=tk.BOTH, expand=True, pady=(0,6))
226+
227+
self.root.after(15, self.animate_progress)
228+
229+
# Drag & drop
230+
if DND_ENABLED:
231+
self.tree.drop_target_register(DND_FILES)
232+
self.tree.dnd_bind("<<Drop>>", self.on_drop)
233+
234+
# ---------------------- Browse / Drop ----------------------
235+
def browse(self):
236+
files = filedialog.askopenfilenames(title="Select CSV Files", filetypes=[("CSV Files","*.csv")])
237+
if files:
238+
self._process_uploaded_files(files)
239+
240+
def on_drop(self, event):
241+
files = self.root.tk.splitlist(event.data)
242+
self._process_uploaded_files(files)
243+
244+
def _process_uploaded_files(self, files):
245+
for f in files:
246+
if f.lower().endswith(".csv") and f not in self.file_set:
247+
self.file_set.add(f)
248+
self.tree.insert("", tk.END, values=("☑️", "", os.path.basename(f), "Queued"))
249+
self.start_btn.config(state=tk.NORMAL)
250+
self.cancel_btn.config(state=tk.DISABLED)
251+
self.path_input.delete(0, tk.END)
252+
self.path_input.insert(0, f"{len(self.file_set)} files queued")
253+
254+
# Smart retrain if model has no training
255+
if hasattr(self.model, 'estimators_') == False:
256+
messagebox.showinfo("Smart Retrain", f"Training model using first uploaded CSV: {files[0]}")
257+
df = pd.read_csv(files[0])
258+
df = self._preprocess(df)
259+
X = df[['Pclass','Sex','Age','SibSp','Parch','Fare']]
260+
y = df.get('Survived', pd.Series([0]*len(df)))
261+
self.model.fit(X, y)
262+
263+
# ---------------------- Actions ----------------------
264+
def start(self):
265+
selected_files = [self.tree.item(i)['values'][2] for i in self.tree.get_children()
266+
if self.tree.item(i)['values'][0]=="☑️"]
267+
if not selected_files:
268+
messagebox.showwarning("No Selection", "Select CSV files before prediction.")
269+
return
270+
self.progress["value"] = 0
271+
self.smooth_value = 0
272+
self.target_progress = 0
273+
self.start_btn.config(state=DISABLED)
274+
self.cancel_btn.config(state=NORMAL)
275+
276+
files_to_process = [f for f in self.file_set if os.path.basename(f) in selected_files]
277+
self.worker_obj = PredictionWorker(files_to_process, self.model, {
278+
"found": self.add_result,
279+
"progress": self.set_target,
280+
"finished": self.finish
281+
})
282+
threading.Thread(target=self.worker_obj.run, daemon=True).start()
283+
284+
def add_result(self, file, row, pred):
285+
pid = row.get('PassengerId','')
286+
name = row.get('Name','')
287+
survived = "Yes" if pred==1 else "No"
288+
self.tree.insert("", tk.END, values=("☑️", pid, name, survived))
289+
290+
def set_target(self, v):
291+
self.target_progress = v
292+
293+
def animate_progress(self):
294+
if self.smooth_value < self.target_progress:
295+
self.smooth_value += 1
296+
self.progress["value"] = self.smooth_value
297+
self.root.after(15, self.animate_progress)
298+
299+
def cancel(self):
300+
if self.worker_obj:
301+
self.worker_obj.stop()
302+
self.finish()
303+
304+
def finish(self):
305+
self.start_btn.config(state=tk.NORMAL)
306+
self.cancel_btn.config(state=tk.DISABLED)
307+
self.progress["value"] = 100
308+
309+
# ---------------------- Export ----------------------
310+
def export_results(self):
311+
selected = [self.tree.item(i)['values'] for i in self.tree.get_children()
312+
if self.tree.item(i)['values'][0]=="☑️"]
313+
if not selected:
314+
messagebox.showwarning("Export", "No selected rows to export")
315+
return
316+
path = filedialog.asksaveasfilename(defaultextension=".csv", filetypes=[("CSV Files","*.csv")])
317+
if path:
318+
df = pd.DataFrame(selected, columns=["Selected","PassengerID","Name","Survived"])
319+
df.to_csv(path,index=False)
320+
messagebox.showinfo("Export", f"Exported {len(df)} rows to {path}")
321+
322+
# ---------------------- About ----------------------
323+
def show_about(self):
324+
messagebox.showinfo(
325+
f"About {self.APP_NAME}",
326+
f"{self.APP_NAME} v{self.APP_VERSION}\n\n"
327+
"• Drag & drop CSV files\n"
328+
"• Auto-detect columns in any Titanic CSV\n"
329+
"• Smart auto-train model if train.csv is missing\n"
330+
"• Multi-file threaded prediction\n"
331+
"• Export prediction results to CSV\n\n"
332+
"🏢 Built for Learning Purposes"
333+
)
334+
335+
# ---------------------- Styles ----------------------
336+
def _apply_styles(self):
337+
self.root.style = tb.Style(theme="darkly")
338+
self.root.style.configure("TProgressbar", troughcolor="#1b1f3a", background="#7c3aed", thickness=14)
339+
340+
# ---------------------- Run ----------------------
341+
def run(self):
342+
self.root.mainloop()
343+
344+
# ---------------------- RUN ----------------------
345+
if __name__ == "__main__":
346+
app = TitanicApp()
347+
app.run()

0 commit comments

Comments
 (0)