|
| 1 | +""" |
| 2 | +Titanic Survival Predictor v3.1 |
| 3 | +ML-based Survival Prediction GUI |
| 4 | +Auto-detects columns and can auto-train on any uploaded CSV |
| 5 | +Fully compatible with pandas 3.x (no FutureWarnings) |
| 6 | +""" |
| 7 | + |
| 8 | +import os, sys, threading |
| 9 | +import pandas as pd |
| 10 | +import numpy as np |
| 11 | +import tkinter as tk |
| 12 | +from tkinter import filedialog, messagebox, ttk |
| 13 | +import ttkbootstrap as tb |
| 14 | +from ttkbootstrap.constants import * |
| 15 | + |
| 16 | +try: |
| 17 | + from tkinterdnd2 import TkinterDnD, DND_FILES |
| 18 | + DND_ENABLED = True |
| 19 | +except ImportError: |
| 20 | + DND_ENABLED = False |
| 21 | + print("Drag & Drop requires tkinterdnd2: pip install tkinterdnd2") |
| 22 | + |
| 23 | +from sklearn.ensemble import RandomForestClassifier |
| 24 | + |
| 25 | +# ---------------------- UTIL ---------------------- |
| 26 | +def resource_path(file_name): |
| 27 | + base_path = getattr(sys, "_MEIPASS", os.path.dirname(os.path.abspath(__file__))) |
| 28 | + return os.path.join(base_path, file_name) |
| 29 | + |
| 30 | +# ---------------------- WORKER ---------------------- |
| 31 | +class PredictionWorker: |
| 32 | + def __init__(self, files, model, callbacks): |
| 33 | + self.files = files |
| 34 | + self.model = model |
| 35 | + self.callbacks = callbacks |
| 36 | + self._running = True |
| 37 | + |
| 38 | + def stop(self): |
| 39 | + self._running = False |
| 40 | + |
| 41 | + def run(self): |
| 42 | + total_files = len(self.files) |
| 43 | + for i, path in enumerate(self.files): |
| 44 | + if not self._running: |
| 45 | + break |
| 46 | + try: |
| 47 | + df = pd.read_csv(path) |
| 48 | + df_pre = self._preprocess(df) |
| 49 | + X = df_pre[['Pclass','Sex','Age','SibSp','Parch','Fare']] |
| 50 | + preds = self.model.predict(X) |
| 51 | + for idx, pred in enumerate(preds): |
| 52 | + if not self._running: |
| 53 | + break |
| 54 | + if 'found' in self.callbacks: |
| 55 | + self.callbacks['found'](path, df_pre.iloc[idx], pred) |
| 56 | + except Exception as e: |
| 57 | + print(f"Error processing {path}: {e}") |
| 58 | + if 'progress' in self.callbacks: |
| 59 | + self.callbacks['progress'](int((i+1)/total_files*100)) |
| 60 | + if 'finished' in self.callbacks: |
| 61 | + self.callbacks['finished']() |
| 62 | + |
| 63 | + def _preprocess(self, df): |
| 64 | + df = df.copy() |
| 65 | + # Map columns |
| 66 | + column_map = { |
| 67 | + 'PassengerId': ['PassengerId','passengerid','Passenger ID','pid'], |
| 68 | + 'Name': ['Name','FullName','full_name'], |
| 69 | + 'Pclass': ['Pclass','Class'], |
| 70 | + 'Sex': ['Sex','Gender'], |
| 71 | + 'Age': ['Age','age'], |
| 72 | + 'SibSp': ['SibSp','Siblings/Spouses'], |
| 73 | + 'Parch': ['Parch','Parents/Children'], |
| 74 | + 'Fare': ['Fare','fare'] |
| 75 | + } |
| 76 | + for key, options in column_map.items(): |
| 77 | + for opt in options: |
| 78 | + if opt in df.columns: |
| 79 | + df[key] = df[opt] |
| 80 | + break |
| 81 | + if key not in df.columns: |
| 82 | + if key in ['Age','SibSp','Parch','Fare','Pclass']: |
| 83 | + df[key] = 0 |
| 84 | + else: |
| 85 | + df[key] = "" |
| 86 | + |
| 87 | + df['Sex'] = df['Sex'].map({'male':0,'female':1}).fillna(0) |
| 88 | + |
| 89 | + # Pandas 3.x safe assignment |
| 90 | + for col in ['Age','Fare']: |
| 91 | + df[col] = df[col].fillna(df[col].median()) |
| 92 | + for col in ['SibSp','Parch','Pclass']: |
| 93 | + df[col] = df[col].fillna(0) |
| 94 | + |
| 95 | + return df |
| 96 | + |
| 97 | +# ---------------------- MAIN APP ---------------------- |
| 98 | +class TitanicApp: |
| 99 | + APP_NAME = "Titanic Survival Predictor" |
| 100 | + APP_VERSION = "3.1" |
| 101 | + SUPPORTED_EXT = (".csv",) |
| 102 | + |
| 103 | + def __init__(self): |
| 104 | + # Root window |
| 105 | + if DND_ENABLED: |
| 106 | + self.root = TkinterDnD.Tk() |
| 107 | + else: |
| 108 | + self.root = tb.Window(themename="darkly") |
| 109 | + |
| 110 | + self.root.title(f"{self.APP_NAME} v{self.APP_VERSION}") |
| 111 | + self.root.minsize(1000, 600) |
| 112 | + |
| 113 | + self.worker_obj = None |
| 114 | + self.smooth_value = 0 |
| 115 | + self.target_progress = 0 |
| 116 | + self.file_set = set() |
| 117 | + |
| 118 | + # Load model |
| 119 | + self.model = self._load_or_train_model() |
| 120 | + |
| 121 | + self._build_ui() |
| 122 | + self._apply_styles() |
| 123 | + |
| 124 | + # ---------------------- MODEL ---------------------- |
| 125 | + def _load_or_train_model(self): |
| 126 | + train_file = resource_path("train.csv") |
| 127 | + if os.path.exists(train_file): |
| 128 | + df = pd.read_csv(train_file) |
| 129 | + messagebox.showinfo("Model", "Loaded train.csv for model training.") |
| 130 | + else: |
| 131 | + # No train.csv → smart retrain on first uploaded CSV |
| 132 | + messagebox.showinfo("Model", "train.csv not found. The first CSV uploaded will be used to train the model automatically.") |
| 133 | + df = pd.DataFrame(columns=['Survived','Pclass','Sex','Age','SibSp','Parch','Fare']) |
| 134 | + df = self._preprocess(df) |
| 135 | + X = df[['Pclass','Sex','Age','SibSp','Parch','Fare']] |
| 136 | + y = df.get('Survived', pd.Series([0]*len(df))) |
| 137 | + model = RandomForestClassifier(n_estimators=100, random_state=42) |
| 138 | + if len(df) > 0: |
| 139 | + model.fit(X, y) |
| 140 | + return model |
| 141 | + |
| 142 | + def _preprocess(self, df): |
| 143 | + df = df.copy() |
| 144 | + # Map columns |
| 145 | + column_map = { |
| 146 | + 'PassengerId': ['PassengerId','passengerid','Passenger ID','pid'], |
| 147 | + 'Name': ['Name','FullName','full_name'], |
| 148 | + 'Pclass': ['Pclass','Class'], |
| 149 | + 'Sex': ['Sex','Gender'], |
| 150 | + 'Age': ['Age','age'], |
| 151 | + 'SibSp': ['SibSp','Siblings/Spouses'], |
| 152 | + 'Parch': ['Parch','Parents/Children'], |
| 153 | + 'Fare': ['Fare','fare'] |
| 154 | + } |
| 155 | + for key, options in column_map.items(): |
| 156 | + for opt in options: |
| 157 | + if opt in df.columns: |
| 158 | + df[key] = df[opt] |
| 159 | + break |
| 160 | + if key not in df.columns: |
| 161 | + if key in ['Age','SibSp','Parch','Fare','Pclass']: |
| 162 | + df[key] = 0 |
| 163 | + else: |
| 164 | + df[key] = "" |
| 165 | + |
| 166 | + df['Sex'] = df['Sex'].map({'male':0,'female':1}).fillna(0) |
| 167 | + |
| 168 | + # Pandas 3.x safe assignment |
| 169 | + for col in ['Age','Fare']: |
| 170 | + df[col] = df[col].fillna(df[col].median()) |
| 171 | + for col in ['SibSp','Parch','Pclass']: |
| 172 | + df[col] = df[col].fillna(0) |
| 173 | + |
| 174 | + return df |
| 175 | + |
| 176 | + # ---------------------- UI ---------------------- |
| 177 | + def _build_ui(self): |
| 178 | + main = tb.Frame(self.root, padding=10) |
| 179 | + main.pack(fill=tk.BOTH, expand=True) |
| 180 | + |
| 181 | + tb.Label(main, text=f"🛳 {self.APP_NAME}", |
| 182 | + font=("Segoe UI", 22, "bold")).pack(pady=(0, 4)) |
| 183 | + tb.Label(main, text="Predict Titanic Passenger Survival", |
| 184 | + font=("Segoe UI", 10, "italic"), foreground="#9ca3af").pack(pady=(0, 20)) |
| 185 | + |
| 186 | + # Row 1: File selection |
| 187 | + row1 = tb.Frame(main) |
| 188 | + row1.pack(fill=tk.X, pady=(0,6)) |
| 189 | + |
| 190 | + self.path_input = tb.Entry(row1, width=80) |
| 191 | + self.path_input.pack(side=tk.LEFT, fill=tk.X, expand=True, padx=(0,6)) |
| 192 | + self.path_input.insert(0, "Drag & drop CSV files here…") |
| 193 | + |
| 194 | + browse_btn = tb.Button(row1, text="📂 Browse", bootstyle=INFO, command=self.browse) |
| 195 | + browse_btn.pack(side=tk.LEFT, padx=3) |
| 196 | + |
| 197 | + self.start_btn = tb.Button(row1, text="🚀 Start Prediction", bootstyle=SUCCESS, command=self.start) |
| 198 | + self.start_btn.pack(side=tk.LEFT, padx=3) |
| 199 | + |
| 200 | + self.cancel_btn = tb.Button(row1, text="⏹ Cancel", bootstyle=DANGER, command=self.cancel) |
| 201 | + self.cancel_btn.pack(side=tk.LEFT, padx=3) |
| 202 | + self.cancel_btn.config(state=DISABLED) |
| 203 | + |
| 204 | + export_btn = tb.Button(row1, text="💾 Export Results", bootstyle=PRIMARY, command=self.export_results) |
| 205 | + export_btn.pack(side=tk.LEFT, padx=3) |
| 206 | + |
| 207 | + about_btn = tb.Button(row1, text="ℹ️ About", bootstyle=INFO, command=self.show_about) |
| 208 | + about_btn.pack(side=tk.LEFT, padx=3) |
| 209 | + |
| 210 | + # Progress bar |
| 211 | + self.progress = tb.Progressbar(main, bootstyle="success-striped", maximum=100) |
| 212 | + self.progress.pack(fill=tk.X, pady=(0,6)) |
| 213 | + |
| 214 | + # Treeview |
| 215 | + columns = ("selected","PassengerID","Name","Prediction") |
| 216 | + self.tree = ttk.Treeview(main, columns=columns, show="headings", selectmode="extended", height=20) |
| 217 | + self.tree.heading("selected", text="✅") |
| 218 | + self.tree.heading("PassengerID", text="Passenger ID", anchor=W) |
| 219 | + self.tree.heading("Name", text="Name", anchor=W) |
| 220 | + self.tree.heading("Prediction", text="Survived", anchor=W) |
| 221 | + self.tree.column("selected", width=50, anchor=tk.CENTER) |
| 222 | + self.tree.column("PassengerID", width=80) |
| 223 | + self.tree.column("Name", width=400) |
| 224 | + self.tree.column("Prediction", width=100) |
| 225 | + self.tree.pack(fill=tk.BOTH, expand=True, pady=(0,6)) |
| 226 | + |
| 227 | + self.root.after(15, self.animate_progress) |
| 228 | + |
| 229 | + # Drag & drop |
| 230 | + if DND_ENABLED: |
| 231 | + self.tree.drop_target_register(DND_FILES) |
| 232 | + self.tree.dnd_bind("<<Drop>>", self.on_drop) |
| 233 | + |
| 234 | + # ---------------------- Browse / Drop ---------------------- |
| 235 | + def browse(self): |
| 236 | + files = filedialog.askopenfilenames(title="Select CSV Files", filetypes=[("CSV Files","*.csv")]) |
| 237 | + if files: |
| 238 | + self._process_uploaded_files(files) |
| 239 | + |
| 240 | + def on_drop(self, event): |
| 241 | + files = self.root.tk.splitlist(event.data) |
| 242 | + self._process_uploaded_files(files) |
| 243 | + |
| 244 | + def _process_uploaded_files(self, files): |
| 245 | + for f in files: |
| 246 | + if f.lower().endswith(".csv") and f not in self.file_set: |
| 247 | + self.file_set.add(f) |
| 248 | + self.tree.insert("", tk.END, values=("☑️", "", os.path.basename(f), "Queued")) |
| 249 | + self.start_btn.config(state=tk.NORMAL) |
| 250 | + self.cancel_btn.config(state=tk.DISABLED) |
| 251 | + self.path_input.delete(0, tk.END) |
| 252 | + self.path_input.insert(0, f"{len(self.file_set)} files queued") |
| 253 | + |
| 254 | + # Smart retrain if model has no training |
| 255 | + if hasattr(self.model, 'estimators_') == False: |
| 256 | + messagebox.showinfo("Smart Retrain", f"Training model using first uploaded CSV: {files[0]}") |
| 257 | + df = pd.read_csv(files[0]) |
| 258 | + df = self._preprocess(df) |
| 259 | + X = df[['Pclass','Sex','Age','SibSp','Parch','Fare']] |
| 260 | + y = df.get('Survived', pd.Series([0]*len(df))) |
| 261 | + self.model.fit(X, y) |
| 262 | + |
| 263 | + # ---------------------- Actions ---------------------- |
| 264 | + def start(self): |
| 265 | + selected_files = [self.tree.item(i)['values'][2] for i in self.tree.get_children() |
| 266 | + if self.tree.item(i)['values'][0]=="☑️"] |
| 267 | + if not selected_files: |
| 268 | + messagebox.showwarning("No Selection", "Select CSV files before prediction.") |
| 269 | + return |
| 270 | + self.progress["value"] = 0 |
| 271 | + self.smooth_value = 0 |
| 272 | + self.target_progress = 0 |
| 273 | + self.start_btn.config(state=DISABLED) |
| 274 | + self.cancel_btn.config(state=NORMAL) |
| 275 | + |
| 276 | + files_to_process = [f for f in self.file_set if os.path.basename(f) in selected_files] |
| 277 | + self.worker_obj = PredictionWorker(files_to_process, self.model, { |
| 278 | + "found": self.add_result, |
| 279 | + "progress": self.set_target, |
| 280 | + "finished": self.finish |
| 281 | + }) |
| 282 | + threading.Thread(target=self.worker_obj.run, daemon=True).start() |
| 283 | + |
| 284 | + def add_result(self, file, row, pred): |
| 285 | + pid = row.get('PassengerId','') |
| 286 | + name = row.get('Name','') |
| 287 | + survived = "Yes" if pred==1 else "No" |
| 288 | + self.tree.insert("", tk.END, values=("☑️", pid, name, survived)) |
| 289 | + |
| 290 | + def set_target(self, v): |
| 291 | + self.target_progress = v |
| 292 | + |
| 293 | + def animate_progress(self): |
| 294 | + if self.smooth_value < self.target_progress: |
| 295 | + self.smooth_value += 1 |
| 296 | + self.progress["value"] = self.smooth_value |
| 297 | + self.root.after(15, self.animate_progress) |
| 298 | + |
| 299 | + def cancel(self): |
| 300 | + if self.worker_obj: |
| 301 | + self.worker_obj.stop() |
| 302 | + self.finish() |
| 303 | + |
| 304 | + def finish(self): |
| 305 | + self.start_btn.config(state=tk.NORMAL) |
| 306 | + self.cancel_btn.config(state=tk.DISABLED) |
| 307 | + self.progress["value"] = 100 |
| 308 | + |
| 309 | + # ---------------------- Export ---------------------- |
| 310 | + def export_results(self): |
| 311 | + selected = [self.tree.item(i)['values'] for i in self.tree.get_children() |
| 312 | + if self.tree.item(i)['values'][0]=="☑️"] |
| 313 | + if not selected: |
| 314 | + messagebox.showwarning("Export", "No selected rows to export") |
| 315 | + return |
| 316 | + path = filedialog.asksaveasfilename(defaultextension=".csv", filetypes=[("CSV Files","*.csv")]) |
| 317 | + if path: |
| 318 | + df = pd.DataFrame(selected, columns=["Selected","PassengerID","Name","Survived"]) |
| 319 | + df.to_csv(path,index=False) |
| 320 | + messagebox.showinfo("Export", f"Exported {len(df)} rows to {path}") |
| 321 | + |
| 322 | + # ---------------------- About ---------------------- |
| 323 | + def show_about(self): |
| 324 | + messagebox.showinfo( |
| 325 | + f"About {self.APP_NAME}", |
| 326 | + f"{self.APP_NAME} v{self.APP_VERSION}\n\n" |
| 327 | + "• Drag & drop CSV files\n" |
| 328 | + "• Auto-detect columns in any Titanic CSV\n" |
| 329 | + "• Smart auto-train model if train.csv is missing\n" |
| 330 | + "• Multi-file threaded prediction\n" |
| 331 | + "• Export prediction results to CSV\n\n" |
| 332 | + "🏢 Built for Learning Purposes" |
| 333 | + ) |
| 334 | + |
| 335 | + # ---------------------- Styles ---------------------- |
| 336 | + def _apply_styles(self): |
| 337 | + self.root.style = tb.Style(theme="darkly") |
| 338 | + self.root.style.configure("TProgressbar", troughcolor="#1b1f3a", background="#7c3aed", thickness=14) |
| 339 | + |
| 340 | + # ---------------------- Run ---------------------- |
| 341 | + def run(self): |
| 342 | + self.root.mainloop() |
| 343 | + |
| 344 | +# ---------------------- RUN ---------------------- |
| 345 | +if __name__ == "__main__": |
| 346 | + app = TitanicApp() |
| 347 | + app.run() |
0 commit comments