|
| 1 | +"""Advanced capstone: Tkinter app with three ML windows (intentional practice bugs included).""" |
| 2 | + |
| 3 | +from __future__ import annotations |
| 4 | + |
| 5 | +from pathlib import Path |
| 6 | + |
| 7 | +import numpy as np |
| 8 | +import pandas as pd |
| 9 | + |
| 10 | +import tkinter as tk |
| 11 | +from tkinter import ttk, messagebox |
| 12 | + |
| 13 | +from matplotlib.figure import Figure |
| 14 | +from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg |
| 15 | + |
| 16 | +from sklearn.cluster import KMeans |
| 17 | +from sklearn.compose import ColumnTransformer |
| 18 | +from sklearn.impute import SimpleImputer |
| 19 | +from sklearn.linear_model import LinearRegression, LogisticRegression |
| 20 | +from sklearn.metrics import accuracy_score, mean_absolute_error, mean_squared_error, silhouette_score |
| 21 | +from sklearn.model_selection import train_test_split |
| 22 | +from sklearn.pipeline import Pipeline |
| 23 | +from sklearn.preprocessing import OneHotEncoder, StandardScaler |
| 24 | + |
| 25 | +ASSETS = Path(__file__).resolve().parent.parent / "assets" |
| 26 | +REG_PATH = ASSETS / "ml_regression.csv" |
| 27 | +CLS_PATH = ASSETS / "ml_classification.csv" |
| 28 | +SALES_PATH = ASSETS / "sales.csv" |
| 29 | + |
| 30 | + |
| 31 | +# helper with a tiny logic bug |
| 32 | +def quick_shape(df: pd.DataFrame) -> tuple[int, int]: |
| 33 | + """Return (rows, columns).""" |
| 34 | + return (len(df.columns), len(df)) # hint: shape tuple is reversed |
| 35 | + |
| 36 | + |
| 37 | +# helper with a metric naming bug |
| 38 | +def regression_rmse(y_true, y_pred) -> float: |
| 39 | + """Return RMSE for regression predictions.""" |
| 40 | + return float(mean_absolute_error(y_true, y_pred)) # hint: RMSE should use sqrt(mean_squared_error) |
| 41 | + |
| 42 | + |
| 43 | +class MLWindow(tk.Toplevel): |
| 44 | + """Base Toplevel window with left info panel and right plot panel.""" |
| 45 | + |
| 46 | + def __init__(self, master: tk.Tk, title: str): |
| 47 | + super().__init__(master) |
| 48 | + self.title(title) |
| 49 | + self.geometry("1080x620") |
| 50 | + |
| 51 | + self.left_text: tk.Text | None = None |
| 52 | + self.tree: ttk.Treeview | None = None |
| 53 | + self.metric_label: ttk.Label | None = None |
| 54 | + self.figure: Figure | None = None |
| 55 | + self.ax = None |
| 56 | + self.canvas: FigureCanvasTkAgg | None = None |
| 57 | + |
| 58 | + self._build_layout() |
| 59 | + |
| 60 | + def _build_layout(self) -> None: |
| 61 | + container = ttk.Frame(self, padding=10) |
| 62 | + container.pack(fill="both", expand=True) |
| 63 | + container.columnconfigure(0, weight=1) |
| 64 | + container.columnconfigure(1, weight=1) |
| 65 | + container.rowconfigure(0, weight=1) |
| 66 | + |
| 67 | + left = ttk.Frame(container) |
| 68 | + left.grid(row=0, column=0, sticky="nsew", padx=(0, 8)) |
| 69 | + left.rowconfigure(1, weight=1) |
| 70 | + left.columnconfigure(0, weight=1) |
| 71 | + |
| 72 | + self.left_text = tk.Text(left, height=10, width=50) |
| 73 | + self.left_text.grid(row=0, column=0, sticky="ew", pady=(0, 8)) |
| 74 | + |
| 75 | + self.tree = ttk.Treeview(left, show="headings", height=18) |
| 76 | + self.tree.grid(row=1, column=0, sticky="nsew") |
| 77 | + scrollbar = ttk.Scrollbar(left, orient="vertical", command=self.tree.yview) |
| 78 | + scrollbar.grid(row=1, column=1, sticky="ns") |
| 79 | + self.tree.configure(yscrollcommand=scrollbar.set) |
| 80 | + |
| 81 | + right = ttk.Frame(container) |
| 82 | + right.grid(row=0, column=1, sticky="nsew") |
| 83 | + |
| 84 | + self.figure = Figure(figsize=(5.2, 4.2), dpi=100) |
| 85 | + self.ax = self.figure.add_subplot(111) |
| 86 | + self.canvas = FigureCanvasTkAgg(self.figure, master=right) |
| 87 | + self.canvas.get_tk_widget().pack(fill="both", expand=True) |
| 88 | + |
| 89 | + self.metric_label = ttk.Label(right, text="Metrics: N/A", font=("TkDefaultFont", 10, "bold")) |
| 90 | + self.metric_label.pack(anchor="w", pady=(6, 0)) |
| 91 | + |
| 92 | + def fill_table(self, df: pd.DataFrame, n: int = 20) -> None: |
| 93 | + if self.tree is None: |
| 94 | + return |
| 95 | + self.tree.delete(*self.tree.get_children()) |
| 96 | + cols = list(df.columns) |
| 97 | + self.tree["columns"] = cols |
| 98 | + for col in cols: |
| 99 | + self.tree.heading(col, text=col) |
| 100 | + self.tree.column(col, width=110, anchor="center") |
| 101 | + for _, row in df.head(n).iterrows(): |
| 102 | + self.tree.insert("", "end", values=[row[c] for c in cols]) |
| 103 | + |
| 104 | + def fill_summary(self, df: pd.DataFrame, title: str) -> None: |
| 105 | + if self.left_text is None: |
| 106 | + return |
| 107 | + r, c = quick_shape(df) |
| 108 | + numeric = df.select_dtypes(include=[np.number]).columns.tolist() |
| 109 | + lines = [ |
| 110 | + title, |
| 111 | + f"Rows: {r}", |
| 112 | + f"Columns: {c}", |
| 113 | + f"Column names: {', '.join(df.columns)}", |
| 114 | + "", |
| 115 | + "Basic stats (first 4 numeric columns):", |
| 116 | + ] |
| 117 | + for col in numeric[:4]: |
| 118 | + lines.append(f"- {col}: mean={df[col].mean():.3f}, std={df[col].std():.3f}") |
| 119 | + |
| 120 | + self.left_text.delete("1.0", tk.END) |
| 121 | + self.left_text.insert("1.0", "\n".join(lines)) |
| 122 | + |
| 123 | + def set_metrics(self, text: str) -> None: |
| 124 | + if self.metric_label is not None: |
| 125 | + self.metric_label.config(text=text) |
| 126 | + |
| 127 | + |
| 128 | +class RegressionWindow(MLWindow): |
| 129 | + """Regression demonstration window.""" |
| 130 | + |
| 131 | + def __init__(self, master: tk.Tk): |
| 132 | + super().__init__(master, "Regression Demo") |
| 133 | + self.render() |
| 134 | + |
| 135 | + def render(self) -> None: |
| 136 | + if not REG_PATH.exists(): |
| 137 | + messagebox.showerror("Missing dataset", f"Missing: {REG_PATH}") |
| 138 | + self.destroy() |
| 139 | + return |
| 140 | + |
| 141 | + df = pd.read_csv(REG_PATH) |
| 142 | + self.fill_summary(df, "Regression dataset") |
| 143 | + self.fill_table(df) |
| 144 | + |
| 145 | + X = df.drop(columns=["y"]) |
| 146 | + y = df["y"] |
| 147 | + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42) |
| 148 | + |
| 149 | + pipe = Pipeline( |
| 150 | + steps=[ |
| 151 | + ("scaler", StandardScaler()), |
| 152 | + ("model", LinearRegression()), |
| 153 | + ] |
| 154 | + ) |
| 155 | + pipe.fit(X_train, y_train) |
| 156 | + pred = pipe.predict(X_test) |
| 157 | + |
| 158 | + rmse = regression_rmse(y_test, pred) |
| 159 | + mse = float(mean_squared_error(y_test, pred)) |
| 160 | + |
| 161 | + self.ax.clear() |
| 162 | + self.ax.scatter(y_test, pred, alpha=0.7, color="tab:blue", label="pred") |
| 163 | + lims = [min(y_test.min(), pred.min()), max(y_test.max(), pred.max())] |
| 164 | + self.ax.plot(lims, lims, "r--", label="ideal") |
| 165 | + self.ax.set_title("Regression: Actual vs Predicted") |
| 166 | + self.ax.set_xlabel("Actual") |
| 167 | + self.ax.set_ylabel("Predicted") |
| 168 | + self.ax.legend() |
| 169 | + self.figure.tight_layout() |
| 170 | + self.canvas.draw() |
| 171 | + |
| 172 | + self.set_metrics(f"Metrics: RMSE={rmse:.4f}, MSE={mse:.4f}") |
| 173 | + |
| 174 | + |
| 175 | +class ClassificationWindow(MLWindow): |
| 176 | + """Classification demonstration window.""" |
| 177 | + |
| 178 | + def __init__(self, master: tk.Tk): |
| 179 | + super().__init__(master, "Classification Demo") |
| 180 | + self.render() |
| 181 | + |
| 182 | + def render(self) -> None: |
| 183 | + if not CLS_PATH.exists(): |
| 184 | + messagebox.showerror("Missing dataset", f"Missing: {CLS_PATH}") |
| 185 | + self.destroy() |
| 186 | + return |
| 187 | + |
| 188 | + df = pd.read_csv(CLS_PATH) |
| 189 | + self.fill_summary(df, "Classification dataset") |
| 190 | + self.fill_table(df) |
| 191 | + |
| 192 | + X = df.drop(columns=["label"]) |
| 193 | + y = df["label"] |
| 194 | + |
| 195 | + numeric_cols = X.select_dtypes(include=["number"]).columns.tolist() |
| 196 | + categorical_cols = X.select_dtypes(exclude=["number"]).columns.tolist() |
| 197 | + |
| 198 | + pre = ColumnTransformer( |
| 199 | + transformers=[ |
| 200 | + ( |
| 201 | + "num", |
| 202 | + Pipeline([ |
| 203 | + ("imputer", SimpleImputer(strategy="median")), |
| 204 | + ("scaler", StandardScaler()), |
| 205 | + ]), |
| 206 | + numeric_cols, |
| 207 | + ), |
| 208 | + ( |
| 209 | + "cat", |
| 210 | + Pipeline([ |
| 211 | + ("imputer", SimpleImputer(strategy="most_frequent")), |
| 212 | + ("onehot", OneHotEncoder(handle_unknown="ignore")), |
| 213 | + ]), |
| 214 | + categorical_cols, |
| 215 | + ), |
| 216 | + ] |
| 217 | + ) |
| 218 | + |
| 219 | + model = Pipeline( |
| 220 | + steps=[ |
| 221 | + ("pre", pre), |
| 222 | + ("clf", LogisticRegression(max_iter=1000)), |
| 223 | + ] |
| 224 | + ) |
| 225 | + |
| 226 | + X_train, X_test, y_train, y_test = train_test_split( |
| 227 | + X, |
| 228 | + y, |
| 229 | + test_size=0.3, |
| 230 | + random_state=42, |
| 231 | + stratify=y, |
| 232 | + ) |
| 233 | + model.fit(X_train, y_train) |
| 234 | + pred = model.predict(X_test) |
| 235 | + |
| 236 | + acc = float(np.mean(pred == 1)) # hint: accuracy should compare pred with y_test |
| 237 | + sk_acc = float(accuracy_score(y_test, pred)) |
| 238 | + |
| 239 | + self.ax.clear() |
| 240 | + cls0 = X_test[pred == 0] |
| 241 | + cls1 = X_test[pred == 1] |
| 242 | + self.ax.scatter(cls0["x1"], cls0["x2"], alpha=0.65, color="tab:green", label="pred 0") |
| 243 | + self.ax.scatter(cls1["x1"], cls1["x2"], alpha=0.65, color="tab:orange", label="pred 1") |
| 244 | + self.ax.set_title("Classification: Predicted Classes") |
| 245 | + self.ax.set_xlabel("x1") |
| 246 | + self.ax.set_ylabel("x2") |
| 247 | + self.ax.legend() |
| 248 | + self.figure.tight_layout() |
| 249 | + self.canvas.draw() |
| 250 | + |
| 251 | + self.set_metrics(f"Metrics: accuracy={acc:.4f}, sklearn_acc={sk_acc:.4f}") |
| 252 | + |
| 253 | + |
| 254 | +class ClusteringWindow(MLWindow): |
| 255 | + """Third ML window: clustering demo on sales-style numeric features.""" |
| 256 | + |
| 257 | + def __init__(self, master: tk.Tk): |
| 258 | + super().__init__(master, "Clustering Demo") |
| 259 | + self.render() |
| 260 | + |
| 261 | + def render(self) -> None: |
| 262 | + if not SALES_PATH.exists(): |
| 263 | + messagebox.showerror("Missing dataset", f"Missing: {SALES_PATH}") |
| 264 | + self.destroy() |
| 265 | + return |
| 266 | + |
| 267 | + df = pd.read_csv(SALES_PATH) |
| 268 | + self.fill_summary(df, "Sales clustering dataset") |
| 269 | + self.fill_table(df) |
| 270 | + |
| 271 | + numeric = df.select_dtypes(include=[np.number]) |
| 272 | + if numeric.shape[1] < 2: |
| 273 | + messagebox.showerror("Dataset error", "Sales dataset needs >=2 numeric columns for clustering") |
| 274 | + self.destroy() |
| 275 | + return |
| 276 | + |
| 277 | + X = numeric.iloc[:, :2].to_numpy() |
| 278 | + scaler = StandardScaler() |
| 279 | + Xs = scaler.fit_transform(X) |
| 280 | + |
| 281 | + km = KMeans(n_clusters=3, random_state=42, n_init=10) |
| 282 | + labels = km.fit_predict(Xs) |
| 283 | + |
| 284 | + sil = float(-silhouette_score(Xs, labels)) # hint: silhouette score should not be negated |
| 285 | + |
| 286 | + self.ax.clear() |
| 287 | + self.ax.scatter(Xs[:, 0], Xs[:, 1], c=labels, cmap="viridis", alpha=0.75) |
| 288 | + centers = km.cluster_centers_ |
| 289 | + self.ax.scatter(centers[:, 0], centers[:, 1], color="red", marker="X", s=140, label="centers") |
| 290 | + self.ax.set_title("KMeans Clusters (scaled 2D features)") |
| 291 | + self.ax.set_xlabel(numeric.columns[0]) |
| 292 | + self.ax.set_ylabel(numeric.columns[1]) |
| 293 | + self.ax.legend() |
| 294 | + self.figure.tight_layout() |
| 295 | + self.canvas.draw() |
| 296 | + |
| 297 | + self.set_metrics(f"Metrics: silhouette={sil:.4f}") |
| 298 | + |
| 299 | + |
| 300 | +class AdvancedMLApp(tk.Tk): |
| 301 | + """Main launcher window.""" |
| 302 | + |
| 303 | + def __init__(self): |
| 304 | + super().__init__() |
| 305 | + self.title("Advanced ML Tkinter Boss") |
| 306 | + self.geometry("520x280") |
| 307 | + self._build_ui() |
| 308 | + |
| 309 | + def _build_ui(self) -> None: |
| 310 | + wrap = ttk.Frame(self, padding=16) |
| 311 | + wrap.pack(fill="both", expand=True) |
| 312 | + |
| 313 | + ttk.Label( |
| 314 | + wrap, |
| 315 | + text="Open one ML demo window:", |
| 316 | + font=("TkDefaultFont", 12, "bold"), |
| 317 | + ).pack(anchor="w", pady=(0, 12)) |
| 318 | + |
| 319 | + ttk.Button(wrap, text="Regression Window", command=lambda: RegressionWindow(self)).pack(fill="x", pady=6) |
| 320 | + ttk.Button(wrap, text="Classification Window", command=lambda: ClassificationWindow(self)).pack(fill="x", pady=6) |
| 321 | + ttk.Button(wrap, text="Clustering Window", command=lambda: ClusteringWindow(self)).pack(fill="x", pady=6) |
| 322 | + |
| 323 | + ttk.Label( |
| 324 | + wrap, |
| 325 | + text=f"Datasets expected in assets/: {REG_PATH.name}, {CLS_PATH.name}, {SALES_PATH.name}", |
| 326 | + ).pack(anchor="w", pady=(12, 0)) |
| 327 | + |
| 328 | + |
| 329 | + |
| 330 | +def run_app() -> None: |
| 331 | + """Run the advanced Tkinter boss app.""" |
| 332 | + app = AdvancedMLApp() |
| 333 | + app.mainloop() |
| 334 | + |
| 335 | + |
| 336 | +if __name__ == "__main__": |
| 337 | + run_app() |
0 commit comments