Skip to content

Commit 3e93172

Browse files
authored
Merge pull request #144 from JanhaviM-07/janhavi
Fix: Basics
2 parents f397267 + 889c9f8 commit 3e93172

45 files changed

Lines changed: 4171 additions & 0 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.
Lines changed: 337 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,337 @@
1+
"""Advanced capstone: Tkinter app with three ML windows (intentional practice bugs included)."""
2+
3+
from __future__ import annotations
4+
5+
from pathlib import Path
6+
7+
import numpy as np
8+
import pandas as pd
9+
10+
import tkinter as tk
11+
from tkinter import ttk, messagebox
12+
13+
from matplotlib.figure import Figure
14+
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
15+
16+
from sklearn.cluster import KMeans
17+
from sklearn.compose import ColumnTransformer
18+
from sklearn.impute import SimpleImputer
19+
from sklearn.linear_model import LinearRegression, LogisticRegression
20+
from sklearn.metrics import accuracy_score, mean_absolute_error, mean_squared_error, silhouette_score
21+
from sklearn.model_selection import train_test_split
22+
from sklearn.pipeline import Pipeline
23+
from sklearn.preprocessing import OneHotEncoder, StandardScaler
24+
25+
ASSETS = Path(__file__).resolve().parent.parent / "assets"
26+
REG_PATH = ASSETS / "ml_regression.csv"
27+
CLS_PATH = ASSETS / "ml_classification.csv"
28+
SALES_PATH = ASSETS / "sales.csv"
29+
30+
31+
# helper with a tiny logic bug
32+
def quick_shape(df: pd.DataFrame) -> tuple[int, int]:
33+
"""Return (rows, columns)."""
34+
return (len(df.columns), len(df)) # hint: shape tuple is reversed
35+
36+
37+
# helper with a metric naming bug
38+
def regression_rmse(y_true, y_pred) -> float:
39+
"""Return RMSE for regression predictions."""
40+
return float(mean_absolute_error(y_true, y_pred)) # hint: RMSE should use sqrt(mean_squared_error)
41+
42+
43+
class MLWindow(tk.Toplevel):
44+
"""Base Toplevel window with left info panel and right plot panel."""
45+
46+
def __init__(self, master: tk.Tk, title: str):
47+
super().__init__(master)
48+
self.title(title)
49+
self.geometry("1080x620")
50+
51+
self.left_text: tk.Text | None = None
52+
self.tree: ttk.Treeview | None = None
53+
self.metric_label: ttk.Label | None = None
54+
self.figure: Figure | None = None
55+
self.ax = None
56+
self.canvas: FigureCanvasTkAgg | None = None
57+
58+
self._build_layout()
59+
60+
def _build_layout(self) -> None:
61+
container = ttk.Frame(self, padding=10)
62+
container.pack(fill="both", expand=True)
63+
container.columnconfigure(0, weight=1)
64+
container.columnconfigure(1, weight=1)
65+
container.rowconfigure(0, weight=1)
66+
67+
left = ttk.Frame(container)
68+
left.grid(row=0, column=0, sticky="nsew", padx=(0, 8))
69+
left.rowconfigure(1, weight=1)
70+
left.columnconfigure(0, weight=1)
71+
72+
self.left_text = tk.Text(left, height=10, width=50)
73+
self.left_text.grid(row=0, column=0, sticky="ew", pady=(0, 8))
74+
75+
self.tree = ttk.Treeview(left, show="headings", height=18)
76+
self.tree.grid(row=1, column=0, sticky="nsew")
77+
scrollbar = ttk.Scrollbar(left, orient="vertical", command=self.tree.yview)
78+
scrollbar.grid(row=1, column=1, sticky="ns")
79+
self.tree.configure(yscrollcommand=scrollbar.set)
80+
81+
right = ttk.Frame(container)
82+
right.grid(row=0, column=1, sticky="nsew")
83+
84+
self.figure = Figure(figsize=(5.2, 4.2), dpi=100)
85+
self.ax = self.figure.add_subplot(111)
86+
self.canvas = FigureCanvasTkAgg(self.figure, master=right)
87+
self.canvas.get_tk_widget().pack(fill="both", expand=True)
88+
89+
self.metric_label = ttk.Label(right, text="Metrics: N/A", font=("TkDefaultFont", 10, "bold"))
90+
self.metric_label.pack(anchor="w", pady=(6, 0))
91+
92+
def fill_table(self, df: pd.DataFrame, n: int = 20) -> None:
93+
if self.tree is None:
94+
return
95+
self.tree.delete(*self.tree.get_children())
96+
cols = list(df.columns)
97+
self.tree["columns"] = cols
98+
for col in cols:
99+
self.tree.heading(col, text=col)
100+
self.tree.column(col, width=110, anchor="center")
101+
for _, row in df.head(n).iterrows():
102+
self.tree.insert("", "end", values=[row[c] for c in cols])
103+
104+
def fill_summary(self, df: pd.DataFrame, title: str) -> None:
105+
if self.left_text is None:
106+
return
107+
r, c = quick_shape(df)
108+
numeric = df.select_dtypes(include=[np.number]).columns.tolist()
109+
lines = [
110+
title,
111+
f"Rows: {r}",
112+
f"Columns: {c}",
113+
f"Column names: {', '.join(df.columns)}",
114+
"",
115+
"Basic stats (first 4 numeric columns):",
116+
]
117+
for col in numeric[:4]:
118+
lines.append(f"- {col}: mean={df[col].mean():.3f}, std={df[col].std():.3f}")
119+
120+
self.left_text.delete("1.0", tk.END)
121+
self.left_text.insert("1.0", "\n".join(lines))
122+
123+
def set_metrics(self, text: str) -> None:
124+
if self.metric_label is not None:
125+
self.metric_label.config(text=text)
126+
127+
128+
class RegressionWindow(MLWindow):
129+
"""Regression demonstration window."""
130+
131+
def __init__(self, master: tk.Tk):
132+
super().__init__(master, "Regression Demo")
133+
self.render()
134+
135+
def render(self) -> None:
136+
if not REG_PATH.exists():
137+
messagebox.showerror("Missing dataset", f"Missing: {REG_PATH}")
138+
self.destroy()
139+
return
140+
141+
df = pd.read_csv(REG_PATH)
142+
self.fill_summary(df, "Regression dataset")
143+
self.fill_table(df)
144+
145+
X = df.drop(columns=["y"])
146+
y = df["y"]
147+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)
148+
149+
pipe = Pipeline(
150+
steps=[
151+
("scaler", StandardScaler()),
152+
("model", LinearRegression()),
153+
]
154+
)
155+
pipe.fit(X_train, y_train)
156+
pred = pipe.predict(X_test)
157+
158+
rmse = regression_rmse(y_test, pred)
159+
mse = float(mean_squared_error(y_test, pred))
160+
161+
self.ax.clear()
162+
self.ax.scatter(y_test, pred, alpha=0.7, color="tab:blue", label="pred")
163+
lims = [min(y_test.min(), pred.min()), max(y_test.max(), pred.max())]
164+
self.ax.plot(lims, lims, "r--", label="ideal")
165+
self.ax.set_title("Regression: Actual vs Predicted")
166+
self.ax.set_xlabel("Actual")
167+
self.ax.set_ylabel("Predicted")
168+
self.ax.legend()
169+
self.figure.tight_layout()
170+
self.canvas.draw()
171+
172+
self.set_metrics(f"Metrics: RMSE={rmse:.4f}, MSE={mse:.4f}")
173+
174+
175+
class ClassificationWindow(MLWindow):
176+
"""Classification demonstration window."""
177+
178+
def __init__(self, master: tk.Tk):
179+
super().__init__(master, "Classification Demo")
180+
self.render()
181+
182+
def render(self) -> None:
183+
if not CLS_PATH.exists():
184+
messagebox.showerror("Missing dataset", f"Missing: {CLS_PATH}")
185+
self.destroy()
186+
return
187+
188+
df = pd.read_csv(CLS_PATH)
189+
self.fill_summary(df, "Classification dataset")
190+
self.fill_table(df)
191+
192+
X = df.drop(columns=["label"])
193+
y = df["label"]
194+
195+
numeric_cols = X.select_dtypes(include=["number"]).columns.tolist()
196+
categorical_cols = X.select_dtypes(exclude=["number"]).columns.tolist()
197+
198+
pre = ColumnTransformer(
199+
transformers=[
200+
(
201+
"num",
202+
Pipeline([
203+
("imputer", SimpleImputer(strategy="median")),
204+
("scaler", StandardScaler()),
205+
]),
206+
numeric_cols,
207+
),
208+
(
209+
"cat",
210+
Pipeline([
211+
("imputer", SimpleImputer(strategy="most_frequent")),
212+
("onehot", OneHotEncoder(handle_unknown="ignore")),
213+
]),
214+
categorical_cols,
215+
),
216+
]
217+
)
218+
219+
model = Pipeline(
220+
steps=[
221+
("pre", pre),
222+
("clf", LogisticRegression(max_iter=1000)),
223+
]
224+
)
225+
226+
X_train, X_test, y_train, y_test = train_test_split(
227+
X,
228+
y,
229+
test_size=0.3,
230+
random_state=42,
231+
stratify=y,
232+
)
233+
model.fit(X_train, y_train)
234+
pred = model.predict(X_test)
235+
236+
acc = float(np.mean(pred == 1)) # hint: accuracy should compare pred with y_test
237+
sk_acc = float(accuracy_score(y_test, pred))
238+
239+
self.ax.clear()
240+
cls0 = X_test[pred == 0]
241+
cls1 = X_test[pred == 1]
242+
self.ax.scatter(cls0["x1"], cls0["x2"], alpha=0.65, color="tab:green", label="pred 0")
243+
self.ax.scatter(cls1["x1"], cls1["x2"], alpha=0.65, color="tab:orange", label="pred 1")
244+
self.ax.set_title("Classification: Predicted Classes")
245+
self.ax.set_xlabel("x1")
246+
self.ax.set_ylabel("x2")
247+
self.ax.legend()
248+
self.figure.tight_layout()
249+
self.canvas.draw()
250+
251+
self.set_metrics(f"Metrics: accuracy={acc:.4f}, sklearn_acc={sk_acc:.4f}")
252+
253+
254+
class ClusteringWindow(MLWindow):
255+
"""Third ML window: clustering demo on sales-style numeric features."""
256+
257+
def __init__(self, master: tk.Tk):
258+
super().__init__(master, "Clustering Demo")
259+
self.render()
260+
261+
def render(self) -> None:
262+
if not SALES_PATH.exists():
263+
messagebox.showerror("Missing dataset", f"Missing: {SALES_PATH}")
264+
self.destroy()
265+
return
266+
267+
df = pd.read_csv(SALES_PATH)
268+
self.fill_summary(df, "Sales clustering dataset")
269+
self.fill_table(df)
270+
271+
numeric = df.select_dtypes(include=[np.number])
272+
if numeric.shape[1] < 2:
273+
messagebox.showerror("Dataset error", "Sales dataset needs >=2 numeric columns for clustering")
274+
self.destroy()
275+
return
276+
277+
X = numeric.iloc[:, :2].to_numpy()
278+
scaler = StandardScaler()
279+
Xs = scaler.fit_transform(X)
280+
281+
km = KMeans(n_clusters=3, random_state=42, n_init=10)
282+
labels = km.fit_predict(Xs)
283+
284+
sil = float(-silhouette_score(Xs, labels)) # hint: silhouette score should not be negated
285+
286+
self.ax.clear()
287+
self.ax.scatter(Xs[:, 0], Xs[:, 1], c=labels, cmap="viridis", alpha=0.75)
288+
centers = km.cluster_centers_
289+
self.ax.scatter(centers[:, 0], centers[:, 1], color="red", marker="X", s=140, label="centers")
290+
self.ax.set_title("KMeans Clusters (scaled 2D features)")
291+
self.ax.set_xlabel(numeric.columns[0])
292+
self.ax.set_ylabel(numeric.columns[1])
293+
self.ax.legend()
294+
self.figure.tight_layout()
295+
self.canvas.draw()
296+
297+
self.set_metrics(f"Metrics: silhouette={sil:.4f}")
298+
299+
300+
class AdvancedMLApp(tk.Tk):
301+
"""Main launcher window."""
302+
303+
def __init__(self):
304+
super().__init__()
305+
self.title("Advanced ML Tkinter Boss")
306+
self.geometry("520x280")
307+
self._build_ui()
308+
309+
def _build_ui(self) -> None:
310+
wrap = ttk.Frame(self, padding=16)
311+
wrap.pack(fill="both", expand=True)
312+
313+
ttk.Label(
314+
wrap,
315+
text="Open one ML demo window:",
316+
font=("TkDefaultFont", 12, "bold"),
317+
).pack(anchor="w", pady=(0, 12))
318+
319+
ttk.Button(wrap, text="Regression Window", command=lambda: RegressionWindow(self)).pack(fill="x", pady=6)
320+
ttk.Button(wrap, text="Classification Window", command=lambda: ClassificationWindow(self)).pack(fill="x", pady=6)
321+
ttk.Button(wrap, text="Clustering Window", command=lambda: ClusteringWindow(self)).pack(fill="x", pady=6)
322+
323+
ttk.Label(
324+
wrap,
325+
text=f"Datasets expected in assets/: {REG_PATH.name}, {CLS_PATH.name}, {SALES_PATH.name}",
326+
).pack(anchor="w", pady=(12, 0))
327+
328+
329+
330+
def run_app() -> None:
331+
"""Run the advanced Tkinter boss app."""
332+
app = AdvancedMLApp()
333+
app.mainloop()
334+
335+
336+
if __name__ == "__main__":
337+
run_app()

0 commit comments

Comments
 (0)