diff --git a/README.md b/README.md index 38ab2a2..6b4704b 100644 --- a/README.md +++ b/README.md @@ -103,4 +103,5 @@ Follow these simple steps to contribute: 6. **Open a Pull Request (PR)** Go to your fork on GitHub → Click “Compare & Pull Request” Add a clear title and description of what you changed. - Submit the PR for review ✅ \ No newline at end of file + Submit the PR for review ✅ + done \ No newline at end of file diff --git a/models/linear_regressuin.py b/models/linear_regressuin.py index fe3b53a..8ebb33a 100644 --- a/models/linear_regressuin.py +++ b/models/linear_regressuin.py @@ -1,23 +1,14 @@ -# Contributing Guide - -We ❤️ contributions! This project is part of **Hacktoberfest**. - -## Steps to Contribute -1. Fork the repo -2. Create a new branch (`git checkout -b feature-model`) -3. Add your model/page under `/pages` -4. Use helper functions from `/utils` -5. Commit and push (`git push origin feature-model`) -6. Open a Pull Request (PR) - -## What You Can Work On -- Add a new ML model (e.g., Decision Tree, KNN, SVM, etc.) -- Improve plotting helpers -- Add more datasets to `data_helpers` -- Enhance UI/UX in Streamlit - -## Labels -- `good first issue` → beginner-friendly -- `feature` → add a new model -- `bug` → fix something broken -- `documentation` → improve docs +# models/linear_regression_model.py +from sklearn.linear_model import LinearRegression +import numpy as np + +# Train a simple model for demonstration +model = LinearRegression() +X = np.array([[1], [2], [3], [4], [5]]) +y = np.array([2, 4, 6, 8, 10]) +model.fit(X, y) + +def predict(features): + arr = np.array(features).reshape(1, -1) + prediction = model.predict(arr) + return prediction.tolist() diff --git a/pages/Linear_Regression.md b/pages/Linear_Regression.md new file mode 100644 index 0000000..218c17d --- /dev/null +++ b/pages/Linear_Regression.md @@ -0,0 +1,104 @@ +# pages/Logistic_Regression.py + +import streamlit as st +from sklearn.linear_model import LogisticRegression +from sklearn.model_selection import train_test_split +from sklearn.metrics import confusion_matrix, roc_auc_score +import seaborn as sns +import matplotlib.pyplot as plt +import numpy as np + +# Import existing helpers +from utils.data_helpers import generate_classification_dataset +from utils.plot_helpers import plot_roc_curve + +# ------------------------------- +# 🏷️ Page Configuration +# ------------------------------- +st.set_page_config(page_title="Logistic Regression Simulator", layout="wide") +st.title("🔹 Logistic Regression Model") + +st.write(""" +This page trains a **Logistic Regression** model on a generated dataset, +displays **predictions**, a **confusion matrix**, and an **ROC curve**. +""") + +# ------------------------------- +# ⚙️ Sidebar Controls +# ------------------------------- +st.sidebar.header("Dataset Configuration") +n_samples = st.sidebar.slider("Number of Samples", 50, 1000, 200, 50) +n_features = st.sidebar.slider("Number of Features", 2, 20, 5) +n_informative = st.sidebar.slider("Informative Features", 1, n_features, 3) +n_classes = st.sidebar.slider("Number of Classes", 2, 5, 2) + +# Generate dataset +data = generate_classification_dataset( + n_samples=n_samples, + n_features=n_features, + n_informative=n_informative, + n_classes=n_classes +) + +st.subheader("📊 Sample of Generated Dataset") +st.dataframe(data.head()) + +# ------------------------------- +# 🧠 Model Training +# ------------------------------- +X = data.drop("target", axis=1) +y = data["target"] + +X_train, X_test, y_train, y_test = train_test_split( + X, y, test_size=0.3, random_state=42 +) + +st.subheader("⚙️ Model Training") +model = LogisticRegression(max_iter=1000) +model.fit(X_train, y_train) + +st.success("✅ Model trained successfully!") + +# ------------------------------- +# 🔮 Predictions +# ------------------------------- +st.subheader("🔮 Predictions on Test Set") +y_pred = model.predict(X_test) +y_pred_prob = model.predict_proba(X_test)[:, 1] if n_classes == 2 else None + +st.write("**Sample Predictions:**") +pred_df = X_test.copy() +pred_df["Actual"] = y_test.values +pred_df["Predicted"] = y_pred +st.dataframe(pred_df.head(10)) + +# ------------------------------- +# 📉 Confusion Matrix +# ------------------------------- +st.subheader("📉 Confusion Matrix") +cm = confusion_matrix(y_test, y_pred) +fig_cm, ax = plt.subplots(figsize=(5, 4)) +sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", ax=ax) +ax.set_xlabel("Predicted Label") +ax.set_ylabel("True Label") +ax.set_title("Confusion Matrix") +st.pyplot(fig_cm) + +# ------------------------------- +# 📈 ROC Curve (only for binary classification) +# ------------------------------- +if n_classes == 2: + st.subheader("📈 ROC Curve") + roc_fig = plot_roc_curve(y_test, y_pred_prob) + roc_auc = roc_auc_score(y_test, y_pred_prob) + st.write(f"**ROC AUC Score:** {roc_auc:.2f}") + st.pyplot(roc_fig) +else: + st.info("ROC Curve is only available for binary classification.") + + + + + + + diff --git a/utils/data_helpers.py b/utils/data_helpers.py index 80c10f4..5a11a80 100644 --- a/utils/data_helpers.py +++ b/utils/data_helpers.py @@ -1,30 +1,47 @@ # utils/data_helpers.py - -from sklearn.datasets import make_regression +from sklearn.datasets import make_classification import pandas as pd -def generate_sample_regression(n_samples=100, n_features=1, noise=0.0, random_state=None): +def generate_classification_dataset( + n_samples: int = 100, + n_features: int = 10, + n_informative: int = 5, + n_classes: int = 2, + random_state: int = 42 +): """ - Generate a sample regression dataset. + Generate a synthetic classification dataset. - Parameters: - n_samples (int): Number of data points. - n_features (int): Number of features. - noise (float): Standard deviation of Gaussian noise added to the output. - random_state (int or None): Random seed for reproducibility. + Parameters + ---------- + n_samples : int, optional + Number of samples to generate (default=100). + n_features : int, optional + Total number of features (default=10). + n_informative : int, optional + Number of informative features (default=5). + n_classes : int, optional + Number of target classes (default=2). + random_state : int, optional + Random seed for reproducibility (default=42). - Returns: - X (pd.DataFrame): Feature dataframe of shape (n_samples, n_features) - y (pd.Series): Target variable of shape (n_samples,) + Returns + ------- + data : pandas.DataFrame + A DataFrame containing the generated features and target column ('target'). """ - X, y = make_regression( + + X, y = make_classification( n_samples=n_samples, n_features=n_features, - noise=noise, + n_informative=n_informative, + n_redundant=0, + n_classes=n_classes, random_state=random_state ) - # Convert to pandas for convenience - X_df = pd.DataFrame(X, columns=[f'feature_{i+1}' for i in range(n_features)]) - y_series = pd.Series(y, name='target') - - return X_df, y_series + + feature_names = [f"feature_{i}" for i in range(n_features)] + data = pd.DataFrame(X, columns=feature_names) + data["target"] = y + + return data diff --git a/utils/plot_helpers.py b/utils/plot_helpers.py index c20d604..eceb807 100644 --- a/utils/plot_helpers.py +++ b/utils/plot_helpers.py @@ -1,28 +1,40 @@ +# utils/plot_helpers.py import matplotlib.pyplot as plt import seaborn as sns -from sklearn.metrics import confusion_matrix, roc_curve, auc +from sklearn.metrics import roc_curve, auc -def plot_regression_line(X, y, model): - plt.figure() - plt.scatter(X, y, color="blue", label="Data") - y_pred = model.predict(X) - plt.plot(X, y_pred, color="red", label="Prediction") - plt.legend() - return plt +def plot_roc_curve(y_true, y_score): + """ + Plot ROC curve for a classification model. -def plot_confusion_matrix(y_true, y_pred, labels): - cm = confusion_matrix(y_true, y_pred) - plt.figure() - sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=labels, yticklabels=labels) - plt.xlabel("Predicted") - plt.ylabel("Actual") - return plt + Parameters + ---------- + y_true : array-like + True class labels (0 or 1). + y_score : array-like + Predicted probabilities or scores for the positive class. -def plot_roc_curve(y_true, y_scores): - fpr, tpr, _ = roc_curve(y_true, y_scores) + Returns + ------- + fig : matplotlib.figure.Figure + ROC curve figure object (for Streamlit display). + """ + + # Compute ROC curve and AUC + fpr, tpr, _ = roc_curve(y_true, y_score) roc_auc = auc(fpr, tpr) - plt.figure() - plt.plot(fpr, tpr, label=f"AUC = {roc_auc:.2f}") - plt.plot([0, 1], [0, 1], linestyle="--") - plt.legend() - return plt + + # Create figure + sns.set(style="whitegrid") + fig, ax = plt.subplots(figsize=(6, 5)) + + ax.plot(fpr, tpr, color='blue', lw=2, label=f'ROC curve (AUC = {roc_auc:.2f})') + ax.plot([0, 1], [0, 1], color='gray', linestyle='--', label='Random Guess') + + ax.set_title("ROC Curve", fontsize=14) + ax.set_xlabel("False Positive Rate") + ax.set_ylabel("True Positive Rate") + ax.legend(loc="lower right") + + plt.tight_layout() + return fig