Model Evaluation

Lastrophysicien · Lastrophysicien · commit 928feeabd0d8 · 2025-12-06T10:34:34.000-05:00
diff --git a/4_data_analysis/MLProject.ipynb b/4_data_analysis/MLProject.ipynb
@@ -686,7 +686,139 @@
   },
   {
    "cell_type": "markdown",
-   "id": "59382706",
+   "id": "ab4b0414",
+   "metadata": {},
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "id": "59a1c65c",
+   "metadata": {},
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "id": "0a94562b",
+   "metadata": {},
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "id": "8a0d6569",
+   "metadata": {},
+   "source": [
+    "7. Model Evaluation"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "57d0ecd7",
+   "metadata": {},
+   "source": [
+    "# Step 7.1: Calculate evaluation metrics\n",
+    "print(\"=== MODEL EVALUATION ===\")\n",
+    "\n",
+    "# Training set metrics\n",
+    "train_mse = mean_squared_error(y_train, y_pred_train)\n",
+    "train_rmse = np.sqrt(train_mse)\n",
+    "train_mae = mean_absolute_error(y_train, y_pred_train)\n",
+    "train_r2 = r2_score(y_train, y_pred_train)\n",
+    "\n",
+    "# Testing set metrics\n",
+    "test_mse = mean_squared_error(y_test, y_pred_test)\n",
+    "test_rmse = np.sqrt(test_mse)\n",
+    "test_mae = mean_absolute_error(y_test, y_pred_test)\n",
+    "test_r2 = r2_score(y_test, y_pred_test)\n",
+    "\n",
+    "print(\"\\n=== TRAINING SET METRICS ===\")\n",
+    "print(f\"Mean Squared Error (MSE): {train_mse:.2f}\")\n",
+    "print(f\"Root Mean Squared Error (RMSE): {train_rmse:.2f}\")\n",
+    "print(f\"Mean Absolute Error (MAE): {train_mae:.2f}\")\n",
+    "print(f\"R-squared Score: {train_r2:.2f}\")\n",
+    "\n",
+    "print(\"\\n=== TESTING SET METRICS ===\")\n",
+    "print(f\"Mean Squared Error (MSE): {test_mse:.2f}\")\n",
+    "print(f\"Root Mean Squared Error (RMSE): {test_rmse:.2f}\")\n",
+    "print(f\"Mean Absolute Error (MAE): {test_mae:.2f}\")\n",
+    "print(f\"R-squared Score: {test_r2:.2f}\")\n",
+    "\n",
+    "# Step 7.2: Visualize predictions vs actual values\n",
+    "print(\"\\n=== VISUALIZING PREDICTIONS ===\")\n",
+    "\n",
+    "fig, axes = plt.subplots(1, 2, figsize=(14, 6))\n",
+    "\n",
+    "# Training set\n",
+    "axes[0].scatter(y_train, y_pred_train, alpha=0.5)\n",
+    "axes[0].plot([y_train.min(), y_train.max()], [y_train.min(), y_train.max()], \n",
+    "            'r--', lw=2)\n",
+    "axes[0].set_xlabel('Actual Values')\n",
+    "axes[0].set_ylabel('Predicted Values')\n",
+    "axes[0].set_title(f'Training Set (R² = {train_r2:.2f})')\n",
+    "axes[0].grid(True, alpha=0.3)\n",
+    "\n",
+    "# Testing set\n",
+    "axes[1].scatter(y_test, y_pred_test, alpha=0.5)\n",
+    "axes[1].plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], \n",
+    "            'r--', lw=2)\n",
+    "axes[1].set_xlabel('Actual Values')\n",
+    "axes[1].set_ylabel('Predicted Values')\n",
+    "axes[1].set_title(f'Testing Set (R² = {test_r2:.2f})')\n",
+    "axes[1].grid(True, alpha=0.3)\n",
+    "\n",
+    "plt.tight_layout()\n",
+    "plt.show()\n",
+    "\n",
+    "# Step 7.3: Residual Analysis\n",
+    "print(\"\\n=== RESIDUAL ANALYSIS ===\")\n",
+    "\n",
+    "residuals = y_test - y_pred_test\n",
+    "\n",
+    "plt.figure(figsize=(12, 5))\n",
+    "\n",
+    "plt.subplot(1, 2, 1)\n",
+    "plt.scatter(y_pred_test, residuals, alpha=0.5)\n",
+    "plt.axhline(y=0, color='r', linestyle='--')\n",
+    "plt.xlabel('Predicted Values')\n",
+    "plt.ylabel('Residuals')\n",
+    "plt.title('Residuals vs Predicted Values')\n",
+    "plt.grid(True, alpha=0.3)\n",
+    "\n",
+    "plt.subplot(1, 2, 2)\n",
+    "plt.hist(residuals, bins=30, edgecolor='black', alpha=0.7)\n",
+    "plt.xlabel('Residuals')\n",
+    "plt.ylabel('Frequency')\n",
+    "plt.title('Distribution of Residuals')\n",
+    "plt.grid(True, alpha=0.3)\n",
+    "\n",
+    "plt.tight_layout()\n",
+    "plt.show()\n",
+    "\n",
+    "# Check residual statistics\n",
+    "print(f\"Residual mean: {residuals.mean():.2f}\")\n",
+    "print(f\"Residual std: {residuals.std():.2f}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e8f8472f",
+   "metadata": {},
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "id": "40f4d419",
+   "metadata": {},
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "id": "aaac01dc",
+   "metadata": {},
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a9e3f8b3",
    "metadata": {},
    "source": []
   }