Skip to content

Commit 928feea

Browse files
Model Evaluation
1 parent 714463a commit 928feea

1 file changed

Lines changed: 133 additions & 1 deletion

File tree

4_data_analysis/MLProject.ipynb

Lines changed: 133 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -686,7 +686,139 @@
686686
},
687687
{
688688
"cell_type": "markdown",
689-
"id": "59382706",
689+
"id": "ab4b0414",
690+
"metadata": {},
691+
"source": []
692+
},
693+
{
694+
"cell_type": "markdown",
695+
"id": "59a1c65c",
696+
"metadata": {},
697+
"source": []
698+
},
699+
{
700+
"cell_type": "markdown",
701+
"id": "0a94562b",
702+
"metadata": {},
703+
"source": []
704+
},
705+
{
706+
"cell_type": "markdown",
707+
"id": "8a0d6569",
708+
"metadata": {},
709+
"source": [
710+
"7. Model Evaluation"
711+
]
712+
},
713+
{
714+
"cell_type": "markdown",
715+
"id": "57d0ecd7",
716+
"metadata": {},
717+
"source": [
718+
"# Step 7.1: Calculate evaluation metrics\n",
719+
"print(\"=== MODEL EVALUATION ===\")\n",
720+
"\n",
721+
"# Training set metrics\n",
722+
"train_mse = mean_squared_error(y_train, y_pred_train)\n",
723+
"train_rmse = np.sqrt(train_mse)\n",
724+
"train_mae = mean_absolute_error(y_train, y_pred_train)\n",
725+
"train_r2 = r2_score(y_train, y_pred_train)\n",
726+
"\n",
727+
"# Testing set metrics\n",
728+
"test_mse = mean_squared_error(y_test, y_pred_test)\n",
729+
"test_rmse = np.sqrt(test_mse)\n",
730+
"test_mae = mean_absolute_error(y_test, y_pred_test)\n",
731+
"test_r2 = r2_score(y_test, y_pred_test)\n",
732+
"\n",
733+
"print(\"\\n=== TRAINING SET METRICS ===\")\n",
734+
"print(f\"Mean Squared Error (MSE): {train_mse:.2f}\")\n",
735+
"print(f\"Root Mean Squared Error (RMSE): {train_rmse:.2f}\")\n",
736+
"print(f\"Mean Absolute Error (MAE): {train_mae:.2f}\")\n",
737+
"print(f\"R-squared Score: {train_r2:.2f}\")\n",
738+
"\n",
739+
"print(\"\\n=== TESTING SET METRICS ===\")\n",
740+
"print(f\"Mean Squared Error (MSE): {test_mse:.2f}\")\n",
741+
"print(f\"Root Mean Squared Error (RMSE): {test_rmse:.2f}\")\n",
742+
"print(f\"Mean Absolute Error (MAE): {test_mae:.2f}\")\n",
743+
"print(f\"R-squared Score: {test_r2:.2f}\")\n",
744+
"\n",
745+
"# Step 7.2: Visualize predictions vs actual values\n",
746+
"print(\"\\n=== VISUALIZING PREDICTIONS ===\")\n",
747+
"\n",
748+
"fig, axes = plt.subplots(1, 2, figsize=(14, 6))\n",
749+
"\n",
750+
"# Training set\n",
751+
"axes[0].scatter(y_train, y_pred_train, alpha=0.5)\n",
752+
"axes[0].plot([y_train.min(), y_train.max()], [y_train.min(), y_train.max()], \n",
753+
" 'r--', lw=2)\n",
754+
"axes[0].set_xlabel('Actual Values')\n",
755+
"axes[0].set_ylabel('Predicted Values')\n",
756+
"axes[0].set_title(f'Training Set (R² = {train_r2:.2f})')\n",
757+
"axes[0].grid(True, alpha=0.3)\n",
758+
"\n",
759+
"# Testing set\n",
760+
"axes[1].scatter(y_test, y_pred_test, alpha=0.5)\n",
761+
"axes[1].plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], \n",
762+
" 'r--', lw=2)\n",
763+
"axes[1].set_xlabel('Actual Values')\n",
764+
"axes[1].set_ylabel('Predicted Values')\n",
765+
"axes[1].set_title(f'Testing Set (R² = {test_r2:.2f})')\n",
766+
"axes[1].grid(True, alpha=0.3)\n",
767+
"\n",
768+
"plt.tight_layout()\n",
769+
"plt.show()\n",
770+
"\n",
771+
"# Step 7.3: Residual Analysis\n",
772+
"print(\"\\n=== RESIDUAL ANALYSIS ===\")\n",
773+
"\n",
774+
"residuals = y_test - y_pred_test\n",
775+
"\n",
776+
"plt.figure(figsize=(12, 5))\n",
777+
"\n",
778+
"plt.subplot(1, 2, 1)\n",
779+
"plt.scatter(y_pred_test, residuals, alpha=0.5)\n",
780+
"plt.axhline(y=0, color='r', linestyle='--')\n",
781+
"plt.xlabel('Predicted Values')\n",
782+
"plt.ylabel('Residuals')\n",
783+
"plt.title('Residuals vs Predicted Values')\n",
784+
"plt.grid(True, alpha=0.3)\n",
785+
"\n",
786+
"plt.subplot(1, 2, 2)\n",
787+
"plt.hist(residuals, bins=30, edgecolor='black', alpha=0.7)\n",
788+
"plt.xlabel('Residuals')\n",
789+
"plt.ylabel('Frequency')\n",
790+
"plt.title('Distribution of Residuals')\n",
791+
"plt.grid(True, alpha=0.3)\n",
792+
"\n",
793+
"plt.tight_layout()\n",
794+
"plt.show()\n",
795+
"\n",
796+
"# Check residual statistics\n",
797+
"print(f\"Residual mean: {residuals.mean():.2f}\")\n",
798+
"print(f\"Residual std: {residuals.std():.2f}\")"
799+
]
800+
},
801+
{
802+
"cell_type": "markdown",
803+
"id": "e8f8472f",
804+
"metadata": {},
805+
"source": []
806+
},
807+
{
808+
"cell_type": "markdown",
809+
"id": "40f4d419",
810+
"metadata": {},
811+
"source": []
812+
},
813+
{
814+
"cell_type": "markdown",
815+
"id": "aaac01dc",
816+
"metadata": {},
817+
"source": []
818+
},
819+
{
820+
"cell_type": "markdown",
821+
"id": "a9e3f8b3",
690822
"metadata": {},
691823
"source": []
692824
}

0 commit comments

Comments
 (0)