|
686 | 686 | }, |
687 | 687 | { |
688 | 688 | "cell_type": "markdown", |
689 | | - "id": "59382706", |
| 689 | + "id": "ab4b0414", |
| 690 | + "metadata": {}, |
| 691 | + "source": [] |
| 692 | + }, |
| 693 | + { |
| 694 | + "cell_type": "markdown", |
| 695 | + "id": "59a1c65c", |
| 696 | + "metadata": {}, |
| 697 | + "source": [] |
| 698 | + }, |
| 699 | + { |
| 700 | + "cell_type": "markdown", |
| 701 | + "id": "0a94562b", |
| 702 | + "metadata": {}, |
| 703 | + "source": [] |
| 704 | + }, |
| 705 | + { |
| 706 | + "cell_type": "markdown", |
| 707 | + "id": "8a0d6569", |
| 708 | + "metadata": {}, |
| 709 | + "source": [ |
| 710 | + "7. Model Evaluation" |
| 711 | + ] |
| 712 | + }, |
| 713 | + { |
| 714 | + "cell_type": "markdown", |
| 715 | + "id": "57d0ecd7", |
| 716 | + "metadata": {}, |
| 717 | + "source": [ |
| 718 | + "# Step 7.1: Calculate evaluation metrics\n", |
| 719 | + "print(\"=== MODEL EVALUATION ===\")\n", |
| 720 | + "\n", |
| 721 | + "# Training set metrics\n", |
| 722 | + "train_mse = mean_squared_error(y_train, y_pred_train)\n", |
| 723 | + "train_rmse = np.sqrt(train_mse)\n", |
| 724 | + "train_mae = mean_absolute_error(y_train, y_pred_train)\n", |
| 725 | + "train_r2 = r2_score(y_train, y_pred_train)\n", |
| 726 | + "\n", |
| 727 | + "# Testing set metrics\n", |
| 728 | + "test_mse = mean_squared_error(y_test, y_pred_test)\n", |
| 729 | + "test_rmse = np.sqrt(test_mse)\n", |
| 730 | + "test_mae = mean_absolute_error(y_test, y_pred_test)\n", |
| 731 | + "test_r2 = r2_score(y_test, y_pred_test)\n", |
| 732 | + "\n", |
| 733 | + "print(\"\\n=== TRAINING SET METRICS ===\")\n", |
| 734 | + "print(f\"Mean Squared Error (MSE): {train_mse:.2f}\")\n", |
| 735 | + "print(f\"Root Mean Squared Error (RMSE): {train_rmse:.2f}\")\n", |
| 736 | + "print(f\"Mean Absolute Error (MAE): {train_mae:.2f}\")\n", |
| 737 | + "print(f\"R-squared Score: {train_r2:.2f}\")\n", |
| 738 | + "\n", |
| 739 | + "print(\"\\n=== TESTING SET METRICS ===\")\n", |
| 740 | + "print(f\"Mean Squared Error (MSE): {test_mse:.2f}\")\n", |
| 741 | + "print(f\"Root Mean Squared Error (RMSE): {test_rmse:.2f}\")\n", |
| 742 | + "print(f\"Mean Absolute Error (MAE): {test_mae:.2f}\")\n", |
| 743 | + "print(f\"R-squared Score: {test_r2:.2f}\")\n", |
| 744 | + "\n", |
| 745 | + "# Step 7.2: Visualize predictions vs actual values\n", |
| 746 | + "print(\"\\n=== VISUALIZING PREDICTIONS ===\")\n", |
| 747 | + "\n", |
| 748 | + "fig, axes = plt.subplots(1, 2, figsize=(14, 6))\n", |
| 749 | + "\n", |
| 750 | + "# Training set\n", |
| 751 | + "axes[0].scatter(y_train, y_pred_train, alpha=0.5)\n", |
| 752 | + "axes[0].plot([y_train.min(), y_train.max()], [y_train.min(), y_train.max()], \n", |
| 753 | + " 'r--', lw=2)\n", |
| 754 | + "axes[0].set_xlabel('Actual Values')\n", |
| 755 | + "axes[0].set_ylabel('Predicted Values')\n", |
| 756 | + "axes[0].set_title(f'Training Set (R² = {train_r2:.2f})')\n", |
| 757 | + "axes[0].grid(True, alpha=0.3)\n", |
| 758 | + "\n", |
| 759 | + "# Testing set\n", |
| 760 | + "axes[1].scatter(y_test, y_pred_test, alpha=0.5)\n", |
| 761 | + "axes[1].plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], \n", |
| 762 | + " 'r--', lw=2)\n", |
| 763 | + "axes[1].set_xlabel('Actual Values')\n", |
| 764 | + "axes[1].set_ylabel('Predicted Values')\n", |
| 765 | + "axes[1].set_title(f'Testing Set (R² = {test_r2:.2f})')\n", |
| 766 | + "axes[1].grid(True, alpha=0.3)\n", |
| 767 | + "\n", |
| 768 | + "plt.tight_layout()\n", |
| 769 | + "plt.show()\n", |
| 770 | + "\n", |
| 771 | + "# Step 7.3: Residual Analysis\n", |
| 772 | + "print(\"\\n=== RESIDUAL ANALYSIS ===\")\n", |
| 773 | + "\n", |
| 774 | + "residuals = y_test - y_pred_test\n", |
| 775 | + "\n", |
| 776 | + "plt.figure(figsize=(12, 5))\n", |
| 777 | + "\n", |
| 778 | + "plt.subplot(1, 2, 1)\n", |
| 779 | + "plt.scatter(y_pred_test, residuals, alpha=0.5)\n", |
| 780 | + "plt.axhline(y=0, color='r', linestyle='--')\n", |
| 781 | + "plt.xlabel('Predicted Values')\n", |
| 782 | + "plt.ylabel('Residuals')\n", |
| 783 | + "plt.title('Residuals vs Predicted Values')\n", |
| 784 | + "plt.grid(True, alpha=0.3)\n", |
| 785 | + "\n", |
| 786 | + "plt.subplot(1, 2, 2)\n", |
| 787 | + "plt.hist(residuals, bins=30, edgecolor='black', alpha=0.7)\n", |
| 788 | + "plt.xlabel('Residuals')\n", |
| 789 | + "plt.ylabel('Frequency')\n", |
| 790 | + "plt.title('Distribution of Residuals')\n", |
| 791 | + "plt.grid(True, alpha=0.3)\n", |
| 792 | + "\n", |
| 793 | + "plt.tight_layout()\n", |
| 794 | + "plt.show()\n", |
| 795 | + "\n", |
| 796 | + "# Check residual statistics\n", |
| 797 | + "print(f\"Residual mean: {residuals.mean():.2f}\")\n", |
| 798 | + "print(f\"Residual std: {residuals.std():.2f}\")" |
| 799 | + ] |
| 800 | + }, |
| 801 | + { |
| 802 | + "cell_type": "markdown", |
| 803 | + "id": "e8f8472f", |
| 804 | + "metadata": {}, |
| 805 | + "source": [] |
| 806 | + }, |
| 807 | + { |
| 808 | + "cell_type": "markdown", |
| 809 | + "id": "40f4d419", |
| 810 | + "metadata": {}, |
| 811 | + "source": [] |
| 812 | + }, |
| 813 | + { |
| 814 | + "cell_type": "markdown", |
| 815 | + "id": "aaac01dc", |
| 816 | + "metadata": {}, |
| 817 | + "source": [] |
| 818 | + }, |
| 819 | + { |
| 820 | + "cell_type": "markdown", |
| 821 | + "id": "a9e3f8b3", |
690 | 822 | "metadata": {}, |
691 | 823 | "source": [] |
692 | 824 | } |
|
0 commit comments