carecodeconnect · jv11l · Feb 3, 2024 · Feb 20, 2024 · Feb 20, 2024
diff --git a/notebook/base_pipeline.ipynb b/notebook/base_pipeline.ipynb
@@ -13,7 +13,10 @@
     "sys.path.append('../src')\n",
     "sys.path.append('../submissions')\n",
     "\n",
-    "from utils import Data, Model, Submission\n",
+    "from utils import load_train_data, load_test_data\n",
+    "from utils import evaluate_model, save_model\n",
+    "from utils import save_submission\n",
+    "\n",
     "\n",
     "import pandas as pd\n",
     "import numpy as np\n",
@@ -37,10 +40,22 @@
    "cell_type": "code",
    "execution_count": 2,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(260601, 39)"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "# Load training data\n",
-    "train_data = Data.load_train_data()"
+    "train_data = load_train_data()\n",
+    "train_data.shape"
    ]
   },
   {
@@ -60,7 +75,6 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "\n",
     "# Prepare data for preprocessing and modelling\n",
     "TARGET = 'damage_grade'\n",
     "\n",
@@ -131,8 +145,7 @@
    ],
    "source": [
     "# Evaluate model performance with base model Logistic Regression\n",
-    "base_model = Model(base_pipe)\n",
-    "base_score_valid, base_score_train = base_model.evaluate_model(X_train, X_valid, \n",
+    "base_score_valid, base_score_train = evaluate_model(base_pipe, X_train, X_valid, \n",
     "                                                               y_train, y_valid)\n",
     "\n",
     "print(f\"F1-score of the base model: {base_score_valid :.3f}\")"
@@ -142,10 +155,22 @@
    "cell_type": "code",
    "execution_count": 9,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(86868, 38)"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "# Load test data and save predictions into a file for submission\n",
-    "test_data = Data.load_test_data()"
+    "# Load test data\n",
+    "test_data = load_test_data()\n",
+    "test_data.shape"
    ]
   },
   {
@@ -156,7 +181,7 @@
     {
      "data": {
       "text/plain": [
-       "PosixPath('../submissions/submission1706959851.csv')"
+       "PosixPath('../submissions/submission1708407950.csv')"
       ]
      },
      "execution_count": 10,
@@ -166,29 +191,29 @@
    ],
    "source": [
     "timestamp =  datetime.now().timestamp()\n",
-    "# Create and save a submission file in submissions/\n",
-    "Submission(base_model, test_data).save_submission(timestamp)"
+    "# Save predictions in a file for submission\n",
+    "save_submission(base_pipe, test_data, timestamp, label_enc)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 12,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "PosixPath('../models/model_1706959851.pickle')"
+       "PosixPath('../models/model_1708407950.pickle')"
       ]
      },
-     "execution_count": 11,
+     "execution_count": 12,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
     "# Save model as .pickle file in models/\n",
-    "base_model.save_model(timestamp)"
+    "save_model(base_pipe, timestamp)"
    ]
   },
   {

diff --git a/notebook/hyperopt_xgb_classifier.ipynb → notebook/xgb_hyperopt.ipynb b/notebook/hyperopt_xgb_classifier.ipynb → notebook/xgb_hyperopt.ipynb
@@ -6,6 +6,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "from datetime import datetime\n",
+    "from pathlib import Path\n",
     "import sys\n",
     "sys.path.append('../src')\n",
     "sys.path.append('../submissions')\n",
@@ -14,12 +16,9 @@
     "import numpy as np\n",
     "import matplotlib.pyplot as plt\n",
     "import seaborn as sns\n",
-    "from pathlib import Path\n",
-    "from utils import load_train_data, load_test_data\n",
-    "from evaluate import evaluate_model\n",
+    "\n",
     "from utils import save_submission, save_model, load_model\n",
     "from encoding import freq_encode, get_house_volume\n",
-    "from datetime import datetime\n",
     "\n",
     "pd.set_option('display.max_columns', None)"
    ]