Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 41 additions & 16 deletions notebook/base_pipeline.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,10 @@
"sys.path.append('../src')\n",
"sys.path.append('../submissions')\n",
"\n",
"from utils import Data, Model, Submission\n",
"from utils import load_train_data, load_test_data\n",
"from utils import evaluate_model, save_model\n",
"from utils import save_submission\n",
"\n",
"\n",
"import pandas as pd\n",
"import numpy as np\n",
Expand All @@ -37,10 +40,22 @@
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"outputs": [
{
"data": {
"text/plain": [
"(260601, 39)"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Load training data\n",
"train_data = Data.load_train_data()"
"train_data = load_train_data()\n",
"train_data.shape"
]
},
{
Expand All @@ -60,7 +75,6 @@
"metadata": {},
"outputs": [],
"source": [
"\n",
"# Prepare data for preprocessing and modelling\n",
"TARGET = 'damage_grade'\n",
"\n",
Expand Down Expand Up @@ -131,8 +145,7 @@
],
"source": [
"# Evaluate model performance with base model Logistic Regression\n",
"base_model = Model(base_pipe)\n",
"base_score_valid, base_score_train = base_model.evaluate_model(X_train, X_valid, \n",
"base_score_valid, base_score_train = evaluate_model(base_pipe, X_train, X_valid, \n",
" y_train, y_valid)\n",
"\n",
"print(f\"F1-score of the base model: {base_score_valid :.3f}\")"
Expand All @@ -142,10 +155,22 @@
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"outputs": [
{
"data": {
"text/plain": [
"(86868, 38)"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Load test data and save predictions into a file for submission\n",
"test_data = Data.load_test_data()"
"# Load test data\n",
"test_data = load_test_data()\n",
"test_data.shape"
]
},
{
Expand All @@ -156,7 +181,7 @@
{
"data": {
"text/plain": [
"PosixPath('../submissions/submission1706959851.csv')"
"PosixPath('../submissions/submission1708407950.csv')"
]
},
"execution_count": 10,
Expand All @@ -166,29 +191,29 @@
],
"source": [
"timestamp = datetime.now().timestamp()\n",
"# Create and save a submission file in submissions/\n",
"Submission(base_model, test_data).save_submission(timestamp)"
"# Save predictions in a file for submission\n",
"save_submission(base_pipe, test_data, timestamp, label_enc)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"PosixPath('../models/model_1706959851.pickle')"
"PosixPath('../models/model_1708407950.pickle')"
]
},
"execution_count": 11,
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Save model as .pickle file in models/\n",
"base_model.save_model(timestamp)"
"save_model(base_pipe, timestamp)"
]
},
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
"metadata": {},
"outputs": [],
"source": [
"from datetime import datetime\n",
"from pathlib import Path\n",
"import sys\n",
"sys.path.append('../src')\n",
"sys.path.append('../submissions')\n",
Expand All @@ -14,12 +16,9 @@
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"from pathlib import Path\n",
"from utils import load_train_data, load_test_data\n",
"from evaluate import evaluate_model\n",
"\n",
"from utils import save_submission, save_model, load_model\n",
"from encoding import freq_encode, get_house_volume\n",
"from datetime import datetime\n",
"\n",
"pd.set_option('display.max_columns', None)"
]
Expand Down
Loading