Skip to content

Commit be1d4b6

Browse files
Preparing Data for Regression
1 parent e8e6678 commit be1d4b6

1 file changed

Lines changed: 66 additions & 1 deletion

File tree

4_data_analysis/MLProject.ipynb

Lines changed: 66 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -568,10 +568,75 @@
568568
"print(visitor_type_stats)\n"
569569
]
570570
},
571+
{
572+
"cell_type": "markdown",
573+
"id": "3c32b2c6",
574+
"metadata": {},
575+
"source": [
576+
"5. Preparing Data for Regression"
577+
]
578+
},
579+
{
580+
"cell_type": "code",
581+
"execution_count": 23,
582+
"id": "142d44a3",
583+
"metadata": {},
584+
"outputs": [
585+
{
586+
"name": "stdout",
587+
"output_type": "stream",
588+
"text": [
589+
"=== PREPARING DATA FOR REGRESSION ===\n",
590+
"Features shape: (3025, 7)\n",
591+
"Target shape: (3025,)\n",
592+
"\n",
593+
"Training set size: 2420 samples\n",
594+
"Testing set size: 605 samples\n",
595+
"Features scaled using StandardScaler\n"
596+
]
597+
}
598+
],
599+
"source": [
600+
"# Step 5.1: Select features and target variable\n",
601+
"print(\"=== PREPARING DATA FOR REGRESSION ===\")\n",
602+
"\n",
603+
"# Define features (X) and target (y)\n",
604+
"X = df[\n",
605+
" [\n",
606+
" \"year\",\n",
607+
" \"country_encoded\",\n",
608+
" \"visitor_type_encoded\",\n",
609+
" \"decade\",\n",
610+
" \"post_2000\",\n",
611+
" \"post_2010\",\n",
612+
" \"covid_period\",\n",
613+
" ]\n",
614+
"]\n",
615+
"y = df[\"number_of_tourist\"]\n",
616+
"\n",
617+
"print(f\"Features shape: {X.shape}\")\n",
618+
"print(f\"Target shape: {y.shape}\")\n",
619+
"\n",
620+
"# Step 5.2: Split data into training and testing sets\n",
621+
"X_train, X_test, y_train, y_test = train_test_split(\n",
622+
" X, y, test_size=0.2, random_state=42, shuffle=True\n",
623+
")\n",
624+
"\n",
625+
"print(f\"\\nTraining set size: {X_train.shape[0]} samples\")\n",
626+
"print(f\"Testing set size: {X_test.shape[0]} samples\")\n",
627+
"\n",
628+
"# Step 5.3: Scale the features (optional, but good practice)\n",
629+
"scaler = StandardScaler()\n",
630+
"X_train_scaled = scaler.fit_transform(X_train)\n",
631+
"X_test_scaled = scaler.transform(X_test)\n",
632+
"\n",
633+
"print(\"Features scaled using StandardScaler\")\n"
634+
]
635+
},
571636
{
572637
"cell_type": "code",
573638
"execution_count": null,
574-
"id": "5a269c6f",
639+
"id": "fceebc18",
575640
"metadata": {},
576641
"outputs": [],
577642
"source": []

0 commit comments

Comments
 (0)