|
568 | 568 | "print(visitor_type_stats)\n" |
569 | 569 | ] |
570 | 570 | }, |
| 571 | + { |
| 572 | + "cell_type": "markdown", |
| 573 | + "id": "3c32b2c6", |
| 574 | + "metadata": {}, |
| 575 | + "source": [ |
| 576 | + "5. Preparing Data for Regression" |
| 577 | + ] |
| 578 | + }, |
| 579 | + { |
| 580 | + "cell_type": "code", |
| 581 | + "execution_count": 23, |
| 582 | + "id": "142d44a3", |
| 583 | + "metadata": {}, |
| 584 | + "outputs": [ |
| 585 | + { |
| 586 | + "name": "stdout", |
| 587 | + "output_type": "stream", |
| 588 | + "text": [ |
| 589 | + "=== PREPARING DATA FOR REGRESSION ===\n", |
| 590 | + "Features shape: (3025, 7)\n", |
| 591 | + "Target shape: (3025,)\n", |
| 592 | + "\n", |
| 593 | + "Training set size: 2420 samples\n", |
| 594 | + "Testing set size: 605 samples\n", |
| 595 | + "Features scaled using StandardScaler\n" |
| 596 | + ] |
| 597 | + } |
| 598 | + ], |
| 599 | + "source": [ |
| 600 | + "# Step 5.1: Select features and target variable\n", |
| 601 | + "print(\"=== PREPARING DATA FOR REGRESSION ===\")\n", |
| 602 | + "\n", |
| 603 | + "# Define features (X) and target (y)\n", |
| 604 | + "X = df[\n", |
| 605 | + " [\n", |
| 606 | + " \"year\",\n", |
| 607 | + " \"country_encoded\",\n", |
| 608 | + " \"visitor_type_encoded\",\n", |
| 609 | + " \"decade\",\n", |
| 610 | + " \"post_2000\",\n", |
| 611 | + " \"post_2010\",\n", |
| 612 | + " \"covid_period\",\n", |
| 613 | + " ]\n", |
| 614 | + "]\n", |
| 615 | + "y = df[\"number_of_tourist\"]\n", |
| 616 | + "\n", |
| 617 | + "print(f\"Features shape: {X.shape}\")\n", |
| 618 | + "print(f\"Target shape: {y.shape}\")\n", |
| 619 | + "\n", |
| 620 | + "# Step 5.2: Split data into training and testing sets\n", |
| 621 | + "X_train, X_test, y_train, y_test = train_test_split(\n", |
| 622 | + " X, y, test_size=0.2, random_state=42, shuffle=True\n", |
| 623 | + ")\n", |
| 624 | + "\n", |
| 625 | + "print(f\"\\nTraining set size: {X_train.shape[0]} samples\")\n", |
| 626 | + "print(f\"Testing set size: {X_test.shape[0]} samples\")\n", |
| 627 | + "\n", |
| 628 | + "# Step 5.3: Scale the features (optional, but good practice)\n", |
| 629 | + "scaler = StandardScaler()\n", |
| 630 | + "X_train_scaled = scaler.fit_transform(X_train)\n", |
| 631 | + "X_test_scaled = scaler.transform(X_test)\n", |
| 632 | + "\n", |
| 633 | + "print(\"Features scaled using StandardScaler\")\n" |
| 634 | + ] |
| 635 | + }, |
571 | 636 | { |
572 | 637 | "cell_type": "code", |
573 | 638 | "execution_count": null, |
574 | | - "id": "5a269c6f", |
| 639 | + "id": "fceebc18", |
575 | 640 | "metadata": {}, |
576 | 641 | "outputs": [], |
577 | 642 | "source": [] |
|
0 commit comments