|
54 | 54 | "import pandas as pd\n", |
55 | 55 | "from tutorial.my_bubbly import bubbleplot \n", |
56 | 56 | "from plotly.offline import iplot\n", |
57 | | - "path = \"data/plotly_intro\"\n", |
| 57 | + "path = \"data/data_exploration\"\n", |
58 | 58 | "gapminder_indicators = pd.read_csv(path + '/gapminder.tsv', delimiter='\\t')\n", |
59 | 59 | "\n", |
60 | 60 | "figure = bubbleplot(dataset=gapminder_indicators, x_column='gdpPercap', y_column='lifeExp', \n", |
|
258 | 258 | " Reads in a CSV file containing happiness data and returns it as a pandas DataFrame.\n", |
259 | 259 | "\n", |
260 | 260 | " Instructions:\n", |
261 | | - " - Use the `path_to_happiness` which will be `data/plotly_intro/World-happiness-report-updated_2024.csv`.\n", |
| 261 | + " - Use the `path_to_happiness` which will be `data/data_exploration/World-happiness-report-updated_2024.csv`.\n", |
262 | 262 | " - Read in the CSV into a DataFrame using `pd.read_csv`.\n", |
263 | 263 | " - Ensure the encoding is set to 'latin1' as the file is formatted accordingly.\n", |
264 | 264 | "\n", |
|
286 | 286 | "metadata": {}, |
287 | 287 | "outputs": [], |
288 | 288 | "source": [ |
289 | | - "happyness = pd.read_csv('data/plotly_intro/World-happiness-report-updated_2024.csv', encoding='latin1')\n", |
| 289 | + "happyness = pd.read_csv('data/data_exploration/World-happiness-report-updated_2024.csv', encoding='latin1')\n", |
290 | 290 | "happyness.describe()\n" |
291 | 291 | ] |
292 | 292 | }, |
|
347 | 347 | "source": [ |
348 | 348 | "import pandas as pd\n", |
349 | 349 | "\n", |
350 | | - "happyness = pd.read_csv('data/plotly_intro/World-happiness-report-updated_2024.csv', encoding='latin1')\n", |
| 350 | + "happyness = pd.read_csv('data/data_exploration/World-happiness-report-updated_2024.csv', encoding='latin1')\n", |
351 | 351 | "\n", |
352 | 352 | "# Assuming your dataframe is loaded into 'df'\n", |
353 | 353 | "df = happyness\n", |
|
437 | 437 | "import pandas as pd\n", |
438 | 438 | "import matplotlib.pyplot as plt\n", |
439 | 439 | "\n", |
440 | | - "happiness = pd.read_csv('data/plotly_intro/World-happiness-report-updated_2024.csv', encoding='latin1')\n", |
| 440 | + "happiness = pd.read_csv('data/data_exploration/World-happiness-report-updated_2024.csv', encoding='latin1')\n", |
441 | 441 | "years = happiness['year'].unique()\n", |
442 | 442 | "print(f\"Unique years in the dataset: {sorted(years)}\")\n", |
443 | 443 | "\n", |
|
482 | 482 | "\n", |
483 | 483 | "In this exercise we want to complete the dataframe with missing values. Complete the function below to \n", |
484 | 484 | "\n", |
485 | | - " 1) Fill in missing years for every country (so we have an entry for every year between 2005 and 2023 and every country). Do this by initializing a DataFrame with `pd.DataFrame()` with a list.\n", |
| 485 | + " 1) Fill in missing years for every country (so we have an entry for every year between 2005 and 2023 and every country). Do this by initializing a DataFrame with `pd.DataFrame()` with a list. Then left merge the happiness dataframe to it with `pd.merge()`\n", |
486 | 486 | " 2) Fill all missing values in the year 2005 with the value 1. Use the `.fillna()` function.\n", |
487 | 487 | " 3) Forwardfill all the remaining years with the function `.ffill()`. (To forward fill the order of the dataframe is important! Make sure to sort first.)" |
488 | 488 | ] |
|
681 | 681 | "outputs": [], |
682 | 682 | "source": [ |
683 | 683 | "# Define the dataset and the columns\n", |
684 | | - "from tutorial.intro_plotly_helper import get_happiness_data, get_clean_dataset_with_region\n", |
| 684 | + "from tutorial.data_exploration_helper import get_happiness_data, get_clean_dataset_with_region\n", |
685 | 685 | "from plotly.offline import iplot\n", |
686 | 686 | "dataset = get_clean_dataset_with_region(get_happiness_data())\n", |
687 | 687 | "x_column = 'Freedom to make life choices'\n", |
|
698 | 698 | " 'frames': []\n", |
699 | 699 | "}\n", |
700 | 700 | "\n", |
701 | | - "# Get a random representative year\n", |
| 701 | + "# Take a random year present in the dataset\n", |
702 | 702 | "year = 2010\n", |
703 | 703 | "\n", |
704 | 704 | "# Make the trace\n", |
|
743 | 743 | }, |
744 | 744 | "outputs": [], |
745 | 745 | "source": [ |
746 | | - "from tutorial.intro_plotly_helper import get_happiness_data, get_clean_dataset_with_region, get_scatter_figure\n", |
| 746 | + "from tutorial.data_exploration_helper import get_happiness_data, get_clean_dataset_with_region, get_scatter_figure\n", |
747 | 747 | "from plotly.offline import iplot\n", |
748 | 748 | "\n", |
749 | 749 | "dataset = get_clean_dataset_with_region(get_happiness_data())\n", |
|
806 | 806 | }, |
807 | 807 | "outputs": [], |
808 | 808 | "source": [ |
809 | | - "from tutorial.intro_plotly_helper import full_clean_dataset, get_scatter_figure_with_years\n", |
| 809 | + "from tutorial.data_exploration_helper import full_clean_dataset, get_scatter_figure_with_years\n", |
810 | 810 | "from plotly.offline import iplot\n", |
811 | 811 | "\n", |
812 | 812 | "dataset = full_clean_dataset()\n", |
|
955 | 955 | "metadata": {}, |
956 | 956 | "outputs": [], |
957 | 957 | "source": [ |
958 | | - "from tutorial.intro_plotly_helper import get_happiness_data, get_clean_dataset_with_region\n", |
| 958 | + "from tutorial.data_exploration_helper import get_happiness_data, get_clean_dataset_with_region\n", |
959 | 959 | "import pandas as pd\n", |
960 | 960 | "import numpy as np\n", |
961 | 961 | "\n", |
|
993 | 993 | }, |
994 | 994 | "outputs": [], |
995 | 995 | "source": [ |
996 | | - "from tutorial.intro_plotly_helper import set_layout, full_clean_dataset\n", |
| 996 | + "from tutorial.data_exploration_helper import set_layout, full_clean_dataset\n", |
997 | 997 | "from plotly.offline import iplot\n", |
998 | 998 | "\n", |
999 | 999 | "\n", |
|
1176 | 1176 | "metadata": {}, |
1177 | 1177 | "outputs": [], |
1178 | 1178 | "source": [ |
1179 | | - "from tutorial.intro_plotly_helper import load_full_happiness_figure\n", |
| 1179 | + "from tutorial.data_exploration_helper import load_full_happiness_figure\n", |
1180 | 1180 | "from plotly.offline import iplot\n", |
1181 | 1181 | "\n", |
1182 | 1182 | "figure = load_full_happiness_figure()\n", |
|
1201 | 1201 | "So as an exercise we exported the bubbly library as a file bubbly.py into the folder data.plotly_intro. It is quite a short library so quite managable.\n", |
1202 | 1202 | "Try to figure out what the error is exactly and then fix the library locally by modifying only the file `data/plotly_intro/bubbly.py` until the same code below compiles.\n", |
1203 | 1203 | "\n", |
| 1204 | + "Note: You will need to restart the kernel after changes to the packages.\n", |
| 1205 | + "\n", |
1204 | 1206 | "(If you are interested in a solution, we have a fixed version under tutorial.my_bubbly.py, feel free to check the differences.)\n" |
1205 | 1207 | ] |
1206 | 1208 | }, |
|
1212 | 1214 | "source": [ |
1213 | 1215 | "import pandas as pd\n", |
1214 | 1216 | "# from bubbly.bubbly import bubbleplot\n", |
1215 | | - "from data.plotly_intro.bubbly import bubbleplot \n", |
| 1217 | + "from data.data_exploration.bubbly import bubbleplot \n", |
1216 | 1218 | "from plotly.offline import iplot\n", |
1217 | | - "path = \"data/plotly_intro\"\n", |
| 1219 | + "path = \"data/data_exploration\"\n", |
1218 | 1220 | "gapminder_indicators = pd.read_csv(path + '/gapminder.tsv', delimiter='\\t')\n", |
1219 | 1221 | "\n", |
1220 | 1222 | "figure = bubbleplot(dataset=gapminder_indicators, x_column='gdpPercap', y_column='lifeExp', \n", |
|
1242 | 1244 | ], |
1243 | 1245 | "metadata": { |
1244 | 1246 | "kernelspec": { |
1245 | | - "display_name": "Python 3 (ipykernel)", |
| 1247 | + "display_name": "python-tutorial", |
1246 | 1248 | "language": "python", |
1247 | 1249 | "name": "python3" |
1248 | 1250 | }, |
|
1256 | 1258 | "name": "python", |
1257 | 1259 | "nbconvert_exporter": "python", |
1258 | 1260 | "pygments_lexer": "ipython3", |
1259 | | - "version": "3.12.10" |
| 1261 | + "version": "3.10.15" |
1260 | 1262 | } |
1261 | 1263 | }, |
1262 | 1264 | "nbformat": 4, |
|
0 commit comments