Skip to content

Commit f38f721

Browse files
committed
finished renaming and smaller fixes
1 parent 32f0bf0 commit f38f721

File tree

7 files changed

+33
-25
lines changed

7 files changed

+33
-25
lines changed

30_introduction_data_exploration.ipynb

Lines changed: 19 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@
5454
"import pandas as pd\n",
5555
"from tutorial.my_bubbly import bubbleplot \n",
5656
"from plotly.offline import iplot\n",
57-
"path = \"data/plotly_intro\"\n",
57+
"path = \"data/data_exploration\"\n",
5858
"gapminder_indicators = pd.read_csv(path + '/gapminder.tsv', delimiter='\\t')\n",
5959
"\n",
6060
"figure = bubbleplot(dataset=gapminder_indicators, x_column='gdpPercap', y_column='lifeExp', \n",
@@ -258,7 +258,7 @@
258258
" Reads in a CSV file containing happiness data and returns it as a pandas DataFrame.\n",
259259
"\n",
260260
" Instructions:\n",
261-
" - Use the `path_to_happiness` which will be `data/plotly_intro/World-happiness-report-updated_2024.csv`.\n",
261+
" - Use the `path_to_happiness` which will be `data/data_exploration/World-happiness-report-updated_2024.csv`.\n",
262262
" - Read in the CSV into a DataFrame using `pd.read_csv`.\n",
263263
" - Ensure the encoding is set to 'latin1' as the file is formatted accordingly.\n",
264264
"\n",
@@ -286,7 +286,7 @@
286286
"metadata": {},
287287
"outputs": [],
288288
"source": [
289-
"happyness = pd.read_csv('data/plotly_intro/World-happiness-report-updated_2024.csv', encoding='latin1')\n",
289+
"happyness = pd.read_csv('data/data_exploration/World-happiness-report-updated_2024.csv', encoding='latin1')\n",
290290
"happyness.describe()\n"
291291
]
292292
},
@@ -347,7 +347,7 @@
347347
"source": [
348348
"import pandas as pd\n",
349349
"\n",
350-
"happyness = pd.read_csv('data/plotly_intro/World-happiness-report-updated_2024.csv', encoding='latin1')\n",
350+
"happyness = pd.read_csv('data/data_exploration/World-happiness-report-updated_2024.csv', encoding='latin1')\n",
351351
"\n",
352352
"# Assuming your dataframe is loaded into 'df'\n",
353353
"df = happyness\n",
@@ -437,7 +437,7 @@
437437
"import pandas as pd\n",
438438
"import matplotlib.pyplot as plt\n",
439439
"\n",
440-
"happiness = pd.read_csv('data/plotly_intro/World-happiness-report-updated_2024.csv', encoding='latin1')\n",
440+
"happiness = pd.read_csv('data/data_exploration/World-happiness-report-updated_2024.csv', encoding='latin1')\n",
441441
"years = happiness['year'].unique()\n",
442442
"print(f\"Unique years in the dataset: {sorted(years)}\")\n",
443443
"\n",
@@ -482,7 +482,7 @@
482482
"\n",
483483
"In this exercise we want to complete the dataframe with missing values. Complete the function below to \n",
484484
"\n",
485-
" 1) Fill in missing years for every country (so we have an entry for every year between 2005 and 2023 and every country). Do this by initializing a DataFrame with `pd.DataFrame()` with a list.\n",
485+
" 1) Fill in missing years for every country (so we have an entry for every year between 2005 and 2023 and every country). Do this by initializing a DataFrame with `pd.DataFrame()` with a list. Then left merge the happiness dataframe to it with `pd.merge()`\n",
486486
" 2) Fill all missing values in the year 2005 with the value 1. Use the `.fillna()` function.\n",
487487
" 3) Forwardfill all the remaining years with the function `.ffill()`. (To forward fill the order of the dataframe is important! Make sure to sort first.)"
488488
]
@@ -681,7 +681,7 @@
681681
"outputs": [],
682682
"source": [
683683
"# Define the dataset and the columns\n",
684-
"from tutorial.intro_plotly_helper import get_happiness_data, get_clean_dataset_with_region\n",
684+
"from tutorial.data_exploration_helper import get_happiness_data, get_clean_dataset_with_region\n",
685685
"from plotly.offline import iplot\n",
686686
"dataset = get_clean_dataset_with_region(get_happiness_data())\n",
687687
"x_column = 'Freedom to make life choices'\n",
@@ -698,7 +698,7 @@
698698
" 'frames': []\n",
699699
"}\n",
700700
"\n",
701-
"# Get a random representative year\n",
701+
"# Take a random year present in the dataset\n",
702702
"year = 2010\n",
703703
"\n",
704704
"# Make the trace\n",
@@ -743,7 +743,7 @@
743743
},
744744
"outputs": [],
745745
"source": [
746-
"from tutorial.intro_plotly_helper import get_happiness_data, get_clean_dataset_with_region, get_scatter_figure\n",
746+
"from tutorial.data_exploration_helper import get_happiness_data, get_clean_dataset_with_region, get_scatter_figure\n",
747747
"from plotly.offline import iplot\n",
748748
"\n",
749749
"dataset = get_clean_dataset_with_region(get_happiness_data())\n",
@@ -806,7 +806,7 @@
806806
},
807807
"outputs": [],
808808
"source": [
809-
"from tutorial.intro_plotly_helper import full_clean_dataset, get_scatter_figure_with_years\n",
809+
"from tutorial.data_exploration_helper import full_clean_dataset, get_scatter_figure_with_years\n",
810810
"from plotly.offline import iplot\n",
811811
"\n",
812812
"dataset = full_clean_dataset()\n",
@@ -955,7 +955,7 @@
955955
"metadata": {},
956956
"outputs": [],
957957
"source": [
958-
"from tutorial.intro_plotly_helper import get_happiness_data, get_clean_dataset_with_region\n",
958+
"from tutorial.data_exploration_helper import get_happiness_data, get_clean_dataset_with_region\n",
959959
"import pandas as pd\n",
960960
"import numpy as np\n",
961961
"\n",
@@ -993,7 +993,7 @@
993993
},
994994
"outputs": [],
995995
"source": [
996-
"from tutorial.intro_plotly_helper import set_layout, full_clean_dataset\n",
996+
"from tutorial.data_exploration_helper import set_layout, full_clean_dataset\n",
997997
"from plotly.offline import iplot\n",
998998
"\n",
999999
"\n",
@@ -1176,7 +1176,7 @@
11761176
"metadata": {},
11771177
"outputs": [],
11781178
"source": [
1179-
"from tutorial.intro_plotly_helper import load_full_happiness_figure\n",
1179+
"from tutorial.data_exploration_helper import load_full_happiness_figure\n",
11801180
"from plotly.offline import iplot\n",
11811181
"\n",
11821182
"figure = load_full_happiness_figure()\n",
@@ -1201,6 +1201,8 @@
12011201
"So as an exercise we exported the bubbly library as a file bubbly.py into the folder data.plotly_intro. It is quite a short library so quite managable.\n",
12021202
"Try to figure out what the error is exactly and then fix the library locally by modifying only the file `data/plotly_intro/bubbly.py` until the same code below compiles.\n",
12031203
"\n",
1204+
"Note: You will need to restart the kernel after changes to the packages.\n",
1205+
"\n",
12041206
"(If you are interested in a solution, we have a fixed version under tutorial.my_bubbly.py, feel free to check the differences.)\n"
12051207
]
12061208
},
@@ -1212,9 +1214,9 @@
12121214
"source": [
12131215
"import pandas as pd\n",
12141216
"# from bubbly.bubbly import bubbleplot\n",
1215-
"from data.plotly_intro.bubbly import bubbleplot \n",
1217+
"from data.data_exploration.bubbly import bubbleplot \n",
12161218
"from plotly.offline import iplot\n",
1217-
"path = \"data/plotly_intro\"\n",
1219+
"path = \"data/data_exploration\"\n",
12181220
"gapminder_indicators = pd.read_csv(path + '/gapminder.tsv', delimiter='\\t')\n",
12191221
"\n",
12201222
"figure = bubbleplot(dataset=gapminder_indicators, x_column='gdpPercap', y_column='lifeExp', \n",
@@ -1242,7 +1244,7 @@
12421244
],
12431245
"metadata": {
12441246
"kernelspec": {
1245-
"display_name": "Python 3 (ipykernel)",
1247+
"display_name": "python-tutorial",
12461248
"language": "python",
12471249
"name": "python3"
12481250
},
@@ -1256,7 +1258,7 @@
12561258
"name": "python",
12571259
"nbconvert_exporter": "python",
12581260
"pygments_lexer": "ipython3",
1259-
"version": "3.12.10"
1261+
"version": "3.10.15"
12601262
}
12611263
},
12621264
"nbformat": 4,

data/plotly_intro/World-happiness-report-updated_2024.csv renamed to data/data_exploration/World-happiness-report-updated_2024.csv

File renamed without changes.
File renamed without changes.
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
def get_happiness_data():
88
# Load the dataset
99
happiness_df = pd.read_csv(
10-
"data/plotly_intro/World-happiness-report-updated_2024.csv",
10+
"data/data_exploration/World-happiness-report-updated_2024.csv",
1111
encoding="latin1",
1212
usecols=[
1313
"Freedom to make life choices",
@@ -61,7 +61,7 @@ def get_clean_dataset_with_region(happiness_df: pd.DataFrame) -> pd.DataFrame:
6161

6262
# Load the region mapping
6363
region_df = pd.read_csv(
64-
"data/plotly_intro/country_region_mapping.csv",
64+
"data/data_exploration/country_region_mapping.csv",
6565
encoding="latin1",
6666
usecols=["Country name", "Regional indicator"],
6767
).drop_duplicates()

tutorial/tests/test_30_plotly_intro_to_libraries.py renamed to tutorial/tests/test_30_introduction_data_exploration.py

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
import numpy as np
22
import pandas as pd
3-
import pytest
43

5-
from tutorial.intro_plotly_helper import (
4+
import pytest
5+
from tutorial.data_exploration_helper import (
66
full_clean_dataset,
77
get_clean_dataset,
88
get_happiness_data,
@@ -24,7 +24,7 @@ def reference_read_in_dataframe(path_to_happiness: str) -> pd.DataFrame:
2424
def test_read_in_dataframe(input_arg, function_to_test):
2525
"""The test case(s)"""
2626
# Get the path to the data
27-
path_to_happiness = "data/plotly_intro/World-happiness-report-updated_2024.csv"
27+
path_to_happiness = "data/data_exploration/World-happiness-report-updated_2024.csv"
2828

2929
# Read in the data
3030
happiness_df = reference_read_in_dataframe(path_to_happiness)
@@ -71,9 +71,15 @@ def test_clean_dataset(input_arg, function_to_test):
7171

7272
clean_ref = reference_clean_dataset(hapiness_df)
7373
clean_sol = function_to_test(hapiness_df)
74+
clean_ref_sorted = clean_ref.sort_values(by=["Country name", "year"]).reset_index(
75+
drop=True
76+
)
77+
clean_sol_sorted = clean_sol.sort_values(by=["Country name", "year"]).reset_index(
78+
drop=True
79+
)
7480

75-
# Check if the two DataFrames are equal
76-
assert clean_ref.equals(clean_sol)
81+
# Check if the two DataFrames are equal, ignoring the index
82+
assert clean_ref_sorted.equals(clean_sol_sorted)
7783

7884
import matplotlib.pyplot as plt
7985

@@ -112,7 +118,7 @@ def test_add_regional_indicator(input_arg, function_to_test):
112118

113119
# Load the region mapping
114120
region_df = pd.read_csv(
115-
"data/plotly_intro/country_region_mapping.csv",
121+
"data/data_exploration/country_region_mapping.csv",
116122
encoding="latin1",
117123
usecols=["Country name", "Regional indicator"],
118124
).drop_duplicates()

0 commit comments

Comments
 (0)