Skip to content

Commit bf6d243

Browse files
EliEli
authored andcommitted
Fixes to climatology and merging, added tests and revised notebooks.
1 parent a9bd8fb commit bf6d243

File tree

9 files changed

+1046
-200
lines changed

9 files changed

+1046
-200
lines changed

docsrc/notebooks/merge_splice.ipynb

Lines changed: 175 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -22,25 +22,103 @@
2222
},
2323
{
2424
"cell_type": "code",
25-
"execution_count": null,
25+
"execution_count": 1,
2626
"id": "e52fb077",
2727
"metadata": {},
2828
"outputs": [
2929
{
30-
"ename": "NameError",
31-
"evalue": "name 'pd' is not defined",
32-
"output_type": "error",
33-
"traceback": [
34-
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
35-
"\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)",
36-
"Cell \u001b[1;32mIn[1], line 4\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[38;5;66;03m# ========================================\u001b[39;00m\n\u001b[0;32m 2\u001b[0m \u001b[38;5;66;03m# 1️⃣ Creating Regular Time Series (1D Frequency with Missing Data)\u001b[39;00m\n\u001b[0;32m 3\u001b[0m \u001b[38;5;66;03m# ========================================\u001b[39;00m\n\u001b[1;32m----> 4\u001b[0m idx1 \u001b[38;5;241m=\u001b[39m \u001b[43mpd\u001b[49m\u001b[38;5;241m.\u001b[39mdate_range(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m2023-01-01\u001b[39m\u001b[38;5;124m\"\u001b[39m, periods\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m10\u001b[39m, freq\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m1D\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 5\u001b[0m idx2 \u001b[38;5;241m=\u001b[39m pd\u001b[38;5;241m.\u001b[39mdate_range(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m2023-01-01\u001b[39m\u001b[38;5;124m\"\u001b[39m, periods\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m12\u001b[39m, freq\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m1D\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 6\u001b[0m idx3 \u001b[38;5;241m=\u001b[39m pd\u001b[38;5;241m.\u001b[39mdate_range(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m2022-12-31\u001b[39m\u001b[38;5;124m\"\u001b[39m, periods\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m14\u001b[39m, freq\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m1D\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
37-
"\u001b[1;31mNameError\u001b[0m: name 'pd' is not defined"
30+
"name": "stdout",
31+
"output_type": "stream",
32+
"text": [
33+
"Series 1 (Primary):\n"
34+
]
35+
},
36+
{
37+
"data": {
38+
"text/plain": [
39+
"2023-01-01 1.0\n",
40+
"2023-01-02 NaN\n",
41+
"2023-01-03 3.0\n",
42+
"2023-01-04 NaN\n",
43+
"2023-01-05 5.0\n",
44+
"2023-01-06 6.0\n",
45+
"2023-01-07 NaN\n",
46+
"2023-01-08 8.0\n",
47+
"2023-01-09 9.0\n",
48+
"2023-01-10 10.0\n",
49+
"Freq: D, Name: A, dtype: float64"
50+
]
51+
},
52+
"metadata": {},
53+
"output_type": "display_data"
54+
},
55+
{
56+
"name": "stdout",
57+
"output_type": "stream",
58+
"text": [
59+
"\n",
60+
"Series 2 (Secondary - Fills Gaps):\n"
61+
]
62+
},
63+
{
64+
"data": {
65+
"text/plain": [
66+
"2023-01-01 NaN\n",
67+
"2023-01-02 2.0\n",
68+
"2023-01-03 NaN\n",
69+
"2023-01-04 4.0\n",
70+
"2023-01-05 NaN\n",
71+
"2023-01-06 NaN\n",
72+
"2023-01-07 7.0\n",
73+
"2023-01-08 NaN\n",
74+
"2023-01-09 NaN\n",
75+
"2023-01-10 NaN\n",
76+
"2023-01-11 3.0\n",
77+
"2023-01-12 4.0\n",
78+
"Freq: D, Name: A, dtype: float64"
79+
]
80+
},
81+
"metadata": {},
82+
"output_type": "display_data"
83+
},
84+
{
85+
"name": "stdout",
86+
"output_type": "stream",
87+
"text": [
88+
"\n",
89+
"Series 3 (Tertiary - Fills Gaps):\n"
3890
]
91+
},
92+
{
93+
"data": {
94+
"text/plain": [
95+
"2022-12-31 1000.0\n",
96+
"2023-01-01 1001.0\n",
97+
"2023-01-02 1002.0\n",
98+
"2023-01-03 NaN\n",
99+
"2023-01-04 1004.0\n",
100+
"2023-01-05 NaN\n",
101+
"2023-01-06 NaN\n",
102+
"2023-01-07 1007.0\n",
103+
"2023-01-08 NaN\n",
104+
"2023-01-09 NaN\n",
105+
"2023-01-10 NaN\n",
106+
"2023-01-11 1005.0\n",
107+
"2023-01-12 1006.0\n",
108+
"2023-01-13 1007.0\n",
109+
"Freq: D, Name: A, dtype: float64"
110+
]
111+
},
112+
"metadata": {},
113+
"output_type": "display_data"
39114
}
40115
],
41116
"source": [
117+
"import pandas as pd\n",
118+
"import numpy as np\n",
119+
"from vtools import ts_merge, ts_splice\n",
42120
"# ========================================\n",
43-
"# 1️⃣ Creating Regular Time Series (1D Frequency with Missing Data)\n",
121+
"# Creating Regular Time Series (1D Frequency with Missing Data)\n",
44122
"# ========================================\n",
45123
"idx1 = pd.date_range(\"2023-01-01\", periods=10, freq=\"1D\")\n",
46124
"idx2 = pd.date_range(\"2023-01-01\", periods=12, freq=\"1D\")\n",
@@ -73,7 +151,7 @@
73151
},
74152
{
75153
"cell_type": "code",
76-
"execution_count": null,
154+
"execution_count": 2,
77155
"id": "5dd08914",
78156
"metadata": {},
79157
"outputs": [
@@ -129,7 +207,7 @@
129207
},
130208
{
131209
"cell_type": "code",
132-
"execution_count": null,
210+
"execution_count": 3,
133211
"id": "ae88f210",
134212
"metadata": {},
135213
"outputs": [
@@ -138,7 +216,7 @@
138216
"output_type": "stream",
139217
"text": [
140218
"\n",
141-
"Spliced Series with Prioritization:\n"
219+
"Spliced Series with Prioritization and default `prefer last`:\n"
142220
]
143221
},
144222
{
@@ -216,7 +294,7 @@
216294
},
217295
{
218296
"cell_type": "code",
219-
"execution_count": null,
297+
"execution_count": 4,
220298
"id": "9a1d0dae",
221299
"metadata": {},
222300
"outputs": [
@@ -354,7 +432,7 @@
354432
},
355433
{
356434
"cell_type": "code",
357-
"execution_count": null,
435+
"execution_count": 5,
358436
"id": "35cfc422",
359437
"metadata": {},
360438
"outputs": [
@@ -683,53 +761,35 @@
683761
"name": "stdout",
684762
"output_type": "stream",
685763
"text": [
764+
"2023-01-01 1.0\n",
765+
"2023-01-03 NaN\n",
766+
"2023-01-05 3.0\n",
767+
"2023-01-07 4.0\n",
768+
"2023-01-09 5.0\n",
769+
"Freq: 2D, Name: A, dtype: float64\n",
770+
"2023-01-02 10.0\n",
771+
"2023-01-04 20.0\n",
772+
"2023-01-06 30.0\n",
773+
"2023-01-08 NaN\n",
774+
"2023-01-10 50.0\n",
775+
"Freq: 2D, Name: B, dtype: float64\n",
686776
"Merged Series not renamed:\n"
687777
]
688778
},
689779
{
690780
"data": {
691781
"text/plain": [
692782
"2023-01-01 1.0\n",
693-
"2023-01-02 2.0\n",
694-
"2023-01-03 3.0\n",
695-
"2023-01-04 4.0\n",
696-
"2023-01-05 5.0\n",
697-
"2023-01-06 6.0\n",
698-
"2023-01-07 7.0\n",
699-
"2023-01-08 8.0\n",
700-
"2023-01-09 9.0\n",
701-
"2023-01-10 10.0\n",
702-
"2023-01-11 3.0\n",
703-
"2023-01-12 4.0\n",
704-
"Freq: D, Name: A, dtype: float64"
705-
]
706-
},
707-
"metadata": {},
708-
"output_type": "display_data"
709-
},
710-
{
711-
"name": "stdout",
712-
"output_type": "stream",
713-
"text": [
714-
"Merged Series renamed:\n"
715-
]
716-
},
717-
{
718-
"data": {
719-
"text/plain": [
720-
"2023-01-01 1.0\n",
721-
"2023-01-02 2.0\n",
722-
"2023-01-03 3.0\n",
723-
"2023-01-04 4.0\n",
724-
"2023-01-05 5.0\n",
725-
"2023-01-06 6.0\n",
726-
"2023-01-07 7.0\n",
727-
"2023-01-08 8.0\n",
728-
"2023-01-09 9.0\n",
729-
"2023-01-10 10.0\n",
730-
"2023-01-11 3.0\n",
731-
"2023-01-12 4.0\n",
732-
"Freq: D, Name: Renamed_A, dtype: float64"
783+
"2023-01-02 10.0\n",
784+
"2023-01-03 NaN\n",
785+
"2023-01-04 20.0\n",
786+
"2023-01-05 3.0\n",
787+
"2023-01-06 30.0\n",
788+
"2023-01-07 4.0\n",
789+
"2023-01-08 NaN\n",
790+
"2023-01-09 5.0\n",
791+
"2023-01-10 50.0\n",
792+
"Name: C, dtype: float64"
733793
]
734794
},
735795
"metadata": {},
@@ -943,19 +1003,13 @@
9431003
{
9441004
"data": {
9451005
"text/plain": [
946-
"2023-01-01 1.0\n",
947-
"2023-01-02 2.0\n",
948-
"2023-01-03 NaN\n",
949-
"2023-01-04 4.0\n",
950-
"2023-01-05 NaN\n",
951-
"2023-01-06 NaN\n",
952-
"2023-01-07 7.0\n",
953-
"2023-01-08 NaN\n",
954-
"2023-01-09 NaN\n",
955-
"2023-01-10 NaN\n",
956-
"2023-01-11 3.0\n",
957-
"2023-01-12 4.0\n",
958-
"Freq: D, Name: Renamed_A, dtype: float64"
1006+
"2023-01-01 1.0\n",
1007+
"2023-01-02 10.0\n",
1008+
"2023-01-04 20.0\n",
1009+
"2023-01-06 30.0\n",
1010+
"2023-01-08 NaN\n",
1011+
"2023-01-10 50.0\n",
1012+
"Name: Renamed_A, dtype: float64"
9591013
]
9601014
},
9611015
"metadata": {},
@@ -964,16 +1018,15 @@
9641018
],
9651019
"source": [
9661020
"# Example: Using `names` to rename output columns\n",
1021+
"print(\"Original univariate series\")\n",
1022+
"print(series1)\n",
1023+
"print(series2)\n",
9671024
"\n",
968-
"# Merging without a rename\n",
969-
"merged_series_named = ts_merge((series1, series2))\n",
970-
"print(\"Merged Series not renamed:\")\n",
1025+
"# Merging univariate with different names and using names to rename\n",
1026+
"merged_series_named = ts_merge((series1, series2), names=[\"C\"])\n",
1027+
"print(\"Merged univariate series renamed:\")\n",
9711028
"display(merged_series_named)\n",
9721029
"\n",
973-
"# Rename a single column\n",
974-
"merged_series_named = ts_merge((series1, series2), names=\"Renamed_A\")\n",
975-
"print(\"Merged Series renamed:\")\n",
976-
"display(merged_series_named)\n",
9771030
"\n",
9781031
"# Select specific columns in DataFrame\n",
9791032
"try:\n",
@@ -1015,6 +1068,7 @@
10151068
},
10161069
{
10171070
"cell_type": "markdown",
1071+
"id": "d615df22",
10181072
"metadata": {},
10191073
"source": [
10201074
"# `ts_merge`: strict priority option\n",
@@ -1068,9 +1122,53 @@
10681122
},
10691123
{
10701124
"cell_type": "code",
1071-
"execution_count": null,
1125+
"execution_count": 7,
1126+
"id": "d31654ba",
10721127
"metadata": {},
1073-
"outputs": [],
1128+
"outputs": [
1129+
{
1130+
"name": "stdout",
1131+
"output_type": "stream",
1132+
"text": [
1133+
"Example 1 strict=False:\n",
1134+
"2023-01-01 1.0\n",
1135+
"2023-01-02 2.0\n",
1136+
"2023-01-03 10.0\n",
1137+
"2023-01-04 4.0\n",
1138+
"2023-01-05 5.0\n",
1139+
"2023-01-06 NaN\n",
1140+
"2023-01-07 50.0\n",
1141+
"Freq: D, Name: A, dtype: float64\n",
1142+
"Example 1 strict=True:\n",
1143+
"2023-01-01 1.0\n",
1144+
"2023-01-02 2.0\n",
1145+
"2023-01-03 NaN\n",
1146+
"2023-01-04 4.0\n",
1147+
"2023-01-05 5.0\n",
1148+
"2023-01-06 NaN\n",
1149+
"2023-01-07 50.0\n",
1150+
"Freq: D, Name: A, dtype: float64\n",
1151+
"\n",
1152+
"Example 2 strict=True:\n",
1153+
" A B\n",
1154+
"2023-01-01 1.0 1.0\n",
1155+
"2023-01-02 NaN NaN\n",
1156+
"2023-01-03 3.0 NaN\n",
1157+
"2023-01-04 4.0 4.0\n",
1158+
"2023-01-05 5.0 5.0\n",
1159+
"2023-01-06 40.0 40.0\n",
1160+
"2023-01-07 50.0 50.0\n",
1161+
"\n",
1162+
"Example 3 strict=True:\n",
1163+
"2023-01-01 1.0\n",
1164+
"2023-01-03 2.0\n",
1165+
"2023-01-07 3.0\n",
1166+
"2023-01-10 4.0\n",
1167+
"2023-01-11 40.0\n",
1168+
"Name: A, dtype: float64\n"
1169+
]
1170+
}
1171+
],
10741172
"source": [
10751173
"import numpy as np, pandas as pd\n",
10761174
"from vtools.functions.merge import ts_merge\n",

0 commit comments

Comments
 (0)