88def _idx (start = "2023-01-01" , periods = 6 , freq = "h" ):
99 return pd .date_range (start , periods = periods , freq = freq )
1010
11+
1112def _gap (i0 = "2023-01-02" , i1 = "2023-01-03" ):
1213 return [i0 , i1 ]
1314
1415
1516# ---------- type & frequency contracts ----------
1617
18+
1719def test_type_mismatch_raises ():
1820 # Series vs DataFrame should fail
19- ts0 = pd .Series ([1 ,2 , 3 , 4 , 5 , 6 ], index = _idx ("2023-01-01" ), name = "A" )
20- ts1 = pd .DataFrame ({"A" :[10 ,20 ,30 ,40 ,50 ,60 ]}, index = _idx ("2023-01-02" ))
21+ ts0 = pd .Series ([1 , 2 , 3 , 4 , 5 , 6 ], index = _idx ("2023-01-01" ), name = "A" )
22+ ts1 = pd .DataFrame ({"A" : [10 , 20 , 30 , 40 , 50 , 60 ]}, index = _idx ("2023-01-02" ))
2123 with pytest .raises (ValueError , match = "same type" ):
22- transition_ts (ts0 , ts1 , method = "linear" , create_gap = _gap (), return_type = "series" )
24+ transition_ts (
25+ ts0 , ts1 , method = "linear" , create_gap = _gap (), return_type = "series"
26+ )
2327
2428
2529def test_frequency_mismatch_raises ():
2630 # Same type, different freq should fail
27- ts0 = pd .Series ([1 ,2 ,3 ,4 ,5 ,6 ], index = _idx ("2023-01-01" , freq = "h" ), name = "A" )
28- ts1 = pd .Series ([1 ,2 ,3 ,4 ,5 ,6 ], index = _idx ("2023-01-02" , freq = "30min" ), name = "A" )
31+ ts0 = pd .Series ([1 , 2 , 3 , 4 , 5 , 6 ], index = _idx ("2023-01-01" , freq = "h" ), name = "A" )
32+ ts1 = pd .Series (
33+ [1 , 2 , 3 , 4 , 5 , 6 ], index = _idx ("2023-01-02" , freq = "30min" ), name = "A"
34+ )
2935 with pytest .raises (ValueError , match = "same frequency" ):
30- transition_ts (ts0 , ts1 , method = "linear" , create_gap = _gap (), return_type = "series" )
36+ transition_ts (
37+ ts0 , ts1 , method = "linear" , create_gap = _gap (), return_type = "series"
38+ )
3139
3240
3341# ---------- strict column alignment contracts (names=None) ----------
3442
43+
3544def test_df_columns_mismatch_raises_when_names_none ():
3645 idx0 = _idx ("2023-01-01" , freq = "h" )
3746 idx1 = _idx ("2023-01-02" , freq = "h" )
38- df0 = pd .DataFrame ({"A" :[1 ,2 ,3 ,4 ,5 ,6 ]}, index = idx0 )
39- df1 = pd .DataFrame ({"B" :[10 ,20 ,30 ,40 ,50 ,60 ]}, index = idx1 )
40- with pytest .raises (ValueError , match = r"All input columns must be identical when `names` is None" ):
41- transition_ts (df0 , df1 , method = "linear" , create_gap = _gap (), return_type = "series" )
47+ df0 = pd .DataFrame ({"A" : [1 , 2 , 3 , 4 , 5 , 6 ]}, index = idx0 )
48+ df1 = pd .DataFrame ({"B" : [10 , 20 , 30 , 40 , 50 , 60 ]}, index = idx1 )
49+ with pytest .raises (
50+ ValueError , match = r"All input columns must be identical when `names` is None"
51+ ):
52+ transition_ts (
53+ df0 , df1 , method = "linear" , create_gap = _gap (), return_type = "series"
54+ )
4255
4356
4457def test_df_column_order_mismatch_raises_when_names_none ():
4558 idx0 = _idx ("2023-01-01" , freq = "h" )
4659 idx1 = _idx ("2023-01-02" , freq = "h" )
47- df0 = pd .DataFrame ({"A" :[1 ,2 ,3 ,4 ,5 ,6 ], "B" :[0 ,0 ,0 ,0 ,0 ,0 ]}, index = idx0 )
48- df1 = pd .DataFrame ({"B" :[0 ,0 ,0 ,0 ,0 ,0 ], "A" :[1 ,2 ,3 ,4 ,5 ,6 ]}, index = idx1 ) # same set, different order
49- with pytest .raises (ValueError , match = r"All input columns must be identical when `names` is None" ):
50- transition_ts (df0 , df1 , method = "linear" , create_gap = _gap (), return_type = "series" )
60+ df0 = pd .DataFrame ({"A" : [1 , 2 , 3 , 4 , 5 , 6 ], "B" : [0 , 0 , 0 , 0 , 0 , 0 ]}, index = idx0 )
61+ df1 = pd .DataFrame (
62+ {"B" : [0 , 0 , 0 , 0 , 0 , 0 ], "A" : [1 , 2 , 3 , 4 , 5 , 6 ]}, index = idx1
63+ ) # same set, different order
64+ with pytest .raises (
65+ ValueError , match = r"All input columns must be identical when `names` is None"
66+ ):
67+ transition_ts (
68+ df0 , df1 , method = "linear" , create_gap = _gap (), return_type = "series"
69+ )
5170
5271
5372# ---------- names=str / names=[str] on univariate inputs ----------
5473
74+
5575def test_series_univariate_names_str_returns_series_named ():
56- s0 = pd .Series ([1 ,2 ,3 ,4 ,5 ,6 ], index = _idx ("2023-01-01" , freq = "h" ), name = "A" )
57- s1 = pd .Series ([np .nan ,np .nan ,3 ,4 ,5 ,6 ], index = _idx ("2023-01-02" , freq = "h" ), name = "B" )
58- out = transition_ts (s0 , s1 , method = "linear" , create_gap = ["2023-01-01 12:00" , "2023-01-01 18:00" ], return_type = "series" , names = "X" )
76+ s0 = pd .Series ([1 , 2 , 3 , 4 , 5 , 6 ], index = _idx ("2023-01-01" , freq = "h" ), name = "A" )
77+ s1 = pd .Series (
78+ [np .nan , np .nan , 3 , 4 , 5 , 6 ], index = _idx ("2023-01-02" , freq = "h" ), name = "B"
79+ )
80+ out = transition_ts (
81+ s0 ,
82+ s1 ,
83+ method = "linear" ,
84+ create_gap = ["2023-01-01 12:00" , "2023-01-01 18:00" ],
85+ return_type = "series" ,
86+ names = "X" ,
87+ )
5988 assert isinstance (out , pd .Series )
6089 assert out .name == "X"
6190
6291
6392def test_series_univariate_names_list_single_equiv_to_str ():
64- s0 = pd .Series ([1 ,2 ,3 ,4 ,5 ,6 ], index = _idx ("2023-01-01" , freq = "h" ), name = "A" )
65- s1 = pd .Series ([np .nan ,np .nan ,3 ,4 ,5 ,6 ], index = _idx ("2023-01-02" , freq = "h" ), name = "B" )
93+ s0 = pd .Series ([1 , 2 , 3 , 4 , 5 , 6 ], index = _idx ("2023-01-01" , freq = "h" ), name = "A" )
94+ s1 = pd .Series (
95+ [np .nan , np .nan , 3 , 4 , 5 , 6 ], index = _idx ("2023-01-02" , freq = "h" ), name = "B"
96+ )
6697 out1 = transition_ts (
67- s0 , s1 ,
98+ s0 ,
99+ s1 ,
68100 method = "linear" ,
69101 create_gap = ["2023-01-01 12:00" , "2023-01-01 18:00" ], # <-- inside natural gap
70102 return_type = "series" ,
71103 names = "X" ,
72104 )
73105
74106 out2 = transition_ts (
75- s0 , s1 ,
107+ s0 ,
108+ s1 ,
76109 method = "linear" ,
77110 create_gap = ["2023-01-01 12:00" , "2023-01-01 18:00" ], # <-- same valid gap
78111 return_type = "series" ,
@@ -82,155 +115,222 @@ def test_series_univariate_names_list_single_equiv_to_str():
82115
83116
84117def test_series_univariate_names_list_multi_raises ():
85- s0 = pd .Series ([1 ,2 ,3 ,4 ,5 ,6 ], index = _idx ("2023-01-01" , freq = "h" ), name = "A" )
86- s1 = pd .Series ([np .nan ,np .nan ,3 ,4 ,5 ,6 ], index = _idx ("2023-01-02" , freq = "h" ), name = "B" )
118+ s0 = pd .Series ([1 , 2 , 3 , 4 , 5 , 6 ], index = _idx ("2023-01-01" , freq = "h" ), name = "A" )
119+ s1 = pd .Series (
120+ [np .nan , np .nan , 3 , 4 , 5 , 6 ], index = _idx ("2023-01-02" , freq = "h" ), name = "B"
121+ )
87122 with pytest .raises (ValueError , match = "multiple names" ):
88- transition_ts (s0 , s1 , method = "linear" , create_gap = _gap (), return_type = "series" , names = ["X" ,"Y" ])
123+ transition_ts (
124+ s0 ,
125+ s1 ,
126+ method = "linear" ,
127+ create_gap = _gap (),
128+ return_type = "series" ,
129+ names = ["X" , "Y" ],
130+ )
89131
90132
91133# ---------- names=[...] selection on multivariate DFs ----------
92134
135+
93136def test_df_names_list_selection_subset_and_order_preserved ():
94137 idx0 = _idx ("2023-01-01" , freq = "h" )
95138 idx1 = _idx ("2023-01-02" , freq = "h" )
96- df0 = pd .DataFrame ({"A" :[1 ,2 ,3 ,4 ,5 ,6 ], "B" :[10 ,20 ,30 ,40 ,50 ,60 ]}, index = idx0 )
97- df1 = pd .DataFrame ({"A" :[2 ,3 ,4 ,5 ,6 ,7 ], "B" :[11 ,21 ,31 ,41 ,51 ,61 ]}, index = idx1 )
139+ df0 = pd .DataFrame (
140+ {"A" : [1 , 2 , 3 , 4 , 5 , 6 ], "B" : [10 , 20 , 30 , 40 , 50 , 60 ]}, index = idx0
141+ )
142+ df1 = pd .DataFrame (
143+ {"A" : [2 , 3 , 4 , 5 , 6 , 7 ], "B" : [11 , 21 , 31 , 41 , 51 , 61 ]}, index = idx1
144+ )
98145
99- gap_start = df0 .index [- 1 ] + df0 .index .freq # 2023-01-01 06:00
100- gap_end = df1 .index [0 ] - df1 .index .freq # 2023-01-01 23:00
146+ gap_start = df0 .index [- 1 ] + df0 .index .freq # 2023-01-01 06:00
147+ gap_end = df1 .index [0 ] - df1 .index .freq # 2023-01-01 23:00
101148 out = transition_ts (
102- df0 , df1 ,
149+ df0 ,
150+ df1 ,
103151 method = "linear" ,
104152 create_gap = [gap_start , gap_end ],
105153 return_type = "series" ,
106- names = ["B" ,"A" ]
154+ names = ["B" , "A" ],
107155 )
108156
109157
110-
111-
112158def test_df_names_list_missing_column_raises ():
113159 idx0 = _idx ("2023-01-01" , freq = "h" )
114160 idx1 = _idx ("2023-01-02" , freq = "h" )
115- df0 = pd .DataFrame ({"A" :[1 ,2 ,3 ,4 ,5 ,6 ], "B" :[10 ,20 ,30 ,40 ,50 ,60 ]}, index = idx0 )
116- df1 = pd .DataFrame ({"A" :[2 ,3 ,4 ,5 ,6 ,7 ]}, index = idx1 ) # missing 'B'
161+ df0 = pd .DataFrame (
162+ {"A" : [1 , 2 , 3 , 4 , 5 , 6 ], "B" : [10 , 20 , 30 , 40 , 50 , 60 ]}, index = idx0
163+ )
164+ df1 = pd .DataFrame ({"A" : [2 , 3 , 4 , 5 , 6 , 7 ]}, index = idx1 ) # missing 'B'
117165 with pytest .raises (ValueError , match = r"missing requested columns" ):
118- transition_ts (df0 , df1 , method = "linear" , create_gap = _gap (), return_type = "series" , names = ["A" ,"B" ])
166+ transition_ts (
167+ df0 ,
168+ df1 ,
169+ method = "linear" ,
170+ create_gap = _gap (),
171+ return_type = "series" ,
172+ names = ["A" , "B" ],
173+ )
119174
120175
121176def test_gap_end_after_ts1_last_raises ():
122177 s0 = pd .Series (range (6 ), index = _idx ("2023-01-01" , freq = "h" ), name = "A" )
123178 s1 = pd .Series (range (6 ), index = _idx ("2023-01-02" , freq = "h" ), name = "A" )
124179 with pytest .raises (ValueError , match = "create_gap end" ):
125- transition_ts (s0 , s1 , method = "linear" , create_gap = ["2023-01-02 12:00" , "2023-01-03 00:00" ])
180+ transition_ts (
181+ s0 , s1 , method = "linear" , create_gap = ["2023-01-02 12:00" , "2023-01-03 00:00" ]
182+ )
183+
126184
127185def test_gap_start_not_before_ts0_any_sample_raises ():
128186 s0 = pd .Series (range (6 ), index = _idx ("2023-01-01" , freq = "h" ), name = "A" )
129187 s1 = pd .Series (range (6 ), index = _idx ("2023-01-02" , freq = "h" ), name = "A" )
130188 with pytest .raises (ValueError , match = "create_gap start" ):
131- transition_ts (s0 , s1 , method = "linear" ,
132- create_gap = ["2022-12-31 00:00" , "2022-12-31 12:00" ])
189+ transition_ts (
190+ s0 , s1 , method = "linear" , create_gap = ["2022-12-31 00:00" , "2022-12-31 12:00" ]
191+ )
133192
134193
135194# ---------- names=[] guard ----------
136195
196+
137197def test_empty_names_list_raises ():
138- s0 = pd .Series ([1 ,2 ,3 ,4 ,5 ,6 ], index = _idx ("2023-01-01" , freq = "h" ), name = "A" )
139- s1 = pd .Series ([np .nan ,np .nan ,3 ,4 ,5 ,6 ], index = _idx ("2023-01-02" , freq = "h" ), name = "B" )
198+ s0 = pd .Series ([1 , 2 , 3 , 4 , 5 , 6 ], index = _idx ("2023-01-01" , freq = "h" ), name = "A" )
199+ s1 = pd .Series (
200+ [np .nan , np .nan , 3 , 4 , 5 , 6 ], index = _idx ("2023-01-02" , freq = "h" ), name = "B"
201+ )
140202 with pytest .raises (ValueError , match = "selection is empty" ):
141- transition_ts (s0 , s1 , method = "linear" , create_gap = _gap (), return_type = "series" , names = [])
203+ transition_ts (
204+ s0 , s1 , method = "linear" , create_gap = _gap (), return_type = "series" , names = []
205+ )
142206
143207
144208import pytest
145209import pandas as pd
146210import numpy as np
147211from vtools import transition_ts
148212
213+
149214def _idx (start = "2023-01-01" , periods = 6 , freq = "h" ):
150215 return pd .date_range (start , periods = periods , freq = freq )
151216
217+
152218# --- helpers to build simple data ---
153219def _s (name , start , periods = 6 , freq = "h" , offset = 0 ):
154220 idx = _idx (start , periods = periods , freq = freq ) + pd .Timedelta (offset , unit = freq )
155221 return pd .Series (range (periods ), index = idx , name = name )
156222
223+
157224def _df (names , start , periods = 6 , freq = "h" ):
158225 idx = _idx (start , periods = periods , freq = freq )
159226 data = {n : np .arange (periods ) for n in names }
160227 return pd .DataFrame (data , index = idx )
161228
229+
162230# ---------- CONTRACT: explicit create_gap strict domain checks ----------
163231
232+
164233def test_gap_start_before_ts0_first_errors ():
165234 ts0 = _s ("A" , "2023-01-02" )
166235 ts1 = _s ("A" , "2023-01-03" )
167236 with pytest .raises (ValueError , match = r"create_gap start.*" ):
168- transition_ts (ts0 , ts1 , create_gap = ["2023-01-01 00:00" , "2023-01-02 12:00" ], method = "linear" )
237+ transition_ts (
238+ ts0 ,
239+ ts1 ,
240+ create_gap = ["2023-01-01 00:00" , "2023-01-02 12:00" ],
241+ method = "linear" ,
242+ )
243+
169244
170245def test_gap_end_after_ts1_last_errors ():
171246 ts0 = _s ("A" , "2023-01-01" )
172247 ts1 = _s ("A" , "2023-01-02" )
173248 with pytest .raises (ValueError , match = r"create_gap end.*" ):
174- transition_ts (ts0 , ts1 , create_gap = ["2023-01-02 00:00" , "2023-01-03 12:00" ], method = "linear" )
249+ transition_ts (
250+ ts0 ,
251+ ts1 ,
252+ create_gap = ["2023-01-02 00:00" , "2023-01-03 12:00" ],
253+ method = "linear" ,
254+ )
255+
175256
176257def test_gap_start_ge_end_errors ():
177258 ts0 = _s ("A" , "2023-01-01" )
178259 ts1 = _s ("A" , "2023-01-02" )
179260 with pytest .raises (ValueError , match = "start must be strictly before end" ):
180- transition_ts (ts0 , ts1 ,
181- create_gap = ["2023-01-01 10:00" , "2023-01-01 10:00" ], method = "linear" )
261+ transition_ts (
262+ ts0 ,
263+ ts1 ,
264+ create_gap = ["2023-01-01 10:00" , "2023-01-01 10:00" ],
265+ method = "linear" ,
266+ )
267+
182268
183269# ---------- OPTIONAL SNAP: only when gap ⊂ natural gap ----------
184270
271+
185272def test_max_snap_expands_inside_natural_gap_symmetrically ():
186273 # Natural gap: ts0.last < ts1.first (24h apart)
187- ts0 = _s ("A" , "2023-01-01" ) # ends ~ 2023-01-01 05:00
188- ts1 = _s ("A" , "2023-01-03" ) # starts 2023-01-03 00:00
274+ ts0 = _s ("A" , "2023-01-01" ) # ends ~ 2023-01-01 05:00
275+ ts1 = _s ("A" , "2023-01-03" ) # starts 2023-01-03 00:00
189276 # User picks a very small sub-gap in the middle of the natural gap
190277 out = transition_ts (
191- ts0 , ts1 , method = "linear" ,
278+ ts0 ,
279+ ts1 ,
280+ method = "linear" ,
192281 create_gap = ["2023-01-02 06:00" , "2023-01-02 07:00" ],
193- max_snap = "1D" , # allow widening up to 24h
194- return_type = "series"
282+ max_snap = "1D" , # allow widening up to 24h
283+ return_type = "series" ,
195284 )
196285 assert isinstance (out , (pd .Series , pd .DataFrame ))
197286
287+
198288def test_max_snap_ignored_when_overlap ():
199289 # Overlap (no natural gap)
200290 ts0 = _s ("A" , "2023-01-01" , periods = 12 , freq = "h" )
201291 ts1 = _s ("A" , "2023-01-01 06:00" , periods = 12 , freq = "h" )
202292 # Valid gap inside overlap; max_snap should be ignored (no errors; algorithms decide)
203293 out = transition_ts (
204- ts0 , ts1 , method = "linear" ,
294+ ts0 ,
295+ ts1 ,
296+ method = "linear" ,
205297 create_gap = ["2023-01-01 08:00" , "2023-01-01 10:00" ],
206298 max_snap = "12H" ,
207- return_type = "series"
299+ return_type = "series" ,
208300 )
209301 assert isinstance (out , (pd .Series , pd .DataFrame ))
210302
303+
211304def test_max_snap_does_not_cross_natural_bounds ():
212- ts0 = _s ("A" , "2023-01-01" , periods = 6 , freq = "h" ) # last = 2023-01-01 05:00
213- ts1 = _s ("A" , "2023-01-02" , periods = 6 , freq = "h" ) # first = 2023-01-02 00:00
305+ ts0 = _s ("A" , "2023-01-01" , periods = 6 , freq = "h" ) # last = 2023-01-01 05:00
306+ ts1 = _s ("A" , "2023-01-02" , periods = 6 , freq = "h" ) # first = 2023-01-02 00:00
214307 # choose a sub-gap near the left edge; allow large snap
215308 out = transition_ts (
216- ts0 , ts1 , method = "linear" ,
309+ ts0 ,
310+ ts1 ,
311+ method = "linear" ,
217312 create_gap = ["2023-01-01 05:30" , "2023-01-01 06:00" ],
218313 max_snap = "1D" ,
219- return_type = "series"
314+ return_type = "series" ,
220315 )
221316 assert isinstance (out , (pd .Series , pd .DataFrame ))
222317 # We don’t assert internal endpoints here, but this protects against crossing bounds.
223318
319+
224320# ---------- NAMES contract still holds with gap handling ----------
225321
322+
226323def test_df_subset_names_with_gap_inside_natural_gap ():
227- df0 = _df (["A" ,"B" ], "2023-01-01" )
228- df1 = _df (["A" ,"B" ], "2023-01-03" )
324+ df0 = _df (["A" , "B" ], "2023-01-01" )
325+ df1 = _df (["A" , "B" ], "2023-01-03" )
229326 out = transition_ts (
230- df0 , df1 , method = "linear" ,
327+ df0 ,
328+ df1 ,
329+ method = "linear" ,
231330 create_gap = ["2023-01-02 06:00" , "2023-01-02 07:00" ],
232- names = ["B" ,"A" ], max_snap = "12h" ,
233- return_type = "series"
331+ names = ["B" , "A" ],
332+ max_snap = "12h" ,
333+ return_type = "series" ,
234334 )
235335 assert isinstance (out , pd .DataFrame )
236- assert list (out .columns ) == ["B" ,"A" ]
336+ assert list (out .columns ) == ["B" , "A" ]
0 commit comments