Skip to content

Commit 9cb76a4

Browse files
EliEli
authored andcommitted
Added test.
1 parent bf6d243 commit 9cb76a4

2 files changed

Lines changed: 165 additions & 62 deletions

File tree

tests/test_merge_splice.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -340,3 +340,6 @@ def test_ts_merge_strict_priority_irregular(irregular_sample_data):
340340
index=pd.to_datetime(["2023-01-01","2023-01-03","2023-01-07","2023-01-10","2023-01-11"]),
341341
name="A")
342342
pd.testing.assert_series_equal(result, expected)
343+
344+
345+

tests/test_transition_arg_alignment.py

Lines changed: 162 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -8,71 +8,104 @@
88
def _idx(start="2023-01-01", periods=6, freq="h"):
99
return pd.date_range(start, periods=periods, freq=freq)
1010

11+
1112
def _gap(i0="2023-01-02", i1="2023-01-03"):
1213
return [i0, i1]
1314

1415

1516
# ---------- type & frequency contracts ----------
1617

18+
1719
def test_type_mismatch_raises():
1820
# Series vs DataFrame should fail
19-
ts0 = pd.Series([1,2,3,4,5,6], index=_idx("2023-01-01"), name="A")
20-
ts1 = pd.DataFrame({"A":[10,20,30,40,50,60]}, index=_idx("2023-01-02"))
21+
ts0 = pd.Series([1, 2, 3, 4, 5, 6], index=_idx("2023-01-01"), name="A")
22+
ts1 = pd.DataFrame({"A": [10, 20, 30, 40, 50, 60]}, index=_idx("2023-01-02"))
2123
with pytest.raises(ValueError, match="same type"):
22-
transition_ts(ts0, ts1, method="linear", create_gap=_gap(), return_type="series")
24+
transition_ts(
25+
ts0, ts1, method="linear", create_gap=_gap(), return_type="series"
26+
)
2327

2428

2529
def test_frequency_mismatch_raises():
2630
# Same type, different freq should fail
27-
ts0 = pd.Series([1,2,3,4,5,6], index=_idx("2023-01-01", freq="h"), name="A")
28-
ts1 = pd.Series([1,2,3,4,5,6], index=_idx("2023-01-02", freq="30min"), name="A")
31+
ts0 = pd.Series([1, 2, 3, 4, 5, 6], index=_idx("2023-01-01", freq="h"), name="A")
32+
ts1 = pd.Series(
33+
[1, 2, 3, 4, 5, 6], index=_idx("2023-01-02", freq="30min"), name="A"
34+
)
2935
with pytest.raises(ValueError, match="same frequency"):
30-
transition_ts(ts0, ts1, method="linear", create_gap=_gap(), return_type="series")
36+
transition_ts(
37+
ts0, ts1, method="linear", create_gap=_gap(), return_type="series"
38+
)
3139

3240

3341
# ---------- strict column alignment contracts (names=None) ----------
3442

43+
3544
def test_df_columns_mismatch_raises_when_names_none():
3645
idx0 = _idx("2023-01-01", freq="h")
3746
idx1 = _idx("2023-01-02", freq="h")
38-
df0 = pd.DataFrame({"A":[1,2,3,4,5,6]}, index=idx0)
39-
df1 = pd.DataFrame({"B":[10,20,30,40,50,60]}, index=idx1)
40-
with pytest.raises(ValueError, match=r"All input columns must be identical when `names` is None"):
41-
transition_ts(df0, df1, method="linear", create_gap=_gap(), return_type="series")
47+
df0 = pd.DataFrame({"A": [1, 2, 3, 4, 5, 6]}, index=idx0)
48+
df1 = pd.DataFrame({"B": [10, 20, 30, 40, 50, 60]}, index=idx1)
49+
with pytest.raises(
50+
ValueError, match=r"All input columns must be identical when `names` is None"
51+
):
52+
transition_ts(
53+
df0, df1, method="linear", create_gap=_gap(), return_type="series"
54+
)
4255

4356

4457
def test_df_column_order_mismatch_raises_when_names_none():
4558
idx0 = _idx("2023-01-01", freq="h")
4659
idx1 = _idx("2023-01-02", freq="h")
47-
df0 = pd.DataFrame({"A":[1,2,3,4,5,6], "B":[0,0,0,0,0,0]}, index=idx0)
48-
df1 = pd.DataFrame({"B":[0,0,0,0,0,0], "A":[1,2,3,4,5,6]}, index=idx1) # same set, different order
49-
with pytest.raises(ValueError, match=r"All input columns must be identical when `names` is None"):
50-
transition_ts(df0, df1, method="linear", create_gap=_gap(), return_type="series")
60+
df0 = pd.DataFrame({"A": [1, 2, 3, 4, 5, 6], "B": [0, 0, 0, 0, 0, 0]}, index=idx0)
61+
df1 = pd.DataFrame(
62+
{"B": [0, 0, 0, 0, 0, 0], "A": [1, 2, 3, 4, 5, 6]}, index=idx1
63+
) # same set, different order
64+
with pytest.raises(
65+
ValueError, match=r"All input columns must be identical when `names` is None"
66+
):
67+
transition_ts(
68+
df0, df1, method="linear", create_gap=_gap(), return_type="series"
69+
)
5170

5271

5372
# ---------- names=str / names=[str] on univariate inputs ----------
5473

74+
5575
def test_series_univariate_names_str_returns_series_named():
56-
s0 = pd.Series([1,2,3,4,5,6], index=_idx("2023-01-01", freq="h"), name="A")
57-
s1 = pd.Series([np.nan,np.nan,3,4,5,6], index=_idx("2023-01-02", freq="h"), name="B")
58-
out = transition_ts(s0, s1, method="linear", create_gap=["2023-01-01 12:00", "2023-01-01 18:00"], return_type="series", names="X")
76+
s0 = pd.Series([1, 2, 3, 4, 5, 6], index=_idx("2023-01-01", freq="h"), name="A")
77+
s1 = pd.Series(
78+
[np.nan, np.nan, 3, 4, 5, 6], index=_idx("2023-01-02", freq="h"), name="B"
79+
)
80+
out = transition_ts(
81+
s0,
82+
s1,
83+
method="linear",
84+
create_gap=["2023-01-01 12:00", "2023-01-01 18:00"],
85+
return_type="series",
86+
names="X",
87+
)
5988
assert isinstance(out, pd.Series)
6089
assert out.name == "X"
6190

6291

6392
def test_series_univariate_names_list_single_equiv_to_str():
64-
s0 = pd.Series([1,2,3,4,5,6], index=_idx("2023-01-01", freq="h"), name="A")
65-
s1 = pd.Series([np.nan,np.nan,3,4,5,6], index=_idx("2023-01-02", freq="h"), name="B")
93+
s0 = pd.Series([1, 2, 3, 4, 5, 6], index=_idx("2023-01-01", freq="h"), name="A")
94+
s1 = pd.Series(
95+
[np.nan, np.nan, 3, 4, 5, 6], index=_idx("2023-01-02", freq="h"), name="B"
96+
)
6697
out1 = transition_ts(
67-
s0, s1,
98+
s0,
99+
s1,
68100
method="linear",
69101
create_gap=["2023-01-01 12:00", "2023-01-01 18:00"], # <-- inside natural gap
70102
return_type="series",
71103
names="X",
72104
)
73105

74106
out2 = transition_ts(
75-
s0, s1,
107+
s0,
108+
s1,
76109
method="linear",
77110
create_gap=["2023-01-01 12:00", "2023-01-01 18:00"], # <-- same valid gap
78111
return_type="series",
@@ -82,155 +115,222 @@ def test_series_univariate_names_list_single_equiv_to_str():
82115

83116

84117
def test_series_univariate_names_list_multi_raises():
85-
s0 = pd.Series([1,2,3,4,5,6], index=_idx("2023-01-01", freq="h"), name="A")
86-
s1 = pd.Series([np.nan,np.nan,3,4,5,6], index=_idx("2023-01-02", freq="h"), name="B")
118+
s0 = pd.Series([1, 2, 3, 4, 5, 6], index=_idx("2023-01-01", freq="h"), name="A")
119+
s1 = pd.Series(
120+
[np.nan, np.nan, 3, 4, 5, 6], index=_idx("2023-01-02", freq="h"), name="B"
121+
)
87122
with pytest.raises(ValueError, match="multiple names"):
88-
transition_ts(s0, s1, method="linear", create_gap=_gap(), return_type="series", names=["X","Y"])
123+
transition_ts(
124+
s0,
125+
s1,
126+
method="linear",
127+
create_gap=_gap(),
128+
return_type="series",
129+
names=["X", "Y"],
130+
)
89131

90132

91133
# ---------- names=[...] selection on multivariate DFs ----------
92134

135+
93136
def test_df_names_list_selection_subset_and_order_preserved():
94137
idx0 = _idx("2023-01-01", freq="h")
95138
idx1 = _idx("2023-01-02", freq="h")
96-
df0 = pd.DataFrame({"A":[1,2,3,4,5,6], "B":[10,20,30,40,50,60]}, index=idx0)
97-
df1 = pd.DataFrame({"A":[2,3,4,5,6,7], "B":[11,21,31,41,51,61]}, index=idx1)
139+
df0 = pd.DataFrame(
140+
{"A": [1, 2, 3, 4, 5, 6], "B": [10, 20, 30, 40, 50, 60]}, index=idx0
141+
)
142+
df1 = pd.DataFrame(
143+
{"A": [2, 3, 4, 5, 6, 7], "B": [11, 21, 31, 41, 51, 61]}, index=idx1
144+
)
98145

99-
gap_start = df0.index[-1] + df0.index.freq # 2023-01-01 06:00
100-
gap_end = df1.index[0] - df1.index.freq # 2023-01-01 23:00
146+
gap_start = df0.index[-1] + df0.index.freq # 2023-01-01 06:00
147+
gap_end = df1.index[0] - df1.index.freq # 2023-01-01 23:00
101148
out = transition_ts(
102-
df0, df1,
149+
df0,
150+
df1,
103151
method="linear",
104152
create_gap=[gap_start, gap_end],
105153
return_type="series",
106-
names=["B","A"]
154+
names=["B", "A"],
107155
)
108156

109157

110-
111-
112158
def test_df_names_list_missing_column_raises():
113159
idx0 = _idx("2023-01-01", freq="h")
114160
idx1 = _idx("2023-01-02", freq="h")
115-
df0 = pd.DataFrame({"A":[1,2,3,4,5,6], "B":[10,20,30,40,50,60]}, index=idx0)
116-
df1 = pd.DataFrame({"A":[2,3,4,5,6,7]}, index=idx1) # missing 'B'
161+
df0 = pd.DataFrame(
162+
{"A": [1, 2, 3, 4, 5, 6], "B": [10, 20, 30, 40, 50, 60]}, index=idx0
163+
)
164+
df1 = pd.DataFrame({"A": [2, 3, 4, 5, 6, 7]}, index=idx1) # missing 'B'
117165
with pytest.raises(ValueError, match=r"missing requested columns"):
118-
transition_ts(df0, df1, method="linear", create_gap=_gap(), return_type="series", names=["A","B"])
166+
transition_ts(
167+
df0,
168+
df1,
169+
method="linear",
170+
create_gap=_gap(),
171+
return_type="series",
172+
names=["A", "B"],
173+
)
119174

120175

121176
def test_gap_end_after_ts1_last_raises():
122177
s0 = pd.Series(range(6), index=_idx("2023-01-01", freq="h"), name="A")
123178
s1 = pd.Series(range(6), index=_idx("2023-01-02", freq="h"), name="A")
124179
with pytest.raises(ValueError, match="create_gap end"):
125-
transition_ts(s0, s1, method="linear", create_gap=["2023-01-02 12:00", "2023-01-03 00:00"])
180+
transition_ts(
181+
s0, s1, method="linear", create_gap=["2023-01-02 12:00", "2023-01-03 00:00"]
182+
)
183+
126184

127185
def test_gap_start_not_before_ts0_any_sample_raises():
128186
s0 = pd.Series(range(6), index=_idx("2023-01-01", freq="h"), name="A")
129187
s1 = pd.Series(range(6), index=_idx("2023-01-02", freq="h"), name="A")
130188
with pytest.raises(ValueError, match="create_gap start"):
131-
transition_ts(s0, s1, method="linear",
132-
create_gap=["2022-12-31 00:00", "2022-12-31 12:00"])
189+
transition_ts(
190+
s0, s1, method="linear", create_gap=["2022-12-31 00:00", "2022-12-31 12:00"]
191+
)
133192

134193

135194
# ---------- names=[] guard ----------
136195

196+
137197
def test_empty_names_list_raises():
138-
s0 = pd.Series([1,2,3,4,5,6], index=_idx("2023-01-01", freq="h"), name="A")
139-
s1 = pd.Series([np.nan,np.nan,3,4,5,6], index=_idx("2023-01-02", freq="h"), name="B")
198+
s0 = pd.Series([1, 2, 3, 4, 5, 6], index=_idx("2023-01-01", freq="h"), name="A")
199+
s1 = pd.Series(
200+
[np.nan, np.nan, 3, 4, 5, 6], index=_idx("2023-01-02", freq="h"), name="B"
201+
)
140202
with pytest.raises(ValueError, match="selection is empty"):
141-
transition_ts(s0, s1, method="linear", create_gap=_gap(), return_type="series", names=[])
203+
transition_ts(
204+
s0, s1, method="linear", create_gap=_gap(), return_type="series", names=[]
205+
)
142206

143207

144208
import pytest
145209
import pandas as pd
146210
import numpy as np
147211
from vtools import transition_ts
148212

213+
149214
def _idx(start="2023-01-01", periods=6, freq="h"):
150215
return pd.date_range(start, periods=periods, freq=freq)
151216

217+
152218
# --- helpers to build simple data ---
153219
def _s(name, start, periods=6, freq="h", offset=0):
154220
idx = _idx(start, periods=periods, freq=freq) + pd.Timedelta(offset, unit=freq)
155221
return pd.Series(range(periods), index=idx, name=name)
156222

223+
157224
def _df(names, start, periods=6, freq="h"):
158225
idx = _idx(start, periods=periods, freq=freq)
159226
data = {n: np.arange(periods) for n in names}
160227
return pd.DataFrame(data, index=idx)
161228

229+
162230
# ---------- CONTRACT: explicit create_gap strict domain checks ----------
163231

232+
164233
def test_gap_start_before_ts0_first_errors():
165234
ts0 = _s("A", "2023-01-02")
166235
ts1 = _s("A", "2023-01-03")
167236
with pytest.raises(ValueError, match=r"create_gap start.*"):
168-
transition_ts(ts0, ts1, create_gap=["2023-01-01 00:00", "2023-01-02 12:00"], method="linear")
237+
transition_ts(
238+
ts0,
239+
ts1,
240+
create_gap=["2023-01-01 00:00", "2023-01-02 12:00"],
241+
method="linear",
242+
)
243+
169244

170245
def test_gap_end_after_ts1_last_errors():
171246
ts0 = _s("A", "2023-01-01")
172247
ts1 = _s("A", "2023-01-02")
173248
with pytest.raises(ValueError, match=r"create_gap end.*"):
174-
transition_ts(ts0, ts1, create_gap=["2023-01-02 00:00", "2023-01-03 12:00"], method="linear")
249+
transition_ts(
250+
ts0,
251+
ts1,
252+
create_gap=["2023-01-02 00:00", "2023-01-03 12:00"],
253+
method="linear",
254+
)
255+
175256

176257
def test_gap_start_ge_end_errors():
177258
ts0 = _s("A", "2023-01-01")
178259
ts1 = _s("A", "2023-01-02")
179260
with pytest.raises(ValueError, match="start must be strictly before end"):
180-
transition_ts(ts0, ts1,
181-
create_gap=["2023-01-01 10:00", "2023-01-01 10:00"], method="linear")
261+
transition_ts(
262+
ts0,
263+
ts1,
264+
create_gap=["2023-01-01 10:00", "2023-01-01 10:00"],
265+
method="linear",
266+
)
267+
182268

183269
# ---------- OPTIONAL SNAP: only when gap ⊂ natural gap ----------
184270

271+
185272
def test_max_snap_expands_inside_natural_gap_symmetrically():
186273
# Natural gap: ts0.last < ts1.first (24h apart)
187-
ts0 = _s("A", "2023-01-01") # ends ~ 2023-01-01 05:00
188-
ts1 = _s("A", "2023-01-03") # starts 2023-01-03 00:00
274+
ts0 = _s("A", "2023-01-01") # ends ~ 2023-01-01 05:00
275+
ts1 = _s("A", "2023-01-03") # starts 2023-01-03 00:00
189276
# User picks a very small sub-gap in the middle of the natural gap
190277
out = transition_ts(
191-
ts0, ts1, method="linear",
278+
ts0,
279+
ts1,
280+
method="linear",
192281
create_gap=["2023-01-02 06:00", "2023-01-02 07:00"],
193-
max_snap="1D", # allow widening up to 24h
194-
return_type="series"
282+
max_snap="1D", # allow widening up to 24h
283+
return_type="series",
195284
)
196285
assert isinstance(out, (pd.Series, pd.DataFrame))
197286

287+
198288
def test_max_snap_ignored_when_overlap():
199289
# Overlap (no natural gap)
200290
ts0 = _s("A", "2023-01-01", periods=12, freq="h")
201291
ts1 = _s("A", "2023-01-01 06:00", periods=12, freq="h")
202292
# Valid gap inside overlap; max_snap should be ignored (no errors; algorithms decide)
203293
out = transition_ts(
204-
ts0, ts1, method="linear",
294+
ts0,
295+
ts1,
296+
method="linear",
205297
create_gap=["2023-01-01 08:00", "2023-01-01 10:00"],
206298
max_snap="12H",
207-
return_type="series"
299+
return_type="series",
208300
)
209301
assert isinstance(out, (pd.Series, pd.DataFrame))
210302

303+
211304
def test_max_snap_does_not_cross_natural_bounds():
212-
ts0 = _s("A", "2023-01-01", periods=6, freq="h") # last = 2023-01-01 05:00
213-
ts1 = _s("A", "2023-01-02", periods=6, freq="h") # first = 2023-01-02 00:00
305+
ts0 = _s("A", "2023-01-01", periods=6, freq="h") # last = 2023-01-01 05:00
306+
ts1 = _s("A", "2023-01-02", periods=6, freq="h") # first = 2023-01-02 00:00
214307
# choose a sub-gap near the left edge; allow large snap
215308
out = transition_ts(
216-
ts0, ts1, method="linear",
309+
ts0,
310+
ts1,
311+
method="linear",
217312
create_gap=["2023-01-01 05:30", "2023-01-01 06:00"],
218313
max_snap="1D",
219-
return_type="series"
314+
return_type="series",
220315
)
221316
assert isinstance(out, (pd.Series, pd.DataFrame))
222317
# We don’t assert internal endpoints here, but this protects against crossing bounds.
223318

319+
224320
# ---------- NAMES contract still holds with gap handling ----------
225321

322+
226323
def test_df_subset_names_with_gap_inside_natural_gap():
227-
df0 = _df(["A","B"], "2023-01-01")
228-
df1 = _df(["A","B"], "2023-01-03")
324+
df0 = _df(["A", "B"], "2023-01-01")
325+
df1 = _df(["A", "B"], "2023-01-03")
229326
out = transition_ts(
230-
df0, df1, method="linear",
327+
df0,
328+
df1,
329+
method="linear",
231330
create_gap=["2023-01-02 06:00", "2023-01-02 07:00"],
232-
names=["B","A"], max_snap="12h",
233-
return_type="series"
331+
names=["B", "A"],
332+
max_snap="12h",
333+
return_type="series",
234334
)
235335
assert isinstance(out, pd.DataFrame)
236-
assert list(out.columns) == ["B","A"]
336+
assert list(out.columns) == ["B", "A"]

0 commit comments

Comments
 (0)