Skip to content

Commit 43a7103

Browse files
EliEli
authored andcommitted
moved infer_freq_robust to vtools.data.index and added tests.
1 parent e1de627 commit 43a7103

File tree

2 files changed

+421
-0
lines changed

2 files changed

+421
-0
lines changed

tests/test_indexing.py

Lines changed: 192 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,192 @@
1+
from __future__ import annotations
2+
3+
import pandas as pd
4+
import numpy as np
5+
import pytest
6+
7+
from vtools.data.indexing import (
8+
resolve_common_freq,
9+
regular_index_from_valid_extent,
10+
reindex_to_continuous,
11+
inferred_regular_freq,
12+
compare_regular_freq,
13+
)
14+
15+
16+
def test_resolve_common_freq_same():
17+
i1 = pd.date_range("2024-01-01", periods=5, freq="D")
18+
i2 = pd.date_range("2024-02-01", periods=3, freq="D")
19+
f = resolve_common_freq([i1, i2], preserve_freq=True)
20+
assert f == i1.freq
21+
22+
23+
def test_resolve_common_freq_none_when_preserve_false():
24+
i1 = pd.date_range("2024-01-01", periods=5, freq="D")
25+
i2 = pd.date_range("2024-02-01", periods=3, freq="2D")
26+
assert resolve_common_freq([i1, i2], preserve_freq=False) is None
27+
28+
29+
def test_resolve_common_freq_raises_on_mismatch():
30+
i1 = pd.date_range("2024-01-01", periods=5, freq="D")
31+
i2 = pd.date_range("2024-02-01", periods=3, freq="2D")
32+
with pytest.raises(ValueError, match="inconsistent frequencies"):
33+
resolve_common_freq([i1, i2], preserve_freq=True)
34+
35+
36+
def test_regular_index_from_valid_extent_series():
37+
idx1 = pd.date_range("2024-01-01", periods=5, freq="D")
38+
idx2 = pd.date_range("2024-01-03", periods=5, freq="D")
39+
s1 = pd.Series([np.nan, 1.0, 2.0, 3.0, np.nan], index=idx1)
40+
s2 = pd.Series([10.0, 11.0, 12.0, 13.0, np.nan], index=idx2)
41+
42+
out = regular_index_from_valid_extent([s1, s2], idx1.freq)
43+
expected = pd.date_range("2024-01-02", "2024-01-06", freq="D")
44+
pd.testing.assert_index_equal(out, expected)
45+
46+
47+
def test_regular_index_from_valid_extent_empty_valid_returns_empty_like_first():
48+
idx = pd.date_range("2024-01-01", periods=3, freq="D")
49+
s1 = pd.Series([np.nan, np.nan, np.nan], index=idx)
50+
s2 = pd.Series([np.nan, np.nan, np.nan], index=idx)
51+
52+
out = regular_index_from_valid_extent([s1, s2], idx.freq)
53+
assert len(out) == 0
54+
assert isinstance(out, pd.DatetimeIndex)
55+
56+
57+
def test_reindex_to_continuous_regularizes_when_aligned():
58+
idx = pd.to_datetime(["2024-01-01", "2024-01-03"])
59+
s = pd.Series([1.0, 3.0], index=idx, name="x")
60+
61+
out = reindex_to_continuous(s, pd.tseries.frequencies.to_offset("D"))
62+
expected = pd.Series(
63+
[1.0, np.nan, 3.0],
64+
index=pd.date_range("2024-01-01", periods=3, freq="D"),
65+
name="x",
66+
)
67+
pd.testing.assert_series_equal(out, expected)
68+
69+
70+
def test_reindex_to_continuous_returns_original_when_misaligned():
71+
idx = pd.to_datetime(["2024-01-01 00:00", "2024-01-01 00:10", "2024-01-01 00:15"])
72+
s = pd.Series([1.0, 2.0, 3.0], index=idx, name="x")
73+
74+
out = reindex_to_continuous(s, pd.tseries.frequencies.to_offset("15min"))
75+
pd.testing.assert_series_equal(out, s)
76+
77+
78+
def test_inferred_regular_freq_datetime_ok():
79+
idx = pd.date_range("2024-01-01", periods=5, freq="D")
80+
s = pd.Series(range(5), index=idx)
81+
freq, reason = inferred_regular_freq(s)
82+
assert reason == "ok"
83+
assert freq == idx.freq
84+
85+
86+
def test_inferred_regular_freq_non_datetime():
87+
s = pd.Series([1, 2, 3], index=[1, 2, 3])
88+
freq, reason = inferred_regular_freq(s)
89+
assert freq is None
90+
assert reason == "not_datetime_like"
91+
92+
93+
def test_inferred_regular_freq_not_monotonic():
94+
idx = pd.to_datetime(["2024-01-02", "2024-01-01", "2024-01-03"])
95+
s = pd.Series([1, 2, 3], index=idx)
96+
freq, reason = inferred_regular_freq(s)
97+
assert freq is None
98+
assert reason == "not_monotonic"
99+
100+
101+
def test_inferred_regular_freq_duplicates():
102+
idx = pd.to_datetime(["2024-01-01", "2024-01-01", "2024-01-02"])
103+
s = pd.Series([1, 2, 3], index=idx)
104+
freq, reason = inferred_regular_freq(s)
105+
assert freq is None
106+
assert reason == "duplicates"
107+
108+
109+
def test_inferred_regular_freq_irregular_infer_failed():
110+
idx = pd.to_datetime(["2024-01-01", "2024-01-02", "2024-01-04"])
111+
s = pd.Series([1, 2, 3], index=idx)
112+
freq, reason = inferred_regular_freq(s)
113+
assert freq is None
114+
assert reason == "infer_failed"
115+
116+
117+
def test_inferred_regular_freq_single_point_degenerate():
118+
idx = pd.date_range("2024-01-01", periods=1, freq="D")
119+
s = pd.Series([1.0], index=idx)
120+
freq, reason = inferred_regular_freq(s)
121+
assert reason == "degenerate"
122+
assert freq == pd.Timedelta(0)
123+
124+
125+
def test_compare_regular_freq_both_regular_same():
126+
i1 = pd.date_range("2024-01-01", periods=5, freq="D")
127+
i2 = pd.date_range("2024-02-01", periods=3, freq="D")
128+
s1 = pd.Series(range(5), index=i1)
129+
s2 = pd.Series(range(3), index=i2)
130+
131+
status, reason, sf, rf = compare_regular_freq(s1, s2)
132+
assert status == "both_regular_same"
133+
assert sf == i1.freq
134+
assert rf == i2.freq
135+
136+
137+
def test_compare_regular_freq_both_regular_different():
138+
i1 = pd.date_range("2024-01-01", periods=5, freq="D")
139+
i2 = pd.date_range("2024-02-01", periods=3, freq="2D")
140+
s1 = pd.Series(range(5), index=i1)
141+
s2 = pd.Series(range(3), index=i2)
142+
143+
status, _, sf, rf = compare_regular_freq(s1, s2)
144+
assert status == "both_regular_different"
145+
assert sf == i1.freq
146+
assert rf == i2.freq
147+
148+
149+
def test_compare_regular_freq_src_irregular():
150+
s1 = pd.Series([1, 2, 3], index=pd.to_datetime(["2024-01-01", "2024-01-02", "2024-01-04"]))
151+
s2 = pd.Series([1, 2, 3], index=pd.date_range("2024-01-01", periods=3, freq="D"))
152+
153+
status, reason, sf, rf = compare_regular_freq(s1, s2)
154+
assert status == "src_irregular"
155+
assert sf is None
156+
assert rf == s2.index.freq
157+
assert "staged not regular" in reason
158+
159+
160+
def test_compare_regular_freq_dst_irregular():
161+
s1 = pd.Series([1, 2, 3], index=pd.date_range("2024-01-01", periods=3, freq="D"))
162+
s2 = pd.Series([1, 2, 3], index=pd.to_datetime(["2024-01-01", "2024-01-02", "2024-01-04"]))
163+
164+
status, reason, sf, rf = compare_regular_freq(s1, s2)
165+
assert status == "dst_irregular"
166+
assert sf == s1.index.freq
167+
assert rf is None
168+
assert "repo not regular" in reason
169+
170+
171+
def test_compare_regular_freq_both_irregular():
172+
s1 = pd.Series([1, 2, 3], index=pd.to_datetime(["2024-01-01", "2024-01-02", "2024-01-04"]))
173+
s2 = pd.Series([1, 2, 3], index=pd.to_datetime(["2024-02-01", "2024-02-03", "2024-02-04"]))
174+
175+
status, reason, sf, rf = compare_regular_freq(s1, s2)
176+
assert status == "both_irregular"
177+
assert sf is None
178+
assert rf is None
179+
assert "both irregular" in reason
180+
181+
182+
def test_compare_regular_freq_single_point_compatible():
183+
idx1 = pd.date_range("2024-01-01", periods=1, freq="D")
184+
idx2 = pd.date_range("2024-02-01", periods=1, freq="D")
185+
s1 = pd.Series([1.0], index=idx1)
186+
s2 = pd.Series([2.0], index=idx2)
187+
188+
status, reason, sf, rf = compare_regular_freq(s1, s2)
189+
assert status == "both_regular_same"
190+
assert reason == "degenerate_single_point"
191+
assert sf is None
192+
assert rf is None

0 commit comments

Comments
 (0)