Skip to content

Commit e8850db

Browse files
EliEli
authored andcommitted
Revise frequency handling (partially) and added helpers for division.
1 parent d8e5e59 commit e8850db

8 files changed

Lines changed: 184 additions & 63 deletions

File tree

tests/test_extrapolation.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -12,15 +12,15 @@
1212

1313
# Core test cases
1414
def test_constant_forward():
15-
ts = pd.Series([1, 2, 3], index=pd.date_range("2020-01-01", periods=3, freq="d"))
15+
ts = pd.Series([1, 2, 3], index=pd.date_range("2020-01-01", periods=3, freq="D"))
1616
result = extrapolate_ts(ts, end="2020-01-05", method="constant", val=10)
17-
expected = pd.Series([1, 2, 3, 10, 10], index=pd.date_range("2020-01-01", periods=5, freq="d"))
17+
expected = pd.Series([1, 2, 3, 10, 10], index=pd.date_range("2020-01-01", periods=5, freq="D"))
1818
assert_series_equal(result, expected)
1919

2020
def test_constant_backward():
21-
ts = pd.Series([4, 5, 6], index=pd.date_range("2020-01-03", periods=3, freq="d"))
21+
ts = pd.Series([4, 5, 6], index=pd.date_range("2020-01-03", periods=3, freq="D"))
2222
result = extrapolate_ts(ts, start="2020-01-01", method="constant", val=0)
23-
expected = pd.Series([0, 0, 4, 5, 6], index=pd.date_range("2020-01-01", periods=5, freq="d"))
23+
expected = pd.Series([0, 0, 4, 5, 6], index=pd.date_range("2020-01-01", periods=5, freq="D"))
2424
assert_series_equal(result, expected)
2525

2626
def test_taper_forward():
@@ -58,7 +58,7 @@ def test_taper_without_val():
5858
extrapolate_ts(ts, end="2020-01-02", method="taper")
5959

6060
def test_linear_with_val_error():
61-
ts = pd.Series([1, 2], index=pd.date_range("2020-01-01", periods=2, freq="d"))
61+
ts = pd.Series([1, 2], index=pd.date_range("2020-01-01", periods=2, freq="D"))
6262
with pytest.raises(ValueError, match="does not use 'val'"):
6363
extrapolate_ts(ts, start="2019-12-30", end="2020-01-03", method="linear_slope", val=99)
6464

@@ -91,7 +91,7 @@ def generate_series(start, periods, freq, values=None):
9191

9292

9393
def test_taper_across_frequencies():
94-
freqs = ["15min", "h", "d"]
94+
freqs = ["15min", "h", "D"]
9595
for freq in freqs:
9696
ts = generate_series("2020-01-01", periods=2, freq=freq, values=[10, 20])
9797
interval = ts.index[1] - ts.index[0]
@@ -100,13 +100,13 @@ def test_taper_across_frequencies():
100100
assert result.index[-1] == end_time
101101

102102

103-
def test_taper_across_frequencies():
103+
def test_taper_across_frequencies2():
104104
results = {}
105-
for freq in ["15min", "h", "d"]:
105+
for freq in ["15min", "h", "D"]:
106106
ts = generate_series("2020-01-01", periods=2, freq=freq, values=[10, 10]) # Ensure 2+ points
107107
step = ts.index[1] - ts.index[0]
108108
end_time = ts.index[-1] + 3 * step
109109
result = extrapolate_ts(ts, end=end_time, method="taper", val=0.0)
110-
assert result.index.freqstr.lower() == freq
110+
assert result.index.freqstr == freq
111111
results[freq] = result
112112

tests/test_interval_ops.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
import pytest
2+
import pandas as pd
3+
from vtools.data.vtime import safe_divide_interval
4+
5+
6+
def test_int_division():
7+
assert safe_divide_interval(24, 1) == 24
8+
9+
10+
def test_string_offsets():
11+
assert safe_divide_interval("1D", "1h") == 24
12+
assert safe_divide_interval("15min", "5min") == 3
13+
14+
15+
def test_timedelta():
16+
assert safe_divide_interval(pd.Timedelta("1D"), pd.Timedelta("1h")) == 24
17+
18+
19+
def test_mixed_types():
20+
assert safe_divide_interval("1D", pd.Timedelta("1h")) == 24
21+
assert safe_divide_interval(pd.Timedelta("1D"), "1h") == 24
22+
23+
24+
def test_non_integer_division_fails():
25+
with pytest.raises(ValueError):
26+
safe_divide_interval("1D", "7h")
27+
28+
29+
def test_month_rejected():
30+
with pytest.raises(TypeError):
31+
safe_divide_interval("1M", "1D")
32+
33+
34+
def test_year_rejected():
35+
with pytest.raises(TypeError):
36+
safe_divide_interval("1Y", "1D")
37+
38+
39+
def test_zero_division():
40+
with pytest.raises(ZeroDivisionError):
41+
safe_divide_interval("1D", "0H")

tests/test_merge_splice.py

Lines changed: 24 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,8 @@
1313
@pytest.fixture
1414
def sample_data():
1515
"""Create sample time series data for tests."""
16-
idx1 = pd.date_range("2023-01-01", periods=5, freq="d")
17-
idx2 = pd.date_range("2023-01-03", periods=5, freq="d")
16+
idx1 = pd.date_range("2023-01-01", periods=5, freq="D")
17+
idx2 = pd.date_range("2023-01-03", periods=5, freq="D")
1818
series1 = pd.Series([1, 2, np.nan, 4, 5], index=idx1, name="A")
1919
series2 = pd.Series([10, 20, 30, np.nan, 50], index=idx2, name="A")
2020
df1 = pd.DataFrame({"A": [1, np.nan, 3, 4, 5]}, index=idx1)
@@ -110,7 +110,7 @@ def test_series_merge(self, sample_data):
110110
result = ts_merge((s1, s2), names="A")
111111
expected = pd.Series(
112112
[1, 2, 10, 4, 5, np.nan, 50],
113-
index=pd.date_range("2023-01-01", periods=7, freq="d"),
113+
index=pd.date_range("2023-01-01", periods=7, freq="D"),
114114
name="A"
115115
)
116116
pd.testing.assert_series_equal(result, expected)
@@ -120,7 +120,7 @@ def test_single_column_dataframe(self, sample_data):
120120
result = ts_merge((df1, df2))
121121
expected = pd.DataFrame(
122122
{"A": [1., np.nan, 3., 4., 5., 40., 50.]},
123-
index=pd.date_range("2023-01-01", periods=7, freq="d")
123+
index=pd.date_range("2023-01-01", periods=7, freq="D")
124124
)
125125
pd.testing.assert_frame_equal(result, expected)
126126

@@ -130,15 +130,15 @@ def test_multi_column_dataframe(self, sample_data):
130130
expected = pd.DataFrame({
131131
"A": [1., 2., 100, 4, 5, np.nan, 500],
132132
"B": [10., np.nan, 30, 40, 50, 4000, 5000]
133-
}, index=pd.date_range("2023-01-01", periods=7, freq="d"))
133+
}, index=pd.date_range("2023-01-01", periods=7, freq="D"))
134134
pd.testing.assert_frame_equal(result, expected)
135135

136136
def test_series_merge_with_names(self, sample_data):
137137
s1, s2 = sample_data["series1"], sample_data["series2"]
138138
result = ts_merge((s1, s2), names="new_name")
139139
expected = pd.Series(
140140
[1., 2., 10, 4, 5, np.nan, 50],
141-
index=pd.date_range("2023-01-01", periods=7, freq="d"),
141+
index=pd.date_range("2023-01-01", periods=7, freq="D"),
142142
name="new_name"
143143
)
144144
pd.testing.assert_series_equal(result, expected)
@@ -148,7 +148,7 @@ def test_dataframe_merge_with_renaming(self, sample_data):
148148
result = ts_merge((df1, df2), names="Renamed_A")
149149
expected = pd.DataFrame(
150150
{"Renamed_A": [1., np.nan, 3., 4., 5., 40, 50]},
151-
index=pd.date_range("2023-01-01", periods=7, freq="d")
151+
index=pd.date_range("2023-01-01", periods=7, freq="D")
152152
)
153153
pd.testing.assert_frame_equal(result, expected)
154154

@@ -157,7 +157,7 @@ def test_dataframe_merge_with_custom_column_selection(self, sample_data):
157157
result = ts_merge((df_multi1, df_multi2), names=["A"])
158158
expected = pd.DataFrame(
159159
{"A": [1, 2, 100, 4, 5, np.nan, 500]},
160-
index=pd.date_range("2023-01-01", periods=7, freq="d")
160+
index=pd.date_range("2023-01-01", periods=7, freq="D")
161161
)
162162
pd.testing.assert_frame_equal(result, expected)
163163

@@ -179,7 +179,7 @@ def test_splice_prefer_first(self, sample_data):
179179
result = ts_splice((s1, s2), transition="prefer_first")
180180
expected = pd.Series(
181181
[1., 2, np.nan, 4, 5, np.nan, 50],
182-
index=pd.date_range("2023-01-01", periods=7, freq="d"),
182+
index=pd.date_range("2023-01-01", periods=7, freq="D"),
183183
name="A"
184184
)
185185
pd.testing.assert_series_equal(result, expected)
@@ -189,7 +189,7 @@ def test_splice_prefer_last(self, sample_data):
189189
result = ts_splice((s1, s2), transition="prefer_last")
190190
expected = pd.Series(
191191
[1., 2, 10, 20., 30., np.nan, 50],
192-
index=pd.date_range("2023-01-01", periods=7, freq="d"),
192+
index=pd.date_range("2023-01-01", periods=7, freq="D"),
193193
name="A"
194194
)
195195
pd.testing.assert_series_equal(result, expected)
@@ -199,7 +199,7 @@ def test_splice_series_with_names(self, sample_data):
199199
result = ts_splice((s1, s2), names="new_name", transition="prefer_last")
200200
expected = pd.Series(
201201
[1, 2, 10, 20, 30, np.nan, 50],
202-
index=pd.date_range("2023-01-01", periods=7, freq="d"),
202+
index=pd.date_range("2023-01-01", periods=7, freq="D"),
203203
name="new_name"
204204
)
205205
pd.testing.assert_series_equal(result, expected)
@@ -210,7 +210,7 @@ def test_splice_with_custom_transitions(self, sample_data):
210210
result = ts_splice((s1, s2), transition=transition_points)
211211
expected = pd.Series(
212212
[1, 2, np.nan, 4., 30., np.nan, 50],
213-
index=pd.date_range("2023-01-01", periods=7, freq="d"),
213+
index=pd.date_range("2023-01-01", periods=7, freq="D"),
214214
name="A"
215215
)
216216
pd.testing.assert_series_equal(result, expected)
@@ -220,7 +220,7 @@ def test_splice_dataframe_prefer_first(self, sample_data):
220220
result = ts_splice((df1, df2), transition="prefer_first")
221221
expected = pd.DataFrame(
222222
{"A": [1, np.nan, 3, 4, 5, 40, 50]},
223-
index=pd.date_range("2023-01-01", periods=7, freq="d")
223+
index=pd.date_range("2023-01-01", periods=7, freq="D")
224224
)
225225
pd.testing.assert_frame_equal(result, expected)
226226

@@ -229,7 +229,7 @@ def test_splice_dataframe_prefer_last(self, sample_data):
229229
result = ts_splice((df1, df2), transition="prefer_last")
230230
expected = pd.DataFrame(
231231
{"A": [1., np.nan, 10, 20., np.nan, 40, 50]},
232-
index=pd.date_range("2023-01-01", periods=7, freq="d")
232+
index=pd.date_range("2023-01-01", periods=7, freq="D")
233233
)
234234
pd.testing.assert_frame_equal(result, expected)
235235

@@ -239,15 +239,15 @@ def test_splice_multi_column_dataframe(self, sample_data):
239239
expected = pd.DataFrame({
240240
"A": [1., 2, 100, 200, 300., np.nan, 500],
241241
"B": [10., np.nan, 1000., 2000., np.nan, 4000, 5000]
242-
}, index=pd.date_range("2023-01-01", periods=7, freq="d"))
242+
}, index=pd.date_range("2023-01-01", periods=7, freq="D"))
243243
pd.testing.assert_frame_equal(result, expected)
244244

245245
def test_splice_with_renaming(self, sample_data):
246246
s1, s2 = sample_data["series1"], sample_data["series2"]
247247
result = ts_splice((s1, s2), names="Renamed_A", transition="prefer_last")
248248
expected = pd.Series(
249249
[1., 2, 10., 20., 30, np.nan, 50],
250-
index=pd.date_range("2023-01-01", periods=7, freq="d"),
250+
index=pd.date_range("2023-01-01", periods=7, freq="D"),
251251
name="Renamed_A"
252252
)
253253
pd.testing.assert_series_equal(result, expected)
@@ -257,7 +257,7 @@ def test_splice_multi_column_dataframe_with_column_selection(self, sample_data):
257257
result = ts_splice((df_multi1, df_multi2), names=["A"], transition="prefer_last")
258258
expected = pd.DataFrame(
259259
{"A": [1., 2, 100, 200, 300, np.nan, 500]},
260-
index=pd.date_range("2023-01-01", periods=7, freq="d")
260+
index=pd.date_range("2023-01-01", periods=7, freq="D")
261261
)
262262
pd.testing.assert_frame_equal(result, expected)
263263

@@ -266,7 +266,7 @@ def test_splice_floor_dates(self, sample_data):
266266
result = ts_splice((s1, s2), transition="prefer_last", floor_dates=True)
267267
expected = pd.Series(
268268
[1., 2, 10, 20., 30., np.nan, 50],
269-
index=pd.date_range("2023-01-01", periods=7, freq="d"),
269+
index=pd.date_range("2023-01-01", periods=7, freq="D"),
270270
name="A"
271271
)
272272
pd.testing.assert_series_equal(result, expected)
@@ -288,9 +288,9 @@ def test_splice_irregular_series(self, irregular_sample_data):
288288
class TestErrorConditions:
289289
def test_mismatched_column_names(self, sample_data):
290290
df1 = pd.DataFrame({"X": [1, 2, np.nan, 4, 5]},
291-
index=pd.date_range("2023-01-01", periods=5, freq="d"))
291+
index=pd.date_range("2023-01-01", periods=5, freq="D"))
292292
df2 = pd.DataFrame({"Y": [10, 20, np.nan, 40, 50]},
293-
index=pd.date_range("2023-01-03", periods=5, freq="d"))
293+
index=pd.date_range("2023-01-03", periods=5, freq="D"))
294294
with pytest.raises(ValueError, match="All input columns must be identical when `names` is None"):
295295
ts_merge((df1, df2))
296296

@@ -366,7 +366,7 @@ def test_blend_series_no_blend_length_equiv_to_merge(self, sample_data):
366366
# For reference, this is the same expectation as TestTsMerge.test_series_merge
367367
expected = pd.Series(
368368
[1., 2., 10., 4., 5., np.nan, 50.],
369-
index=pd.date_range("2023-01-01", periods=7, freq="d"),
369+
index=pd.date_range("2023-01-01", periods=7, freq="D"),
370370
name="A",
371371
)
372372
pd.testing.assert_series_equal(result, expected)
@@ -384,7 +384,7 @@ def test_blend_series_integer_window(self, sample_data):
384384
result = ts_blend((s1, s2), blend_length=2)
385385

386386
# Union index: 2023-01-01..07
387-
idx = pd.date_range("2023-01-01", periods=7, freq="d")
387+
idx = pd.date_range("2023-01-01", periods=7, freq="D")
388388

389389
# Expected values (see derivation in chat):
390390
# high-priority gaps (NaN) at days 3, 6, 7 (in union),
@@ -407,7 +407,7 @@ def test_blend_dataframe_integer_window(self, sample_data):
407407

408408
result = ts_blend((df1, df2), blend_length=2)
409409

410-
idx = pd.date_range("2023-01-01", periods=7, freq="d")
410+
idx = pd.date_range("2023-01-01", periods=7, freq="D")
411411
# Derived expectations for column "A":
412412
# - Fill high-priority gaps using low where possible
413413
# - Then blend near gaps using the same kernel as in the series test
@@ -447,7 +447,7 @@ def test_blend_series_with_names(self, sample_data):
447447
s1, s2 = sample_data["series1"], sample_data["series2"]
448448
result = ts_blend((s1, s2), names="new_name", blend_length=2)
449449

450-
idx = pd.date_range("2023-01-01", periods=7, freq="d")
450+
idx = pd.date_range("2023-01-01", periods=7, freq="D")
451451
expected = pd.Series(
452452
[1.0, 2.0, 10.0, 8.0, 11.25, np.nan, 50.0],
453453
index=idx,

vtools/data/vtime.py

Lines changed: 68 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
import pandas as pd
1919

2020

21-
__all__ = ["seconds", "minutes", "hours", "days", "months", "years"]
21+
__all__ = ["seconds", "minutes", "hours", "days", "months", "years","to_timedelta", "safe_divide_interval"]
2222

2323

2424
def seconds(s):
@@ -91,3 +91,70 @@ def dst_to_standard_naive(ts, dst_zone="US/Pacific", standard_zone="Etc/GMT+8"):
9191
.tz_localize(None)
9292
)
9393
return ts2
94+
95+
96+
_FIXED_OFFSET_CLASSES = (
97+
pd.offsets.Day,
98+
pd.offsets.Hour,
99+
pd.offsets.Minute,
100+
pd.offsets.Second,
101+
pd.offsets.Milli,
102+
pd.offsets.Micro,
103+
pd.offsets.Nano,
104+
)
105+
106+
def to_timedelta(x):
107+
"""
108+
Convert x to pandas.Timedelta if and only if it represents
109+
a fixed-length duration.
110+
"""
111+
if isinstance(x, (int, np.integer, float)):
112+
return pd.Timedelta(x, unit="ns")
113+
114+
if isinstance(x, pd.Timedelta):
115+
return x
116+
117+
# FIRST: try Timedelta parsing (handles "1H", "1D", etc.)
118+
try:
119+
if isinstance(x, str):
120+
x = x.replace("H", "h").replace("d", "D").replace("T", "t") # standardize case
121+
print(x)
122+
return pd.Timedelta(x)
123+
except Exception:
124+
pass
125+
126+
# FALLBACK: try fixed pandas offsets
127+
try:
128+
off = pd.tseries.frequencies.to_offset(x)
129+
except Exception as e:
130+
raise TypeError(f"Cannot interpret interval {x!r}") from e
131+
132+
if not isinstance(off, _FIXED_OFFSET_CLASSES):
133+
raise TypeError(
134+
f"Offset {type(off).__name__} is calendar-dependent "
135+
"and not a fixed-length interval"
136+
)
137+
138+
return pd.Timedelta(off.nanos, unit="ns")
139+
140+
141+
142+
def safe_divide_interval(a, b, *, tol=1e-12, require_int=True):
143+
td_a = to_timedelta(a)
144+
td_b = to_timedelta(b)
145+
146+
if td_b == pd.Timedelta(0):
147+
raise ZeroDivisionError("Division by zero interval")
148+
149+
ratio = td_a / td_b
150+
151+
if require_int:
152+
r_int = int(round(ratio))
153+
if abs(ratio - r_int) > tol:
154+
raise ValueError(
155+
f"Intervals are not evenly divisible: {a!r} / {b!r} = {ratio}"
156+
)
157+
return r_int
158+
159+
return float(ratio)
160+

0 commit comments

Comments
 (0)