Skip to content

Commit 65eb261

Browse files
EliEli
authored andcommitted
Refactor merge/splice/blend index handling; restrict blend to regular data
Centralize frequency/index logic in vtools.data.indexing Make ts_merge/ts_splice index-driven and fail-fast on freq mismatch Remove post-hoc reindexing Restrict ts_blend to common regular frequency (no irregular blending; use ts_splice) Update tests for stricter semantics and explicit rejection cases
1 parent 8954186 commit 65eb261

File tree

4 files changed

+722
-94
lines changed

4 files changed

+722
-94
lines changed

tests/test_merge_splice.py

Lines changed: 124 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -455,15 +455,11 @@ def test_blend_series_with_names(self, sample_data):
455455
)
456456
pd.testing.assert_series_equal(result, expected)
457457

458-
def test_blend_time_based_requires_regular_index(self, irregular_sample_data):
459-
"""
460-
Time-based blend_length on an irregular index (no .freq) should
461-
raise a clear error.
462-
"""
458+
def test_blend_irregular_inputs_raise(self, irregular_sample_data):
463459
s1 = irregular_sample_data["series1"]
464460
s2 = irregular_sample_data["series2"]
465461

466-
with pytest.raises(ValueError, match="requires a regular index with a .freq"):
462+
with pytest.raises(ValueError, match="common regular frequency"):
467463
ts_blend((s1, s2), blend_length="2D")
468464

469465
def test_blend_non_datetime_index_raises(self):
@@ -475,3 +471,125 @@ def test_blend_non_datetime_index_raises(self):
475471

476472
with pytest.raises(ValueError, match="DatetimeIndex or PeriodIndex"):
477473
ts_blend((s1, s2), blend_length=2)
474+
475+
def test_merge_conflicting_freq_raises():
476+
s1 = pd.Series(
477+
[1., 2., 3.],
478+
index=pd.date_range("2023-01-01", periods=3, freq="D"),
479+
name="A",
480+
)
481+
s2 = pd.Series(
482+
[10., 20., 30.],
483+
index=pd.date_range("2023-01-01", periods=3, freq="2D"),
484+
name="A",
485+
)
486+
487+
with pytest.raises(ValueError, match="inconsistent frequencies"):
488+
ts_merge((s1, s2))
489+
490+
def test_blend_conflicting_freq_raises():
491+
s1 = pd.Series(
492+
[1., 2., 3.],
493+
index=pd.date_range("2023-01-01", periods=3, freq="D"),
494+
name="A",
495+
)
496+
s2 = pd.Series(
497+
[10., 20., 30.],
498+
index=pd.date_range("2023-01-01", periods=3, freq="2D"),
499+
name="A",
500+
)
501+
502+
with pytest.raises(ValueError, match="inconsistent frequencies"):
503+
ts_blend((s1, s2), blend_length=1)
504+
505+
def test_merge_regular_disjoint_inserts_nan_gap():
506+
s1 = pd.Series(
507+
[1., 2.],
508+
index=pd.date_range("2023-01-01", periods=2, freq="D"),
509+
name="A",
510+
)
511+
s2 = pd.Series(
512+
[10., 20.],
513+
index=pd.date_range("2023-01-05", periods=2, freq="D"),
514+
name="A",
515+
)
516+
517+
result = ts_merge((s1, s2))
518+
expected = pd.Series(
519+
[1., 2., np.nan, np.nan, 10., 20.],
520+
index=pd.date_range("2023-01-01", periods=6, freq="D"),
521+
name="A",
522+
)
523+
pd.testing.assert_series_equal(result, expected)
524+
525+
def test_merge_irregular_warns_when_preserve_freq_false_and_not_strict(irregular_sample_data):
526+
s1 = irregular_sample_data["series1"]
527+
s2 = irregular_sample_data["series2"]
528+
529+
with pytest.warns(UserWarning, match="irregular union-index artifacts"):
530+
ts_merge((s1, s2), preserve_freq=False, strict_priority=False)
531+
532+
533+
def test_blend_regular_disjoint_inserts_nan_gap():
534+
s1 = pd.Series(
535+
[1., 2.],
536+
index=pd.date_range("2023-01-01", periods=2, freq="D"),
537+
name="A",
538+
)
539+
s2 = pd.Series(
540+
[10., 20.],
541+
index=pd.date_range("2023-01-05", periods=2, freq="D"),
542+
name="A",
543+
)
544+
545+
result = ts_blend((s1, s2), blend_length=None)
546+
expected = pd.Series(
547+
[1., 2., np.nan, np.nan, 10., 20.],
548+
index=pd.date_range("2023-01-01", periods=6, freq="D"),
549+
name="A",
550+
)
551+
pd.testing.assert_series_equal(result, expected)
552+
553+
def test_blend_irregular_inputs_raise(irregular_sample_data):
554+
s1 = irregular_sample_data["series1"]
555+
s2 = irregular_sample_data["series2"]
556+
557+
with pytest.raises(ValueError, match="common regular frequency"):
558+
ts_blend((s1, s2), blend_length=None)
559+
560+
def test_blend_irregular_raises(irregular_sample_data):
561+
s1 = irregular_sample_data["series1"]
562+
s2 = irregular_sample_data["series2"]
563+
564+
with pytest.raises(ValueError, match="common regular frequency"):
565+
ts_blend((s1, s2), blend_length=None)
566+
567+
def test_blend_conflicting_freq_raises():
568+
s1 = pd.Series(
569+
[1., 2., 3.],
570+
index=pd.date_range("2023-01-01", periods=3, freq="D"),
571+
name="A",
572+
)
573+
s2 = pd.Series(
574+
[10., 20., 30.],
575+
index=pd.date_range("2023-01-01", periods=3, freq="2D"),
576+
name="A",
577+
)
578+
579+
with pytest.raises(ValueError, match="inconsistent frequencies"):
580+
ts_blend((s1, s2), blend_length=1)
581+
582+
def test_blend_conflicting_freq_raises():
583+
s1 = pd.Series(
584+
[1., 2., 3.],
585+
index=pd.date_range("2023-01-01", periods=3, freq="D"),
586+
name="A",
587+
)
588+
s2 = pd.Series(
589+
[10., 20., 30.],
590+
index=pd.date_range("2023-01-01", periods=3, freq="2D"),
591+
name="A",
592+
)
593+
594+
with pytest.raises(ValueError, match="inconsistent frequencies"):
595+
ts_blend((s1, s2), blend_length=1)

vtools/data/indexing.py

Lines changed: 229 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,229 @@
1+
# vtools/data/indexing.py
2+
3+
import pandas as pd
4+
"""
5+
Index construction and frequency-handling utilities for time series operations.
6+
7+
This module provides low-level helpers for:
8+
- enforcing frequency consistency across multiple time series,
9+
- constructing regular time indexes from valid data extents, and
10+
- reindexing existing series onto continuous grids when possible.
11+
12+
These functions are used by higher-level operations such as merging,
13+
splicing, and blending of time series.
14+
"""
15+
16+
def resolve_common_freq(indexes, preserve_freq=True):
17+
"""
18+
Determine a common frequency across a collection of pandas indexes.
19+
20+
This function inspects the `.freq` attribute of each index and, if
21+
frequency preservation is requested, verifies that all non-null
22+
frequency attributes are identical. If so, that frequency is returned.
23+
Otherwise, an error is raised.
24+
25+
Parameters
26+
----------
27+
indexes : sequence of pandas.Index
28+
Sequence of pandas Index objects (typically DatetimeIndex or
29+
PeriodIndex). Each index may or may not have a `.freq` attribute.
30+
31+
preserve_freq : bool, default True
32+
If True, enforce that all indexes with a non-null `.freq` attribute
33+
have identical frequencies. If any mismatch is detected, a
34+
ValueError is raised.
35+
36+
If False, no checking is performed and the function always returns
37+
None.
38+
39+
Returns
40+
-------
41+
freq : pandas offset or None
42+
The common frequency if one can be determined and
43+
`preserve_freq=True`. Returns None if:
44+
- `preserve_freq=False`, or
45+
- no index has a non-null `.freq` attribute.
46+
47+
Raises
48+
------
49+
ValueError
50+
If `preserve_freq=True` and multiple indexes define conflicting
51+
`.freq` attributes.
52+
53+
Notes
54+
-----
55+
- This function relies only on the `.freq` attribute and does not
56+
attempt to infer frequency using `pandas.infer_freq` or the more robust
57+
vtools functions for that purpose.
58+
- It is intended for use in routines that require strict consistency
59+
of sampling intervals across inputs.
60+
61+
See Also
62+
--------
63+
regular_index_from_valid_extent : Construct a regular index once a
64+
common frequency has been established.
65+
"""
66+
freqs = [idx.freq for idx in indexes if getattr(idx, "freq", None) is not None]
67+
68+
if not preserve_freq or not freqs:
69+
return None
70+
71+
first = freqs[0]
72+
for f in freqs[1:]:
73+
if f != first:
74+
raise ValueError(
75+
"Input series have inconsistent frequencies; cannot preserve frequency."
76+
)
77+
return first
78+
79+
80+
def regular_index_from_valid_extent(series, freq):
81+
"""
82+
Construct a regular index spanning the valid data extent of input series.
83+
84+
This function creates a regular (fixed-frequency) index that spans
85+
from the earliest first valid timestamp to the latest last valid
86+
timestamp across a collection of time series.
87+
88+
Parameters
89+
----------
90+
series : sequence of pandas.Series or pandas.DataFrame
91+
Input time series objects. Each must have an index of the same
92+
type (DatetimeIndex or PeriodIndex). The index values are used
93+
to determine the overall time extent of valid data.
94+
95+
freq : pandas offset
96+
Frequency to use when constructing the regular index (e.g.,
97+
pandas offset such as Hour, Day, etc.). Typically obtained from
98+
`resolve_common_freq`.
99+
100+
Returns
101+
-------
102+
index : pandas.DatetimeIndex or pandas.PeriodIndex
103+
A regular index spanning from the minimum first valid timestamp
104+
to the maximum last valid timestamp across all input series.
105+
106+
If no valid timestamps are found in any series, an empty index
107+
of the same type as the first input is returned.
108+
109+
Raises
110+
------
111+
ValueError
112+
If the index type is not supported (i.e., not DatetimeIndex or
113+
PeriodIndex).
114+
115+
Notes
116+
-----
117+
- The function uses `first_valid_index()` and `last_valid_index()`
118+
for each series, so leading and trailing NaNs are ignored when
119+
determining the time extent.
120+
- The returned index includes both endpoints.
121+
- No validation is performed to ensure that input series conform
122+
to the specified frequency.
123+
124+
See Also
125+
--------
126+
resolve_common_freq : Determine whether a shared frequency exists
127+
across input indexes.
128+
"""
129+
firsts = [s.first_valid_index() for s in series if s.first_valid_index() is not None]
130+
lasts = [s.last_valid_index() for s in series if s.last_valid_index() is not None]
131+
132+
if not firsts or not lasts:
133+
return series[0].index[:0]
134+
135+
start = min(firsts)
136+
end = max(lasts)
137+
138+
idx0 = series[0].index
139+
if isinstance(idx0, pd.DatetimeIndex):
140+
return pd.date_range(start=start, end=end, freq=freq)
141+
elif isinstance(idx0, pd.PeriodIndex):
142+
return pd.period_range(start=start, end=end, freq=freq)
143+
else:
144+
raise ValueError("Unsupported index type for frequency preservation.")
145+
146+
147+
def reindex_to_continuous(result, freq):
148+
"""
149+
Reindex a time series onto a regular grid if possible.
150+
151+
This function attempts to map an existing time series onto a
152+
continuous, fixed-frequency index spanning its full time extent.
153+
If the existing timestamps are not compatible with the target
154+
regular grid, the input is returned unchanged.
155+
156+
Parameters
157+
----------
158+
result : pandas.Series or pandas.DataFrame
159+
Time series to be reindexed. Must have a DatetimeIndex or
160+
PeriodIndex.
161+
162+
freq : pandas offset or None
163+
Target frequency for the regular index. If None, no action is
164+
taken and `result` is returned unchanged.
165+
166+
Returns
167+
-------
168+
out : pandas.Series or pandas.DataFrame
169+
Reindexed time series if all existing timestamps align with
170+
the target regular grid. Otherwise, the original input is
171+
returned unchanged.
172+
173+
Notes
174+
-----
175+
- The function first constructs a regular index from the minimum
176+
to maximum timestamps of `result` using the provided `freq`.
177+
- If any existing timestamps are not present in the constructed
178+
regular index, the function does not reindex and instead clears
179+
the `.freq` attribute (if possible) before returning the original
180+
data.
181+
- This behavior is intentionally conservative to avoid silently
182+
dropping or shifting data.
183+
184+
- When reindexing succeeds:
185+
- Missing timestamps are filled with NaN.
186+
- The `.freq` attribute is set on the resulting index if possible.
187+
188+
Limitations
189+
-----------
190+
- This function assumes that `result.index` is monotonic and
191+
comparable with the generated regular index.
192+
- It does not attempt to infer or repair irregular spacing.
193+
194+
See Also
195+
--------
196+
regular_index_from_valid_extent : Construct a regular index prior
197+
to composition operations.
198+
resolve_common_freq : Determine if a shared frequency can be enforced.
199+
"""
200+
if freq is None:
201+
return result
202+
203+
start = result.index.min()
204+
end = result.index.max()
205+
206+
if isinstance(result.index, pd.DatetimeIndex):
207+
cont = pd.date_range(start=start, end=end, freq=freq)
208+
elif isinstance(result.index, pd.PeriodIndex):
209+
cont = pd.period_range(start=start, end=end, freq=freq)
210+
else:
211+
return result
212+
213+
try:
214+
if not pd.Index(result.index).isin(cont).all():
215+
try:
216+
result.index.freq = None
217+
except ValueError:
218+
pass
219+
return result
220+
except Exception:
221+
return result
222+
223+
result = result.reindex(cont)
224+
225+
try:
226+
result.index.freq = freq
227+
except ValueError:
228+
result.index.freq = None
229+
return result

0 commit comments

Comments
 (0)