22import numpy as np
33from vtools import to_timedelta
44from vtools .functions .colname_align import align_inputs_strict
5- from vtools .data .indexing import resolve_common_freq , regular_index_from_valid_extent
5+ from vtools .data .indexing import resolve_common_freq
66
77__all__ = ["ts_blend" ]
88
@@ -12,7 +12,8 @@ def _blend_output_index(series):
1212 Determine the working index for ts_blend.
1313
1414 Blending requires inputs with a common regular frequency. The working
15- index is the regular index spanning the valid-data extent of the inputs.
15+ index is the regular index spanning the full union extent of the inputs
16+ (earliest start to latest end across all series).
1617
1718 Parameters
1819 ----------
@@ -21,7 +22,7 @@ def _blend_output_index(series):
2122
2223 Returns
2324 -------
24- pandas.Index
25+ pandas.DatetimeIndex
2526 Regular index on which blending should be performed.
2627
2728 Raises
@@ -40,250 +41,9 @@ def _blend_output_index(series):
4041 "For irregular handoff behavior, use ts_splice."
4142 )
4243
43- return regular_index_from_valid_extent (series , output_freq )
44-
45-
46- def _distance_to_gap (hi_col : pd .Series , mode : str = "count" ) -> pd .Series :
47- """
48- Distance to nearest gap (NaN) in hi_col.
49-
50- Parameters
51- ----------
52- hi_col : Series
53- Higher-priority series.
54- mode : {'count', 'freq'}
55- 'count' -> distance in # of samples (0 at gaps).
56- 'freq' -> distance as Timedelta, using hi_col.index.freq.
57-
58- Returns
59- -------
60- Series
61- Same index as hi_col, distance to nearest NaN.
62- """
63- idx = hi_col .index
64- n = len (idx )
65- mask = hi_col .isna ().to_numpy ()
66-
67- # No gaps -> everything is effectively "far away"
68- if not mask .any ():
69- dist = np .full (n , np .inf , dtype = float )
70- return pd .Series (dist , index = idx )
71-
72- dist = np .full (n , np .inf , dtype = float )
73-
74- # Forward pass: distance from the last gap
75- last_gap = None
76- for i in range (n ):
77- if mask [i ]:
78- dist [i ] = 0.0
79- last_gap = i
80- elif last_gap is not None :
81- dist [i ] = float (i - last_gap )
82-
83- # Backward pass: distance from the next gap
84- last_gap = None
85- for i in range (n - 1 , - 1 , - 1 ):
86- if mask [i ]:
87- last_gap = i
88- elif last_gap is not None :
89- dist [i ] = min (dist [i ], float (last_gap - i ))
90-
91- dist_s = pd .Series (dist , index = idx )
92-
93- if mode == "count" :
94- return dist_s
95-
96- if mode == "freq" :
97- freq = idx .freq
98- if freq is None :
99- raise ValueError (
100- "Time-based blending requires a regular index with .freq set."
101- )
102- # counts * freq → Timedelta
103- return dist_s * to_timedelta (freq )
104-
105- raise ValueError ("mode must be 'count' or 'freq'" )
106-
107-
108- def _normalize_blend_length (blend_length , index ):
109- """
110- Interpret blend_length as sample count or time span.
111-
112- Returns
113- -------
114- (mode, L)
115- mode : {'count', 'freq'} or None
116- L : numeric (count) or Timedelta
117- """
118- if blend_length is None :
119- return None , None
120- if isinstance (blend_length , str ):
121- blend_length = blend_length .replace ("H" , "h" )
122- blend_length = blend_length .replace ("d" , "D" )
123-
124- # Integer: number of samples
125- if isinstance (blend_length , (int , np .integer )):
126- if blend_length <= 0 :
127- return None , None
128- return "count" , float (blend_length )
129-
130- # Timedelta-like: e.g. '2h', '30min'
131- td = pd .to_timedelta (blend_length )
132- if not isinstance (index , (pd .DatetimeIndex , pd .PeriodIndex )):
133- raise ValueError (
134- "Time-based blend_length requires a DatetimeIndex or PeriodIndex."
135- )
136- if index .freq is None :
137- raise ValueError (
138- "Time-based blend_length requires a regular index with a .freq attribute."
139- )
140- if td <= pd .Timedelta (0 ):
141- return None , None
142-
143- return "freq" , td
144-
145-
146- def _blend_two (
147- aligned_hi : pd .DataFrame ,
148- aligned_lo : pd .DataFrame ,
149- blend_mode : str ,
150- blend_L ,
151- ) -> pd .DataFrame :
152- """
153- Blend a lower-priority DataFrame into a higher-priority DataFrame.
154-
155- Parameters
156- ----------
157- aligned_hi, aligned_lo : DataFrame
158- Same index. Higher priority is 'aligned_hi'.
159- blend_mode : {'count', 'freq'} or None
160- blend_L : float or Timedelta
161-
162- Returns
163- -------
164- DataFrame
165- Blended result.
166- """
167- # No blending requested → just do priority overlay
168- if blend_mode is None or blend_L is None :
169- return aligned_hi .combine_first (aligned_lo )
170-
171- idx = aligned_hi .index
172- out = aligned_hi .copy ()
173- cols = sorted (set (aligned_hi .columns ) | set (aligned_lo .columns ))
174-
175- for col in cols :
176- hi_col = (
177- aligned_hi [col ]
178- if col in aligned_hi .columns
179- else pd .Series (index = idx , dtype = float )
180- )
181- lo_col = (
182- aligned_lo [col ]
183- if col in aligned_lo .columns
184- else pd .Series (index = idx , dtype = float )
185- )
186-
187- hi_nan = hi_col .isna ()
188- lo_nan = lo_col .isna ()
189-
190- # Priority baseline: hi where present, otherwise lo
191- merged = hi_col .copy ()
192- fill_mask = hi_nan & (~ lo_nan )
193- merged [fill_mask ] = lo_col [fill_mask ]
194-
195- # Distance to nearest gap in the *high-priority* series
196- dist_to_gap = _distance_to_gap (
197- hi_col ,
198- mode = "count" if blend_mode == "count" else "freq" ,
199- )
200-
201- # Candidate points for blending on the shoulders of gaps:
202- # - hi has data
203- # - lo has data
204- near_gap = (~ hi_nan ) & (~ lo_nan )
205-
206- if blend_mode == "count" :
207- near_gap &= (dist_to_gap > 0 ) & (dist_to_gap <= blend_L )
208- if not near_gap .any ():
209- out [col ] = merged
210- continue
211- d = dist_to_gap [near_gap ].astype (float )
212- t = (blend_L - d ) / blend_L
213- else : # 'freq' mode (Timedelta)
214- near_gap &= (dist_to_gap > pd .Timedelta (0 )) & (dist_to_gap <= blend_L )
215- if not near_gap .any ():
216- out [col ] = merged
217- continue
218- d = dist_to_gap [near_gap ]
219- t = 1.0 - (d / blend_L )
220-
221- t = t .clip (lower = 0.0 , upper = 1.0 )
222-
223- # Kernel: lower-priority gets up to 0.5 weight at the gap edge,
224- # tapering to 0 at distance >= blend_L.
225- w_lo = 0.5 * t
226- w_hi = 1.0 - w_lo
227-
228- hi_vals = hi_col [near_gap ].astype (float )
229- lo_vals = lo_col [near_gap ].astype (float )
230-
231- blended_vals = (
232- w_hi .to_numpy () * hi_vals .to_numpy () + w_lo .to_numpy () * lo_vals .to_numpy ()
233- )
234-
235- # IMPORTANT: use .loc with a boolean mask, not .at, so we never hit
236- # DataFrame._set_value with a non-scalar index.
237- merged .loc [near_gap ] = blended_vals
238-
239- out [col ] = merged
240-
241- return out
242-
243-
244- import pandas as pd
245- import numpy as np
246- from vtools import to_timedelta
247- from vtools .functions .colname_align import align_inputs_strict
248- from vtools .data .indexing import resolve_common_freq , regular_index_from_valid_extent
249-
250- __all__ = ["ts_blend" ]
251-
252-
253- def _blend_output_index (series ):
254- """
255- Determine the working index for ts_blend.
256-
257- Blending requires inputs with a common regular frequency. The working
258- index is the regular index spanning the valid-data extent of the inputs.
259-
260- Parameters
261- ----------
262- series : sequence of pandas.Series or pandas.DataFrame
263- Input time series.
264-
265- Returns
266- -------
267- pandas.Index
268- Regular index on which blending should be performed.
269-
270- Raises
271- ------
272- ValueError
273- If a common regular frequency cannot be established.
274- """
275- output_freq = resolve_common_freq (
276- [s .index for s in series ],
277- preserve_freq = True ,
278- )
279-
280- if output_freq is None :
281- raise ValueError (
282- "ts_blend requires inputs with a common regular frequency. "
283- "For irregular handoff behavior, use ts_splice."
284- )
285-
286- return regular_index_from_valid_extent (series , output_freq )
44+ start = min (s .index .min () for s in series )
45+ end = max (s .index .max () for s in series )
46+ return pd .date_range (start , end , freq = output_freq )
28747
28848
28949def _distance_to_gap (hi_col : pd .Series , mode : str = "count" ) -> pd .Series :
0 commit comments