Skip to content

Commit e1de627

Browse files
EliEli
authored andcommitted
Bolster API docs and some black formatting.
1 parent 74fee16 commit e1de627

4 files changed

Lines changed: 113 additions & 56 deletions

File tree

vtools/functions/blend.py

Lines changed: 54 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
21
import pandas as pd
32
import numpy as np
43
from vtools import to_timedelta
@@ -7,6 +6,7 @@
76

87
__all__ = ["ts_blend"]
98

9+
1010
def _blend_output_index(series):
1111
"""
1212
Determine the working index for ts_blend.
@@ -42,6 +42,7 @@ def _blend_output_index(series):
4242

4343
return regular_index_from_valid_extent(series, output_freq)
4444

45+
4546
def _distance_to_gap(hi_col: pd.Series, mode: str = "count") -> pd.Series:
4647
"""
4748
Distance to nearest gap (NaN) in hi_col.
@@ -95,7 +96,9 @@ def _distance_to_gap(hi_col: pd.Series, mode: str = "count") -> pd.Series:
9596
if mode == "freq":
9697
freq = idx.freq
9798
if freq is None:
98-
raise ValueError("Time-based blending requires a regular index with .freq set.")
99+
raise ValueError(
100+
"Time-based blending requires a regular index with .freq set."
101+
)
99102
# counts * freq → Timedelta
100103
return dist_s * to_timedelta(freq)
101104

@@ -127,9 +130,13 @@ def _normalize_blend_length(blend_length, index):
127130
# Timedelta-like: e.g. '2h', '30min'
128131
td = pd.to_timedelta(blend_length)
129132
if not isinstance(index, (pd.DatetimeIndex, pd.PeriodIndex)):
130-
raise ValueError("Time-based blend_length requires a DatetimeIndex or PeriodIndex.")
133+
raise ValueError(
134+
"Time-based blend_length requires a DatetimeIndex or PeriodIndex."
135+
)
131136
if index.freq is None:
132-
raise ValueError("Time-based blend_length requires a regular index with a .freq attribute.")
137+
raise ValueError(
138+
"Time-based blend_length requires a regular index with a .freq attribute."
139+
)
133140
if td <= pd.Timedelta(0):
134141
return None, None
135142

@@ -166,8 +173,16 @@ def _blend_two(
166173
cols = sorted(set(aligned_hi.columns) | set(aligned_lo.columns))
167174

168175
for col in cols:
169-
hi_col = aligned_hi[col] if col in aligned_hi.columns else pd.Series(index=idx, dtype=float)
170-
lo_col = aligned_lo[col] if col in aligned_lo.columns else pd.Series(index=idx, dtype=float)
176+
hi_col = (
177+
aligned_hi[col]
178+
if col in aligned_hi.columns
179+
else pd.Series(index=idx, dtype=float)
180+
)
181+
lo_col = (
182+
aligned_lo[col]
183+
if col in aligned_lo.columns
184+
else pd.Series(index=idx, dtype=float)
185+
)
171186

172187
hi_nan = hi_col.isna()
173188
lo_nan = lo_col.isna()
@@ -214,8 +229,7 @@ def _blend_two(
214229
lo_vals = lo_col[near_gap].astype(float)
215230

216231
blended_vals = (
217-
w_hi.to_numpy() * hi_vals.to_numpy()
218-
+ w_lo.to_numpy() * lo_vals.to_numpy()
232+
w_hi.to_numpy() * hi_vals.to_numpy() + w_lo.to_numpy() * lo_vals.to_numpy()
219233
)
220234

221235
# IMPORTANT: use .loc with a boolean mask, not .at, so we never hit
@@ -235,6 +249,7 @@ def _blend_two(
235249

236250
__all__ = ["ts_blend"]
237251

252+
238253
def _blend_output_index(series):
239254
"""
240255
Determine the working index for ts_blend.
@@ -270,6 +285,7 @@ def _blend_output_index(series):
270285

271286
return regular_index_from_valid_extent(series, output_freq)
272287

288+
273289
def _distance_to_gap(hi_col: pd.Series, mode: str = "count") -> pd.Series:
274290
"""
275291
Distance to nearest gap (NaN) in hi_col.
@@ -323,7 +339,9 @@ def _distance_to_gap(hi_col: pd.Series, mode: str = "count") -> pd.Series:
323339
if mode == "freq":
324340
freq = idx.freq
325341
if freq is None:
326-
raise ValueError("Time-based blending requires a regular index with .freq set.")
342+
raise ValueError(
343+
"Time-based blending requires a regular index with .freq set."
344+
)
327345
# counts * freq → Timedelta
328346
return dist_s * to_timedelta(freq)
329347

@@ -355,9 +373,13 @@ def _normalize_blend_length(blend_length, index):
355373
# Timedelta-like: e.g. '2h', '30min'
356374
td = pd.to_timedelta(blend_length)
357375
if not isinstance(index, (pd.DatetimeIndex, pd.PeriodIndex)):
358-
raise ValueError("Time-based blend_length requires a DatetimeIndex or PeriodIndex.")
376+
raise ValueError(
377+
"Time-based blend_length requires a DatetimeIndex or PeriodIndex."
378+
)
359379
if index.freq is None:
360-
raise ValueError("Time-based blend_length requires a regular index with a .freq attribute.")
380+
raise ValueError(
381+
"Time-based blend_length requires a regular index with a .freq attribute."
382+
)
361383
if td <= pd.Timedelta(0):
362384
return None, None
363385

@@ -394,8 +416,16 @@ def _blend_two(
394416
cols = sorted(set(aligned_hi.columns) | set(aligned_lo.columns))
395417

396418
for col in cols:
397-
hi_col = aligned_hi[col] if col in aligned_hi.columns else pd.Series(index=idx, dtype=float)
398-
lo_col = aligned_lo[col] if col in aligned_lo.columns else pd.Series(index=idx, dtype=float)
419+
hi_col = (
420+
aligned_hi[col]
421+
if col in aligned_hi.columns
422+
else pd.Series(index=idx, dtype=float)
423+
)
424+
lo_col = (
425+
aligned_lo[col]
426+
if col in aligned_lo.columns
427+
else pd.Series(index=idx, dtype=float)
428+
)
399429

400430
hi_nan = hi_col.isna()
401431
lo_nan = lo_col.isna()
@@ -442,8 +472,7 @@ def _blend_two(
442472
lo_vals = lo_col[near_gap].astype(float)
443473

444474
blended_vals = (
445-
w_hi.to_numpy() * hi_vals.to_numpy()
446-
+ w_lo.to_numpy() * lo_vals.to_numpy()
475+
w_hi.to_numpy() * hi_vals.to_numpy() + w_lo.to_numpy() * lo_vals.to_numpy()
447476
)
448477

449478
# IMPORTANT: use .loc with a boolean mask, not .at, so we never hit
@@ -454,12 +483,9 @@ def _blend_two(
454483

455484
return out
456485

486+
457487
@align_inputs_strict(seq_arg=0, names_kw="names")
458-
def ts_blend(
459-
series,
460-
names=None,
461-
blend_length=None
462-
):
488+
def ts_blend(series, names=None, blend_length=None):
463489
"""
464490
Blend multiple time series together, using higher priority where possible,
465491
but ramping in lower-priority data near gaps in the higher-priority series.
@@ -507,7 +533,9 @@ def ts_blend(
507533
# If any DataFrame is present, normalize all to DataFrame
508534
any_df = any(isinstance(s, pd.DataFrame) for s in series)
509535
if any_df:
510-
series = [s.to_frame(name=s.name) if isinstance(s, pd.Series) else s for s in series]
536+
series = [
537+
s.to_frame(name=s.name) if isinstance(s, pd.Series) else s for s in series
538+
]
511539

512540
all_df = all(isinstance(s, pd.DataFrame) for s in series)
513541
any_series = any(isinstance(s, pd.Series) for s in series)
@@ -523,17 +551,17 @@ def ts_blend(
523551
)
524552
elif any_df and any_series:
525553
if names is None:
526-
df_cols = {c for s in series if isinstance(s, pd.DataFrame) for c in s.columns}
554+
df_cols = {
555+
c for s in series if isinstance(s, pd.DataFrame) for c in s.columns
556+
}
527557
for s in series:
528558
if isinstance(s, pd.Series) and s.name not in df_cols:
529559
raise ValueError(
530560
"Mixed Series and DataFrames require Series names to match DataFrame columns."
531561
)
532562

533563
# Build working index according to frequency policy.
534-
full_index = _blend_output_index(
535-
series
536-
)
564+
full_index = _blend_output_index(series)
537565

538566
# Normalize blend_length against the working index
539567
blend_mode, blend_L = _normalize_blend_length(blend_length, full_index)
@@ -557,4 +585,4 @@ def ts_blend(
557585
elif isinstance(blended, pd.Series):
558586
blended = blended.to_frame()
559587

560-
return blended
588+
return blended

vtools/functions/error_detect.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -119,10 +119,10 @@ def bounds_test(ts, bounds):
119119
if bounds is not None:
120120
lo, hi = bounds
121121
if lo is not None:
122-
anomaly |= (ts < lo)
122+
anomaly |= ts < lo
123123
if hi is not None:
124-
anomaly |= (ts > hi)
125-
124+
anomaly |= ts > hi
125+
126126
return anomaly
127127

128128

@@ -136,6 +136,7 @@ def median_test(ts, level=4, filt_len=7, quantiles=(0.005, 0.095), copy=True):
136136
as_anomaly=True,
137137
)
138138

139+
139140
def choose_npartitions(
140141
nrows,
141142
*,
@@ -169,6 +170,7 @@ def choose_npartitions(
169170
max_safe = max(1, nrows // min_partition_size)
170171
return max(1, min(proposed, max_safe))
171172

173+
172174
def median_test_oneside(
173175
ts,
174176
scale=None,
@@ -186,7 +188,7 @@ def median_test_oneside(
186188
vals = ts[::-1]
187189
else:
188190
vals = ts
189-
191+
190192
if isinstance(vals, pd.Series):
191193
vals = vals.to_frame(name="ts")
192194
elif isinstance(vals, pd.DataFrame):
@@ -342,7 +344,6 @@ def median_test_twoside(
342344

343345
vals = ts_out.to_numpy()
344346

345-
346347
def mseq(flen):
347348
halflen = flen // 2
348349
a = np.arange(0, halflen)
@@ -482,7 +483,9 @@ def steep_then_nan(
482483
elif gap_aggregation == "all":
483484
near_t = near.all(axis=1)
484485
else:
485-
raise ValueError(f"gap_aggregation must be 'any' or 'all', got {gap_aggregation!r}")
486+
raise ValueError(
487+
f"gap_aggregation must be 'any' or 'all', got {gap_aggregation!r}"
488+
)
486489
else:
487490
near_t = near
488491

@@ -523,6 +526,7 @@ def steep_then_nan(
523526
warnings.resetwarnings()
524527
return ts_out
525528

529+
526530
def despike(arr, n1=2, n2=20, block=10, *, as_anomaly=False):
527531
"""Detect and optionally remove isolated spikes using overlapping-window statistics.
528532

0 commit comments

Comments
 (0)