Skip to content

Commit 82f13e4

Browse files
EliEli
authored andcommitted
Improve time interval handling and irregular time series identification.
1 parent a5d4f19 commit 82f13e4

File tree

2 files changed

+8
-8
lines changed

2 files changed

+8
-8
lines changed

dms_datastore/read_ts.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
import os
1212
import fnmatch
1313
from vtools.functions.merge import *
14-
from vtools.data.vtime import days, minutes
14+
from vtools.data.vtime import days, minutes, hours, months, seconds, years, to_timedelta
1515
from dms_datastore.filename import extract_year_fname
1616

1717
__all__ = [
@@ -1341,7 +1341,7 @@ def read_ts(
13411341
if "freq" in kwargs and freq is not None:
13421342
raise ValueError("freq must be None if passed in kwargs to avoid conflict")
13431343
if force_regular:
1344-
if freq in (None, "None"): freq = "infer"
1344+
if freq in (None, "None","none"): freq = "infer"
13451345
else:
13461346
if freq not in ["None", None]:
13471347
raise ValueError("freq must be None or 'None' if force_regular is False")
@@ -1433,7 +1433,6 @@ def infer_freq_robust(
14331433
index, preferred=["h", "15min", "6min", "10min", "h", "d"], **kwargs
14341434
):
14351435
index = index.round("1min")
1436-
14371436
if len(index) < 8:
14381437
# not enough to quibble, use the 8 points
14391438
f = pd.infer_freq(index)
@@ -1452,8 +1451,9 @@ def infer_freq_robust(
14521451
f = pd.infer_freq(index[0:7])
14531452
if f is None:
14541453
for p in preferred:
1455-
freq = pd.tseries.frequencies.to_offset(p)
1454+
freq = to_timedelta(p)
14561455
tester = index.round(p)
1456+
14571457
diff = (index - tester) < (freq / 5)
14581458
frac = diff.mean()
14591459
if frac > 0.98:

examples/dropbox/dropbox_spec_ccf.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ data:
99
- name: ccfb
1010
skip: False
1111
collect:
12-
file_pattern: "CC_Gate_Positions*2024*.csv"
12+
file_pattern: "CC_Gate_Positions*.csv"
1313
wildcard: time_overlap # time_sharded | time_overlap | data | none.
1414
# If time_sharded, indicates non-overlapping timestamps that uses year chunking.
1515
# If time_overlap, indicates wildcards are ordered by date 20250531(lexicographic must = chronological)
@@ -27,7 +27,7 @@ data:
2727
transition: prefer_first
2828
selector: null # Usually a column name. When read_last_resort_csv is used, use null.
2929
transforms:
30-
- name: dst_tz # SCADA is delivered local time. Often this is not needed from better data sources
30+
- name: dst_st # SCADA is delivered local time. Often this is not needed from better data sources
3131
args:
3232
src_tz: US/Pacific
3333
target_tz: ${target_tz}
@@ -55,10 +55,10 @@ data:
5555
output:
5656
staging_dir: ./drop_staging/formatted
5757
write_args:
58-
float_format: "%0.3f" # optional, fallback = 0.6f
58+
float_format: "%0.3f"
5959
chunk_years: False
6060
repo_dir: ./fake_repo/formatted
61-
merge_priority: staged
61+
merge_priority: staged # reconcile
6262
allow_new_series: true
6363
inspection:
6464
recent_years: 3

0 commit comments

Comments
 (0)