Skip to content

Commit 4ec9062

Browse files
add read_dss function
1 parent df5c187 commit 4ec9062

2 files changed

Lines changed: 109 additions & 1 deletion

File tree

pyproject.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,8 @@ dependencies = [
2828
"matplotlib",
2929
"scikit-learn",
3030
"scipy",
31-
"statsmodels>=0.13"
31+
"statsmodels>=0.13",
32+
"pyhecdss"
3233
]
3334

3435
[project.optional-dependencies]

vtools/functions/read_dss.py

Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
from vtools.functions.interpolate import rhistinterp
2+
from vtools.data.vtime import days, minutes
3+
from pyhecdss import get_ts, DSSFile
4+
from pathlib import Path
5+
import pandas as pd
6+
import re
7+
import os
8+
9+
10+
dss_e2_freq = {"1HOUR": "H", "1DAY": "D", "1MON": "M"}
11+
12+
13+
def check_exclude(pathname, exclude_pathname):
14+
"""
15+
Returns True if pathname matches the exclude_pathname pattern.
16+
Wildcards (*) in exclude_pathname are supported.
17+
"""
18+
path_parts = pathname.split("/")[1:-1]
19+
exclude_parts = exclude_pathname.split("/")[1:-1]
20+
for p, ex in zip(path_parts, exclude_parts):
21+
if not ex or ex == "":
22+
continue # skip empty (wildcard) parts
23+
# Convert wildcard pattern to regex
24+
pattern = "^" + ex.replace("*", ".*") + "$"
25+
if re.match(pattern, p):
26+
print(
27+
f"\t\tSkipping path: {pathname}\n\t\t\t{p} matches {ex} from exclude_pathname: \n\t\t\t{exclude_pathname}"
28+
)
29+
return True
30+
return False
31+
32+
33+
def read_dss(
34+
filename,
35+
pathname,
36+
dt=minutes(15),
37+
p=2.0,
38+
start_date=None,
39+
end_date=None,
40+
exclude_pathname=None,
41+
):
42+
"""
43+
Reads in a DSM2 dss file and interpolates
44+
Outputs an interpolated DataFrame of that variable
45+
46+
Parameters
47+
----------
48+
filename: str|Path
49+
Path to the DSS file to read
50+
pathname: str
51+
Pathname within the DSS file to read.
52+
Needs to be in the format '/A_PART/B_PART/C_PART/D_PART/E_PART/F_PART/'
53+
(e.g. '//RSAN112/FLOW////')
54+
"""
55+
ts_out_list = []
56+
col_names = []
57+
print(f"\tReading pathname: {pathname}")
58+
if len(pathname.split("/")[1:-1]) != 6:
59+
raise ValueError(f"Invalid DSS pathname: {pathname}, needs 6 parts (A-F)")
60+
ts = get_ts(str(filename), pathname)
61+
for i, tsi in enumerate(ts):
62+
ts_path = tsi[0].columns.values[0]
63+
if exclude_pathname is None or (
64+
exclude_pathname is not None
65+
and not check_exclude(ts_path, exclude_pathname)
66+
):
67+
# if not an excluded path, then carry on
68+
path_lst = (ts_path).split("/")
69+
path_e = path_lst[5]
70+
# Set default start_date and end_date to cover the full period of record if not specified
71+
tt_full = tsi[0]
72+
if start_date is None:
73+
start_date = tt_full.index[0]
74+
if end_date is None:
75+
end_date = tt_full.index[-1]
76+
if (tt_full.index[0].to_timestamp() > pd.to_datetime(end_date)) or (
77+
tt_full.index[-1].to_timestamp() < pd.to_datetime(start_date)
78+
):
79+
raise ValueError(
80+
f"File: {filename} does not cover the dates requested. \n\tRequested dates are: {start_date} to {end_date}, \n\tand the file covers {tt_full.index[0]} to {tt_full.index[-1]}"
81+
)
82+
tt = tt_full[start_date:end_date]
83+
pidx = pd.period_range(tt.index[0], tt.index[-1], freq=dss_e2_freq[path_e])
84+
ptt = pd.DataFrame(tt.values[:, 0], pidx)
85+
86+
# Interpolate with rhistinterp
87+
if p > 0:
88+
col_data = rhistinterp(ptt, dt, p=p)
89+
elif p == 0:
90+
col_data = rhistinterp(ptt, dt)
91+
else:
92+
col_data = tsi[0]
93+
ts_out_list.append(col_data)
94+
col_names.append(ts_path)
95+
96+
if ts_out_list:
97+
ts_out = pd.concat(ts_out_list, axis=1)
98+
ts_out.columns = col_names
99+
ts_out = ts_out.copy() # Defragment the DataFrame
100+
else:
101+
with DSSFile(filename) as dssh:
102+
dfcat = dssh.read_catalog()
103+
raise ValueError(
104+
f"Warning: DSS data not found for {pathname}. Preview of available paths in {filename} are: {dfcat}"
105+
)
106+
107+
return ts_out

0 commit comments

Comments
 (0)