Skip to content

Commit d9073b9

Browse files
EliEli
authored andcommitted
Added process_yolo
1 parent 850d259 commit d9073b9

File tree

1 file changed

+111
-74
lines changed

1 file changed

+111
-74
lines changed

dms_datastore/processed/process_yolo.py

Lines changed: 111 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,10 @@
55
import pandas as pd
66

77
import matplotlib.pyplot as plt
8-
from vtools.functions.interpolate import *
9-
from vtools.functions.filter import *
10-
from vtools.data.vtime import *
8+
from vtools import *
119
from dms_datastore.read_ts import *
12-
from schimpy.unit_conversions import ec_psu_25c,CFS2CMS,CMS2CFS
10+
from vtools.functions.unit_conversions import ec_psu_25c,CFS2CMS,CMS2CFS
1311
from vtools.functions.error_detect import *
14-
import pyhecdss
1512

1613
def describe_null(dset,name):
1714
print(f"null for {name}")
@@ -29,16 +26,18 @@ def describe_null(dset,name):
2926
print("series")
3027
interiornan = intnan.sum(axis=0)
3128
count = isnan.sum(axis=0)
32-
print("Count: {} Interior: {}".format(count,interiornan))
29+
print("nan count Total: {} Interior: {}".format(count,interiornan))
3330
print(nans)
3431
else: print("None")
3532

36-
def process_yolo(outfname,sdate,edate,do_plot=True,do_lis_plot=False,do_bypass_plot=False):
33+
def process_yolo(outfname,sdate,edate,do_plot=True):
3734

3835
interval = minutes(15)
39-
continuous_repo = '//cnrastore-bdo/Modeling_Data/continuous_station_repo/'
36+
# for some reason /screened is choking
37+
continuous_repo = '//cnrastore-bdo/Modeling_Data/repo/continuous/formatted'
4038
usgs_dir = continuous_repo
4139
wdl_dir = continuous_repo
40+
rt_repo = './data/cdec_download'
4241

4342
#cdec_dir = "data/cdec_download"
4443
#dsm2_file="D:/Delta/dsm2_v8/timeseries/hist201912.dss"
@@ -52,7 +51,7 @@ def process_yolo(outfname,sdate,edate,do_plot=True,do_lis_plot=False,do_bypass_p
5251

5352

5453
# This is the RYI station, replaced in roughly 2018-2019 by RYE
55-
cache = read_ts(osp.join(usgs_dir,'usgs_ryi_11455350_flow_*.rdb'))
54+
cache = read_ts(osp.join(usgs_dir,'usgs_ryi_11455350_flow_*.csv'))
5655
cache = cache.interpolate(limit=60) # 60 is way more than we would usually allow
5756
# when we apply a filter -- we do it here given
5857
# that completeness is more important than
@@ -62,7 +61,7 @@ def process_yolo(outfname,sdate,edate,do_plot=True,do_lis_plot=False,do_bypass_p
6261
cache.columns = ["value"]
6362

6463
# This is the newer station.
65-
cache_ryer = read_ts(osp.join(usgs_dir,'usgs_rye_11455385_flow_*.rdb'))
64+
cache_ryer = read_ts(osp.join(usgs_dir,'usgs_rye_11455385_flow_*.csv'))
6665
cache_ryer = cache_ryer.interpolate(limit=60)
6766
cache_ryer = cosine_lanczos(cache_ryer,hours(40))
6867
cache_ryer.columns=["value"]
@@ -79,7 +78,7 @@ def process_yolo(outfname,sdate,edate,do_plot=True,do_lis_plot=False,do_bypass_p
7978
# so we have to subtract Miner to get the flow out of the bypass
8079
# out of Yolo to the south
8180
#miner = read_ts(osp.join(usgs_dir,"usgs_hwb_11455165_flow_*.rdb"))
82-
miner = read_ts(osp.join(usgs_dir,"usgs_hwb_11455165_flow_*.rdb"))
81+
miner = read_ts(osp.join(usgs_dir,"usgs_hwb_11455165_flow_*.csv"))
8382
miner = miner.interpolate(limit=60)
8483
miner = cosine_lanczos(miner,hours(40))
8584
yolo_south = cache_interp.sub(miner.squeeze(),axis=0)
@@ -93,27 +92,35 @@ def process_yolo(outfname,sdate,edate,do_plot=True,do_lis_plot=False,do_bypass_p
9392

9493

9594
# Sacweir. The USGS readers probably don't work for daily data so this is a brittle alternative.
96-
sacweir = read_ts(osp.join('.',"usgs_ssw2_11426000_flow_2000_2022.rdb")).interpolate().to_period()
95+
sacweir = read_ts(osp.join('data',"usgs_ssw2_11426000_flow_2000_*.rdb")).interpolate().to_period()
9796
sacweir.columns = ['value']
9897
sacweir = rhistinterp(sacweir+100.,interval,lowbound=0.0,p=12.)-100.
9998
sacweir.columns=["value"]
10099

101-
woodland = read_ts("usgs_yby_11453000_flow_*.rdb")
102-
woodland = woodland.interpolate(method='linear',limit=1200)
100+
woodland = read_ts(osp.join(usgs_dir,"usgs_yby_11453000_flow_*.csv"))
101+
woodland = woodland.interpolate(method='linear',limit=1200)
103102
woodland.columns = ['value']
104-
sacweir.reindex(woodland.index)
105-
sacweir.fillna(0.0)
103+
#print("woodland 0",woodland.loc["2023-03-14T01:15:00"])
104+
105+
sacweir=sacweir.reindex(woodland.index)
106+
sacweir=sacweir.fillna(0.0)
107+
#print("after sacweir",woodland.loc["2023-03-14T01:15:00"])
108+
#print("after sacweir",sacweir.loc["2023-03-14T01:15:00"])
109+
106110
woodland = woodland + sacweir
107111
woodinterp = woodland.interpolate()
108-
woodland.loc[pd.Timestamp(2019,1,16):pd.Timestamp(2019,1,26)] = woodinterp
112+
#print("woodinterp 0",woodinterp.loc["2023-03-14T01:15:00"])
113+
114+
woodland.loc[pd.Timestamp(2019,1,16):pd.Timestamp(2019,1,26)] = woodinterp.loc[pd.Timestamp(2019,1,16):pd.Timestamp(2019,1,26)]
115+
#print("woodland 0",woodland.loc["2023-03-14T01:15:00"])
109116

110117
lis_flow_fname = "ncro_lis_b91560q_flow_*.csv"
111118
lis_elev_fname = "ncro_lis_b91560_elev_*.csv"
112119
lisbon1 = read_ts(osp.join(wdl_dir,lis_flow_fname))
113120

114121
lisbon1 = med_outliers(lisbon1,range=(-1000.,5000.)) # was scale=50
115122
lisbon1 = lisbon1.interpolate(limit = 20)[sdate:edate]
116-
lisbon1 = lisbon1.resample('15T').interpolate(limit=3)
123+
lisbon1 = lisbon1.resample('15min').interpolate(limit=3)
117124
lisbon_elev1 = read_ts(osp.join(wdl_dir,lis_elev_fname))
118125

119126

@@ -124,24 +131,27 @@ def process_yolo(outfname,sdate,edate,do_plot=True,do_lis_plot=False,do_bypass_p
124131
end_of_ncro_elev = lisbon_elev1.last_valid_index()
125132

126133

127-
lis_cdec_fname = "cdec_lis_*flow_*9999.csv"
128-
lisbon2 = read_ts(osp.join(wdl_dir,lis_cdec_fname))
134+
lis_cdec_fname = "cdec_lis_*_flow_*.csv"
135+
lisbon2 = read_ts(osp.join(continuous_repo,lis_cdec_fname))
136+
137+
# todo have rt_repo stuff just go into the repo
138+
lisbon2 = read_ts(osp.join(rt_repo,lis_cdec_fname))
139+
140+
129141
lisbon2 = med_outliers(lisbon2,range=(-1000.,5000.))
130142
lisbon2= lisbon2.interpolate(limit = 20)[end_of_ncro:edate]
131143
lisbon2.columns=["value"]
132144
lisbon1 = ts_merge([lisbon1,lisbon2])
133-
lisbon1.plot()
134-
plt.title("Debug")
135-
plt.show()
136145

137146

138-
lis_cdec_elev_fname = "cdec_lis_*elev_*9999.csv"
147+
148+
lis_cdec_elev_fname = "cdec_lis_*elev_*.csv"
139149
lisbon_elev2 = read_ts(osp.join(wdl_dir,lis_cdec_elev_fname))
140150
lisbon_elev2 = med_outliers(lisbon_elev2,range=(-5.,25.))
141151
lisbon_elev2= lisbon_elev2.interpolate(limit = 20)[end_of_ncro_elev:edate]
142152
lisbon_elev2.columns=["value"]
143153
lisbon_elev1 = ts_merge([lisbon_elev1,lisbon_elev2])
144-
print(lisbon_elev1)
154+
145155

146156

147157
#Enumerate fixes since 2008
@@ -154,9 +164,11 @@ def process_yolo(outfname,sdate,edate,do_plot=True,do_lis_plot=False,do_bypass_p
154164
lis_do_fill[:] = 0.
155165

156166
# When to fill toe with woodland. -1 = interpolate self, +1 = from Woodland Initially force zero
167+
lis_do_fill.loc[pd.Timestamp(2007,6,22):pd.Timestamp(2008,7,31)] = INTERP
157168
lis_do_fill.loc[pd.Timestamp(2008,1,2):pd.Timestamp(2008,1,8)] = INTERP
158169
lis_do_fill.loc[pd.Timestamp(2008,1,26):pd.Timestamp(2008,2,17)] = WOODLND
159-
lis_do_fill.loc[pd.Timestamp(2008,5,28):pd.Timestamp(2008,5,30)] = TIDAL
170+
lis_do_fill.loc[pd.Timestamp(2008,4,28):pd.Timestamp(2008,5,30)] = TIDAL
171+
lis_do_fill.loc[pd.Timestamp(2008,9,29):pd.Timestamp(2008,9,30)] = INTERP
160172
lis_do_fill.loc[pd.Timestamp(2009,1,23): pd.Timestamp(2009,1,27)] = INTERP
161173
lis_do_fill.loc[pd.Timestamp(2009,9,8):pd.Timestamp(2009,9,15)] = TIDAL
162174
lis_do_fill.loc[pd.Timestamp(2010,1,21):pd.Timestamp(2010,2,22)] = WOODLND
@@ -169,8 +181,14 @@ def process_yolo(outfname,sdate,edate,do_plot=True,do_lis_plot=False,do_bypass_p
169181
lis_do_fill.loc[pd.Timestamp(2012,12,1):pd.Timestamp(2013,1,9)] = WOODLND
170182
lis_do_fill.loc[pd.Timestamp(2014,12,17):pd.Timestamp(2015,1,1)] = WOODLND
171183
lis_do_fill.loc[pd.Timestamp(2015,3,23):pd.Timestamp(2015,4,5)] = INTERP
172-
lis_do_fill.loc[pd.Timestamp(2018,7,26):pd.Timestamp(2018,7,25)] = TIDAL
173-
lis_do_fill.loc[pd.Timestamp(2019,1,16):pd.Timestamp(2019,1,26)] = INTERP
184+
lis_do_fill.loc[pd.Timestamp(2018,6,26):pd.Timestamp(2018,7,25)] = TIDAL
185+
lis_do_fill.loc[pd.Timestamp(2018,11,29):pd.Timestamp(2018,12,5)] = TIDAL
186+
lis_do_fill.loc[pd.Timestamp(2018,12,21):pd.Timestamp(2018,12,27)] = TIDAL
187+
lis_do_fill.loc[pd.Timestamp(2019,1,5):pd.Timestamp(2019,1,8)] = TIDAL
188+
lis_do_fill.loc[pd.Timestamp(2019,1,16):pd.Timestamp(2019,1,29)] = INTERP
189+
lis_do_fill.loc[pd.Timestamp(2019,1,16):pd.Timestamp(2019,1,26)] = INTERP
190+
lis_do_fill.loc[pd.Timestamp(2019,2,13):pd.Timestamp(2019,2,26)] = WOODLND
191+
lis_do_fill.loc[pd.Timestamp(2019,4,19):pd.Timestamp(2019,4,26)] = WOODLND
174192
lis_do_fill.loc[pd.Timestamp(2019,6,8):pd.Timestamp(2022,2,1)] = INTERP
175193
lis_do_fill.loc[pd.Timestamp(2020,2,3):pd.Timestamp(2020,3,23)] = WOODLND
176194

@@ -184,6 +202,7 @@ def process_yolo(outfname,sdate,edate,do_plot=True,do_lis_plot=False,do_bypass_p
184202
woodupdate.update(woodland,overwrite=True)
185203
toe.loc[lis_do_fill == WOODLND] = woodupdate.loc[lis_do_fill == WOODLND]
186204

205+
print("woodland 1",woodland.loc["2023-03-14T01:15:00"])
187206

188207
# Now create the Yolo time series and mask for when Yolo Bypass flow is assumed
189208
lisbon_elev1.columns = ["value"]; toe.columns = ["value"] ; lisbon1.columns = ["value"] # needed for concat
@@ -199,22 +218,24 @@ def process_yolo(outfname,sdate,edate,do_plot=True,do_lis_plot=False,do_bypass_p
199218

200219
use_yolo = (lisbon_elev1 > 11.5) | (lisbon1 > 4000.) | (toe > 4000.) | (yolo>4000.)
201220
use_yolo = use_yolo.reindex(full.index)
202-
use_yolo.fillna(method="ffill",inplace=True)
221+
use_yolo.ffill(inplace=True)
203222
use_yolo.loc[pd.Timestamp(2019,1,16):pd.Timestamp(2019,1,26)] = False
204223
full['use_yolo'] = use_yolo
224+
print("woodland 2",use_yolo.loc["2023-03-14T01:15:00"])
225+
205226

206227
# Since we are getting closer to final product and cannot tolerate missing values,
207228
# interpolate Toe drain without a limit in gap size and apply only in areas where
208229
# use_yolo is False.
209-
toeinterp = toe.interpolate()
210-
toe = toe.where(use_yolo,toeinterp)
230+
toeinterp = full.toe.interpolate()
231+
full["toe"] = full.toe.where(full.use_yolo,toeinterp)
211232

212233
# Yolo bypass flow will be zero when use_yolo is False
213-
full.loc[~use_yolo.value,'yolo'] = 0.
234+
full["yolo"]=full["yolo"].where(full.use_yolo,other=0.,axis=0)
214235

215236

216237
# Now add a measure of total Bypass flow, which will be the maximum of the Yolo and Toe Drain
217-
full["yolo_bypass"]=full.max(axis=1) # Total Bypass flow which includes Toe and Yolo flow pathways
238+
full["yolo_bypass"]=full[["toe","yolo"]].max(axis=1) # Total Bypass flow which includes Toe and Yolo flow pathways
218239
full.loc[~full.use_yolo,"yolo_bypass"] = full.toe[~full.use_yolo]
219240

220241
# Todo: how would this differ from use_yolo?
@@ -224,46 +245,50 @@ def process_yolo(outfname,sdate,edate,do_plot=True,do_lis_plot=False,do_bypass_p
224245
toe_eff=full.toe.clip(upper=4000.)
225246
toe_eff[use_yolo.value & toe_eff.isnull()] = 4000. # Often values above 4000 will be marked as nan
226247
# This should mostly pick the correct instances and mark them as 4000.
227-
toe_eff[full_high] = toe_eff + 0.05*(full.yolo_bypass - toe_eff) # Allocate 5% of excess Bypass flow to Toe Drain channel
248+
249+
toe_eff.mask(full_high,toe_eff + 0.05*(full.yolo_bypass - toe_eff))
250+
#toe_eff[full_high] = toe_eff + 0.05*(full.yolo_bypass - toe_eff) # Allocate 5% of excess Bypass flow to Toe Drain channel
228251
full['toe_eff'] = toe_eff
229252
yolo_eff = full.yolo_bypass - full.toe_eff # Yolo carries the part of Total Bypass flow that is not routed down Toe Drain
230253
yolo_eff[full_low] = 0. # Yolo is zero when the Total Bypass flow including Yolo/Toe is small
231254
full['yolo_eff'] = yolo_eff
232-
233-
ax=full.plot()
234-
#ax.legend(["toe","yolo","full"])
235-
236-
plt.title("Output and some inputs")
237-
plt.show()
238-
239-
240-
241-
242-
fig,(ax0,ax1) = plt.subplots(2,sharex=True)
243-
toe.plot(ax=ax0,linewidth=3,color="0.45")
244-
lisbon1.plot(ax=ax0)
245-
#lisbon.plot(ax=ax0)
246-
#cache_interp.plot(ax=ax0)
247-
#cache.plot(ax=ax0)
248-
yolo.plot(ax=ax0)
249-
#yolo1.plot(ax=ax0)
250-
woodland.plot(ax=ax0)
251-
#(woodupdate-15).plot(ax=ax0)
252-
253-
#yolo2016.plot(ax=ax0)
254-
lisbon_elev1.plot(ax=ax1)
255-
#ax.grid()
256-
ax0.grid()
257-
ax0.set_ylabel("cfs")
258-
#ax0.legend(["Toe","Lisbon","woodland"])
259-
ax0.legend(["Toe","Lisbon WDL","Yolo","Woodland"])
260-
#ax0.legend(["Lisbon WDL","cache_interp","cache","yolo south/miner","woodland","yolo2016"])
261-
ax1.grid()
262-
ax1.set_ylabel("ft")
263-
plt.show()
264-
unitstr = ["cfs","cms"]
265255

256+
if do_plot:
257+
print("Plotting Output and some inputs")
258+
ax=full.plot()
259+
#ax.legend(["toe","yolo","full"])
260+
plt.title("Output and some inputs")
261+
plt.show()
262+
263+
264+
fig,(ax0,ax1) = plt.subplots(2,sharex=True)
265+
toe.plot(ax=ax0,linewidth=3,color="0.45")
266+
lisbon1.plot(ax=ax0)
267+
#lisbon.plot(ax=ax0)
268+
#cache_interp.plot(ax=ax0)
269+
#cache.plot(ax=ax0)
270+
yolo.plot(ax=ax0)
271+
#yolo1.plot(ax=ax0)
272+
woodland.plot(ax=ax0)
273+
#(woodupdate-15).plot(ax=ax0)
274+
275+
#yolo2016.plot(ax=ax0)
276+
lisbon_elev1.plot(ax=ax1)
277+
#ax.grid()
278+
ax0.grid()
279+
ax0.set_ylabel("cfs")
280+
#ax0.legend(["Toe","Lisbon","woodland"])
281+
ax0.legend(["Toe","Lisbon WDL","Yolo","Woodland"])
282+
#ax0.legend(["Lisbon WDL","cache_interp","cache","yolo south/miner","woodland","yolo2016"])
283+
ax1.grid()
284+
ax1.set_ylabel("ft")
285+
plt.show()
286+
287+
288+
289+
unitstr = ["cfs","cms"]
266290

291+
print("Writing")
267292
multipliers = [1.,CFS2CMS]
268293
for unitstring,multiplier in zip(unitstr,multipliers):
269294
outfile = outfname.replace(".csv",f"_{unitstring}.csv")
@@ -273,27 +298,39 @@ def process_yolo(outfname,sdate,edate,do_plot=True,do_lis_plot=False,do_bypass_p
273298
outf.write("# yolo_toedrain: discharge in Toe Drain\n")
274299
outf.write("# yolo: remaining discharge in Yolo Bypass during flood events\n")
275300
output=full[["toe_eff","yolo_eff"]]*multiplier
301+
#output = extrapolate_ts(output,"2024-01-01")
302+
276303
output.columns=["yolo_toedrain","yolo"]
277304
describe_null(output.yolo,"yolo")
278305
describe_null(output.yolo_toedrain,"yolo_toedrain")
279-
output.loc[sdate:edate,:].to_csv(outf,header=True,index=True,date_format="%Y-%m-%dT%H:%M",float_format="%.2f")
306+
print(output.first_valid_index(),output.last_valid_index())
307+
try:
308+
output.loc[sdate:edate].to_csv(outf,header=True,index=True,date_format="%Y-%m-%dT%H:%M",float_format="%.2f")
309+
print("Write successful")
310+
except:
311+
output.loc[sdate:].to_csv(outf,header=True,index=True,date_format="%Y-%m-%dT%H:%M",float_format="%.2f")
312+
print("An error occurred in writing to csv due to end date. Eliminating the end date allowed recovery, but check output file carefully")
313+
280314

281315

282316

283317

284318

285319
def main():
286-
outfile = "yolo_v20220202.csv"
320+
287321
sdate = pd.Timestamp(2005,1,1) # Start date of processed data. Earlier than 2005 will require new data and approach
288-
edate = pd.Timestamp(2022,2,1)
289-
edate = pd.Timestamp.now()
322+
edate = pd.Timestamp(2025,6,30)
323+
outfile = "prepared_yolo.csv"
324+
325+
326+
290327
# The following two flags will produce diagnostic info. If a problem is revealed, the script output probably is bogus but you will know
291328
# how to fix it. Please refer to comments above "FIXES" for LIsbon and Yolo
292329
do_lis_plot = False # Do plots of Lisbon that may help decide how to fill this station
293330
do_bypass_plot = False # Do plots that compare the north (Woodland+Sac Weir) to Southern (RYE/RYI - Miner (HBW) approaches to help decids
294-
# if they need filling/manipulation
295-
do_plot = True # Plot the results
296-
process_yolo(outfile,sdate,edate,do_plot,do_lis_plot,do_bypass_plot)
331+
# if they need filling/manipulation (no longer here)
332+
do_plot = False # Plot the results
333+
process_yolo(outfile,sdate,edate,do_plot)
297334

298335

299336
if __name__ == '__main__':

0 commit comments

Comments
 (0)