Skip to content

Commit cfb1de6

Browse files
committed
fix: handle None and string errors in cache storage and improve timewindow parsing
1 parent c8f82f1 commit cfb1de6

1 file changed

Lines changed: 41 additions & 11 deletions

File tree

pydsm/analysis/postpro.py

Lines changed: 41 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -135,9 +135,10 @@ def get_cache_key(self, bpart, cpart, epart):
135135
return f"/{bpart.upper()}/{cpart.upper()}/{epart.upper()}/"
136136

137137
def store(self, df, units, bpart, cpart, epart):
138-
if type(df) is str:
138+
if df is None or type(df) is str:
139+
error_str = df if isinstance(df, str) else "None returned from process()"
139140
key = self.get_cache_key(bpart, cpart, epart)
140-
self.cache[key] = (df, units.upper(), "ERROR")
141+
self.cache[key] = (error_str, (units or "").upper(), "ERROR")
141142
else:
142143
if df.empty:
143144
return
@@ -274,7 +275,13 @@ def _read_ts(self):
274275
else:
275276
return_df = merge(dflist)
276277
else:
277-
return_df = next(dfgen).data
278+
try:
279+
return_df = next(dfgen).data
280+
except StopIteration:
281+
return (
282+
"Error in postpro._read_ts: no data found for pathname: "
283+
+ pathname.upper()
284+
)
278285
convert_index_to_timestamps(return_df) # inplace change of index
279286
elif self.subtract or self.ratio:
280287
# read in >1 time series, and subtract them. Column names of dataframes are set to 'data' because when
@@ -376,10 +383,19 @@ def _load(self, cpart_suffix="", epart=TIME_INTERVAL, timewindow=""):
376383
return_series = series
377384
elif series is not None:
378385
if timewindow != "":
379-
start, end = timewindow.split("-")
380-
start = pd.Timestamp(start)
381-
end = pd.Timestamp(end)
386+
if " - " in timewindow:
387+
# DSM2 format: "01MAR2015 - 30SEP2024"
388+
tw_parts = [p.strip() for p in timewindow.split(" - ", 1)]
389+
start = pd.Timestamp(tw_parts[0])
390+
end = pd.Timestamp(tw_parts[1])
391+
else:
392+
# Legacy format: "2015-03-01-2024-09-30" (split on first dash pair)
393+
start, end = timewindow.split("-", 1) if timewindow.count("-") == 1 else (timewindow[:10], timewindow[11:])
394+
start = pd.Timestamp(start.strip())
395+
end = pd.Timestamp(end.strip())
382396
return_series = series.loc[start:end]
397+
else:
398+
return_series = series
383399
else:
384400
return_series = series
385401
except StopIteration as e:
@@ -414,6 +430,20 @@ def store_processed(self):
414430
self._store(self.amp, "-AMP", PostProCache.IRR_E_PART)
415431
return True
416432

433+
def has_cached_failure(self):
434+
"""Return True if the cache already holds an error result for this station.
435+
436+
Distinguishes "never attempted" (key absent) from "attempted but failed"
437+
(key present with a string payload). Used to avoid repeated on-demand
438+
processing attempts for stations that genuinely don't exist in the DSS file.
439+
"""
440+
series, _, ptype = self.cache.load(
441+
self.location.name,
442+
self.vartype.name,
443+
PostProcessor.TIME_INTERVAL,
444+
)
445+
return ptype == "ERROR" and isinstance(series, str) and len(series) > 0
446+
417447
def load_processed(self, timewindow="", invert_series=False):
418448
"""
419449
invert_series (bool): if true, all data will be multiplied by -1. This is needed
@@ -437,11 +467,11 @@ def load_processed(self, timewindow="", invert_series=False):
437467
)
438468
success = False
439469
if (
440-
self.df is not None
441-
and self.gdf is not None
442-
and self.high is not None
443-
and self.low is not None
444-
and self.amp is not None
470+
isinstance(self.df, pd.DataFrame)
471+
and isinstance(self.gdf, pd.DataFrame)
472+
and isinstance(self.high, pd.DataFrame)
473+
and isinstance(self.low, pd.DataFrame)
474+
and isinstance(self.amp, pd.DataFrame)
445475
and len(self.df) > 0
446476
and len(self.gdf) > 0
447477
and len(self.high) > 0

0 commit comments

Comments
 (0)