@@ -135,9 +135,10 @@ def get_cache_key(self, bpart, cpart, epart):
135135 return f"/{ bpart .upper ()} /{ cpart .upper ()} /{ epart .upper ()} /"
136136
137137 def store (self , df , units , bpart , cpart , epart ):
138- if type (df ) is str :
138+ if df is None or type (df ) is str :
139+ error_str = df if isinstance (df , str ) else "None returned from process()"
139140 key = self .get_cache_key (bpart , cpart , epart )
140- self .cache [key ] = (df , units .upper (), "ERROR" )
141+ self .cache [key ] = (error_str , ( units or "" ) .upper (), "ERROR" )
141142 else :
142143 if df .empty :
143144 return
@@ -274,7 +275,13 @@ def _read_ts(self):
274275 else :
275276 return_df = merge (dflist )
276277 else :
277- return_df = next (dfgen ).data
278+ try :
279+ return_df = next (dfgen ).data
280+ except StopIteration :
281+ return (
282+ "Error in postpro._read_ts: no data found for pathname: "
283+ + pathname .upper ()
284+ )
278285 convert_index_to_timestamps (return_df ) # inplace change of index
279286 elif self .subtract or self .ratio :
280287 # read in >1 time series, and subtract them. Column names of dataframes are set to 'data' because when
@@ -376,10 +383,19 @@ def _load(self, cpart_suffix="", epart=TIME_INTERVAL, timewindow=""):
376383 return_series = series
377384 elif series is not None :
378385 if timewindow != "" :
379- start , end = timewindow .split ("-" )
380- start = pd .Timestamp (start )
381- end = pd .Timestamp (end )
386+ if " - " in timewindow :
387+ # DSM2 format: "01MAR2015 - 30SEP2024"
388+ tw_parts = [p .strip () for p in timewindow .split (" - " , 1 )]
389+ start = pd .Timestamp (tw_parts [0 ])
390+ end = pd .Timestamp (tw_parts [1 ])
391+ else :
392+ # Legacy format: "2015-03-01-2024-09-30" (split on first dash pair)
393+ start , end = timewindow .split ("-" , 1 ) if timewindow .count ("-" ) == 1 else (timewindow [:10 ], timewindow [11 :])
394+ start = pd .Timestamp (start .strip ())
395+ end = pd .Timestamp (end .strip ())
382396 return_series = series .loc [start :end ]
397+ else :
398+ return_series = series
383399 else :
384400 return_series = series
385401 except StopIteration as e :
@@ -414,6 +430,20 @@ def store_processed(self):
414430 self ._store (self .amp , "-AMP" , PostProCache .IRR_E_PART )
415431 return True
416432
433+ def has_cached_failure (self ):
434+ """Return True if the cache already holds an error result for this station.
435+
436+ Distinguishes "never attempted" (key absent) from "attempted but failed"
437+ (key present with a string payload). Used to avoid repeated on-demand
438+ processing attempts for stations that genuinely don't exist in the DSS file.
439+ """
440+ series , _ , ptype = self .cache .load (
441+ self .location .name ,
442+ self .vartype .name ,
443+ PostProcessor .TIME_INTERVAL ,
444+ )
445+ return ptype == "ERROR" and isinstance (series , str ) and len (series ) > 0
446+
417447 def load_processed (self , timewindow = "" , invert_series = False ):
418448 """
419449 invert_series (bool): if true, all data will be multiplied by -1. This is needed
@@ -437,11 +467,11 @@ def load_processed(self, timewindow="", invert_series=False):
437467 )
438468 success = False
439469 if (
440- self .df is not None
441- and self .gdf is not None
442- and self .high is not None
443- and self .low is not None
444- and self .amp is not None
470+ isinstance ( self .df , pd . DataFrame )
471+ and isinstance ( self .gdf , pd . DataFrame )
472+ and isinstance ( self .high , pd . DataFrame )
473+ and isinstance ( self .low , pd . DataFrame )
474+ and isinstance ( self .amp , pd . DataFrame )
445475 and len (self .df ) > 0
446476 and len (self .gdf ) > 0
447477 and len (self .high ) > 0
0 commit comments