1818from dms_datastore .dstore_config import *
1919from dms_datastore .inventory import *
2020from dms_datastore .write_ts import *
21+ from dms_datastore .filename import meta_to_filename
2122from schimpy .station import *
2223import geopandas as gpd
2324import numpy as np
@@ -148,22 +149,20 @@ def plot_anomalies(
148149 plt .close (fig )
149150
150151
152+
153+
151154def filter_inventory_ (inventory , stations , params ):
152155 if stations is not None :
153156 if isinstance (stations , str ):
154157 stations = [stations ]
155- inventory = inventory .loc [
156- inventory .index .get_level_values ("station_id" ).isin (stations ), :
157- ]
158+ inventory = inventory .loc [inventory ["station_id" ].isin (stations ), :]
159+
158160 if params is not None :
159161 if isinstance (params , str ):
160162 params = [params ]
161- logger .debug (f"params: { params } " )
162- inventory = inventory .loc [
163- inventory .index .get_level_values ("param" ).isin (params ), :
164- ]
165- return inventory
163+ inventory = inventory .loc [inventory ["param" ].isin (params ), :]
166164
165+ return inventory
167166
168167def auto_screen (
169168 fpath = "formatted" ,
@@ -208,29 +207,29 @@ def auto_screen(
208207
209208 active = start_station is None
210209 station_db = station_dbase ()
211- inventory = repo_data_inventory (fpath )
210+
211+ source_repo = "formatted"
212+ actual_fpath = fpath if fpath is not None else repo_root (source_repo )
213+ inventory = repo_data_inventory (repo = "formatted" ,in_path = actual_fpath ) # repo is the config repo, in_path is the data storage location
212214 inventory = filter_inventory_ (inventory , stations , params )
213215 failed_read = []
214216
215217 for index , row in inventory .iterrows ():
216- station_id = index [0 ]
217- if not active :
218- if station_id == start_station :
219- active = True
220- else :
221- continue
222- subloc = index [1 ]
223- if type (subloc ) == float :
218+ station_id = row ["station_id" ]
219+ subloc = row ["subloc" ]
220+ param = row ["param" ]
221+
222+ if pd .isna (subloc ) or subloc is None :
224223 subloc = "default"
225- param = index [ 2 ]
224+
226225 if subloc is None :
227226 subloc = "default"
228227 if np .random .uniform () < 0.0 : # 0.95:
229228 logger .debug (f"Randomly rejecting: { station_id } { subloc } { param } " )
230229 continue
231- filename = str ( row . filename )
230+
232231 station_info = station_db .loc [station_id , :]
233- agency = row . agency_dbase
232+ agency = row [ "agency_registry" ] if "agency_registry" in row . index else row [ "agency" ]
234233 if agency .startswith ("dwr_" ):
235234 agency = agency [4 :] # todo: need to take care of des_ vs dwr_des etc
236235
@@ -240,9 +239,11 @@ def auto_screen(
240239 # these may be lists
241240 try :
242241 # logger.debug(f"fetching {fpath},{station_id},{param}")
243- meta_ts = fetcher (fpath , station_id , param , subloc = subloc )
244- except :
245- logger .warning (f"Read failed for { fpath } , { station_id } , { param } , { subloc } " )
242+ meta_ts = fetcher (source_repo , station_id , param , subloc = subloc , data_path = actual_fpath )
243+ except Exception as e :
244+ logger .warning (f"Read failed for { actual_fpath } , { station_id } , { param } , { subloc } , storage loc = { actual_fpath } " )
245+ logger .exception (e )
246+ print (e )
246247 meta_ts = None
247248
248249 if meta_ts is None :
@@ -276,12 +277,23 @@ def auto_screen(
276277 if "value" in screened .columns :
277278 screened = screened [["value" , "user_flag" ]]
278279 meta ["screen" ] = proto
279- if subloc_actual and subloc_actual != "default" :
280- output_fname = (
281- f"{ agency } _{ station_id } @{ subloc_actual } _{ row .agency_id } _{ param } .csv"
282- )
283- else :
284- output_fname = f"{ agency } _{ station_id } _{ row .agency_id } _{ param } .csv"
280+
281+ # Build output filename using configured naming spec for screened repo
282+ output_meta = {
283+ "agency" : agency ,
284+ "station_id" : station_id ,
285+ "subloc" : subloc_actual if subloc_actual != "default" else None ,
286+ "param" : param ,
287+ "agency_id" : row .agency_id ,
288+ }
289+ # Add year info if available from metadata
290+ if "year" in meta :
291+ output_meta ["year" ] = meta ["year" ]
292+ elif "syear" in meta and "eyear" in meta :
293+ output_meta ["syear" ] = meta ["syear" ]
294+ output_meta ["eyear" ] = meta ["eyear" ]
295+
296+ output_fname = meta_to_filename (output_meta , repo = "screened" ,include_shard = False )
285297 output_fpath = os .path .join (dest , output_fname )
286298 logger .debug ("start write" )
287299 write_ts_csv (screened , output_fpath , meta , chunk_years = True )
@@ -479,24 +491,26 @@ def spatial_config(configfile, x, y):
479491 return checker .region_info (x , y )
480492
481493
482- def ncro_fetcher (repo_path , station_id , param , subloc ):
483- """Reads NCRO data, correctly folding together NCRO and CDEC by priority.
484- Celsius is converted to Farenheit
485- """
494+ def ncro_fetcher (repo , station_id , param , subloc , data_path = None ):
486495 return read_ts_repo (
487496 station_id ,
488497 param ,
489498 subloc = subloc ,
490499 src_priority = ["ncro" , "cdec" ],
491- repo = repo_path ,
500+ repo = repo ,
501+ data_path = data_path ,
492502 meta = True ,
493503 )
494504
495-
496- def general_fetcher (repo_path , station_id , param , subloc ):
497- """Fetches from a well behaved and standard repo"""
498- return read_ts_repo (station_id , param , subloc = subloc , repo = repo_path , meta = True )
499-
505+ def general_fetcher (repo , station_id , param , subloc , data_path = None ):
506+ return read_ts_repo (
507+ station_id ,
508+ param ,
509+ subloc = subloc ,
510+ repo = repo ,
511+ data_path = data_path ,
512+ meta = True ,
513+ )
500514
501515def custom_fetcher (agency ):
502516 if agency in ["ncro" , "dwr_ncro" ]:
0 commit comments