1616from vtools .data .gap import *
1717from dms_datastore .read_multi import *
1818from dms_datastore .dstore_config import *
19- from dms_datastore .filename import interpret_fname
2019from dms_datastore .inventory import *
2120from dms_datastore .write_ts import *
21+ from dms_datastore .filename import meta_to_filename
2222from schimpy .station import *
2323import geopandas as gpd
2424import numpy as np
@@ -149,22 +149,20 @@ def plot_anomalies(
149149 plt .close (fig )
150150
151151
152+
153+
152154def filter_inventory_ (inventory , stations , params ):
153155 if stations is not None :
154156 if isinstance (stations , str ):
155157 stations = [stations ]
156- inventory = inventory .loc [
157- inventory .index .get_level_values ("station_id" ).isin (stations ), :
158- ]
158+ inventory = inventory .loc [inventory ["station_id" ].isin (stations ), :]
159+
159160 if params is not None :
160161 if isinstance (params , str ):
161162 params = [params ]
162- logger .debug (f"params: { params } " )
163- inventory = inventory .loc [
164- inventory .index .get_level_values ("param" ).isin (params ), :
165- ]
166- return inventory
163+ inventory = inventory .loc [inventory ["param" ].isin (params ), :]
167164
165+ return inventory
168166
169167def auto_screen (
170168 fpath = "formatted" ,
@@ -209,29 +207,29 @@ def auto_screen(
209207
210208 active = start_station is None
211209 station_db = station_dbase ()
212- inventory = repo_data_inventory (fpath )
210+
211+ source_repo = "formatted"
212+ actual_fpath = fpath if fpath is not None else repo_root (source_repo )
213+ inventory = repo_data_inventory (repo = "formatted" ,in_path = actual_fpath ) # repo is the config repo, in_path is the data storage location
213214 inventory = filter_inventory_ (inventory , stations , params )
214215 failed_read = []
215216
216217 for index , row in inventory .iterrows ():
217- station_id = index [0 ]
218- if not active :
219- if station_id == start_station :
220- active = True
221- else :
222- continue
223- subloc = index [1 ]
224- if type (subloc ) == float :
218+ station_id = row ["station_id" ]
219+ subloc = row ["subloc" ]
220+ param = row ["param" ]
221+
222+ if pd .isna (subloc ) or subloc is None :
225223 subloc = "default"
226- param = index [ 2 ]
224+
227225 if subloc is None :
228226 subloc = "default"
229227 if np .random .uniform () < 0.0 : # 0.95:
230228 logger .debug (f"Randomly rejecting: { station_id } { subloc } { param } " )
231229 continue
232- filename = str ( row . filename )
230+
233231 station_info = station_db .loc [station_id , :]
234- agency = row . agency_dbase
232+ agency = row [ "agency_registry" ] if "agency_registry" in row . index else row [ "agency" ]
235233 if agency .startswith ("dwr_" ):
236234 agency = agency [4 :] # todo: need to take care of des_ vs dwr_des etc
237235
@@ -241,9 +239,11 @@ def auto_screen(
241239 # these may be lists
242240 try :
243241 # logger.debug(f"fetching {fpath},{station_id},{param}")
244- meta_ts = fetcher (fpath , station_id , param , subloc = subloc )
245- except :
246- logger .warning (f"Read failed for { fpath } , { station_id } , { param } , { subloc } " )
242+ meta_ts = fetcher (source_repo , station_id , param , subloc = subloc , data_path = actual_fpath )
243+ except Exception as e :
244+ logger .warning (f"Read failed for { actual_fpath } , { station_id } , { param } , { subloc } , storage loc = { actual_fpath } " )
245+ logger .exception (e )
246+ print (e )
247247 meta_ts = None
248248
249249 if meta_ts is None :
@@ -277,12 +277,23 @@ def auto_screen(
277277 if "value" in screened .columns :
278278 screened = screened [["value" , "user_flag" ]]
279279 meta ["screen" ] = proto
280- if subloc_actual and subloc_actual != "default" :
281- output_fname = (
282- f"{ agency } _{ station_id } @{ subloc_actual } _{ row .agency_id } _{ param } .csv"
283- )
284- else :
285- output_fname = f"{ agency } _{ station_id } _{ row .agency_id } _{ param } .csv"
280+
281+ # Build output filename using configured naming spec for screened repo
282+ output_meta = {
283+ "agency" : agency ,
284+ "station_id" : station_id ,
285+ "subloc" : subloc_actual if subloc_actual != "default" else None ,
286+ "param" : param ,
287+ "agency_id" : row .agency_id ,
288+ }
289+ # Add year info if available from metadata
290+ if "year" in meta :
291+ output_meta ["year" ] = meta ["year" ]
292+ elif "syear" in meta and "eyear" in meta :
293+ output_meta ["syear" ] = meta ["syear" ]
294+ output_meta ["eyear" ] = meta ["eyear" ]
295+
296+ output_fname = meta_to_filename (output_meta , repo = "screened" ,include_shard = False )
286297 output_fpath = os .path .join (dest , output_fname )
287298 logger .debug ("start write" )
288299 write_ts_csv (screened , output_fpath , meta , chunk_years = True )
@@ -480,24 +491,26 @@ def spatial_config(configfile, x, y):
480491 return checker .region_info (x , y )
481492
482493
483- def ncro_fetcher (repo_path , station_id , param , subloc ):
484- """Reads NCRO data, correctly folding together NCRO and CDEC by priority.
485- Celsius is converted to Farenheit
486- """
494+ def ncro_fetcher (repo , station_id , param , subloc , data_path = None ):
487495 return read_ts_repo (
488496 station_id ,
489497 param ,
490498 subloc = subloc ,
491499 src_priority = ["ncro" , "cdec" ],
492- repo = repo_path ,
500+ repo = repo ,
501+ data_path = data_path ,
493502 meta = True ,
494503 )
495504
496-
497- def general_fetcher (repo_path , station_id , param , subloc ):
498- """Fetches from a well behaved and standard repo"""
499- return read_ts_repo (station_id , param , subloc = subloc , repo = repo_path , meta = True )
500-
505+ def general_fetcher (repo , station_id , param , subloc , data_path = None ):
506+ return read_ts_repo (
507+ station_id ,
508+ param ,
509+ subloc = subloc ,
510+ repo = repo ,
511+ data_path = data_path ,
512+ meta = True ,
513+ )
501514
502515def custom_fetcher (agency ):
503516 if agency in ["ncro" , "dwr_ncro" ]:
0 commit comments