@@ -29,7 +29,11 @@ def _quarantine_file(fname, quarantine_dir="quarantine"):
2929
3030def usgs_scan_series_json (fname ):
3131 hdr = read_yaml_header (fname )
32- orig = yaml .safe_load (hdr ["original_header" ])
32+
33+ orig_txt = hdr ["original_header" ]
34+ if orig_txt is None :
35+ raise ValueError ("No original_header present" )
36+ orig = parse_yaml_header (orig_txt )
3337 subs = orig ["sublocations" ]
3438 var = orig ["variable_code" ]
3539 series = [(str (s ["subloc" ]), var , s ["method_description" ]) for s in subs ]
@@ -123,7 +127,7 @@ def usgs_multivariate(pat, outfile):
123127 files = glob .glob (pat )
124128 data = []
125129 for fname in files :
126- meta = interpret_fname (fname )
130+ meta = interpret_fname (fname , repo = "formatted" )
127131 try :
128132 ts = read_ts (fname , nrows = 4000 )
129133 except :
@@ -132,6 +136,7 @@ def usgs_multivariate(pat, outfile):
132136
133137 multi_cols = ts .shape [1 ] > 1
134138 subloc_df = sublocation_df ()
139+
135140 station_id = meta ["station_id" ]
136141 param = meta ["param" ]
137142 known_multi = (subloc_df ["station_id" ] == station_id ).any ()
@@ -215,23 +220,24 @@ def usgs_multivariate(pat, outfile):
215220 return df
216221
217222
218- def process_multivariate_usgs (fpath , pat = None , rescan = True ):
223+ def process_multivariate_usgs (repo = "formatted" , data_path = None , pat = None , rescan = True ):
219224 """Identify and separate or combine multivariate USGS files.
220225 Separate sublocations if they are known (typically the vertical ones like upper/lower)
221226 Otherwise aggregates the columns and adds a value column containing their mean ignoring nans.
222227 Often only one is active at a time and in this case the treatment is equivalent to selecting
223228 the one that is active
224229 """
225230 logger .info ("Entering process_multivariate_usgs" )
226-
231+ actual_fpath = data_path if data_path is not None else repo_root ( repo )
227232 # todo: straighten out fpath and pat stuff
228233 tempfile .tempdir = "."
229234 tmpdir = tempfile .TemporaryDirectory ()
230235
231236 if pat is None :
232- pat = fpath + "/ usgs*.csv"
237+ pat = os . path . join ( actual_fpath , " usgs*.csv")
233238 else :
234- pat = fpath + "/" + pat # "/usgs*.csv"
239+ pat = os .path .join (actual_fpath , pat )
240+
235241
236242 # This recreates or reuses list of multivariate files. Being multivariate is something that has
237243 # to be assessed over the full period of record
@@ -246,7 +252,7 @@ def process_multivariate_usgs(fpath, pat=None, rescan=True):
246252
247253 for fn in filenames :
248254 direct , filepart = os .path .split (fn )
249- meta = interpret_fname (filepart )
255+ meta = interpret_fname (filepart , repo = "formatted" )
250256 station_id = meta ["station_id" ]
251257 param = meta ["param" ]
252258 logger .info (f"Working on { fn } , { station_id } , { param } " )
@@ -331,19 +337,24 @@ def process_multivariate_usgs(fpath, pat=None, rescan=True):
331337 for fdname in set_of_deletions :
332338 logger .debug (f"Removing { fdname } " )
333339 os .remove (fdname )
334- shutil .copytree (tmpdir .name , fpath , dirs_exist_ok = True )
340+ shutil .copytree (tmpdir .name , actual_fpath , dirs_exist_ok = True )
335341 del tmpdir
336342 logger .info ("Exiting process_multivariate_usgs" )
337343
338344
339345@click .command ()
340346@click .option ("--pat" , default = "usgs*.csv" , help = "Pattern of files to process" )
341- @click .option ("--fpath" , default = "." , help = "Directory of files to process." )
347+ @click .option ("--repo" , default = "formatted" , help = "Configured repo name for naming/parse rules." )
348+ @click .option (
349+ "--data-path" ,
350+ default = None ,
351+ help = "Directory containing the files. Defaults to the configured root of --repo." ,
352+ )
342353@click .option ("--logdir" , type = click .Path (path_type = Path ), default = None )
343354@click .option ("--debug" , is_flag = True )
344355@click .option ("--quiet" , is_flag = True )
345356@click .help_option ("-h" , "--help" )
346- def usgs_multi_cli (pat , fpath , logdir = None , debug = False , quiet = False ):
357+ def usgs_multi_cli (pat , repo , data_path , logdir = None , debug = False , quiet = False ):
347358 """CLI for processing multivariate USGS files."""
348359 # recatalogs the unique series. If false an old catalog will be used, which is useful
349360 # for sequential debugging.
@@ -360,7 +371,7 @@ def usgs_multi_cli(pat, fpath, logdir=None, debug=False, quiet=False):
360371 logdir = logdir ,
361372 logfile_prefix = "usgs_multi"
362373 )
363- process_multivariate_usgs (fpath = fpath , pat = pat , rescan = True )
374+ process_multivariate_usgs (repo = repo , data_path = data_path , pat = pat , rescan = True )
364375
365376
366377if __name__ == "__main__" :
0 commit comments