Skip to content

Commit 0f09843

Browse files
EliEli
authored andcommitted
Updated usgs_multi to use new templated read/write.
1 parent 42ca33c commit 0f09843

1 file changed

Lines changed: 22 additions & 11 deletions

File tree

dms_datastore/usgs_multi.py

Lines changed: 22 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,11 @@ def _quarantine_file(fname, quarantine_dir="quarantine"):
2929

3030
def usgs_scan_series_json(fname):
3131
hdr = read_yaml_header(fname)
32-
orig = yaml.safe_load(hdr["original_header"])
32+
33+
orig_txt = hdr["original_header"]
34+
if orig_txt is None:
35+
raise ValueError("No original_header present")
36+
orig = parse_yaml_header(orig_txt)
3337
subs = orig["sublocations"]
3438
var = orig["variable_code"]
3539
series = [(str(s["subloc"]), var, s["method_description"]) for s in subs]
@@ -123,7 +127,7 @@ def usgs_multivariate(pat, outfile):
123127
files = glob.glob(pat)
124128
data = []
125129
for fname in files:
126-
meta = interpret_fname(fname)
130+
meta = interpret_fname(fname, repo="formatted")
127131
try:
128132
ts = read_ts(fname, nrows=4000)
129133
except:
@@ -132,6 +136,7 @@ def usgs_multivariate(pat, outfile):
132136

133137
multi_cols = ts.shape[1] > 1
134138
subloc_df = sublocation_df()
139+
135140
station_id = meta["station_id"]
136141
param = meta["param"]
137142
known_multi = (subloc_df["station_id"] == station_id).any()
@@ -215,23 +220,24 @@ def usgs_multivariate(pat, outfile):
215220
return df
216221

217222

218-
def process_multivariate_usgs(fpath, pat=None, rescan=True):
223+
def process_multivariate_usgs(repo="formatted", data_path=None, pat=None, rescan=True):
219224
"""Identify and separate or combine multivariate USGS files.
220225
Separate sublocations if they are known (typically the vertical ones like upper/lower)
221226
Otherwise aggregates the columns and adds a value column containing their mean ignoring nans.
222227
Often only one is active at a time and in this case the treatment is equivalent to selecting
223228
the one that is active
224229
"""
225230
logger.info("Entering process_multivariate_usgs")
226-
231+
actual_fpath = data_path if data_path is not None else repo_root(repo)
227232
# todo: straighten out fpath and pat stuff
228233
tempfile.tempdir = "."
229234
tmpdir = tempfile.TemporaryDirectory()
230235

231236
if pat is None:
232-
pat = fpath + "/usgs*.csv"
237+
pat = os.path.join(actual_fpath, "usgs*.csv")
233238
else:
234-
pat = fpath + "/" + pat # "/usgs*.csv"
239+
pat = os.path.join(actual_fpath, pat)
240+
235241

236242
# This recreates or reuses list of multivariate files. Being multivariate is something that has
237243
# to be assessed over the full period of record
@@ -246,7 +252,7 @@ def process_multivariate_usgs(fpath, pat=None, rescan=True):
246252

247253
for fn in filenames:
248254
direct, filepart = os.path.split(fn)
249-
meta = interpret_fname(filepart)
255+
meta = interpret_fname(filepart, repo="formatted")
250256
station_id = meta["station_id"]
251257
param = meta["param"]
252258
logger.info(f"Working on {fn}, {station_id}, {param}")
@@ -331,19 +337,24 @@ def process_multivariate_usgs(fpath, pat=None, rescan=True):
331337
for fdname in set_of_deletions:
332338
logger.debug(f"Removing {fdname}")
333339
os.remove(fdname)
334-
shutil.copytree(tmpdir.name, fpath, dirs_exist_ok=True)
340+
shutil.copytree(tmpdir.name, actual_fpath, dirs_exist_ok=True)
335341
del tmpdir
336342
logger.info("Exiting process_multivariate_usgs")
337343

338344

339345
@click.command()
340346
@click.option("--pat", default="usgs*.csv", help="Pattern of files to process")
341-
@click.option("--fpath", default=".", help="Directory of files to process.")
347+
@click.option("--repo", default="formatted", help="Configured repo name for naming/parse rules.")
348+
@click.option(
349+
"--data-path",
350+
default=None,
351+
help="Directory containing the files. Defaults to the configured root of --repo.",
352+
)
342353
@click.option("--logdir", type=click.Path(path_type=Path), default=None)
343354
@click.option("--debug", is_flag=True)
344355
@click.option("--quiet", is_flag=True)
345356
@click.help_option("-h", "--help")
346-
def usgs_multi_cli(pat, fpath, logdir=None, debug=False, quiet=False):
357+
def usgs_multi_cli(pat, repo, data_path, logdir=None, debug=False, quiet=False):
347358
"""CLI for processing multivariate USGS files."""
348359
# recatalogs the unique series. If false an old catalog will be used, which is useful
349360
# for sequential debugging.
@@ -360,7 +371,7 @@ def usgs_multi_cli(pat, fpath, logdir=None, debug=False, quiet=False):
360371
logdir=logdir,
361372
logfile_prefix="usgs_multi"
362373
)
363-
process_multivariate_usgs(fpath=fpath, pat=pat, rescan=True)
374+
process_multivariate_usgs(repo=repo, data_path=data_path, pat=pat, rescan=True)
364375

365376

366377
if __name__ == "__main__":

0 commit comments

Comments
 (0)