Skip to content

Commit 6e82519

Browse files
committed
DD-124 download_dcc.py to improve action and comments parsing
1 parent da31259 commit 6e82519

File tree

1 file changed

+19
-9
lines changed

1 file changed

+19
-9
lines changed

dms_datastore/download_dcc.py

Lines changed: 19 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@ def main(base_dir="data/raw/dxc_gate"):
1717
"""
1818
Download the Delta Cross Channel gate log from the US Bureau of Reclamation
1919
https://www.usbr.gov/mp/cvo/vungvari/Ccgates.pdf
20-
2120
"""
2221
utils.ensure_dir(base_dir)
2322
today = datetime.datetime.now()
@@ -31,7 +30,7 @@ def main(base_dir="data/raw/dxc_gate"):
3130
fh.write(response.content)
3231
pages = tabula.read_pdf(
3332
pdfname, pages="all", guess=False, encoding="ISO-8859-1" # for windows maybe?
34-
) # columns=['date','time','remarks'])
33+
) # columns=['date','time', 'action', 'remarks'])
3534
df = pd.concat(pages)
3635
df.columns = ["date", "time", "value"]
3736
df = df.dropna()
@@ -41,15 +40,26 @@ def main(base_dir="data/raw/dxc_gate"):
4140
df = df[["datetime", "value"]]
4241
df = df.set_index("datetime")
4342
df = df.sort_index()
44-
df["action"] = df["value"].str.split(expand=True)[0]
45-
df["comments"] = df["value"].str.split().map(lambda x: " ".join(x[1:]))
43+
df["action"] = df["value"].str.split(n=1, expand=True)[0]
44+
df["comments"] = df["value"].str.split(n=1, expand=True)[1]
4645
df = df.drop(columns=["value"])
47-
# df['action'].unique()
48-
df["action"] = (
49-
df["action"]
50-
.map({"open": 2, "closed": 0, "gate": 0, "partially": 1, "-": 0, "close": 0})
51-
.astype("int")
46+
df.loc[df["comments"].str.strip() == "-", "comments"] = ""
47+
df.loc[df["comments"].isna(), "comments"] = ""
48+
df["comments"] = df["comments"].str.strip()
49+
df["action"] = df["action"].map(
50+
{
51+
"open": "open",
52+
"closed": "closed",
53+
"gate": "closed",
54+
"partially": "partially open",
55+
"-": "closed",
56+
"close": "closed",
57+
}
5258
)
5359
conv_dir = os.path.dirname(pdfname).replace("/raw/", "/converted/")
5460
utils.ensure_dir(conv_dir)
5561
df.to_csv(os.path.join(conv_dir, fname.split(".")[0] + ".csv"))
62+
63+
64+
if __name__ == "__main__":
65+
main()

0 commit comments

Comments
 (0)