@@ -17,7 +17,6 @@ def main(base_dir="data/raw/dxc_gate"):
1717 """
1818 Download the Delta Cross Channel gate log from the US Bureau of Reclamation
1919 https://www.usbr.gov/mp/cvo/vungvari/Ccgates.pdf
20-
2120 """
2221 utils .ensure_dir (base_dir )
2322 today = datetime .datetime .now ()
@@ -31,7 +30,7 @@ def main(base_dir="data/raw/dxc_gate"):
3130 fh .write (response .content )
3231 pages = tabula .read_pdf (
3332 pdfname , pages = "all" , guess = False , encoding = "ISO-8859-1" # for windows maybe?
34- ) # columns=['date','time','remarks'])
33+ ) # columns=['date','time', 'action', 'remarks'])
3534 df = pd .concat (pages )
3635 df .columns = ["date" , "time" , "value" ]
3736 df = df .dropna ()
@@ -41,15 +40,26 @@ def main(base_dir="data/raw/dxc_gate"):
4140 df = df [["datetime" , "value" ]]
4241 df = df .set_index ("datetime" )
4342 df = df .sort_index ()
44- df ["action" ] = df ["value" ].str .split (expand = True )[0 ]
45- df ["comments" ] = df ["value" ].str .split (). map ( lambda x : " " . join ( x [ 1 :]))
43+ df ["action" ] = df ["value" ].str .split (n = 1 , expand = True )[0 ]
44+ df ["comments" ] = df ["value" ].str .split (n = 1 , expand = True )[ 1 ]
4645 df = df .drop (columns = ["value" ])
47- # df['action'].unique()
48- df ["action" ] = (
49- df ["action" ]
50- .map ({"open" : 2 , "closed" : 0 , "gate" : 0 , "partially" : 1 , "-" : 0 , "close" : 0 })
51- .astype ("int" )
46+ df .loc [df ["comments" ].str .strip () == "-" , "comments" ] = ""
47+ df .loc [df ["comments" ].isna (), "comments" ] = ""
48+ df ["comments" ] = df ["comments" ].str .strip ()
49+ df ["action" ] = df ["action" ].map (
50+ {
51+ "open" : "open" ,
52+ "closed" : "closed" ,
53+ "gate" : "closed" ,
54+ "partially" : "partially open" ,
55+ "-" : "closed" ,
56+ "close" : "closed" ,
57+ }
5258 )
5359 conv_dir = os .path .dirname (pdfname ).replace ("/raw/" , "/converted/" )
5460 utils .ensure_dir (conv_dir )
5561 df .to_csv (os .path .join (conv_dir , fname .split ("." )[0 ] + ".csv" ))
62+
63+
64+ if __name__ == "__main__" :
65+ main ()
0 commit comments