-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathextract_chan.py
More file actions
66 lines (48 loc) · 1.7 KB
/
extract_chan.py
File metadata and controls
66 lines (48 loc) · 1.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import json
import numpy as np
import camelot
import os
import hashlib
def parse_float_fix_minus(s):
# remove spaces
s = s.replace(' ', '')
# fix minus which is parsed as 2
if s[0] == '2':
s = '-'+s[1:]
return float(s)
def main(fname, prefix=''):
tables = camelot.read_pdf(fname, flavor='stream', pages='6')
df = tables[-1].df
data_chan = {}
i, = np.where((df[0] == ''))
i = i.tolist()[-1]
data_chan['r'] = list(map(lambda s: float(s.strip('a 0')), df.iloc[i][1:]))
i, = np.where((df[0] == 'UHF'))
i, = i.tolist()
data_chan['uhf'] = list(map(parse_float_fix_minus, df.iloc[i][1:]))
i, = np.where((df[0] == 'DMRG: 4000'))
i, = i.tolist()
data_chan['dmrg4000_offset'] = list(map(parse_float_fix_minus, df.iloc[i][1:]))
i, = np.where((df[0] == 'CCSD'))
i, = i.tolist()
data_chan['ccsd_offset'] = list(map(parse_float_fix_minus, df.iloc[i][1:]))
i, = np.where((df[0] == 'CCSD~T!'))
i, = i.tolist()
data_chan['ccsd_t_offset'] = list(map(parse_float_fix_minus, df.iloc[i][1:]))
with open(prefix+'e_chan.json', 'w') as f:
json.dump(data_chan, f)
if __name__ == '__main__':
fname = '6110_1_online.pdf'
# the file is generated with a different cover page every time, so cant check the hash
md5 = None
url = 'https://doi.org/10.1063/1.1783212'
prefix = './reference/'
if not os.path.exists(prefix):
os.makedirs(prefix)
if not os.path.isfile(fname):
print('file not found, please download', fname)
print('e.g. from ', url)
exit()
if md5 is not None:
assert hashlib.md5(open(fname,'rb').read()).hexdigest() == md5
main(fname, prefix=prefix)