This repository was archived by the owner on Jun 12, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtbtl_json.py
More file actions
94 lines (73 loc) · 2.88 KB
/
tbtl_json.py
File metadata and controls
94 lines (73 loc) · 2.88 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import argparse
import json
import re
import sys
parser = argparse.ArgumentParser()
parser.add_argument("input", help="input file to be parsed")
parser.add_argument("--headers", help="define headers of columns, separated by commas", type=str)
args = parser.parse_args()
def parse_tablatal_data(tablatal_data: list, header_names: list = None) -> list:
""" Convert tablatal data into list of dicts """
headers, entries = get_headers_and_entries(tablatal_data, header_names)
database = []
for line in entries:
if line == '' or line[0] == ';':
continue
entry = create_entry(headers, line)
database.append(entry)
return database
def get_headers_and_entries(tablatal_data: list, header_names: list):
""" Return headers and entries from data """
for index, line in enumerate(tablatal_data):
if line in [';', '']:
continue
if line == line.upper():
headers = get_headers_info_from_line(line, header_names)
entries = tablatal_data[index+1:]
return headers, entries
sys.exit('Header not found. Please use `--headers` flag.')
def get_headers_info_from_line(line: str, header_names: list = None) -> list:
fields = re.finditer(r'\w+\s*', line)
headers = []
if line[0] in [';', ' ']:
headers.append({'name': ('ID'
if header_names is None
else header_names[0]),
'start': 0,
'length': re.search('\s+', line).end()})
for i, field in enumerate(fields):
field_name = field.group()
headers.append({'name': (field_name.strip()
if header_names is None
else header_names[i+1]),
'start': field.start(),
'length': len(field_name)})
headers[-1]['length'] = None
return headers
def create_entry(headers: list, line: str) -> dict:
""" Return entry from the line and headers """
entry = {}
for header in headers:
field_name = header['name']
entry[field_name] = get_value_from_line(line, header)
return entry
def get_value_from_line(line: str, field_info: dict):
""" Return the field value from the line """
field_start = field_info['start']
field_length = field_info['length']
value = line[field_start:]
value = value[:field_length]
value = value.rstrip()
if value == '':
return None
else:
return value
if __name__ == '__main__':
if args.headers is not None:
header_names = [header.strip() for header in args.headers.split(',')]
else:
header_names = None
with open(args.input, 'r') as f:
tablatal_entries = f.read().split('\n')
parsed_data = parse_tablatal_data(tablatal_entries, header_names)
print(json.dumps(parsed_data))