-
Notifications
You must be signed in to change notification settings - Fork 52
Expand file tree
/
Copy pathagt_parser.py
More file actions
executable file
·172 lines (148 loc) · 6.08 KB
/
agt_parser.py
File metadata and controls
executable file
·172 lines (148 loc) · 6.08 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
#!/usr/bin/env python
from argparse import ArgumentParser
import numpy as np
import pandas as pd
import re
import sys
class AgtException(Exception):
'''Base class for Agt file parser exceptions.'''
def __str__(self):
return self.message
class NoAgtDataSectionError(AgtException):
'''Exception raised when no data header is present in the file.'''
def __init__(self):
'''Constructor, sets message'''
self.message = 'No data section found'
class AgtDataError(AgtException):
'''Exception raised when a problem with the data is detected.'''
def __init__(self, msg):
'''Constructor that sets message.'''
self.message = msg
class AgtParser(object):
'''Parser for agt files that contain sensor data, i.e., timestamps,
temerature, and pressure. These files also contain meta-inforamtion
that is ignored.
'''
def __init__(self, dt_fmt='%Y/%m/%d %H:%M:%S', sep=';',
data_header='[Data]', footer_header='END OF DATA',
columns=('pressure', 'temperature')):
'''Constructor for a new parser, optionally specify a timestamp
format.
'''
self._dt_fmt = dt_fmt
self._data_header = data_header
self._footer_header = footer_header
self._columns = columns
self._sep = sep
self._nr_cols = 3
self._current_line = None
def parse(self, file_name, encoding='latin-1'):
'''parse the file with the given name, return a pandas
DataFrame with the timestamp as index, and the pressure and
temperature as columns.
'''
self._current_line = 0
with open(file_name, 'r', encoding=encoding) as agt_file:
self._parse_meta_data(agt_file)
df = self._parse_data(agt_file)
self._parse_footer(agt_file)
return df
def _parse_meta_data(self, agt_file):
'''parse the meta data, and leaving the file handle pointing to
the start of the data section'''
position = 0
for line in agt_file:
self._current_line += 1
position += len(line)
if line.startswith(self._data_header):
agt_file.seek(position)
break
else:
raise NoAgtDataSectionError()
def _read_data(self, agt_file, nr_lines):
'''erad the actual data, this is a hand written parser.'''
indices = []
measurements = np.empty((nr_lines, 2))
for line_nr in range(nr_lines):
self._current_line += 1
line = agt_file.readline().rstrip()
data = line.split(self._sep)
if len(data) != self._nr_cols:
msg = "ine {0:d} is invalid: '{1}'"
raise AgtDataError(msg.format(self._current_line, line))
try:
indices.append(pd.datetime.strptime(data[0], self._dt_fmt))
measurements[line_nr][0] = float(data[1])
measurements[line_nr][1] = float(data[2])
except ValueError as error:
msg = 'line {0:d}: {1}'
raise AgtDataError(msg.format(self._current_line, error))
df = pd.DataFrame(measurements, index=indices,
columns=self._columns)
df.index.name = 'timestamp'
return df
def _parse_data(self, agt_file):
'''parse the data section, the file points is at the line below
the header'''
nr_lines_str = agt_file.readline()
match = re.match(r'(\d+)', nr_lines_str)
if not match:
msg = "line {0:d}: invalid number of measurements '{1}'"
raise AgtDataError(msg.format(self._current_line, nr_lines_str))
nr_lines = int(match[1])
self._current_line += 1
# ignore header line
agt_file.readline()
self._current_line += 1
return self._read_data(agt_file, nr_lines)
def _parse_footer(self, agt_file):
'''parse footer, i.e., file content after data.'''
line = agt_file.readline().rstrip()
self._current_line += 1
if not (line and line.startswith(self._footer_header)):
msg = "line {0:d}: invalid footer '{1}'"
raise AgtDataError(msg.format(self._current_line, line))
class AgtPandasParser(AgtParser):
'''Parser for agt files that contain sensor data, i.e., timestamps,
temerature, and pressure. These files also contain meta-inforamtion
that is ignored.
'''
def _read_data(self, agt_file, nr_lines):
'''read data using pandas methods'''
def dt_conv(dt):
return pd.datetime.strptime(dt, self._dt_fmt)
df = pd.read_csv(agt_file, sep=self._sep, converters={0: dt_conv},
names=(['timestamp'] + list(self._columns)),
nrows=nr_lines)
df.set_index('timestamp', inplace=True)
self._current_line += nr_lines
return df
def _parse_footer(self, agt_file):
'''parse footer, i.e., file content after data.'''
# Since the pandas read_csv method swallows an extra line, this parser needs
# to check for an empty footer
line = agt_file.readline().rstrip()
self._current_line += 1
if line:
msg = "line {0:d}: invalid footer '{1}'"
raise AgtDataError(msg.format(self._current_line, line))
if __name__ == '__main__':
arg_parser = ArgumentParser(description='parse AGT files, and print '
'part of data')
arg_parser.add_argument('file', help='file to parse')
arg_parser.add_argument('--dt_fmt', default='%Y/%m/%d %H:%M:%S',
help='timestamp format to use')
options = arg_parser.parse_args()
agt_parser = AgtParser(dt_fmt=options.dt_fmt)
try:
df = agt_parser.parse(options.file)
print(df.head())
print('...')
print(df.tail())
except AgtException as error:
print('Parse error at {0}'.format(error), file=sys.stderr)
agt_parser = AgtPandasParser(dt_fmt=options.dt_fmt)
df = agt_parser.parse(options.file)
print(df.head())
print('...')
print(df.tail())