Skip to content

Commit 02b5226

Browse files
committed
implement tal adaptor subclass
1 parent ab07bb3 commit 02b5226

4 files changed

Lines changed: 338 additions & 4 deletions

File tree

circStudio/io/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,6 @@
55
from .dqt import read_dqt
66
from .mesa import read_mesa
77
from .rpx import read_rpx
8+
from .tal import read_tal
89

9-
10-
__all__ = ["Raw", "read_atr", "read_awd", "read_agd", "read_dqt", "read_mesa", "read_rpx"]
10+
__all__ = ["Raw", "read_atr", "read_awd", "read_agd", "read_dqt", "read_mesa", "read_rpx", "read_tal"]

circStudio/io/tal/__init__.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
from .tal import TAL
2+
from .tal import read_tal
3+
4+
5+
__all__ = ["TAL", "read_tal"]

circStudio/io/tal/tal.py

Lines changed: 217 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,217 @@
1+
import pandas as pd
2+
import os
3+
import re
4+
5+
from ..base import Raw
6+
7+
8+
9+
class TAL(Raw):
10+
r"""Raw object from .txt file recorded by Tempatilumi (CE Brasil)
11+
12+
Parameters
13+
----------
14+
input_fname: str
15+
Path to the Tempatilumi file.
16+
sep: str, optional
17+
Delimiter to use.
18+
Default is ";".
19+
frequency: str, optional
20+
Sampling frequency.
21+
Cf. #timeseries-offset-aliases in
22+
<https://pandas.pydata.org/pandas-docs/stable/timeseries.html>.
23+
If None, the sampling frequency is inferred from the data. Otherwise,
24+
the data are resampled to the specified frequency.
25+
Default is None.
26+
start_time: datetime-like, optional
27+
Read data from this time.
28+
Default is None.
29+
period: str, optional
30+
Length of the read data.
31+
Cf. #timeseries-offset-aliases in
32+
<https://pandas.pydata.org/pandas-docs/stable/timeseries.html>.
33+
Default is None (i.e all the data).
34+
encoding: str, optional
35+
Encoding to use for UTF when reading the file.
36+
Default is "latin-1".
37+
"""
38+
39+
def __init__(
40+
self,
41+
input_fname,
42+
sep=';',
43+
frequency=None,
44+
start_time=None,
45+
period=None,
46+
encoding='latin-1'
47+
):
48+
49+
# get absolute file path
50+
input_fname = os.path.abspath(input_fname)
51+
52+
# extract header and data
53+
with open(input_fname, encoding=encoding) as f:
54+
header = []
55+
pos = 0
56+
cur_line = f.readline()
57+
while not cur_line.startswith(sep.join(["Data", " Hora"])):
58+
header.append(cur_line)
59+
pos = f.tell()
60+
cur_line = f.readline()
61+
f.seek(pos)
62+
index_data = pd.read_csv(
63+
filepath_or_buffer=f,
64+
skipinitialspace=True,
65+
sep=sep,
66+
index_col=False,
67+
)
68+
# Strip whitespace and combine date and time
69+
index_data['Date_Time'] = pd.to_datetime(
70+
index_data['Data'].str.strip() + ' ' + index_data['Hora'].str.strip(),
71+
format='%Y/%m/%d %H:%M:%S'
72+
)
73+
74+
# Set Date_Time as index
75+
index_data.set_index('Date_Time', inplace=True)
76+
77+
# Check column names
78+
# Evento Temperatura Luminosidade Atividade
79+
if 'Atividade' not in index_data.columns:
80+
raise ValueError(
81+
'The activity counts cannot be found in the data.\n'
82+
'Column name in file header should be "Atividade".'
83+
)
84+
85+
self._temperature = self._extract_from_data(
86+
index_data, 'Temperatura'
87+
)
88+
89+
self._events = self._extract_from_data(
90+
index_data, 'Evento'
91+
)
92+
93+
if frequency is not None:
94+
index_data = index_data.resample(frequency).sum()
95+
freq = pd.Timedelta(frequency)
96+
elif not index_data.index.inferred_freq:
97+
raise ValueError(
98+
'The sampling frequency:\n'
99+
'- cannot be inferred from the data\n'
100+
'AND\n'
101+
'- is NOT explicity passed to the reader function.\n'
102+
)
103+
else:
104+
index_data = index_data.asfreq(index_data.index.inferred_freq)
105+
freq = pd.Timedelta(index_data.index.freq)
106+
107+
if start_time is not None:
108+
start_time = pd.to_datetime(start_time)
109+
else:
110+
start_time = index_data.index[0]
111+
112+
if period is not None:
113+
period = pd.Timedelta(period)
114+
stop_time = start_time+period
115+
else:
116+
stop_time = index_data.index[-1]
117+
period = stop_time - start_time
118+
119+
index_data = index_data[start_time:stop_time]
120+
121+
# Light
122+
index_light = self._extract_from_data(index_data, 'Luminosidade')
123+
124+
# call __init__ function of the base class
125+
super().__init__(
126+
df=index_data,
127+
fpath=input_fname,
128+
start_time=start_time,
129+
period=period,
130+
frequency=freq,
131+
activity=index_data['Atividade'],
132+
light=index_light.to_frame(name='light'),
133+
)
134+
135+
@property
136+
def white_light(self):
137+
r"""Value of the light intensity in µw/cm²."""
138+
if self.light is None:
139+
return None
140+
else:
141+
return self.light.get_channel("whitelight")
142+
143+
@property
144+
def temperature(self):
145+
r"""Value of the temperature (in ° C)."""
146+
return self._temperature
147+
148+
@property
149+
def events(self):
150+
r"""Event markers."""
151+
return self._events
152+
153+
@classmethod
154+
def _extract_tal_uuid(cls, header):
155+
match = re.search(r'Série: (\d+)', ''.join(header))
156+
if not match:
157+
raise ValueError('UUID cannot be extracted from the file header.')
158+
return match.group(1)
159+
160+
@classmethod
161+
def _extract_from_data(cls, data, key):
162+
if key in data.columns:
163+
return data[key]
164+
else:
165+
return None
166+
167+
168+
def read_tal(
169+
input_fname,
170+
sep=';',
171+
frequency=None,
172+
start_time=None,
173+
period=None,
174+
encoding='latin-1'
175+
):
176+
r"""Raw object from .txt file recorded by Tempatilumi (CE Brasil)
177+
178+
Parameters
179+
----------
180+
input_fname: str
181+
Path to the Tempatilumi file.
182+
sep: str, optional
183+
Delimiter to use.
184+
Default is ';'.
185+
frequency: str, optional
186+
Sampling frequency.
187+
Cf. #timeseries-offset-aliases in
188+
<https://pandas.pydata.org/pandas-docs/stable/timeseries.html>.
189+
If None, the sampling frequency is inferred from the data. Otherwise,
190+
the data are resampled to the specified frequency.
191+
Default is None.
192+
start_time: datetime-like, optional
193+
Read data from this time.
194+
Default is None.
195+
period: str, optional
196+
Length of the read data.
197+
Cf. #timeseries-offset-aliases in
198+
<https://pandas.pydata.org/pandas-docs/stable/timeseries.html>.
199+
Default is None (i.e all the data).
200+
encoding: str, optional
201+
Encoding to use for UTF when reading the file.
202+
Default is 'latin-1'.
203+
204+
Returns
205+
-------
206+
raw : Instance of TAL
207+
An object containing raw TAL data
208+
"""
209+
210+
return TAL(
211+
input_fname=input_fname,
212+
sep=sep,
213+
frequency=frequency,
214+
start_time=start_time,
215+
period=period,
216+
encoding=encoding
217+
)

docs/source/opening_files.ipynb

Lines changed: 114 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -191,10 +191,122 @@
191191
},
192192
{
193193
"cell_type": "code",
194-
"execution_count": null,
194+
"execution_count": 4,
195195
"metadata": {},
196196
"outputs": [],
197-
"source": []
197+
"source": [
198+
"raw = circStudio.io.read_tal(os.path.join(fpath, 'data', 'test_sample_tal.txt'))"
199+
]
200+
},
201+
{
202+
"cell_type": "code",
203+
"execution_count": 6,
204+
"metadata": {},
205+
"outputs": [
206+
{
207+
"data": {
208+
"text/html": [
209+
"<div>\n",
210+
"<style scoped>\n",
211+
" .dataframe tbody tr th:only-of-type {\n",
212+
" vertical-align: middle;\n",
213+
" }\n",
214+
"\n",
215+
" .dataframe tbody tr th {\n",
216+
" vertical-align: top;\n",
217+
" }\n",
218+
"\n",
219+
" .dataframe thead th {\n",
220+
" text-align: right;\n",
221+
" }\n",
222+
"</style>\n",
223+
"<table border=\"1\" class=\"dataframe\">\n",
224+
" <thead>\n",
225+
" <tr style=\"text-align: right;\">\n",
226+
" <th></th>\n",
227+
" <th>light</th>\n",
228+
" </tr>\n",
229+
" <tr>\n",
230+
" <th>Date_Time</th>\n",
231+
" <th></th>\n",
232+
" </tr>\n",
233+
" </thead>\n",
234+
" <tbody>\n",
235+
" <tr>\n",
236+
" <th>2000-01-01 00:00:00</th>\n",
237+
" <td>0</td>\n",
238+
" </tr>\n",
239+
" <tr>\n",
240+
" <th>2000-01-01 00:01:00</th>\n",
241+
" <td>0</td>\n",
242+
" </tr>\n",
243+
" <tr>\n",
244+
" <th>2000-01-01 00:02:00</th>\n",
245+
" <td>0</td>\n",
246+
" </tr>\n",
247+
" <tr>\n",
248+
" <th>2000-01-01 00:03:00</th>\n",
249+
" <td>0</td>\n",
250+
" </tr>\n",
251+
" <tr>\n",
252+
" <th>2000-01-01 00:04:00</th>\n",
253+
" <td>0</td>\n",
254+
" </tr>\n",
255+
" <tr>\n",
256+
" <th>...</th>\n",
257+
" <td>...</td>\n",
258+
" </tr>\n",
259+
" <tr>\n",
260+
" <th>2000-01-07 23:55:00</th>\n",
261+
" <td>0</td>\n",
262+
" </tr>\n",
263+
" <tr>\n",
264+
" <th>2000-01-07 23:56:00</th>\n",
265+
" <td>0</td>\n",
266+
" </tr>\n",
267+
" <tr>\n",
268+
" <th>2000-01-07 23:57:00</th>\n",
269+
" <td>0</td>\n",
270+
" </tr>\n",
271+
" <tr>\n",
272+
" <th>2000-01-07 23:58:00</th>\n",
273+
" <td>0</td>\n",
274+
" </tr>\n",
275+
" <tr>\n",
276+
" <th>2000-01-07 23:59:00</th>\n",
277+
" <td>0</td>\n",
278+
" </tr>\n",
279+
" </tbody>\n",
280+
"</table>\n",
281+
"<p>10080 rows × 1 columns</p>\n",
282+
"</div>"
283+
],
284+
"text/plain": [
285+
" light\n",
286+
"Date_Time \n",
287+
"2000-01-01 00:00:00 0\n",
288+
"2000-01-01 00:01:00 0\n",
289+
"2000-01-01 00:02:00 0\n",
290+
"2000-01-01 00:03:00 0\n",
291+
"2000-01-01 00:04:00 0\n",
292+
"... ...\n",
293+
"2000-01-07 23:55:00 0\n",
294+
"2000-01-07 23:56:00 0\n",
295+
"2000-01-07 23:57:00 0\n",
296+
"2000-01-07 23:58:00 0\n",
297+
"2000-01-07 23:59:00 0\n",
298+
"\n",
299+
"[10080 rows x 1 columns]"
300+
]
301+
},
302+
"execution_count": 6,
303+
"metadata": {},
304+
"output_type": "execute_result"
305+
}
306+
],
307+
"source": [
308+
"raw.light"
309+
]
198310
},
199311
{
200312
"cell_type": "markdown",

0 commit comments

Comments
 (0)