1+ import pandas as pd
2+ import os
3+ import re
4+
5+ from ..base import Raw
6+
7+
8+
9+ class TAL (Raw ):
10+ r"""Raw object from .txt file recorded by Tempatilumi (CE Brasil)
11+
12+ Parameters
13+ ----------
14+ input_fname: str
15+ Path to the Tempatilumi file.
16+ sep: str, optional
17+ Delimiter to use.
18+ Default is ";".
19+ frequency: str, optional
20+ Sampling frequency.
21+ Cf. #timeseries-offset-aliases in
22+ <https://pandas.pydata.org/pandas-docs/stable/timeseries.html>.
23+ If None, the sampling frequency is inferred from the data. Otherwise,
24+ the data are resampled to the specified frequency.
25+ Default is None.
26+ start_time: datetime-like, optional
27+ Read data from this time.
28+ Default is None.
29+ period: str, optional
30+ Length of the read data.
31+ Cf. #timeseries-offset-aliases in
32+ <https://pandas.pydata.org/pandas-docs/stable/timeseries.html>.
33+ Default is None (i.e all the data).
34+ encoding: str, optional
35+ Encoding to use for UTF when reading the file.
36+ Default is "latin-1".
37+ """
38+
39+ def __init__ (
40+ self ,
41+ input_fname ,
42+ sep = ';' ,
43+ frequency = None ,
44+ start_time = None ,
45+ period = None ,
46+ encoding = 'latin-1'
47+ ):
48+
49+ # get absolute file path
50+ input_fname = os .path .abspath (input_fname )
51+
52+ # extract header and data
53+ with open (input_fname , encoding = encoding ) as f :
54+ header = []
55+ pos = 0
56+ cur_line = f .readline ()
57+ while not cur_line .startswith (sep .join (["Data" , " Hora" ])):
58+ header .append (cur_line )
59+ pos = f .tell ()
60+ cur_line = f .readline ()
61+ f .seek (pos )
62+ index_data = pd .read_csv (
63+ filepath_or_buffer = f ,
64+ skipinitialspace = True ,
65+ sep = sep ,
66+ index_col = False ,
67+ )
68+ # Strip whitespace and combine date and time
69+ index_data ['Date_Time' ] = pd .to_datetime (
70+ index_data ['Data' ].str .strip () + ' ' + index_data ['Hora' ].str .strip (),
71+ format = '%Y/%m/%d %H:%M:%S'
72+ )
73+
74+ # Set Date_Time as index
75+ index_data .set_index ('Date_Time' , inplace = True )
76+
77+ # Check column names
78+ # Evento Temperatura Luminosidade Atividade
79+ if 'Atividade' not in index_data .columns :
80+ raise ValueError (
81+ 'The activity counts cannot be found in the data.\n '
82+ 'Column name in file header should be "Atividade".'
83+ )
84+
85+ self ._temperature = self ._extract_from_data (
86+ index_data , 'Temperatura'
87+ )
88+
89+ self ._events = self ._extract_from_data (
90+ index_data , 'Evento'
91+ )
92+
93+ if frequency is not None :
94+ index_data = index_data .resample (frequency ).sum ()
95+ freq = pd .Timedelta (frequency )
96+ elif not index_data .index .inferred_freq :
97+ raise ValueError (
98+ 'The sampling frequency:\n '
99+ '- cannot be inferred from the data\n '
100+ 'AND\n '
101+ '- is NOT explicity passed to the reader function.\n '
102+ )
103+ else :
104+ index_data = index_data .asfreq (index_data .index .inferred_freq )
105+ freq = pd .Timedelta (index_data .index .freq )
106+
107+ if start_time is not None :
108+ start_time = pd .to_datetime (start_time )
109+ else :
110+ start_time = index_data .index [0 ]
111+
112+ if period is not None :
113+ period = pd .Timedelta (period )
114+ stop_time = start_time + period
115+ else :
116+ stop_time = index_data .index [- 1 ]
117+ period = stop_time - start_time
118+
119+ index_data = index_data [start_time :stop_time ]
120+
121+ # Light
122+ index_light = self ._extract_from_data (index_data , 'Luminosidade' )
123+
124+ # call __init__ function of the base class
125+ super ().__init__ (
126+ df = index_data ,
127+ fpath = input_fname ,
128+ start_time = start_time ,
129+ period = period ,
130+ frequency = freq ,
131+ activity = index_data ['Atividade' ],
132+ light = index_light .to_frame (name = 'light' ),
133+ )
134+
135+ @property
136+ def white_light (self ):
137+ r"""Value of the light intensity in µw/cm²."""
138+ if self .light is None :
139+ return None
140+ else :
141+ return self .light .get_channel ("whitelight" )
142+
143+ @property
144+ def temperature (self ):
145+ r"""Value of the temperature (in ° C)."""
146+ return self ._temperature
147+
148+ @property
149+ def events (self ):
150+ r"""Event markers."""
151+ return self ._events
152+
153+ @classmethod
154+ def _extract_tal_uuid (cls , header ):
155+ match = re .search (r'Série: (\d+)' , '' .join (header ))
156+ if not match :
157+ raise ValueError ('UUID cannot be extracted from the file header.' )
158+ return match .group (1 )
159+
160+ @classmethod
161+ def _extract_from_data (cls , data , key ):
162+ if key in data .columns :
163+ return data [key ]
164+ else :
165+ return None
166+
167+
168+ def read_tal (
169+ input_fname ,
170+ sep = ';' ,
171+ frequency = None ,
172+ start_time = None ,
173+ period = None ,
174+ encoding = 'latin-1'
175+ ):
176+ r"""Raw object from .txt file recorded by Tempatilumi (CE Brasil)
177+
178+ Parameters
179+ ----------
180+ input_fname: str
181+ Path to the Tempatilumi file.
182+ sep: str, optional
183+ Delimiter to use.
184+ Default is ';'.
185+ frequency: str, optional
186+ Sampling frequency.
187+ Cf. #timeseries-offset-aliases in
188+ <https://pandas.pydata.org/pandas-docs/stable/timeseries.html>.
189+ If None, the sampling frequency is inferred from the data. Otherwise,
190+ the data are resampled to the specified frequency.
191+ Default is None.
192+ start_time: datetime-like, optional
193+ Read data from this time.
194+ Default is None.
195+ period: str, optional
196+ Length of the read data.
197+ Cf. #timeseries-offset-aliases in
198+ <https://pandas.pydata.org/pandas-docs/stable/timeseries.html>.
199+ Default is None (i.e all the data).
200+ encoding: str, optional
201+ Encoding to use for UTF when reading the file.
202+ Default is 'latin-1'.
203+
204+ Returns
205+ -------
206+ raw : Instance of TAL
207+ An object containing raw TAL data
208+ """
209+
210+ return TAL (
211+ input_fname = input_fname ,
212+ sep = sep ,
213+ frequency = frequency ,
214+ start_time = start_time ,
215+ period = period ,
216+ encoding = encoding
217+ )
0 commit comments