@@ -178,7 +178,7 @@ def process(self, data : pandas.DataFrame) -> pandas.DataFrame:
178178
179179 # Other options
180180 for k , v in self .options .items ():
181- args += [ f'--{ k } ' , v ]
181+ args += [ f'--{ k } ' , str ( v ) ]
182182
183183 cmd = ' ' .join (args )
184184 try :
@@ -194,8 +194,9 @@ def process(self, data : pandas.DataFrame) -> pandas.DataFrame:
194194 # TODO: support feature names. Separat output file, with --features
195195 out = numpy .load (features_path )
196196 windows = pandas .DataFrame (out )
197+ # FIXME: support reading times, not infer
197198 span = (data .index .max () - data .index .min ()).total_seconds ()
198- dt = span / len (windows ) # XXX: make correct
199+ dt = span / len (windows )
199200 windows ['time' ] = dt * numpy .arange (len (windows ))
200201 elif self .serialization == 'csv' :
201202 windows = pandas .read_csv (features_path )
@@ -208,7 +209,7 @@ def process(self, data : pandas.DataFrame) -> pandas.DataFrame:
208209 time_in = data .index
209210 time_out = windows ['time' ]
210211
211- window_duration = pandas .Timedelta (5 .0 , unit = 's' ) # XXX: hardcoded
212+ window_duration = pandas .Timedelta (4 .0 , unit = 's' ) # XXX: hardcoded
212213 start_delta = time_out .min () - time_in .min ()
213214 assert abs (start_delta ) <= window_duration , (start_delta , time_out .min (), time_in .min ())
214215 end_delta = time_out .max () - time_in .max ()
@@ -300,12 +301,29 @@ def process_one(idx, stream : pandas.DataFrame) -> pandas.DataFrame:
300301 # for any time-dependent logic to stabilize, and to merge while ignoring the run-in
301302 def split_sections (data , groupby : list [str ], time_column = 'time' ):
302303 groups = sensordata .groupby (groupby , observed = True )
303- for group_idx , group_df in groups :
304+ for group_idx , df in groups :
304305
305306 # ensure sorted by time
306- group_df = group_df .reset_index ().set_index (time_column ).sort_index ()
307+ df = df .reset_index ()
308+ print ('d' , df .columns )
307309
308- yield group_idx , group_df
310+ # convert to time-delta, if neeeded
311+ if pandas .api .types .is_datetime64_dtype (df [time_column ]):
312+ df [time_column ] = df [time_column ] - df [time_column ].min ()
313+
314+ df = df .set_index (time_column ).sort_index ()
315+
316+
317+ samplerate = 50
318+ expected_freq = pandas .Timedelta (1 / samplerate , unit = 's' )
319+ diff = df .index .to_series ().diff ()
320+ holes = diff [diff > expected_freq ]
321+ irregular = diff [diff != expected_freq ].dropna ()
322+ assert irregular .empty , irregular
323+
324+ assert pandas .api .types .is_timedelta64_dtype (df .index )
325+
326+ yield group_idx , df
309327
310328 sections = split_sections (sensordata , groupby = groupby , time_column = time_column )
311329 jobs = [ joblib .delayed (process_one )(idx , df ) for idx , df in sections ]
@@ -408,10 +426,18 @@ def run_pipeline(run, hyperparameters, dataset,
408426 features = features ,
409427 )
410428 window_length = model_settings ['window_length' ]
411- samplerate = model_settings .get ('samplerate' , 100 )
412-
429+ samplerate = dataset_config .get ('samplerate' , 100 )
430+ window_hop = window_length
431+
413432 window_duration = (window_length / samplerate )
414433
434+ remap = {
435+ 'x' : 'acc_x' ,
436+ 'y' : 'acc_y' ,
437+ 'z' : 'acc_z' ,
438+ }
439+ data = data .rename (columns = remap )
440+
415441 # Setup feature extraction
416442 if features == 'timebased' :
417443 columns = ['acc_x' , 'acc_y' , 'acc_z' ]
@@ -420,12 +446,13 @@ def run_pipeline(run, hyperparameters, dataset,
420446 elif features == 'custom' :
421447 # FIXME: unhardcode path
422448 executable = ['/home/jon/projects/emlearn/examples/motion_recognition/build/motion_preprocess' ]
423- # FIXME: respect window_length, window_hop
424449 options = dict (
425- #window_length=window_length,
426- #window_hop=window_hop,
450+ window_length = window_length ,
451+ hop_length = window_hop ,
452+ samplerate = samplerate ,
427453 )
428454 columns = ['time' , 'acc_x' , 'acc_y' , 'acc_z' , 'gyro_x' , 'gyro_y' , 'gyro_z' ]
455+ data_columns = [ c for c in columns if not c == 'time' ]
429456 extractor = DataProcessorProgram (program = executable ,
430457 options = options , column_order = columns )
431458
0 commit comments