@@ -189,6 +189,11 @@ def process(self, data : pandas.DataFrame) -> pandas.DataFrame:
189189 cmd = cmd , out = e .stdout , err = e .stderr , code = code )
190190 raise e
191191
192+ log .debug ('preprocessor-run' ,
193+ cmd = cmd ,
194+ #out=out,
195+ )
196+
192197 # Load output
193198 if self .serialization == 'npy' :
194199 # TODO: support feature names. Separat output file, with --features
@@ -223,14 +228,24 @@ def process(self, data : pandas.DataFrame) -> pandas.DataFrame:
223228
224229class TimebasedFeatureExtractor (DataProcessorProgram ):
225230
226- def __init__ (self , python_bin = 'python' , ** kwargs ):
231+ def __init__ (self , sensitivity = 4.0 , python_bin = 'python' , ** kwargs ):
227232 super ().__init__ (self , serialization = 'npy' , ** kwargs )
228233
229234 here = os .path .dirname (__file__ )
230235 feature_extraction_script = os .path .join (here , 'compute_features.py' )
231236 self .program = [ python_bin , feature_extraction_script ]
232237
238+ self .sensitivity = sensitivity
239+
233240 def process (self , data ):
241+
242+ data = data .copy ()
243+ columns = self .column_order
244+
245+ # Convert from floats in "g" to the sensor scaling in int16
246+ data .loc [:, columns ] = \
247+ ((data .loc [:, columns ] / self .sensitivity ) * (2 ** 15 - 1 )).astype (numpy .int16 )
248+
234249 return super ().process (data )
235250
236251
@@ -240,7 +255,6 @@ def extract_features(sensordata : pandas.DataFrame,
240255 groupby : list [str ],
241256 extractor ,
242257 samplerate = 50 ,
243- sensitivity = 2.0 , # how many g range the int16 sensor data has
244258 label_column = 'activity' ,
245259 time_column = 'time' ,
246260 parallel_jobs : int = 4 ,
@@ -264,22 +278,9 @@ def process_one(idx, stream : pandas.DataFrame) -> pandas.DataFrame:
264278 # drop invalid data
265279 stream = stream .dropna (subset = columns )
266280
267- # Convert from floats in "g" to the sensor scaling in int16
268- stream .loc [:, columns ] = \
269- ((stream .loc [:, columns ] / sensitivity ) * (2 ** 15 - 1 )).astype (numpy .int16 )
270-
271- # FIXME: make sure time-series is regular
272- # potentially zero-fill ?
273-
274281 # Extract features
275282 windows = extractor .process (stream )
276283
277- # Convert features to 16-bit integers
278- # XXX: Assuming that they are already in resonable scale
279- # TODO: consider moving the quantization to inside timebased
280- feature_columns = list (set (windows .columns ) - set ([time_column ]))
281- windows .loc [:,feature_columns ] = windows [feature_columns ].astype (numpy .int16 )
282-
283284 # Combine with identifying information
284285 # time should come from data processing
285286 assert time_column in windows
@@ -312,8 +313,8 @@ def split_sections(data, groupby : list[str], time_column='time'):
312313 df = df .reset_index ()
313314
314315 # convert to time-delta, if neeeded
315- if pandas .api .types .is_datetime64_dtype (df [time_column ]):
316- df [time_column ] = df [time_column ] - df [time_column ].min ()
316+ # if pandas.api.types.is_datetime64_dtype(df[time_column]):
317+ # df[time_column] = df[time_column] - df[time_column].min()
317318
318319 df = df .set_index (time_column ).sort_index ()
319320
@@ -401,8 +402,8 @@ def label_windows(sensordata,
401402 data = data .reset_index ().set_index ('time' ) # XXX: Why is this needed?
402403
403404 # convert to time-delta, if neeeded
404- if pandas .api .types .is_datetime64_dtype (data .index ):
405- data .index = data .index - data .index .min ()
405+ # if pandas.api.types.is_datetime64_dtype(data.index):
406+ # data.index = data.index - data.index.min()
406407
407408 for idx , w in ww .iterrows ():
408409 window_end = idx [- 1 ] # XXX: assuming this is time
@@ -416,6 +417,60 @@ def label_windows(sensordata,
416417
417418 return windows
418419
420+ def plot_timelines (sensordata , windows , groupby , sensor_columns , label_column ):
421+
422+ # Plot
423+ from plotting import make_timeline_plot
424+
425+ sensor_columns = [ c for c in sensor_columns if c .startswith ('gyro_' )] # XXX
426+
427+ sensor_groups = {idx : df for idx , df in sensordata .groupby (groupby , group_keys = False , as_index = False ) }
428+
429+ log .debug ('label-windows' , groups = groupby , g = list (sensor_groups .keys ()))
430+
431+ for idx , ww in windows .groupby (groupby , group_keys = False , as_index = False ):
432+ data = sensor_groups [idx ]
433+ #log.debug('label-window', idx=idx, index_dtype=data.index.dtype)
434+
435+ # XXX: Why is this needed?
436+ data = data .reset_index ().set_index ('time' )
437+ ww = ww .reset_index ().set_index ('time' )
438+
439+ # convert to seconds
440+ #data.index = data.index / pandas.Timedelta(seconds=1)
441+ #ww.index = ww.index / pandas.Timedelta(seconds=1)
442+
443+ feature_columns = list (ww .columns )
444+
445+ feature_columns = [
446+ 'motion_mag_rms' , 'motion_mag_p2p' , 'motion_x_rms' , 'motion_y_rms' , 'motion_z_rms' ,
447+ 'fft_0_4hz' , 'fft_0_8hz' , 'fft_1_2hz' , 'fft_1_6hz' , 'fft_1_10hz' , 'fft_2_3hz' , 'fft_2_7hz' , 'fft_3_1hz' , 'fft_3_5hz' ]
448+
449+ line_features = [
450+ #'orientation_x', 'orientation_y', 'orientation_z',
451+ 'motion_mag_rms'
452+ ]
453+
454+ #print('pp', ww[o])
455+
456+ idx_name = '_' .join ([str (s ) for s in idx ] )
457+ plot_path = f'plot_{ idx_name } .png'
458+ # Make a plot
459+ width = 1600
460+ aspect = 2.0
461+ height = width / aspect
462+ fig = make_timeline_plot (data , ww ,
463+ sensor_columns = sensor_columns ,
464+ label_column = label_column ,
465+ line_feature_columns = line_features ,
466+ heatmap_feature_columns = feature_columns ,
467+ colors = None ,
468+ class_names = ['class_0' , 'class_1' ], # FIXME: pass
469+ predictions = None , # FIXME: pass separate
470+ width = width , aspect = aspect )
471+
472+ fig .write_image (plot_path , scale = 1.5 , width = width , height = height )
473+ print ('Wrote plot' , plot_path )
419474
420475def run_pipeline (run , hyperparameters , dataset ,
421476 config ,
@@ -437,9 +492,6 @@ def run_pipeline(run, hyperparameters, dataset,
437492 log .info ('data-load-start' , dataset = dataset )
438493 data = pandas .read_parquet (data_path )
439494
440- #print(data.index.names)
441- #print(data.columns)
442-
443495 groups = dataset_config ['groups' ]
444496 data_columns = dataset_config ['data_columns' ]
445497 enabled_classes = dataset_config ['classes' ]
@@ -471,24 +523,28 @@ def run_pipeline(run, hyperparameters, dataset,
471523 'y' : 'acc_y' ,
472524 'z' : 'acc_z' ,
473525 }
474- if features == 'timebased' :
475- # XXX: hack
476- remap = {
477- 'acc_x' : 'x' ,
478- 'acc_y' : 'y' ,
479- 'acc_z' : 'z' ,
480- }
481526 data = data .rename (columns = remap )
482527
528+
529+ # convert to time-delta, if neeeded
530+ def convert_time (data ):
531+ if pandas .api .types .is_datetime64_dtype (data .index ):
532+ data .index = data .index - data .index .min ()
533+ return data
534+
535+ data = data .groupby (groups , as_index = False , group_keys = False ).apply (convert_time )
536+
537+
483538 # Setup feature extraction
484539 extract_options = dict (
485540 window_length = window_length ,
486541 hop_length = window_hop ,
487542 samplerate = samplerate ,
488543 )
489544 if features == 'timebased' :
490- columns = ['x' , 'y' , 'z' ]
491- extractor = TimebasedFeatureExtractor (column_order = columns , options = extract_options )
545+ #columns = ['x', 'y', 'z']
546+ columns = ['acc_x' , 'acc_y' , 'acc_z' ]
547+ extractor = TimebasedFeatureExtractor (sensitivity = sensitivity , column_order = columns , options = extract_options )
492548
493549 elif features == 'custom' :
494550 # FIXME: unhardcode path
@@ -510,10 +566,10 @@ def run_pipeline(run, hyperparameters, dataset,
510566 extractor = extractor ,
511567 columns = data_columns ,
512568 groupby = groups ,
513- sensitivity = sensitivity ,
514569 label_column = label_column ,
515570 time_column = time_column ,
516571 samplerate = samplerate ,
572+ #parallel_jobs=1,
517573 )
518574
519575 # Attach labels
@@ -523,8 +579,6 @@ def run_pipeline(run, hyperparameters, dataset,
523579 window_duration = pandas .Timedelta (window_duration , unit = 's' ),
524580 )
525581
526- print (features .head ())
527-
528582 labeled = numpy .count_nonzero (features [label_column ].notna ())
529583
530584 feature_extraction_duration = time .time () - feature_extraction_start
@@ -535,6 +589,9 @@ def run_pipeline(run, hyperparameters, dataset,
535589 duration = feature_extraction_duration ,
536590 )
537591
592+ #plot_timelines(data, features, groupby=groups,
593+ # sensor_columns=data_columns, label_column=label_column)
594+
538595 # FIXME: keep the windows in evaluation, only ignore for training
539596
540597 # Drop windows without labels
0 commit comments