Skip to content

Commit 6613650

Browse files
committed
har_trees: Start of timeline plots
1 parent 8c1d188 commit 6613650

2 files changed

Lines changed: 416 additions & 34 deletions

File tree

examples/har_trees/har_train.py

Lines changed: 91 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,11 @@ def process(self, data : pandas.DataFrame) -> pandas.DataFrame:
189189
cmd=cmd, out=e.stdout, err=e.stderr, code=code)
190190
raise e
191191

192+
log.debug('preprocessor-run',
193+
cmd=cmd,
194+
#out=out,
195+
)
196+
192197
# Load output
193198
if self.serialization == 'npy':
194199
# TODO: support feature names. Separat output file, with --features
@@ -223,14 +228,24 @@ def process(self, data : pandas.DataFrame) -> pandas.DataFrame:
223228

224229
class TimebasedFeatureExtractor(DataProcessorProgram):
225230

226-
def __init__(self, python_bin='python', **kwargs):
231+
def __init__(self, sensitivity=4.0, python_bin='python', **kwargs):
227232
super().__init__(self, serialization='npy', **kwargs)
228233

229234
here = os.path.dirname(__file__)
230235
feature_extraction_script = os.path.join(here, 'compute_features.py')
231236
self.program = [ python_bin, feature_extraction_script ]
232237

238+
self.sensitivity = sensitivity
239+
233240
def process(self, data):
241+
242+
data = data.copy()
243+
columns = self.column_order
244+
245+
# Convert from floats in "g" to the sensor scaling in int16
246+
data.loc[:, columns] = \
247+
((data.loc[:, columns] / self.sensitivity) * (2**15-1)).astype(numpy.int16)
248+
234249
return super().process(data)
235250

236251

@@ -240,7 +255,6 @@ def extract_features(sensordata : pandas.DataFrame,
240255
groupby : list[str],
241256
extractor,
242257
samplerate = 50,
243-
sensitivity = 2.0, # how many g range the int16 sensor data has
244258
label_column='activity',
245259
time_column='time',
246260
parallel_jobs : int = 4 ,
@@ -264,22 +278,9 @@ def process_one(idx, stream : pandas.DataFrame) -> pandas.DataFrame:
264278
# drop invalid data
265279
stream = stream.dropna(subset=columns)
266280

267-
# Convert from floats in "g" to the sensor scaling in int16
268-
stream.loc[:, columns] = \
269-
((stream.loc[:, columns] / sensitivity) * (2**15-1)).astype(numpy.int16)
270-
271-
# FIXME: make sure time-series is regular
272-
# potentially zero-fill ?
273-
274281
# Extract features
275282
windows = extractor.process(stream)
276283

277-
# Convert features to 16-bit integers
278-
# XXX: Assuming that they are already in resonable scale
279-
# TODO: consider moving the quantization to inside timebased
280-
feature_columns = list(set(windows.columns) - set([time_column]))
281-
windows.loc[:,feature_columns] = windows[feature_columns].astype(numpy.int16)
282-
283284
# Combine with identifying information
284285
# time should come from data processing
285286
assert time_column in windows
@@ -312,8 +313,8 @@ def split_sections(data, groupby : list[str], time_column='time'):
312313
df = df.reset_index()
313314

314315
# convert to time-delta, if neeeded
315-
if pandas.api.types.is_datetime64_dtype(df[time_column]):
316-
df[time_column] = df[time_column] - df[time_column].min()
316+
#if pandas.api.types.is_datetime64_dtype(df[time_column]):
317+
# df[time_column] = df[time_column] - df[time_column].min()
317318

318319
df = df.set_index(time_column).sort_index()
319320

@@ -401,8 +402,8 @@ def label_windows(sensordata,
401402
data = data.reset_index().set_index('time') # XXX: Why is this needed?
402403

403404
# convert to time-delta, if neeeded
404-
if pandas.api.types.is_datetime64_dtype(data.index):
405-
data.index = data.index - data.index.min()
405+
#if pandas.api.types.is_datetime64_dtype(data.index):
406+
# data.index = data.index - data.index.min()
406407

407408
for idx, w in ww.iterrows():
408409
window_end = idx[-1] # XXX: assuming this is time
@@ -416,6 +417,60 @@ def label_windows(sensordata,
416417

417418
return windows
418419

420+
def plot_timelines(sensordata, windows, groupby, sensor_columns, label_column):
421+
422+
# Plot
423+
from plotting import make_timeline_plot
424+
425+
sensor_columns = [ c for c in sensor_columns if c.startswith('gyro_')] # XXX
426+
427+
sensor_groups = {idx: df for idx, df in sensordata.groupby(groupby, group_keys=False, as_index=False) }
428+
429+
log.debug('label-windows', groups=groupby, g=list(sensor_groups.keys()))
430+
431+
for idx, ww in windows.groupby(groupby, group_keys=False, as_index=False):
432+
data = sensor_groups[idx]
433+
#log.debug('label-window', idx=idx, index_dtype=data.index.dtype)
434+
435+
# XXX: Why is this needed?
436+
data = data.reset_index().set_index('time')
437+
ww = ww.reset_index().set_index('time')
438+
439+
# convert to seconds
440+
#data.index = data.index / pandas.Timedelta(seconds=1)
441+
#ww.index = ww.index / pandas.Timedelta(seconds=1)
442+
443+
feature_columns = list(ww.columns)
444+
445+
feature_columns = [
446+
'motion_mag_rms', 'motion_mag_p2p', 'motion_x_rms', 'motion_y_rms', 'motion_z_rms',
447+
'fft_0_4hz', 'fft_0_8hz', 'fft_1_2hz', 'fft_1_6hz', 'fft_1_10hz', 'fft_2_3hz', 'fft_2_7hz', 'fft_3_1hz', 'fft_3_5hz']
448+
449+
line_features = [
450+
#'orientation_x', 'orientation_y', 'orientation_z',
451+
'motion_mag_rms'
452+
]
453+
454+
#print('pp', ww[o])
455+
456+
idx_name = '_'.join([str(s) for s in idx] )
457+
plot_path = f'plot_{idx_name}.png'
458+
# Make a plot
459+
width = 1600
460+
aspect = 2.0
461+
height = width/aspect
462+
fig = make_timeline_plot(data, ww,
463+
sensor_columns=sensor_columns,
464+
label_column=label_column,
465+
line_feature_columns=line_features,
466+
heatmap_feature_columns=feature_columns,
467+
colors=None,
468+
class_names=['class_0', 'class_1'], # FIXME: pass
469+
predictions=None, # FIXME: pass separate
470+
width=width, aspect=aspect)
471+
472+
fig.write_image(plot_path, scale=1.5, width=width, height=height)
473+
print('Wrote plot', plot_path)
419474

420475
def run_pipeline(run, hyperparameters, dataset,
421476
config,
@@ -437,9 +492,6 @@ def run_pipeline(run, hyperparameters, dataset,
437492
log.info('data-load-start', dataset=dataset)
438493
data = pandas.read_parquet(data_path)
439494

440-
#print(data.index.names)
441-
#print(data.columns)
442-
443495
groups = dataset_config['groups']
444496
data_columns = dataset_config['data_columns']
445497
enabled_classes = dataset_config['classes']
@@ -471,24 +523,28 @@ def run_pipeline(run, hyperparameters, dataset,
471523
'y': 'acc_y',
472524
'z': 'acc_z',
473525
}
474-
if features == 'timebased':
475-
# XXX: hack
476-
remap = {
477-
'acc_x': 'x',
478-
'acc_y': 'y',
479-
'acc_z': 'z',
480-
}
481526
data = data.rename(columns=remap)
482527

528+
529+
# convert to time-delta, if neeeded
530+
def convert_time(data):
531+
if pandas.api.types.is_datetime64_dtype(data.index):
532+
data.index = data.index - data.index.min()
533+
return data
534+
535+
data = data.groupby(groups, as_index=False, group_keys=False).apply(convert_time)
536+
537+
483538
# Setup feature extraction
484539
extract_options = dict(
485540
window_length=window_length,
486541
hop_length=window_hop,
487542
samplerate=samplerate,
488543
)
489544
if features == 'timebased':
490-
columns = ['x', 'y', 'z']
491-
extractor = TimebasedFeatureExtractor(column_order=columns, options=extract_options)
545+
#columns = ['x', 'y', 'z']
546+
columns = ['acc_x', 'acc_y', 'acc_z']
547+
extractor = TimebasedFeatureExtractor(sensitivity=sensitivity, column_order=columns, options=extract_options)
492548

493549
elif features == 'custom':
494550
# FIXME: unhardcode path
@@ -510,10 +566,10 @@ def run_pipeline(run, hyperparameters, dataset,
510566
extractor=extractor,
511567
columns=data_columns,
512568
groupby=groups,
513-
sensitivity=sensitivity,
514569
label_column=label_column,
515570
time_column=time_column,
516571
samplerate=samplerate,
572+
#parallel_jobs=1,
517573
)
518574

519575
# Attach labels
@@ -523,8 +579,6 @@ def run_pipeline(run, hyperparameters, dataset,
523579
window_duration=pandas.Timedelta(window_duration, unit='s'),
524580
)
525581

526-
print(features.head())
527-
528582
labeled = numpy.count_nonzero(features[label_column].notna())
529583

530584
feature_extraction_duration = time.time() - feature_extraction_start
@@ -535,6 +589,9 @@ def run_pipeline(run, hyperparameters, dataset,
535589
duration=feature_extraction_duration,
536590
)
537591

592+
#plot_timelines(data, features, groupby=groups,
593+
# sensor_columns=data_columns, label_column=label_column)
594+
538595
# FIXME: keep the windows in evaluation, only ignore for training
539596

540597
# Drop windows without labels

0 commit comments

Comments
 (0)