Skip to content

Commit fbe332e

Browse files
committed
TEMP: har_trees: Try fix toothbrushing dataset
1 parent 0c137a3 commit fbe332e

2 files changed

Lines changed: 40 additions & 11 deletions

File tree

examples/har_trees/data/configurations/uci_har.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@ data_columns:
55
- acc_x
66
- acc_y
77
- acc_z
8+
features: 'custom'
9+
samplerate: 50
810
classes:
911
# - STAND_TO_LIE
1012
# - SIT_TO_LIE

examples/har_trees/har_train.py

Lines changed: 38 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -178,7 +178,7 @@ def process(self, data : pandas.DataFrame) -> pandas.DataFrame:
178178

179179
# Other options
180180
for k, v in self.options.items():
181-
args += [ f'--{k}', v ]
181+
args += [ f'--{k}', str(v) ]
182182

183183
cmd = ' '.join(args)
184184
try:
@@ -194,8 +194,9 @@ def process(self, data : pandas.DataFrame) -> pandas.DataFrame:
194194
# TODO: support feature names. Separat output file, with --features
195195
out = numpy.load(features_path)
196196
windows = pandas.DataFrame(out)
197+
# FIXME: support reading times, not infer
197198
span = (data.index.max() - data.index.min()).total_seconds()
198-
dt = span / len(windows) # XXX: make correct
199+
dt = span / len(windows)
199200
windows['time'] = dt * numpy.arange(len(windows))
200201
elif self.serialization == 'csv':
201202
windows = pandas.read_csv(features_path)
@@ -208,7 +209,7 @@ def process(self, data : pandas.DataFrame) -> pandas.DataFrame:
208209
time_in = data.index
209210
time_out = windows['time']
210211

211-
window_duration = pandas.Timedelta(5.0, unit='s') # XXX: hardcoded
212+
window_duration = pandas.Timedelta(4.0, unit='s') # XXX: hardcoded
212213
start_delta = time_out.min() - time_in.min()
213214
assert abs(start_delta) <= window_duration, (start_delta, time_out.min(), time_in.min())
214215
end_delta = time_out.max() - time_in.max()
@@ -300,12 +301,29 @@ def process_one(idx, stream : pandas.DataFrame) -> pandas.DataFrame:
300301
# for any time-dependent logic to stabilize, and to merge while ignoring the run-in
301302
def split_sections(data, groupby : list[str], time_column='time'):
302303
groups = sensordata.groupby(groupby, observed=True)
303-
for group_idx, group_df in groups:
304+
for group_idx, df in groups:
304305

305306
# ensure sorted by time
306-
group_df = group_df.reset_index().set_index(time_column).sort_index()
307+
df = df.reset_index()
308+
print('d', df.columns)
307309

308-
yield group_idx, group_df
310+
# convert to time-delta, if neeeded
311+
if pandas.api.types.is_datetime64_dtype(df[time_column]):
312+
df[time_column] = df[time_column] - df[time_column].min()
313+
314+
df = df.set_index(time_column).sort_index()
315+
316+
317+
samplerate = 50
318+
expected_freq = pandas.Timedelta(1/samplerate, unit='s')
319+
diff = df.index.to_series().diff()
320+
holes = diff[diff > expected_freq]
321+
irregular = diff[diff != expected_freq].dropna()
322+
assert irregular.empty, irregular
323+
324+
assert pandas.api.types.is_timedelta64_dtype(df.index)
325+
326+
yield group_idx, df
309327

310328
sections = split_sections(sensordata, groupby=groupby, time_column=time_column)
311329
jobs = [ joblib.delayed(process_one)(idx, df) for idx, df in sections]
@@ -408,10 +426,18 @@ def run_pipeline(run, hyperparameters, dataset,
408426
features=features,
409427
)
410428
window_length = model_settings['window_length']
411-
samplerate = model_settings.get('samplerate', 100)
412-
429+
samplerate = dataset_config.get('samplerate', 100)
430+
window_hop = window_length
431+
413432
window_duration = (window_length / samplerate)
414433

434+
remap = {
435+
'x': 'acc_x',
436+
'y': 'acc_y',
437+
'z': 'acc_z',
438+
}
439+
data = data.rename(columns=remap)
440+
415441
# Setup feature extraction
416442
if features == 'timebased':
417443
columns = ['acc_x', 'acc_y', 'acc_z']
@@ -420,12 +446,13 @@ def run_pipeline(run, hyperparameters, dataset,
420446
elif features == 'custom':
421447
# FIXME: unhardcode path
422448
executable = ['/home/jon/projects/emlearn/examples/motion_recognition/build/motion_preprocess']
423-
# FIXME: respect window_length, window_hop
424449
options = dict(
425-
#window_length=window_length,
426-
#window_hop=window_hop,
450+
window_length=window_length,
451+
hop_length=window_hop,
452+
samplerate=samplerate,
427453
)
428454
columns = ['time', 'acc_x', 'acc_y', 'acc_z', 'gyro_x', 'gyro_y', 'gyro_z']
455+
data_columns = [ c for c in columns if not c == 'time' ]
429456
extractor = DataProcessorProgram(program=executable,
430457
options=options, column_order=columns)
431458

0 commit comments

Comments
 (0)