Skip to content

Commit 8c48e73

Browse files
committed
har_trees: Start supporting custom features via user-specified program
1 parent f61f16f commit 8c48e73

1 file changed

Lines changed: 87 additions & 0 deletions

File tree

examples/har_trees/har_train.py

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,85 @@ def timebased_features(windows : list[pandas.DataFrame],
198198

199199
return df
200200

201+
202+
def custom_features(windows : list[pandas.DataFrame],
203+
columns : list[str],
204+
executable : str = '',
205+
options : dict = {},
206+
input_option : str = '--input',
207+
output_option : str = '--output',
208+
serialization : str = 'csv') -> pandas.DataFrame:
209+
"""
210+
Run a program (executable) to compute features
211+
212+
"""
213+
214+
assert serialization == 'csv' # TODO: also support .npy
215+
extension = serialization
216+
217+
# Filter columns
218+
data = pandas.concat([ d for d in windows ])
219+
220+
# FIXME: unhardcode
221+
data['time'] = 0
222+
data['gyro_x'] = 0
223+
data['gyro_y'] = 0
224+
data['gyro_z'] = 0
225+
columns = ['time', 'acc_x', 'acc_y', 'acc_z', 'gyro_x', 'gyro_y', 'gyro_z']
226+
227+
data = data[columns]
228+
229+
log.debug('custom-features-start', columns=list(data.columns))
230+
231+
with tempfile.TemporaryDirectory() as tempdir:
232+
data_path = os.path.join(tempdir, f'data.{extension}')
233+
features_path = os.path.join(tempdir, f'features.{extension}')
234+
235+
# Persist the data
236+
data.to_csv(data_path, index=False)
237+
238+
# Build arguments
239+
args = [
240+
executable,
241+
]
242+
243+
# Input and output
244+
if input_option:
245+
args += [ input_option, data_path ]
246+
else:
247+
args += [ data_path ]
248+
249+
if output_option:
250+
args += [ output_option, features_path ]
251+
else:
252+
args += [ features_path ]
253+
254+
# Other options
255+
for k, v in options.items():
256+
args += [ f'--{k}', v ]
257+
258+
cmd = ' '.join(args)
259+
try:
260+
out = subprocess.check_output(args)
261+
except subprocess.CalledProcessError as e:
262+
log.error('preprocessor-error',
263+
cmd=cmd, out=e.stdout, code=e.returncode, err=e.stderr)
264+
raise e
265+
266+
# Load output
267+
out = pandas.read_csv(features_path)
268+
assert len(out) == len(data)
269+
270+
# TODO: add feature names
271+
df = pandas.DataFrame(out)
272+
273+
# post-conditions
274+
# one feature vector per window
275+
assert len(df) == len(windows), (len(df), len(windows))
276+
277+
return df
278+
279+
201280
def batched_iterator(iterable, batch_size):
202281
"""Yield lists of size batch_size from iterable"""
203282
iterator = iter(iterable)
@@ -230,6 +309,13 @@ def extract_features(sensordata : pandas.DataFrame,
230309
raise NotImplementedError
231310
elif features == 'timebased':
232311
feature_extractor = lambda w: timebased_features(w, columns=columns)
312+
elif features == 'custom':
313+
314+
# FIXME: unhardcode
315+
executable = '/home/jon/projects/emlearn/examples/motion_recognition/build/motion_preprocess'
316+
options = {}
317+
318+
feature_extractor = lambda w: custom_features(w, columns=columns, executable=executable, options=options)
233319
else:
234320
raise ValueError(f"Unsupported features: {features}")
235321

@@ -331,6 +417,7 @@ def run_pipeline(run, hyperparameters, dataset,
331417
feature_extraction_start = time.time()
332418
log.info('feature-extraction-start',
333419
dataset=dataset,
420+
features=features,
334421
)
335422
window_length = model_settings['window_length']
336423
features = extract_features(data,

0 commit comments

Comments
 (0)