@@ -198,6 +198,85 @@ def timebased_features(windows : list[pandas.DataFrame],
198198
199199 return df
200200
201+
202+ def custom_features (windows : list [pandas .DataFrame ],
203+ columns : list [str ],
204+ executable : str = '' ,
205+ options : dict = {},
206+ input_option : str = '--input' ,
207+ output_option : str = '--output' ,
208+ serialization : str = 'csv' ) -> pandas .DataFrame :
209+ """
210+ Run a program (executable) to compute features
211+
212+ """
213+
214+ assert serialization == 'csv' # TODO: also support .npy
215+ extension = serialization
216+
217+ # Filter columns
218+ data = pandas .concat ([ d for d in windows ])
219+
220+ # FIXME: unhardcode
221+ data ['time' ] = 0
222+ data ['gyro_x' ] = 0
223+ data ['gyro_y' ] = 0
224+ data ['gyro_z' ] = 0
225+ columns = ['time' , 'acc_x' , 'acc_y' , 'acc_z' , 'gyro_x' , 'gyro_y' , 'gyro_z' ]
226+
227+ data = data [columns ]
228+
229+ log .debug ('custom-features-start' , columns = list (data .columns ))
230+
231+ with tempfile .TemporaryDirectory () as tempdir :
232+ data_path = os .path .join (tempdir , f'data.{ extension } ' )
233+ features_path = os .path .join (tempdir , f'features.{ extension } ' )
234+
235+ # Persist the data
236+ data .to_csv (data_path , index = False )
237+
238+ # Build arguments
239+ args = [
240+ executable ,
241+ ]
242+
243+ # Input and output
244+ if input_option :
245+ args += [ input_option , data_path ]
246+ else :
247+ args += [ data_path ]
248+
249+ if output_option :
250+ args += [ output_option , features_path ]
251+ else :
252+ args += [ features_path ]
253+
254+ # Other options
255+ for k , v in options .items ():
256+ args += [ f'--{ k } ' , v ]
257+
258+ cmd = ' ' .join (args )
259+ try :
260+ out = subprocess .check_output (args )
261+ except subprocess .CalledProcessError as e :
262+ log .error ('preprocessor-error' ,
263+ cmd = cmd , out = e .stdout , code = e .returncode , err = e .stderr )
264+ raise e
265+
266+ # Load output
267+ out = pandas .read_csv (features_path )
268+ assert len (out ) == len (data )
269+
270+ # TODO: add feature names
271+ df = pandas .DataFrame (out )
272+
273+ # post-conditions
274+ # one feature vector per window
275+ assert len (df ) == len (windows ), (len (df ), len (windows ))
276+
277+ return df
278+
279+
201280def batched_iterator (iterable , batch_size ):
202281 """Yield lists of size batch_size from iterable"""
203282 iterator = iter (iterable )
@@ -230,6 +309,13 @@ def extract_features(sensordata : pandas.DataFrame,
230309 raise NotImplementedError
231310 elif features == 'timebased' :
232311 feature_extractor = lambda w : timebased_features (w , columns = columns )
312+ elif features == 'custom' :
313+
314+ # FIXME: unhardcode
315+ executable = '/home/jon/projects/emlearn/examples/motion_recognition/build/motion_preprocess'
316+ options = {}
317+
318+ feature_extractor = lambda w : custom_features (w , columns = columns , executable = executable , options = options )
233319 else :
234320 raise ValueError (f"Unsupported features: { features } " )
235321
@@ -331,6 +417,7 @@ def run_pipeline(run, hyperparameters, dataset,
331417 feature_extraction_start = time .time ()
332418 log .info ('feature-extraction-start' ,
333419 dataset = dataset ,
420+ features = features ,
334421 )
335422 window_length = model_settings ['window_length' ]
336423 features = extract_features (data ,
0 commit comments