emlearn · jonnor · Nov 23, 2025 · Nov 16, 2025 · Nov 16, 2025 · Nov 16, 2025
diff --git a/examples/har_trees/compute_features.py b/examples/har_trees/compute_features.py
@@ -7,15 +7,22 @@
 
 from timebased import calculate_features_xyz, DATA_TYPECODE, N_FEATURES
 
-def compute_dataset_features(data: npyfile.Reader,
+def compute_dataset_features(data: npyfile.Reader, window_length,
+        hop_length=None,
         skip_samples=0, limit_samples=None, verbose=0):
 
+    if hop_length is None:
+        hop_length = window_length
+
+    if window_length % hop_length != 0:
+        raise ValueError(f"hop_length must be an even divisor of window_length. Got window={window_length} hop={hop_length}")
+
+
     # Check that data is expected format
     shape = data.shape
-    assert len(shape) == 3, shape
-    n_samples, window_length, n_axes = shape
+    assert len(shape) == 2, shape
+    n_samples, n_axes = shape
     assert n_axes == 3, shape
-    #assert window_length == 128, shape
 
     # We expect data to be h/int16
     assert data.typecode == DATA_TYPECODE, data.typecode
@@ -26,26 +33,39 @@ def compute_dataset_features(data: npyfile.Reader,
     y_values = array.array(DATA_TYPECODE, (0 for _ in range(window_length)))
     z_values = array.array(DATA_TYPECODE, (0 for _ in range(window_length)))
 
-    chunk_size = window_length*n_axes
-    sample_counter = 0
+    chunk_size = hop_length*n_axes
+    window_counter = 0
+    start_idx = 0
 
     data_chunks = data.read_data_chunks(chunk_size, offset=chunk_size*skip_samples)
+
     for arr in data_chunks:
 
-        # process the data
+        print('cc', len(arr))
+        if len(arr) < chunk_size:
+            # short read, last data piece, ignore
+            continue
+
+        # Window was full, make room for more
+        if start_idx >= window_length:
+            overlap = window_length - hop_length
+            if overlap > 0:
+                x_values[:overlap] = x_values[hop_length:]
+                y_values[:overlap] = y_values[hop_length:]
+                z_values[:overlap] = z_values[hop_length:]
+            start_idx = overlap
+
+        # Copy the input data
         # De-interleave data from XYZ1 XYZ2... into separate continious X,Y,Z
-        for i in range(window_length):
+        for i in range(hop_length):
             x_values[i] = arr[(i*3)+0]
             y_values[i] = arr[(i*3)+1]
             z_values[i] = arr[(i*3)+2]
+        start_idx += hop_length
 
-        #print(x_values)
-        #print(y_values)
-        #print(z_values)
-
-        assert len(x_values) == window_length
-        assert len(y_values) == window_length
-        assert len(z_values) == window_length
+        # waiting for window to fill
+        if start_idx < window_length:
+            continue
 
         feature_calc_start = time.ticks_ms()
         features = calculate_features_xyz((x_values, y_values, z_values))
@@ -54,35 +74,47 @@ def compute_dataset_features(data: npyfile.Reader,
             print('feature-calc-end', duration)
 
         yield features
+        window_counter += 1
 
-        sample_counter += 1
-        if limit_samples is not None and sample_counter > limit_samples:
+        if limit_samples is not None and window_counter > limit_samples:
             break
 
-def main():
+def parse():
+    import argparse
+
+    parser = argparse.ArgumentParser(description='Compute features from NPY file')
+    parser.add_argument('--input', required=True, help='Input .npy file')
+    parser.add_argument('--output', required=True, help='Output .npy file')
+    parser.add_argument('--samplerate', type=int, default=None, help='Samplerate. Currently ignored')
+    parser.add_argument('--skip', type=int, default=0, help='Number of samples to skip (default: 0)')
+    parser.add_argument('--limit', type=int, default=None, help='Maximum number of samples to process (default: None)')
+    parser.add_argument('--window_length', type=int, default=128, help='Window length (default: 128)')
+    parser.add_argument('--hop_length', type=int, default=None, help='Hop length (default: window_length)')
 
-    if len(sys.argv) != 3:
-        print('Usage: compute_features.py IN.npy OUT.npy')
+    args = parser.parse_args()
+    return args
 
-    _, in_path, out_path = sys.argv
+def main():
 
-    skip_samples = 0
-    limit_samples = None
+    args = parse()
 
     out_typecode = 'f'
-    n_features = N_FEATURES
-
+    n_features = N_FEATURES    
     features_array = array.array(out_typecode, (0 for _ in range(n_features)))
 
-    with npyfile.Reader(in_path) as data:
-        n_samples, window_length, n_axes = data.shape
+    with npyfile.Reader(args.input) as data:
+        n_samples, n_axes = data.shape
+
+        n_windows = (n_samples - args.window_length) // args.hop_length
 
-        out_shape = (n_samples, n_features)
-        with npyfile.Writer(out_path, out_shape, out_typecode) as out:
+        out_shape = (n_windows, n_features)
+        with npyfile.Writer(args.output, out_shape, out_typecode) as out:
 
             generator = compute_dataset_features(data,
-                skip_samples=skip_samples,
-                limit_samples=limit_samples,
+                window_length=args.window_length,
+                hop_length=args.hop_length,
+                skip_samples=args.skip,
+                limit_samples=args.limit,
             )
             for features in generator:
                 #print('features', len(features), features)

diff --git a/examples/har_trees/data/configurations/uci_har.yaml b/examples/har_trees/data/configurations/uci_har.yaml
@@ -5,6 +5,8 @@ data_columns:
   - acc_x
   - acc_y
   - acc_z
+features: 'custom'
+samplerate: 50
 classes:
   # - STAND_TO_LIE
   # - SIT_TO_LIE