Skip to content

Commit 1078572

Browse files
authored
Merge pull request #57 from emlearn/har-trees-c-support
har_trees: Generalize preprocessing support
2 parents f61f16f + 6613650 commit 1078572

4 files changed

Lines changed: 781 additions & 181 deletions

File tree

examples/har_trees/compute_features.py

Lines changed: 63 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -7,15 +7,22 @@
77

88
from timebased import calculate_features_xyz, DATA_TYPECODE, N_FEATURES
99

10-
def compute_dataset_features(data: npyfile.Reader,
10+
def compute_dataset_features(data: npyfile.Reader, window_length,
11+
hop_length=None,
1112
skip_samples=0, limit_samples=None, verbose=0):
1213

14+
if hop_length is None:
15+
hop_length = window_length
16+
17+
if window_length % hop_length != 0:
18+
raise ValueError(f"hop_length must be an even divisor of window_length. Got window={window_length} hop={hop_length}")
19+
20+
1321
# Check that data is expected format
1422
shape = data.shape
15-
assert len(shape) == 3, shape
16-
n_samples, window_length, n_axes = shape
23+
assert len(shape) == 2, shape
24+
n_samples, n_axes = shape
1725
assert n_axes == 3, shape
18-
#assert window_length == 128, shape
1926

2027
# We expect data to be h/int16
2128
assert data.typecode == DATA_TYPECODE, data.typecode
@@ -26,26 +33,39 @@ def compute_dataset_features(data: npyfile.Reader,
2633
y_values = array.array(DATA_TYPECODE, (0 for _ in range(window_length)))
2734
z_values = array.array(DATA_TYPECODE, (0 for _ in range(window_length)))
2835

29-
chunk_size = window_length*n_axes
30-
sample_counter = 0
36+
chunk_size = hop_length*n_axes
37+
window_counter = 0
38+
start_idx = 0
3139

3240
data_chunks = data.read_data_chunks(chunk_size, offset=chunk_size*skip_samples)
41+
3342
for arr in data_chunks:
3443

35-
# process the data
44+
print('cc', len(arr))
45+
if len(arr) < chunk_size:
46+
# short read, last data piece, ignore
47+
continue
48+
49+
# Window was full, make room for more
50+
if start_idx >= window_length:
51+
overlap = window_length - hop_length
52+
if overlap > 0:
53+
x_values[:overlap] = x_values[hop_length:]
54+
y_values[:overlap] = y_values[hop_length:]
55+
z_values[:overlap] = z_values[hop_length:]
56+
start_idx = overlap
57+
58+
# Copy the input data
3659
# De-interleave data from XYZ1 XYZ2... into separate continious X,Y,Z
37-
for i in range(window_length):
60+
for i in range(hop_length):
3861
x_values[i] = arr[(i*3)+0]
3962
y_values[i] = arr[(i*3)+1]
4063
z_values[i] = arr[(i*3)+2]
64+
start_idx += hop_length
4165

42-
#print(x_values)
43-
#print(y_values)
44-
#print(z_values)
45-
46-
assert len(x_values) == window_length
47-
assert len(y_values) == window_length
48-
assert len(z_values) == window_length
66+
# waiting for window to fill
67+
if start_idx < window_length:
68+
continue
4969

5070
feature_calc_start = time.ticks_ms()
5171
features = calculate_features_xyz((x_values, y_values, z_values))
@@ -54,35 +74,47 @@ def compute_dataset_features(data: npyfile.Reader,
5474
print('feature-calc-end', duration)
5575

5676
yield features
77+
window_counter += 1
5778

58-
sample_counter += 1
59-
if limit_samples is not None and sample_counter > limit_samples:
79+
if limit_samples is not None and window_counter > limit_samples:
6080
break
6181

62-
def main():
82+
def parse():
83+
import argparse
84+
85+
parser = argparse.ArgumentParser(description='Compute features from NPY file')
86+
parser.add_argument('--input', required=True, help='Input .npy file')
87+
parser.add_argument('--output', required=True, help='Output .npy file')
88+
parser.add_argument('--samplerate', type=int, default=None, help='Samplerate. Currently ignored')
89+
parser.add_argument('--skip', type=int, default=0, help='Number of samples to skip (default: 0)')
90+
parser.add_argument('--limit', type=int, default=None, help='Maximum number of samples to process (default: None)')
91+
parser.add_argument('--window_length', type=int, default=128, help='Window length (default: 128)')
92+
parser.add_argument('--hop_length', type=int, default=None, help='Hop length (default: window_length)')
6393

64-
if len(sys.argv) != 3:
65-
print('Usage: compute_features.py IN.npy OUT.npy')
94+
args = parser.parse_args()
95+
return args
6696

67-
_, in_path, out_path = sys.argv
97+
def main():
6898

69-
skip_samples = 0
70-
limit_samples = None
99+
args = parse()
71100

72101
out_typecode = 'f'
73-
n_features = N_FEATURES
74-
102+
n_features = N_FEATURES
75103
features_array = array.array(out_typecode, (0 for _ in range(n_features)))
76104

77-
with npyfile.Reader(in_path) as data:
78-
n_samples, window_length, n_axes = data.shape
105+
with npyfile.Reader(args.input) as data:
106+
n_samples, n_axes = data.shape
107+
108+
n_windows = (n_samples - args.window_length) // args.hop_length
79109

80-
out_shape = (n_samples, n_features)
81-
with npyfile.Writer(out_path, out_shape, out_typecode) as out:
110+
out_shape = (n_windows, n_features)
111+
with npyfile.Writer(args.output, out_shape, out_typecode) as out:
82112

83113
generator = compute_dataset_features(data,
84-
skip_samples=skip_samples,
85-
limit_samples=limit_samples,
114+
window_length=args.window_length,
115+
hop_length=args.hop_length,
116+
skip_samples=args.skip,
117+
limit_samples=args.limit,
86118
)
87119
for features in generator:
88120
#print('features', len(features), features)

examples/har_trees/data/configurations/uci_har.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@ data_columns:
55
- acc_x
66
- acc_y
77
- acc_z
8+
features: 'custom'
9+
samplerate: 50
810
classes:
911
# - STAND_TO_LIE
1012
# - SIT_TO_LIE

0 commit comments

Comments
 (0)