77
88from timebased import calculate_features_xyz , DATA_TYPECODE , N_FEATURES
99
10- def compute_dataset_features (data : npyfile .Reader ,
10+ def compute_dataset_features (data : npyfile .Reader , window_length ,
11+ hop_length = None ,
1112 skip_samples = 0 , limit_samples = None , verbose = 0 ):
1213
14+ if hop_length is None :
15+ hop_length = window_length
16+
17+ if window_length % hop_length != 0 :
18+ raise ValueError (f"hop_length must be an even divisor of window_length. Got window={ window_length } hop={ hop_length } " )
19+
20+
1321 # Check that data is expected format
1422 shape = data .shape
15- assert len (shape ) == 3 , shape
16- n_samples , window_length , n_axes = shape
23+ assert len (shape ) == 2 , shape
24+ n_samples , n_axes = shape
1725 assert n_axes == 3 , shape
18- #assert window_length == 128, shape
1926
2027 # We expect data to be h/int16
2128 assert data .typecode == DATA_TYPECODE , data .typecode
@@ -26,26 +33,39 @@ def compute_dataset_features(data: npyfile.Reader,
2633 y_values = array .array (DATA_TYPECODE , (0 for _ in range (window_length )))
2734 z_values = array .array (DATA_TYPECODE , (0 for _ in range (window_length )))
2835
29- chunk_size = window_length * n_axes
30- sample_counter = 0
36+ chunk_size = hop_length * n_axes
37+ window_counter = 0
38+ start_idx = 0
3139
3240 data_chunks = data .read_data_chunks (chunk_size , offset = chunk_size * skip_samples )
41+
3342 for arr in data_chunks :
3443
35- # process the data
44+ print ('cc' , len (arr ))
45+ if len (arr ) < chunk_size :
46+ # short read, last data piece, ignore
47+ continue
48+
49+ # Window was full, make room for more
50+ if start_idx >= window_length :
51+ overlap = window_length - hop_length
52+ if overlap > 0 :
53+ x_values [:overlap ] = x_values [hop_length :]
54+ y_values [:overlap ] = y_values [hop_length :]
55+ z_values [:overlap ] = z_values [hop_length :]
56+ start_idx = overlap
57+
58+ # Copy the input data
3659 # De-interleave data from XYZ1 XYZ2... into separate continious X,Y,Z
37- for i in range (window_length ):
60+ for i in range (hop_length ):
3861 x_values [i ] = arr [(i * 3 )+ 0 ]
3962 y_values [i ] = arr [(i * 3 )+ 1 ]
4063 z_values [i ] = arr [(i * 3 )+ 2 ]
64+ start_idx += hop_length
4165
42- #print(x_values)
43- #print(y_values)
44- #print(z_values)
45-
46- assert len (x_values ) == window_length
47- assert len (y_values ) == window_length
48- assert len (z_values ) == window_length
66+ # waiting for window to fill
67+ if start_idx < window_length :
68+ continue
4969
5070 feature_calc_start = time .ticks_ms ()
5171 features = calculate_features_xyz ((x_values , y_values , z_values ))
@@ -54,35 +74,47 @@ def compute_dataset_features(data: npyfile.Reader,
5474 print ('feature-calc-end' , duration )
5575
5676 yield features
77+ window_counter += 1
5778
58- sample_counter += 1
59- if limit_samples is not None and sample_counter > limit_samples :
79+ if limit_samples is not None and window_counter > limit_samples :
6080 break
6181
62- def main ():
82+ def parse ():
83+ import argparse
84+
85+ parser = argparse .ArgumentParser (description = 'Compute features from NPY file' )
86+ parser .add_argument ('--input' , required = True , help = 'Input .npy file' )
87+ parser .add_argument ('--output' , required = True , help = 'Output .npy file' )
88+ parser .add_argument ('--samplerate' , type = int , default = None , help = 'Samplerate. Currently ignored' )
89+ parser .add_argument ('--skip' , type = int , default = 0 , help = 'Number of samples to skip (default: 0)' )
90+ parser .add_argument ('--limit' , type = int , default = None , help = 'Maximum number of samples to process (default: None)' )
91+ parser .add_argument ('--window_length' , type = int , default = 128 , help = 'Window length (default: 128)' )
92+ parser .add_argument ('--hop_length' , type = int , default = None , help = 'Hop length (default: window_length)' )
6393
64- if len ( sys . argv ) != 3 :
65- print ( 'Usage: compute_features.py IN.npy OUT.npy' )
94+ args = parser . parse_args ()
95+ return args
6696
67- _ , in_path , out_path = sys . argv
97+ def main ():
6898
69- skip_samples = 0
70- limit_samples = None
99+ args = parse ()
71100
72101 out_typecode = 'f'
73- n_features = N_FEATURES
74-
102+ n_features = N_FEATURES
75103 features_array = array .array (out_typecode , (0 for _ in range (n_features )))
76104
77- with npyfile .Reader (in_path ) as data :
78- n_samples , window_length , n_axes = data .shape
105+ with npyfile .Reader (args .input ) as data :
106+ n_samples , n_axes = data .shape
107+
108+ n_windows = (n_samples - args .window_length ) // args .hop_length
79109
80- out_shape = (n_samples , n_features )
81- with npyfile .Writer (out_path , out_shape , out_typecode ) as out :
110+ out_shape = (n_windows , n_features )
111+ with npyfile .Writer (args . output , out_shape , out_typecode ) as out :
82112
83113 generator = compute_dataset_features (data ,
84- skip_samples = skip_samples ,
85- limit_samples = limit_samples ,
114+ window_length = args .window_length ,
115+ hop_length = args .hop_length ,
116+ skip_samples = args .skip ,
117+ limit_samples = args .limit ,
86118 )
87119 for features in generator :
88120 #print('features', len(features), features)
0 commit comments