@@ -243,24 +243,31 @@ def generate_waveplot(
243243
244244 return waveplot
245245
246- def get_waveform_data_ms (waveform , sample_rate , hop_length = 16 ):
246+ def get_waveform_data_ms (waveform , sample_rate , hop_length = 16 , n_fft = 512 ):
247247 """
248- Returns a list of (time_ms, min_val, max_val) for each hop, suitable for
249- drawing a waveform plot in external tools (e.g. JSON-serializable).
248+ Same envelope data as generate_waveplot, as raw numbers (no plot).
249+
250+ Returns a list of (time_ms, min_val, max_val) for each STFT-aligned hop:
251+ - time_ms: time in milliseconds (same time base as STFT / time_vec).
252+ - min_val: minimum amplitude in the hop (lowest sample in that window).
253+ - max_val: maximum amplitude in the hop (highest sample in that window).
254+
255+ Amplitude units are the same as the waveform: typically normalized in [-1, 1]
256+ when using librosa.load (same as the values generate_waveplot uses before
257+ scaling to pixel coordinates).
250258 """
251- # 1. Calculate min/max envelopes
259+ # Same min/max envelope as generate_waveplot (identical windowing)
252260 temp = np .pad (waveform , hop_length // 2 , mode = 'edge' )
253261 views = np .lib .stride_tricks .sliding_window_view (temp , (hop_length ,))[::hop_length ]
254-
255262 bin_mins = np .min (views , axis = 1 )
256263 bin_maxs = np .max (views , axis = 1 )
257-
258- # 2. Calculate time in milliseconds
259- # (index * hop_length) = total samples
260- # (samples / sample_rate) = seconds
261- # (seconds * 1000) = milliseconds
262- times_ms = ( np . arange ( len ( bin_mins )) * hop_length ) / sample_rate * 1000
263- # Keep full precision for min/max (do not round to int; waveform is typically in [-1, 1] )
264+
265+ # Time in ms, aligned with STFT frames (librosa frames_to_time * 1000)
266+ n_frames = len ( bin_mins )
267+ times_s = librosa . frames_to_time (
268+ np . arange ( n_frames ), sr = sample_rate , hop_length = hop_length , n_fft = n_fft
269+ )
270+ times_ms = ( times_s * 1000 ). astype ( float )
264271 return [(float (t ), float (mn ), float (mx )) for t , mn , mx in zip (times_ms , bin_mins , bin_maxs )]
265272
266273
@@ -326,7 +333,9 @@ def load_stft(
326333 else :
327334 waveplot = generate_waveplot (waveform , stft_db , hop_length = hop_length )
328335
329- waveform_ms = get_waveform_data_ms (waveform , sample_rate = sr , hop_length = hop_length )
336+ waveform_ms = get_waveform_data_ms (
337+ waveform , sample_rate = sr , hop_length = hop_length , n_fft = n_fft
338+ )
330339 # Estimate maximum frequency band containing data based on original sample rate
331340 # Only data up to this maximum band should be used when computing statistics
332341 max_band_idx = min ((int (np .where (bands < orig_sr / 2.02 )[0 ][- 1 ]), len (bands ) - 1 ))
@@ -1760,12 +1769,12 @@ def compute_wrapper(
17601769 if segment_waves :
17611770 segment_waveplot = waveform_ms [start + trim_begin : start + trim_end ]
17621771 segment_start_ms = (start + trim_begin ) * x_step_ms
1763- # Segment-relative time (0 at segment start) and enough precision for amplitude
1772+ # [time_ms, min_val, max_val]: time in ms (0 at segment start), amplitude in [-1, 1]
17641773 segment_waveplot = [
17651774 [
1766- round (float (t_ms ) - segment_start_ms , 3 ), # time ms relative to segment
1767- round (float (mn ), 6 ), # min amplitude (avoid rounding to 0)
1768- round (float (mx ), 6 ), # max amplitude
1775+ round (float (t_ms ) - segment_start_ms , 3 ),
1776+ round (float (mn ), 6 ),
1777+ round (float (mx ), 6 ),
17691778 ]
17701779 for t_ms , mn , mx in segment_waveplot
17711780 ]
0 commit comments