Skip to content

Commit 4c07aa4

Browse files
committed
modify
1 parent 67f5166 commit 4c07aa4

1 file changed

Lines changed: 26 additions & 17 deletions

File tree

batbot/spectrogram/__init__.py

Lines changed: 26 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -243,24 +243,31 @@ def generate_waveplot(
243243

244244
return waveplot
245245

246-
def get_waveform_data_ms(waveform, sample_rate, hop_length=16):
246+
def get_waveform_data_ms(waveform, sample_rate, hop_length=16, n_fft=512):
247247
"""
248-
Returns a list of (time_ms, min_val, max_val) for each hop, suitable for
249-
drawing a waveform plot in external tools (e.g. JSON-serializable).
248+
Same envelope data as generate_waveplot, as raw numbers (no plot).
249+
250+
Returns a list of (time_ms, min_val, max_val) for each STFT-aligned hop:
251+
- time_ms: time in milliseconds (same time base as STFT / time_vec).
252+
- min_val: minimum amplitude in the hop (lowest sample in that window).
253+
- max_val: maximum amplitude in the hop (highest sample in that window).
254+
255+
Amplitude units are the same as the waveform: typically normalized in [-1, 1]
256+
when using librosa.load (same as the values generate_waveplot uses before
257+
scaling to pixel coordinates).
250258
"""
251-
# 1. Calculate min/max envelopes
259+
# Same min/max envelope as generate_waveplot (identical windowing)
252260
temp = np.pad(waveform, hop_length // 2, mode='edge')
253261
views = np.lib.stride_tricks.sliding_window_view(temp, (hop_length,))[::hop_length]
254-
255262
bin_mins = np.min(views, axis=1)
256263
bin_maxs = np.max(views, axis=1)
257-
258-
# 2. Calculate time in milliseconds
259-
# (index * hop_length) = total samples
260-
# (samples / sample_rate) = seconds
261-
# (seconds * 1000) = milliseconds
262-
times_ms = (np.arange(len(bin_mins)) * hop_length) / sample_rate * 1000
263-
# Keep full precision for min/max (do not round to int; waveform is typically in [-1, 1])
264+
265+
# Time in ms, aligned with STFT frames (librosa frames_to_time * 1000)
266+
n_frames = len(bin_mins)
267+
times_s = librosa.frames_to_time(
268+
np.arange(n_frames), sr=sample_rate, hop_length=hop_length, n_fft=n_fft
269+
)
270+
times_ms = (times_s * 1000).astype(float)
264271
return [(float(t), float(mn), float(mx)) for t, mn, mx in zip(times_ms, bin_mins, bin_maxs)]
265272

266273

@@ -326,7 +333,9 @@ def load_stft(
326333
else:
327334
waveplot = generate_waveplot(waveform, stft_db, hop_length=hop_length)
328335

329-
waveform_ms = get_waveform_data_ms(waveform, sample_rate=sr, hop_length=hop_length)
336+
waveform_ms = get_waveform_data_ms(
337+
waveform, sample_rate=sr, hop_length=hop_length, n_fft=n_fft
338+
)
330339
# Estimate maximum frequency band containing data based on original sample rate
331340
# Only data up to this maximum band should be used when computing statistics
332341
max_band_idx = min((int(np.where(bands < orig_sr / 2.02)[0][-1]), len(bands) - 1))
@@ -1760,12 +1769,12 @@ def compute_wrapper(
17601769
if segment_waves:
17611770
segment_waveplot = waveform_ms[start + trim_begin : start + trim_end]
17621771
segment_start_ms = (start + trim_begin) * x_step_ms
1763-
# Segment-relative time (0 at segment start) and enough precision for amplitude
1772+
# [time_ms, min_val, max_val]: time in ms (0 at segment start), amplitude in [-1, 1]
17641773
segment_waveplot = [
17651774
[
1766-
round(float(t_ms) - segment_start_ms, 3), # time ms relative to segment
1767-
round(float(mn), 6), # min amplitude (avoid rounding to 0)
1768-
round(float(mx), 6), # max amplitude
1775+
round(float(t_ms) - segment_start_ms, 3),
1776+
round(float(mn), 6),
1777+
round(float(mx), 6),
17691778
]
17701779
for t_ms, mn, mx in segment_waveplot
17711780
]

0 commit comments

Comments
 (0)