|
| 1 | +""" pyplots.ai |
| 2 | +spectrogram-mel: Mel-Spectrogram for Audio Analysis |
| 3 | +Library: bokeh 3.9.0 | Python 3.14.3 |
| 4 | +Quality: 92/100 | Created: 2026-03-11 |
| 5 | +""" |
| 6 | + |
| 7 | +import numpy as np |
| 8 | +from bokeh.io import export_png, output_file, save |
| 9 | +from bokeh.models import ( |
| 10 | + BasicTicker, |
| 11 | + BoxAnnotation, |
| 12 | + ColorBar, |
| 13 | + ColumnDataSource, |
| 14 | + FixedTicker, |
| 15 | + HoverTool, |
| 16 | + Label, |
| 17 | + LinearColorMapper, |
| 18 | + Span, |
| 19 | +) |
| 20 | +from bokeh.palettes import Magma256 |
| 21 | +from bokeh.plotting import figure |
| 22 | +from scipy import signal |
| 23 | + |
| 24 | + |
| 25 | +# Data - Synthesize a melody-like audio signal with multiple frequency components |
| 26 | +np.random.seed(42) |
| 27 | +sample_rate = 22050 |
| 28 | +duration = 4.0 |
| 29 | +n_samples = int(sample_rate * duration) |
| 30 | +t = np.linspace(0, duration, n_samples, endpoint=False) |
| 31 | + |
| 32 | +# Create a rich audio signal: melody with harmonics and transients |
| 33 | +audio_signal = np.zeros(n_samples) |
| 34 | + |
| 35 | +# Melody notes (fundamental frequencies in Hz with harmonics) |
| 36 | +notes = [ |
| 37 | + (0.0, 1.0, 261.63), # C4 |
| 38 | + (0.5, 1.5, 329.63), # E4 |
| 39 | + (1.0, 2.0, 392.00), # G4 |
| 40 | + (1.5, 2.5, 523.25), # C5 |
| 41 | + (2.0, 3.0, 440.00), # A4 |
| 42 | + (2.5, 3.5, 349.23), # F4 |
| 43 | + (3.0, 4.0, 293.66), # D4 |
| 44 | +] |
| 45 | + |
| 46 | +for start, end, freq in notes: |
| 47 | + mask = (t >= start) & (t < end) |
| 48 | + envelope = np.zeros(n_samples) |
| 49 | + note_len = np.sum(mask) |
| 50 | + attack = int(0.05 * sample_rate) |
| 51 | + release = int(0.1 * sample_rate) |
| 52 | + if note_len > attack + release: |
| 53 | + env = np.ones(note_len) |
| 54 | + env[:attack] = np.linspace(0, 1, attack) |
| 55 | + env[-release:] = np.linspace(1, 0, release) |
| 56 | + envelope[mask] = env |
| 57 | + audio_signal += envelope * ( |
| 58 | + 0.6 * np.sin(2 * np.pi * freq * t) |
| 59 | + + 0.25 * np.sin(2 * np.pi * 2 * freq * t) |
| 60 | + + 0.1 * np.sin(2 * np.pi * 3 * freq * t) |
| 61 | + + 0.05 * np.sin(2 * np.pi * 4 * freq * t) |
| 62 | + ) |
| 63 | + |
| 64 | +audio_signal += 0.02 * np.random.randn(n_samples) |
| 65 | +audio_signal = audio_signal / np.max(np.abs(audio_signal)) |
| 66 | + |
| 67 | +# Compute STFT |
| 68 | +n_fft = 2048 |
| 69 | +hop_length = 512 |
| 70 | +frequencies, times, Zxx = signal.stft(audio_signal, fs=sample_rate, nperseg=n_fft, noverlap=n_fft - hop_length) |
| 71 | +power_spectrum = np.abs(Zxx) ** 2 |
| 72 | + |
| 73 | +# Mel filterbank construction |
| 74 | +n_mels = 128 |
| 75 | +f_min = 0.0 |
| 76 | +f_max = sample_rate / 2.0 |
| 77 | + |
| 78 | +mel_min = 2595.0 * np.log10(1.0 + f_min / 700.0) |
| 79 | +mel_max = 2595.0 * np.log10(1.0 + f_max / 700.0) |
| 80 | +mel_points = np.linspace(mel_min, mel_max, n_mels + 2) |
| 81 | +hz_points = 700.0 * (10.0 ** (mel_points / 2595.0) - 1.0) |
| 82 | + |
| 83 | +bin_points = np.floor((n_fft + 1) * hz_points / sample_rate).astype(int) |
| 84 | + |
| 85 | +# Build triangular filterbank (vectorized inner loop) |
| 86 | +n_freqs = len(frequencies) |
| 87 | +filterbank = np.zeros((n_mels, n_freqs)) |
| 88 | +for m in range(n_mels): |
| 89 | + f_left, f_center, f_right = bin_points[m], bin_points[m + 1], bin_points[m + 2] |
| 90 | + if f_center > f_left: |
| 91 | + rising = np.arange(f_left, f_center) |
| 92 | + filterbank[m, rising] = (rising - f_left) / (f_center - f_left) |
| 93 | + if f_right > f_center: |
| 94 | + falling = np.arange(f_center, f_right) |
| 95 | + filterbank[m, falling] = (f_right - falling) / (f_right - f_center) |
| 96 | + |
| 97 | +mel_spectrogram = filterbank @ power_spectrum |
| 98 | +mel_spectrogram_db = 10.0 * np.log10(mel_spectrogram + 1e-10) |
| 99 | + |
| 100 | +# Mel band edge frequencies for quad positioning on log scale |
| 101 | +mel_edge_freqs = np.maximum(hz_points, 1.0) |
| 102 | + |
| 103 | +# Build quad data vectorized with np.repeat/np.tile |
| 104 | +n_times = len(times) |
| 105 | +time_step = times[1] - times[0] if n_times > 1 else hop_length / sample_rate |
| 106 | + |
| 107 | +time_grid = np.repeat(times, n_mels) |
| 108 | +bottom_grid = np.tile(mel_edge_freqs[1 : n_mels + 1], n_times) |
| 109 | +top_grid = np.tile(mel_edge_freqs[2 : n_mels + 2], n_times) |
| 110 | +power_grid = mel_spectrogram_db.T.ravel() |
| 111 | + |
| 112 | +vmin = float(np.percentile(mel_spectrogram_db, 5)) |
| 113 | +vmax = float(mel_spectrogram_db.max()) |
| 114 | + |
| 115 | +# Map power to palette colors |
| 116 | +normalized = np.clip((power_grid - vmin) / (vmax - vmin), 0, 1) |
| 117 | +color_indices = (normalized * 255).astype(int) |
| 118 | +colors = [Magma256[i] for i in color_indices] |
| 119 | + |
| 120 | +source = ColumnDataSource( |
| 121 | + data={ |
| 122 | + "left": time_grid - time_step / 2, |
| 123 | + "right": time_grid + time_step / 2, |
| 124 | + "bottom": bottom_grid, |
| 125 | + "top": top_grid, |
| 126 | + "power": power_grid, |
| 127 | + "color": colors, |
| 128 | + "time_s": np.round(time_grid, 3), |
| 129 | + "freq_hz": np.round((bottom_grid + top_grid) / 2, 1), |
| 130 | + } |
| 131 | +) |
| 132 | + |
| 133 | +# Plot |
| 134 | +p = figure( |
| 135 | + width=4800, |
| 136 | + height=2700, |
| 137 | + title="spectrogram-mel \u00b7 bokeh \u00b7 pyplots.ai", |
| 138 | + x_axis_label="Time (seconds)", |
| 139 | + y_axis_label="Frequency (Hz)", |
| 140 | + x_range=(times.min() - time_step / 2, times.max() + time_step / 2), |
| 141 | + y_range=(mel_edge_freqs[1], mel_edge_freqs[-1]), |
| 142 | + y_axis_type="log", |
| 143 | + tools="", |
| 144 | + toolbar_location=None, |
| 145 | +) |
| 146 | + |
| 147 | +# Render mel bands as quads |
| 148 | +p.quad( |
| 149 | + left="left", |
| 150 | + right="right", |
| 151 | + bottom="bottom", |
| 152 | + top="top", |
| 153 | + fill_color="color", |
| 154 | + line_color=None, |
| 155 | + source=source, |
| 156 | + level="image", |
| 157 | +) |
| 158 | + |
| 159 | +# Visual storytelling: annotate the C-major arpeggio rising pattern |
| 160 | +arpeggio_box = BoxAnnotation( |
| 161 | + left=0.0, right=2.5, fill_alpha=0, line_color="#ffffff", line_alpha=0.45, line_width=3, line_dash="dashed" |
| 162 | +) |
| 163 | +p.add_layout(arpeggio_box) |
| 164 | + |
| 165 | +arpeggio_label = Label( |
| 166 | + x=0.05, |
| 167 | + y=mel_edge_freqs[-1] * 0.85, |
| 168 | + text="C Major Arpeggio (C4 \u2192 E4 \u2192 G4 \u2192 C5)", |
| 169 | + text_font_size="22pt", |
| 170 | + text_color="#ffffff", |
| 171 | + text_alpha=0.85, |
| 172 | + text_font_style="italic", |
| 173 | +) |
| 174 | +p.add_layout(arpeggio_label) |
| 175 | + |
| 176 | +# Mark octave fundamentals (C4, C5) with horizontal frequency guides |
| 177 | +for freq, name in [(261.63, "C4"), (523.25, "C5")]: |
| 178 | + if mel_edge_freqs[1] <= freq <= mel_edge_freqs[-1]: |
| 179 | + span = Span( |
| 180 | + location=freq, dimension="width", line_color="#ffffff", line_alpha=0.25, line_width=2, line_dash="dotted" |
| 181 | + ) |
| 182 | + p.add_layout(span) |
| 183 | + label = Label( |
| 184 | + x=times.max() + time_step * 0.3, |
| 185 | + y=freq, |
| 186 | + text=name, |
| 187 | + text_font_size="20pt", |
| 188 | + text_color="#ffffff", |
| 189 | + text_alpha=0.7, |
| 190 | + text_font_style="bold", |
| 191 | + ) |
| 192 | + p.add_layout(label) |
| 193 | + |
| 194 | +# Descending passage label |
| 195 | +desc_label = Label( |
| 196 | + x=2.55, |
| 197 | + y=mel_edge_freqs[-1] * 0.85, |
| 198 | + text="Descending (A4 \u2192 F4 \u2192 D4)", |
| 199 | + text_font_size="22pt", |
| 200 | + text_color="#ffffff", |
| 201 | + text_alpha=0.65, |
| 202 | + text_font_style="italic", |
| 203 | +) |
| 204 | +p.add_layout(desc_label) |
| 205 | + |
| 206 | +# HoverTool for interactive readout |
| 207 | +hover = HoverTool( |
| 208 | + tooltips=[("Time", "@time_s{0.000} s"), ("Frequency", "@freq_hz{0.0} Hz"), ("Power", "@power{0.0} dB")], |
| 209 | + point_policy="follow_mouse", |
| 210 | +) |
| 211 | +p.add_tools(hover) |
| 212 | + |
| 213 | +# Colorbar |
| 214 | +color_mapper = LinearColorMapper(palette=Magma256, low=vmin, high=vmax) |
| 215 | +color_bar = ColorBar( |
| 216 | + color_mapper=color_mapper, |
| 217 | + ticker=BasicTicker(desired_num_ticks=8), |
| 218 | + label_standoff=24, |
| 219 | + border_line_color=None, |
| 220 | + location=(0, 0), |
| 221 | + title="Power (dB)", |
| 222 | + title_text_font_size="32pt", |
| 223 | + title_text_font_style="italic", |
| 224 | + major_label_text_font_size="24pt", |
| 225 | + major_label_text_color="#444444", |
| 226 | + width=70, |
| 227 | + padding=50, |
| 228 | + title_standoff=24, |
| 229 | +) |
| 230 | +p.add_layout(color_bar, "right") |
| 231 | + |
| 232 | +# Y-axis tick labels at key mel band frequencies |
| 233 | +mel_tick_freqs = [ |
| 234 | + f for f in [50, 100, 200, 500, 1000, 2000, 4000, 8000] if mel_edge_freqs[1] <= f <= mel_edge_freqs[-1] |
| 235 | +] |
| 236 | +p.yaxis.ticker = FixedTicker(ticks=mel_tick_freqs) |
| 237 | + |
| 238 | +# Typography for 4800x2700 canvas |
| 239 | +p.title.text_font_size = "42pt" |
| 240 | +p.title.text_font_style = "bold" |
| 241 | +p.title.text_color = "#333333" |
| 242 | +p.xaxis.axis_label_text_font_size = "32pt" |
| 243 | +p.yaxis.axis_label_text_font_size = "32pt" |
| 244 | +p.xaxis.major_label_text_font_size = "24pt" |
| 245 | +p.yaxis.major_label_text_font_size = "24pt" |
| 246 | +p.xaxis.axis_label_text_font_style = "normal" |
| 247 | +p.yaxis.axis_label_text_font_style = "normal" |
| 248 | +p.xaxis.axis_label_text_color = "#444444" |
| 249 | +p.yaxis.axis_label_text_color = "#444444" |
| 250 | +p.xaxis.major_label_text_color = "#555555" |
| 251 | +p.yaxis.major_label_text_color = "#555555" |
| 252 | + |
| 253 | +# Axis styling |
| 254 | +p.xaxis.axis_line_width = 3 |
| 255 | +p.yaxis.axis_line_width = 3 |
| 256 | +p.xaxis.axis_line_color = "#555555" |
| 257 | +p.yaxis.axis_line_color = "#555555" |
| 258 | +p.xaxis.major_tick_line_width = 3 |
| 259 | +p.yaxis.major_tick_line_width = 3 |
| 260 | +p.xaxis.major_tick_line_color = "#555555" |
| 261 | +p.yaxis.major_tick_line_color = "#555555" |
| 262 | +p.xaxis.minor_tick_line_color = None |
| 263 | +p.yaxis.minor_tick_line_color = None |
| 264 | + |
| 265 | +# Grid - subtle styling |
| 266 | +p.xgrid.grid_line_alpha = 0.12 |
| 267 | +p.ygrid.grid_line_alpha = 0.12 |
| 268 | +p.xgrid.grid_line_dash = [6, 4] |
| 269 | +p.ygrid.grid_line_dash = [6, 4] |
| 270 | +p.xgrid.grid_line_color = "#888888" |
| 271 | +p.ygrid.grid_line_color = "#888888" |
| 272 | + |
| 273 | +# Background |
| 274 | +p.background_fill_color = "#000004" |
| 275 | +p.border_fill_color = "#fafafa" |
| 276 | +p.outline_line_color = "#333333" |
| 277 | +p.outline_line_width = 2 |
| 278 | +p.min_border_right = 180 |
| 279 | +p.min_border_left = 130 |
| 280 | +p.min_border_bottom = 110 |
| 281 | +p.min_border_top = 80 |
| 282 | + |
| 283 | +# Save |
| 284 | +export_png(p, filename="plot.png") |
| 285 | + |
| 286 | +output_file("plot.html", title="spectrogram-mel \u00b7 bokeh \u00b7 pyplots.ai") |
| 287 | +save(p) |
0 commit comments