|
| 1 | +""" pyplots.ai |
| 2 | +spectrogram-mel: Mel-Spectrogram for Audio Analysis |
| 3 | +Library: altair 6.0.0 | Python 3.14.3 |
| 4 | +Quality: 91/100 | Created: 2026-03-11 |
| 5 | +""" |
| 6 | + |
| 7 | +import altair as alt |
| 8 | +import numpy as np |
| 9 | +import pandas as pd |
| 10 | + |
| 11 | + |
| 12 | +# Data - synthesize a rich audio signal with melody and harmonics |
| 13 | +np.random.seed(42) |
| 14 | +sample_rate = 22050 |
| 15 | +duration = 4.0 |
| 16 | +n_samples = int(sample_rate * duration) |
| 17 | +t = np.linspace(0, duration, n_samples, endpoint=False) |
| 18 | + |
| 19 | +# Descending frequency sweep from 1200 Hz to 300 Hz with harmonics |
| 20 | +sweep_freq = np.cumsum(1200 * np.exp(-0.35 * t)) / sample_rate |
| 21 | +signal = 0.6 * np.sin(2 * np.pi * sweep_freq) |
| 22 | +signal += 0.3 * np.sin(2 * np.pi * 2 * sweep_freq) |
| 23 | +signal += 0.15 * np.sin(2 * np.pi * 3 * sweep_freq) |
| 24 | + |
| 25 | +# Pulsed tone at 440 Hz (A4) with amplitude modulation |
| 26 | +envelope = 0.5 * (1 + np.sin(2 * np.pi * 2.5 * t)) |
| 27 | +signal += 0.4 * envelope * np.sin(2 * np.pi * 440 * t) |
| 28 | + |
| 29 | +# High-frequency chirp burst in the middle section |
| 30 | +chirp_mask = (t > 1.5) & (t < 2.5) |
| 31 | +chirp_phase = np.cumsum(chirp_mask * (2000 + 3000 * (t - 1.5))) / sample_rate |
| 32 | +signal += 0.35 * chirp_mask * np.sin(2 * np.pi * chirp_phase) |
| 33 | + |
| 34 | +# Subtle noise floor |
| 35 | +signal += 0.05 * np.random.randn(n_samples) |
| 36 | + |
| 37 | +# Compute STFT |
| 38 | +n_fft = 2048 |
| 39 | +hop_length = 512 |
| 40 | +window = np.hanning(n_fft) |
| 41 | +n_freq_bins = n_fft // 2 + 1 |
| 42 | +n_frames = 1 + (n_samples - n_fft) // hop_length |
| 43 | + |
| 44 | +stft_power = np.zeros((n_freq_bins, n_frames)) |
| 45 | +for i in range(n_frames): |
| 46 | + start = i * hop_length |
| 47 | + frame = signal[start : start + n_fft] * window |
| 48 | + spectrum = np.fft.rfft(frame) |
| 49 | + stft_power[:, i] = np.abs(spectrum) ** 2 |
| 50 | + |
| 51 | +# Mel filter bank |
| 52 | +n_mels = 128 |
| 53 | +f_max = sample_rate / 2.0 |
| 54 | + |
| 55 | +mel_max = 2595.0 * np.log10(1.0 + f_max / 700.0) |
| 56 | +mel_edges = np.linspace(0, mel_max, n_mels + 2) |
| 57 | +hz_edges = 700.0 * (10.0 ** (mel_edges / 2595.0) - 1.0) |
| 58 | +fft_freqs = np.linspace(0, f_max, n_freq_bins) |
| 59 | + |
| 60 | +filterbank = np.zeros((n_mels, n_freq_bins)) |
| 61 | +for i in range(n_mels): |
| 62 | + lo, mid, hi = hz_edges[i], hz_edges[i + 1], hz_edges[i + 2] |
| 63 | + up_slope = (fft_freqs >= lo) & (fft_freqs <= mid) |
| 64 | + dn_slope = (fft_freqs > mid) & (fft_freqs <= hi) |
| 65 | + if mid > lo: |
| 66 | + filterbank[i, up_slope] = (fft_freqs[up_slope] - lo) / (mid - lo) |
| 67 | + if hi > mid: |
| 68 | + filterbank[i, dn_slope] = (hi - fft_freqs[dn_slope]) / (hi - mid) |
| 69 | + |
| 70 | +# Apply mel filter and convert to dB |
| 71 | +mel_spec = filterbank @ stft_power |
| 72 | +mel_spec = np.maximum(mel_spec, 1e-10) |
| 73 | +mel_spec_db = 10.0 * np.log10(mel_spec) |
| 74 | +mel_spec_db -= mel_spec_db.max() |
| 75 | +mel_spec_db = np.maximum(mel_spec_db, -80.0) |
| 76 | + |
| 77 | +# Use ALL mel bins (no subsampling) to fix blockiness at low frequencies |
| 78 | +# Only subsample time frames to keep data manageable |
| 79 | +frame_step = 2 |
| 80 | +time_idx = np.arange(0, n_frames, frame_step) |
| 81 | +mel_idx = np.arange(0, n_mels) |
| 82 | + |
| 83 | +time_sec = time_idx * hop_length / sample_rate |
| 84 | +time_width = frame_step * hop_length / sample_rate |
| 85 | + |
| 86 | +# Build dataframe with explicit rectangle bounds |
| 87 | +rows = [] |
| 88 | +for mi in mel_idx: |
| 89 | + freq_lo = float(hz_edges[mi]) |
| 90 | + freq_hi = float(hz_edges[mi + 2]) |
| 91 | + for ti_pos, ti in enumerate(time_idx): |
| 92 | + rows.append( |
| 93 | + { |
| 94 | + "t1": round(float(time_sec[ti_pos]), 4), |
| 95 | + "t2": round(float(time_sec[ti_pos]) + time_width, 4), |
| 96 | + "f1": round(max(freq_lo, 20), 1), |
| 97 | + "f2": round(freq_hi, 1), |
| 98 | + "dB": round(float(mel_spec_db[mi, ti]), 1), |
| 99 | + } |
| 100 | + ) |
| 101 | + |
| 102 | +df = pd.DataFrame(rows) |
| 103 | + |
| 104 | +# Annotation labels for key audio features (data storytelling) |
| 105 | +annotations = pd.DataFrame( |
| 106 | + [ |
| 107 | + {"x": 0.6, "y": 1200, "label": "Harmonic Sweep"}, |
| 108 | + {"x": 2.2, "y": 6500, "label": "Chirp Burst"}, |
| 109 | + {"x": 3.5, "y": 350, "label": "440 Hz Tone"}, |
| 110 | + ] |
| 111 | +) |
| 112 | + |
| 113 | +# Main spectrogram layer |
| 114 | +spectrogram = ( |
| 115 | + alt.Chart(df) |
| 116 | + .mark_rect() |
| 117 | + .encode( |
| 118 | + x=alt.X( |
| 119 | + "t1:Q", |
| 120 | + title="Time (s)", |
| 121 | + scale=alt.Scale(domain=[0, duration], nice=False), |
| 122 | + axis=alt.Axis( |
| 123 | + labelFontSize=18, |
| 124 | + titleFontSize=22, |
| 125 | + titlePadding=14, |
| 126 | + values=[0, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0], |
| 127 | + domainColor="#444444", |
| 128 | + tickColor="#444444", |
| 129 | + labelColor="#333333", |
| 130 | + titleColor="#222222", |
| 131 | + tickSize=6, |
| 132 | + ), |
| 133 | + ), |
| 134 | + x2="t2:Q", |
| 135 | + y=alt.Y( |
| 136 | + "f1:Q", |
| 137 | + title="Frequency (Hz)", |
| 138 | + scale=alt.Scale(type="log", domain=[20, 11025], nice=False), |
| 139 | + axis=alt.Axis( |
| 140 | + labelFontSize=18, |
| 141 | + titleFontSize=22, |
| 142 | + titlePadding=14, |
| 143 | + values=[50, 100, 200, 500, 1000, 2000, 5000, 10000], |
| 144 | + domainColor="#444444", |
| 145 | + tickColor="#444444", |
| 146 | + labelColor="#333333", |
| 147 | + titleColor="#222222", |
| 148 | + tickSize=6, |
| 149 | + labelExpr="datum.value >= 1000 ? format(datum.value / 1000, '.0f') + 'k' : format(datum.value, '.0f')", |
| 150 | + ), |
| 151 | + ), |
| 152 | + y2="f2:Q", |
| 153 | + color=alt.Color( |
| 154 | + "dB:Q", |
| 155 | + scale=alt.Scale(scheme="inferno", domain=[-80, 0]), |
| 156 | + legend=alt.Legend( |
| 157 | + title="Power (dB)", |
| 158 | + titleFontSize=18, |
| 159 | + labelFontSize=16, |
| 160 | + gradientLength=480, |
| 161 | + gradientThickness=18, |
| 162 | + titlePadding=10, |
| 163 | + offset=14, |
| 164 | + direction="vertical", |
| 165 | + titleColor="#222222", |
| 166 | + labelColor="#333333", |
| 167 | + ), |
| 168 | + ), |
| 169 | + tooltip=[ |
| 170 | + alt.Tooltip("t1:Q", title="Time (s)", format=".2f"), |
| 171 | + alt.Tooltip("f1:Q", title="Freq low (Hz)", format=".0f"), |
| 172 | + alt.Tooltip("f2:Q", title="Freq high (Hz)", format=".0f"), |
| 173 | + alt.Tooltip("dB:Q", title="Power (dB)", format=".1f"), |
| 174 | + ], |
| 175 | + ) |
| 176 | +) |
| 177 | + |
| 178 | +# Annotation text layer for data storytelling emphasis |
| 179 | +annotation_labels = ( |
| 180 | + alt.Chart(annotations) |
| 181 | + .mark_text( |
| 182 | + fontSize=16, fontWeight="bold", color="#ffffff", strokeWidth=3, stroke="#1a1a2e", align="left", dx=10, dy=-6 |
| 183 | + ) |
| 184 | + .encode(x="x:Q", y="y:Q", text="label:N") |
| 185 | +) |
| 186 | + |
| 187 | +# Small arrow markers pointing to features |
| 188 | +annotation_marks = ( |
| 189 | + alt.Chart(annotations) |
| 190 | + .mark_point(shape="triangle-right", size=150, color="#ffffff", strokeWidth=2, stroke="#1a1a2e", filled=True) |
| 191 | + .encode(x="x:Q", y="y:Q") |
| 192 | +) |
| 193 | + |
| 194 | +# Layer composition: spectrogram + annotations |
| 195 | +chart = ( |
| 196 | + alt.layer(spectrogram, annotation_marks, annotation_labels) |
| 197 | + .properties( |
| 198 | + width=1400, |
| 199 | + height=800, |
| 200 | + title=alt.Title( |
| 201 | + "spectrogram-mel · altair · pyplots.ai", |
| 202 | + subtitle="Mel-scaled power spectrogram of a synthesized signal — frequency sweep with harmonics, pulsed 440 Hz tone, and chirp burst", |
| 203 | + fontSize=28, |
| 204 | + subtitleFontSize=17, |
| 205 | + subtitleColor="#555555", |
| 206 | + anchor="start", |
| 207 | + offset=20, |
| 208 | + color="#111111", |
| 209 | + ), |
| 210 | + padding={"left": 24, "right": 24, "top": 24, "bottom": 20}, |
| 211 | + ) |
| 212 | + .configure_axis(grid=False) |
| 213 | + .configure_view(strokeWidth=0) |
| 214 | + .configure(font="Helvetica Neue, Helvetica, Arial, sans-serif", background="#fafafa") |
| 215 | +) |
| 216 | + |
| 217 | +# Save |
| 218 | +chart.save("plot.png", scale_factor=3.0) |
| 219 | +chart.save("plot.html") |
0 commit comments