|
| 1 | +""" pyplots.ai |
| 2 | +spectrogram-mel: Mel-Spectrogram for Audio Analysis |
| 3 | +Library: plotnine 0.15.3 | Python 3.14.3 |
| 4 | +Quality: 90/100 | Created: 2026-03-11 |
| 5 | +""" |
| 6 | + |
| 7 | +import numpy as np |
| 8 | +import pandas as pd |
| 9 | +from plotnine import ( |
| 10 | + aes, |
| 11 | + coord_cartesian, |
| 12 | + element_blank, |
| 13 | + element_rect, |
| 14 | + element_text, |
| 15 | + geom_raster, |
| 16 | + geom_segment, |
| 17 | + geom_text, |
| 18 | + ggplot, |
| 19 | + guide_colorbar, |
| 20 | + guides, |
| 21 | + labs, |
| 22 | + scale_fill_gradientn, |
| 23 | + scale_x_continuous, |
| 24 | + scale_y_continuous, |
| 25 | + theme, |
| 26 | + theme_minimal, |
| 27 | +) |
| 28 | +from scipy.signal import stft |
| 29 | + |
| 30 | + |
| 31 | +# Data - synthesize a 3-second audio signal with speech-like frequency components |
| 32 | +np.random.seed(42) |
| 33 | +sample_rate = 22050 |
| 34 | +duration = 3.0 |
| 35 | +n_samples = int(sample_rate * duration) |
| 36 | +t = np.linspace(0, duration, n_samples, endpoint=False) |
| 37 | + |
| 38 | +# Build a rich audio signal: fundamental + harmonics with time-varying amplitude |
| 39 | +fundamental = 220 |
| 40 | +signal = ( |
| 41 | + 0.6 * np.sin(2 * np.pi * fundamental * t) * np.exp(-0.3 * t) |
| 42 | + + 0.4 * np.sin(2 * np.pi * 440 * t) * (0.5 + 0.5 * np.sin(2 * np.pi * 1.5 * t)) |
| 43 | + + 0.3 * np.sin(2 * np.pi * 880 * t) * np.exp(-0.5 * t) |
| 44 | + + 0.2 * np.sin(2 * np.pi * 1320 * t) * (1 - t / duration) |
| 45 | + + 0.15 * np.sin(2 * np.pi * 3300 * t) * np.exp(-1.0 * t) |
| 46 | + + 0.1 * np.random.randn(n_samples) * np.exp(-0.8 * t) |
| 47 | +) |
| 48 | + |
| 49 | +# Add a frequency sweep (chirp) from 500 to 4000 Hz in the middle section |
| 50 | +chirp_mask = (t > 0.8) & (t < 2.0) |
| 51 | +chirp_freq = 500 + (4000 - 500) * (t[chirp_mask] - 0.8) / 1.2 |
| 52 | +signal[chirp_mask] += 0.35 * np.sin(2 * np.pi * np.cumsum(chirp_freq) / sample_rate) |
| 53 | + |
| 54 | +# STFT |
| 55 | +n_fft = 2048 |
| 56 | +hop_length = 512 |
| 57 | +_, time_bins, Zxx = stft(signal, fs=sample_rate, nperseg=n_fft, noverlap=n_fft - hop_length) |
| 58 | +power_spec = np.abs(Zxx) ** 2 |
| 59 | + |
| 60 | +# Mel filterbank |
| 61 | +n_mels = 128 |
| 62 | +freq_bins = np.linspace(0, sample_rate / 2, power_spec.shape[0]) |
| 63 | + |
| 64 | +mel_low = 2595.0 * np.log10(1.0 + 0 / 700.0) |
| 65 | +mel_high = 2595.0 * np.log10(1.0 + (sample_rate / 2) / 700.0) |
| 66 | +mel_points = np.linspace(mel_low, mel_high, n_mels + 2) |
| 67 | +hz_points = 700.0 * (10.0 ** (mel_points / 2595.0) - 1.0) |
| 68 | + |
| 69 | +# Vectorized mel filterbank using numpy broadcasting |
| 70 | +lower = hz_points[:-2, np.newaxis] # (n_mels, 1) |
| 71 | +center = hz_points[1:-1, np.newaxis] # (n_mels, 1) |
| 72 | +upper = hz_points[2:, np.newaxis] # (n_mels, 1) |
| 73 | +freqs = freq_bins[np.newaxis, :] # (1, n_freq) |
| 74 | + |
| 75 | +rising = np.where((freqs >= lower) & (freqs <= center) & (center != lower), (freqs - lower) / (center - lower), 0.0) |
| 76 | +falling = np.where((freqs > center) & (freqs <= upper) & (upper != center), (upper - freqs) / (upper - center), 0.0) |
| 77 | +filterbank = rising + falling |
| 78 | + |
| 79 | +# Apply mel filterbank and convert to dB |
| 80 | +mel_spec = filterbank @ power_spec |
| 81 | +mel_spec_db = 10 * np.log10(np.maximum(mel_spec, 1e-10)) |
| 82 | +mel_spec_db -= mel_spec_db.max() |
| 83 | + |
| 84 | +# Build long-form DataFrame with evenly-spaced mel band indices for smooth raster |
| 85 | +mel_center_freqs = 700.0 * (10.0 ** (mel_points[1:-1] / 2595.0) - 1.0) |
| 86 | +time_grid, mel_idx_grid = np.meshgrid(time_bins, np.arange(n_mels)) |
| 87 | + |
| 88 | +df = pd.DataFrame({"Time (s)": time_grid.ravel(), "mel_band": mel_idx_grid.ravel(), "Power (dB)": mel_spec_db.ravel()}) |
| 89 | + |
| 90 | +# Y-axis tick positions: map Hz values to mel band indices |
| 91 | +y_ticks_hz = [128, 256, 512, 1024, 2048, 4096, 8000] |
| 92 | +y_ticks_hz = [f for f in y_ticks_hz if f <= sample_rate / 2] |
| 93 | +# Convert Hz to mel band index via interpolation |
| 94 | +y_ticks_band = np.interp(y_ticks_hz, mel_center_freqs, np.arange(n_mels)) |
| 95 | + |
| 96 | + |
| 97 | +# Annotation data — grammar-of-graphics approach: data-driven geom layers |
| 98 | +f0_band = float(np.interp(220, mel_center_freqs, np.arange(n_mels))) |
| 99 | +h3_band = float(np.interp(880, mel_center_freqs, np.arange(n_mels))) |
| 100 | + |
| 101 | +df_labels = pd.DataFrame( |
| 102 | + {"x": [2.85, 2.85], "y": [f0_band, h3_band], "label": ["F\u2080", "3rd"], "color": ["#fcffa4", "#fb9b06"]} |
| 103 | +) |
| 104 | +df_reflines = pd.DataFrame( |
| 105 | + {"x": [0.0, 0.0], "xend": [duration, duration], "y": [f0_band, h3_band], "yend": [f0_band, h3_band]} |
| 106 | +) |
| 107 | + |
| 108 | +# Plot — geom_raster for smooth spectrogram, data-driven geom_text/geom_segment for annotations |
| 109 | +plot = ( |
| 110 | + ggplot(df, aes(x="Time (s)", y="mel_band", fill="Power (dB)")) |
| 111 | + + geom_raster(interpolate=True) |
| 112 | + + scale_fill_gradientn( |
| 113 | + colors=[ |
| 114 | + "#000004", |
| 115 | + "#1b0c41", |
| 116 | + "#4a0c6b", |
| 117 | + "#781c6d", |
| 118 | + "#a52c60", |
| 119 | + "#cf4446", |
| 120 | + "#ed6925", |
| 121 | + "#fb9b06", |
| 122 | + "#f7d13d", |
| 123 | + "#fcffa4", |
| 124 | + ], |
| 125 | + name="Power (dB)", |
| 126 | + ) |
| 127 | + + guides(fill=guide_colorbar(nbin=256, display="raster")) |
| 128 | + + geom_text( |
| 129 | + aes(x="x", y="y", label="label"), |
| 130 | + data=df_labels.iloc[[0]], |
| 131 | + inherit_aes=False, |
| 132 | + color="#fcffa4", |
| 133 | + size=11, |
| 134 | + ha="right", |
| 135 | + fontweight="bold", |
| 136 | + alpha=0.85, |
| 137 | + ) |
| 138 | + + geom_text( |
| 139 | + aes(x="x", y="y", label="label"), |
| 140 | + data=df_labels.iloc[[1]], |
| 141 | + inherit_aes=False, |
| 142 | + color="#fb9b06", |
| 143 | + size=9, |
| 144 | + ha="right", |
| 145 | + alpha=0.7, |
| 146 | + ) |
| 147 | + + geom_segment( |
| 148 | + aes(x="x", xend="xend", y="y", yend="yend"), |
| 149 | + data=df_reflines.iloc[[0]], |
| 150 | + inherit_aes=False, |
| 151 | + color="#fcffa4", |
| 152 | + alpha=0.15, |
| 153 | + size=0.4, |
| 154 | + ) |
| 155 | + + geom_segment( |
| 156 | + aes(x="x", xend="xend", y="y", yend="yend"), |
| 157 | + data=df_reflines.iloc[[1]], |
| 158 | + inherit_aes=False, |
| 159 | + color="#fb9b06", |
| 160 | + alpha=0.12, |
| 161 | + size=0.3, |
| 162 | + ) |
| 163 | + + scale_x_continuous(expand=(0, 0)) |
| 164 | + + scale_y_continuous(breaks=y_ticks_band.tolist(), labels=[str(f) for f in y_ticks_hz], expand=(0, 0)) |
| 165 | + + coord_cartesian(ylim=(0, n_mels - 1)) |
| 166 | + + labs(x="Time (s)", y="Frequency (Hz)", title="spectrogram-mel \u00b7 plotnine \u00b7 pyplots.ai") |
| 167 | + + theme_minimal() |
| 168 | + + theme( |
| 169 | + figure_size=(16, 9), |
| 170 | + text=element_text(family="sans-serif"), |
| 171 | + plot_title=element_text(size=24, ha="center", weight="bold", color="#e0e0e0", margin={"b": 8}), |
| 172 | + axis_title_x=element_text(size=20, color="#cccccc", margin={"t": 10}), |
| 173 | + axis_title_y=element_text(size=20, color="#cccccc", margin={"r": 8}), |
| 174 | + axis_text_x=element_text(size=16, color="#aaaaaa"), |
| 175 | + axis_text_y=element_text(size=16, color="#aaaaaa"), |
| 176 | + legend_title=element_text(size=16, weight="bold", color="#cccccc"), |
| 177 | + legend_text=element_text(size=14, color="#aaaaaa"), |
| 178 | + legend_position="right", |
| 179 | + legend_key_height=60, |
| 180 | + legend_key_width=14, |
| 181 | + panel_grid_major=element_blank(), |
| 182 | + panel_grid_minor=element_blank(), |
| 183 | + panel_background=element_rect(fill="#000004", color="none"), |
| 184 | + plot_background=element_rect(fill="#0e0e1a", color="none"), |
| 185 | + plot_margin=0.02, |
| 186 | + ) |
| 187 | +) |
| 188 | + |
| 189 | +plot.save("plot.png", dpi=300, verbose=False) |
0 commit comments