|
| 1 | +""" pyplots.ai |
| 2 | +spectrogram-mel: Mel-Spectrogram for Audio Analysis |
| 3 | +Library: seaborn 0.13.2 | Python 3.14.3 |
| 4 | +Quality: 93/100 | Created: 2026-03-11 |
| 5 | +""" |
| 6 | + |
| 7 | +import os |
| 8 | +import sys |
| 9 | + |
| 10 | + |
| 11 | +# Avoid local seaborn.py shadowing the real seaborn package |
| 12 | +_script_dir = os.path.dirname(os.path.abspath(__file__)) |
| 13 | +if _script_dir in sys.path: |
| 14 | + sys.path.remove(_script_dir) |
| 15 | + |
| 16 | +import matplotlib.pyplot as plt |
| 17 | +import numpy as np |
| 18 | +import pandas as pd |
| 19 | +import seaborn as sns |
| 20 | +from scipy.signal import stft |
| 21 | + |
| 22 | + |
| 23 | +sys.path.insert(0, _script_dir) |
| 24 | + |
| 25 | +# Seaborn theming for distinctive look |
| 26 | +sns.set_theme( |
| 27 | + style="dark", |
| 28 | + rc={ |
| 29 | + "axes.facecolor": "#1a1a2e", |
| 30 | + "figure.facecolor": "#0f0f1a", |
| 31 | + "text.color": "#e0e0e0", |
| 32 | + "axes.labelcolor": "#e0e0e0", |
| 33 | + "xtick.color": "#c0c0c0", |
| 34 | + "ytick.color": "#c0c0c0", |
| 35 | + }, |
| 36 | +) |
| 37 | +sns.set_context("talk", font_scale=1.1) |
| 38 | + |
| 39 | +# Data |
| 40 | +np.random.seed(42) |
| 41 | +sample_rate = 22050 |
| 42 | +duration = 4.0 |
| 43 | +n_fft = 2048 |
| 44 | +hop_length = 512 |
| 45 | +n_mels = 128 |
| 46 | +t = np.linspace(0, duration, int(sample_rate * duration), endpoint=False) |
| 47 | + |
| 48 | +# Synthesize audio: melody with harmonics and percussive transients |
| 49 | +note_names = ["C4", "E4", "G4", "C5", "A4", "F4", "D4", "C4"] |
| 50 | +freqs_melody = [261.6, 329.6, 392.0, 523.3, 440.0, 349.2, 293.7, 261.6] |
| 51 | +segment_len = len(t) // len(freqs_melody) |
| 52 | +audio = np.zeros_like(t) |
| 53 | +for i, freq in enumerate(freqs_melody): |
| 54 | + start = i * segment_len |
| 55 | + end = start + segment_len if i < len(freqs_melody) - 1 else len(t) |
| 56 | + seg_t = t[start:end] |
| 57 | + envelope = np.exp(-2.0 * (seg_t - seg_t[0]) / (seg_t[-1] - seg_t[0] + 1e-9)) |
| 58 | + onset_env = np.exp(-80.0 * (seg_t - seg_t[0])) |
| 59 | + audio[start:end] = ( |
| 60 | + 0.6 * np.sin(2 * np.pi * freq * seg_t) |
| 61 | + + 0.3 * np.sin(2 * np.pi * 2 * freq * seg_t) |
| 62 | + + 0.1 * np.sin(2 * np.pi * 3 * freq * seg_t) |
| 63 | + ) * envelope + 0.15 * onset_env * np.sin(2 * np.pi * 5 * freq * seg_t) |
| 64 | +audio += 0.02 * np.random.randn(len(audio)) |
| 65 | + |
| 66 | +# Compute STFT |
| 67 | +freqs_stft, times_stft, Zxx = stft(audio, fs=sample_rate, nperseg=n_fft, noverlap=n_fft - hop_length) |
| 68 | +power_spectrum = np.abs(Zxx) ** 2 |
| 69 | + |
| 70 | +# Mel filterbank |
| 71 | +f_min, f_max = 0.0, sample_rate / 2.0 |
| 72 | +mel_min = 2595.0 * np.log10(1.0 + f_min / 700.0) |
| 73 | +mel_max = 2595.0 * np.log10(1.0 + f_max / 700.0) |
| 74 | +mel_points = np.linspace(mel_min, mel_max, n_mels + 2) |
| 75 | +hz_points = 700.0 * (10.0 ** (mel_points / 2595.0) - 1.0) |
| 76 | +bin_indices = np.floor((n_fft + 1) * hz_points / sample_rate).astype(int) |
| 77 | + |
| 78 | +filterbank = np.zeros((n_mels, len(freqs_stft))) |
| 79 | +for m in range(1, n_mels + 1): |
| 80 | + f_left, f_center, f_right = bin_indices[m - 1], bin_indices[m], bin_indices[m + 1] |
| 81 | + for k in range(f_left, f_center): |
| 82 | + if f_center != f_left: |
| 83 | + filterbank[m - 1, k] = (k - f_left) / (f_center - f_left) |
| 84 | + for k in range(f_center, f_right): |
| 85 | + if f_right != f_center: |
| 86 | + filterbank[m - 1, k] = (f_right - k) / (f_right - f_center) |
| 87 | + |
| 88 | +# Apply mel filterbank and convert to dB |
| 89 | +mel_spec = filterbank @ power_spectrum |
| 90 | +mel_spec_db = 10 * np.log10(np.maximum(mel_spec, 1e-10)) |
| 91 | +mel_spec_db -= mel_spec_db.max() |
| 92 | + |
| 93 | +# Build DataFrame for seaborn heatmap (flip so low freq at bottom) |
| 94 | +mel_center_freqs = 700.0 * (10.0 ** (mel_points[1:-1] / 2595.0) - 1.0) |
| 95 | +mel_spec_flipped = mel_spec_db[::-1] |
| 96 | + |
| 97 | +df_spec = pd.DataFrame(mel_spec_flipped, index=np.arange(n_mels), columns=np.arange(mel_spec_flipped.shape[1])) |
| 98 | + |
| 99 | +# Waveform DataFrame for seaborn lineplot (downsample for display) |
| 100 | +step = 80 |
| 101 | +wave_df = pd.DataFrame({"Time (s)": t[::step], "Amplitude": audio[::step]}) |
| 102 | + |
| 103 | +# Use seaborn-specific 'mako' palette (not available in plain matplotlib) |
| 104 | +cmap_colors = sns.color_palette("mako", as_cmap=True) |
| 105 | + |
| 106 | +# Plot: two-panel layout — waveform + mel-spectrogram |
| 107 | +fig, (ax_wave, ax_spec) = plt.subplots(2, 1, figsize=(16, 9), height_ratios=[1, 5], gridspec_kw={"hspace": 0.06}) |
| 108 | + |
| 109 | +# Top panel: waveform using seaborn lineplot |
| 110 | +sns.lineplot(data=wave_df, x="Time (s)", y="Amplitude", ax=ax_wave, color="#ffcc66", linewidth=0.6, alpha=0.85) |
| 111 | +ax_wave.fill_between(wave_df["Time (s)"], wave_df["Amplitude"], alpha=0.15, color="#ffcc66") |
| 112 | +ax_wave.set_xlim(0, duration) |
| 113 | +ax_wave.set_ylabel("Amp.", fontsize=16, labelpad=8) |
| 114 | +ax_wave.set_xlabel("") |
| 115 | +ax_wave.set_xticklabels([]) |
| 116 | +ax_wave.tick_params(axis="y", labelsize=13, length=3) |
| 117 | +ax_wave.tick_params(axis="x", length=0) |
| 118 | +ax_wave.set_title( |
| 119 | + "spectrogram-mel \u00b7 seaborn \u00b7 pyplots.ai", fontsize=24, fontweight="bold", pad=14, color="#ffffff" |
| 120 | +) |
| 121 | +sns.despine(ax=ax_wave, bottom=True, left=False) |
| 122 | +ax_wave.spines["top"].set_edgecolor("#444466") |
| 123 | +ax_wave.spines["left"].set_edgecolor("#444466") |
| 124 | +ax_wave.spines["right"].set_edgecolor("#444466") |
| 125 | +for sp in ax_wave.spines.values(): |
| 126 | + sp.set_linewidth(0.8) |
| 127 | + |
| 128 | +# Note boundary lines on waveform |
| 129 | +for i in range(1, len(freqs_melody)): |
| 130 | + boundary_time = i * segment_len / sample_rate |
| 131 | + ax_wave.axvline(x=boundary_time, color="#ffffff", alpha=0.12, linewidth=0.8, linestyle="--") |
| 132 | + |
| 133 | +# Note labels on waveform panel |
| 134 | +for i, name in enumerate(note_names): |
| 135 | + mid_time = (i + 0.5) * segment_len / sample_rate |
| 136 | + ax_wave.text( |
| 137 | + mid_time, |
| 138 | + ax_wave.get_ylim()[1] * 0.85, |
| 139 | + name, |
| 140 | + ha="center", |
| 141 | + va="top", |
| 142 | + fontsize=15, |
| 143 | + color="#ffcc66", |
| 144 | + fontweight="bold", |
| 145 | + alpha=0.9, |
| 146 | + ) |
| 147 | + |
| 148 | +# Bottom panel: mel-spectrogram heatmap |
| 149 | +sns.heatmap( |
| 150 | + df_spec, |
| 151 | + ax=ax_spec, |
| 152 | + cmap=cmap_colors, |
| 153 | + vmin=-80, |
| 154 | + vmax=0, |
| 155 | + cbar_kws={"label": "Power (dB)", "pad": 0.015, "aspect": 30, "shrink": 0.92}, |
| 156 | + xticklabels=False, |
| 157 | + yticklabels=False, |
| 158 | + rasterized=True, |
| 159 | +) |
| 160 | + |
| 161 | +# X-axis: time ticks |
| 162 | +x_tick_seconds = np.arange(0, 4.5, 0.5) |
| 163 | +x_tick_positions = [np.argmin(np.abs(times_stft - s)) for s in x_tick_seconds] |
| 164 | +ax_spec.set_xticks(x_tick_positions) |
| 165 | +ax_spec.set_xticklabels([f"{s:.1f}" for s in x_tick_seconds]) |
| 166 | + |
| 167 | +# Y-axis: Hz labels at key mel band positions (flipped coordinates) |
| 168 | +tick_freqs = [100, 200, 500, 1000, 2000, 4000, 8000] |
| 169 | +tick_positions_y = [] |
| 170 | +tick_labels_y = [] |
| 171 | +for freq in tick_freqs: |
| 172 | + idx = np.argmin(np.abs(mel_center_freqs - freq)) |
| 173 | + tick_positions_y.append(n_mels - 1 - idx) |
| 174 | + tick_labels_y.append(f"{freq // 1000}k Hz" if freq >= 1000 else f"{freq} Hz") |
| 175 | + |
| 176 | +ax_spec.set_yticks(tick_positions_y) |
| 177 | +ax_spec.set_yticklabels(tick_labels_y) |
| 178 | + |
| 179 | +# Colorbar refinement |
| 180 | +cbar = ax_spec.collections[0].colorbar |
| 181 | +cbar.ax.tick_params(labelsize=14, colors="#c0c0c0") |
| 182 | +cbar.set_label("Power (dB)", fontsize=18, color="#e0e0e0") |
| 183 | +cbar.outline.set_edgecolor("#444466") |
| 184 | +cbar.outline.set_linewidth(0.8) |
| 185 | + |
| 186 | +# Note boundary lines on spectrogram |
| 187 | +for i in range(1, len(freqs_melody)): |
| 188 | + boundary_time = i * segment_len / sample_rate |
| 189 | + x_pos = np.argmin(np.abs(times_stft - boundary_time)) |
| 190 | + ax_spec.axvline(x=x_pos, color="#ffffff", alpha=0.12, linewidth=0.8, linestyle="--") |
| 191 | + |
| 192 | +# Harmonic annotations on the last note (C4) to show overtone series |
| 193 | +last_note_mid = (len(freqs_melody) - 0.5) * segment_len / sample_rate |
| 194 | +x_anno = np.argmin(np.abs(times_stft - last_note_mid)) |
| 195 | +for h, label in [(1, "f\u2080"), (2, "2f\u2080"), (3, "3f\u2080")]: |
| 196 | + freq_h = freqs_melody[0] * h |
| 197 | + mel_idx = np.argmin(np.abs(mel_center_freqs - freq_h)) |
| 198 | + y_pos = n_mels - 1 - mel_idx |
| 199 | + ax_spec.plot(x_anno, y_pos, marker="<", color="#ffcc66", markersize=7, alpha=0.9) |
| 200 | + ax_spec.text( |
| 201 | + x_anno + 3, |
| 202 | + y_pos, |
| 203 | + label, |
| 204 | + fontsize=14, |
| 205 | + color="#ffcc66", |
| 206 | + fontweight="bold", |
| 207 | + alpha=0.95, |
| 208 | + va="center", |
| 209 | + ha="left", |
| 210 | + bbox={"boxstyle": "round,pad=0.15", "facecolor": "#1a1a2e", "edgecolor": "none", "alpha": 0.7}, |
| 211 | + ) |
| 212 | + |
| 213 | +# Style refinement |
| 214 | +ax_spec.set_xlabel("Time (s)", fontsize=20, labelpad=10) |
| 215 | +ax_spec.set_ylabel("Frequency (mel scale)", fontsize=20, labelpad=10) |
| 216 | +ax_spec.tick_params(axis="both", labelsize=16, length=4, width=0.8) |
| 217 | + |
| 218 | +# Use seaborn despine on spectrogram and style remaining spines |
| 219 | +sns.despine(ax=ax_spec, top=True, right=True) |
| 220 | +ax_spec.spines["bottom"].set_edgecolor("#444466") |
| 221 | +ax_spec.spines["bottom"].set_linewidth(0.8) |
| 222 | +ax_spec.spines["left"].set_edgecolor("#444466") |
| 223 | +ax_spec.spines["left"].set_linewidth(0.8) |
| 224 | + |
| 225 | +# Save |
| 226 | +plt.savefig("plot.png", dpi=300, bbox_inches="tight", facecolor=fig.get_facecolor()) |
0 commit comments