|
| 1 | +""" pyplots.ai |
| 2 | +spectrogram-mel: Mel-Spectrogram for Audio Analysis |
| 3 | +Library: letsplot 4.9.0 | Python 3.14.3 |
| 4 | +Quality: 90/100 | Created: 2026-03-11 |
| 5 | +""" |
| 6 | + |
| 7 | +import numpy as np |
| 8 | +import pandas as pd |
| 9 | +from lets_plot import * |
| 10 | + |
| 11 | + |
| 12 | +LetsPlot.setup_html() |
| 13 | + |
| 14 | +# Data |
| 15 | +np.random.seed(42) |
| 16 | +sample_rate = 22050 |
| 17 | +duration = 3.0 |
| 18 | +n_samples = int(sample_rate * duration) |
| 19 | +t = np.linspace(0, duration, n_samples, endpoint=False) |
| 20 | + |
| 21 | +# Synthesize a richer melody: C4, E4, G4, C5 with harmonics and vibrato |
| 22 | +melody_freqs = [261.6, 329.6, 392.0, 523.3] |
| 23 | +note_names = ["C4", "E4", "G4", "C5"] |
| 24 | +audio_signal = np.zeros(n_samples) |
| 25 | +for i, freq in enumerate(melody_freqs): |
| 26 | + start = int(i * n_samples / len(melody_freqs)) |
| 27 | + end = int((i + 1) * n_samples / len(melody_freqs)) |
| 28 | + segment_t = t[start:end] |
| 29 | + envelope = np.sin(np.linspace(0, np.pi, end - start)) ** 1.5 |
| 30 | + # Add slight vibrato for realism |
| 31 | + vibrato = 1 + 0.005 * np.sin(2 * np.pi * 5.5 * segment_t) |
| 32 | + audio_signal[start:end] += 0.5 * envelope * np.sin(2 * np.pi * freq * vibrato * segment_t) |
| 33 | + for harmonic, amplitude in [(2, 0.25), (3, 0.15), (4, 0.08), (5, 0.05)]: |
| 34 | + audio_signal[start:end] += ( |
| 35 | + (amplitude / harmonic) * envelope * np.sin(2 * np.pi * freq * harmonic * vibrato * segment_t) |
| 36 | + ) |
| 37 | + |
| 38 | +audio_signal += 0.015 * np.random.randn(n_samples) |
| 39 | + |
| 40 | +# STFT via numpy |
| 41 | +n_fft = 2048 |
| 42 | +hop_length = 512 |
| 43 | +n_mels = 128 |
| 44 | + |
| 45 | +window = np.hanning(n_fft) |
| 46 | +n_frames = 1 + (n_samples - n_fft) // hop_length |
| 47 | +stft_matrix = np.zeros((n_fft // 2 + 1, n_frames)) |
| 48 | +for frame_idx in range(n_frames): |
| 49 | + start_sample = frame_idx * hop_length |
| 50 | + frame = audio_signal[start_sample : start_sample + n_fft] * window |
| 51 | + spectrum = np.fft.rfft(frame) |
| 52 | + stft_matrix[:, frame_idx] = np.abs(spectrum) ** 2 |
| 53 | + |
| 54 | +times = np.arange(n_frames) * hop_length / sample_rate |
| 55 | +frequencies = np.fft.rfftfreq(n_fft, 1.0 / sample_rate) |
| 56 | + |
| 57 | +# Mel filter bank |
| 58 | +mel_low = 2595 * np.log10(1 + 0 / 700) |
| 59 | +mel_high = 2595 * np.log10(1 + (sample_rate / 2) / 700) |
| 60 | +mel_points = np.linspace(mel_low, mel_high, n_mels + 2) |
| 61 | +hz_points = 700 * (10 ** (mel_points / 2595) - 1) |
| 62 | +fft_bins = np.floor((n_fft + 1) * hz_points / sample_rate).astype(int) |
| 63 | + |
| 64 | +mel_filterbank = np.zeros((n_mels, len(frequencies))) |
| 65 | +for m in range(1, n_mels + 1): |
| 66 | + f_left = fft_bins[m - 1] |
| 67 | + f_center = fft_bins[m] |
| 68 | + f_right = fft_bins[m + 1] |
| 69 | + for k in range(f_left, min(f_center, len(frequencies))): |
| 70 | + if f_center != f_left: |
| 71 | + mel_filterbank[m - 1, k] = (k - f_left) / (f_center - f_left) |
| 72 | + for k in range(f_center, min(f_right, len(frequencies))): |
| 73 | + if f_right != f_center: |
| 74 | + mel_filterbank[m - 1, k] = (f_right - k) / (f_right - f_center) |
| 75 | + |
| 76 | +mel_spec = mel_filterbank @ stft_matrix |
| 77 | +mel_spec_db = 10 * np.log10(mel_spec + 1e-10) |
| 78 | + |
| 79 | +# Clip dB range to emphasize musical content and reduce noise |
| 80 | +db_min = -10.0 |
| 81 | +db_max = float(np.max(mel_spec_db)) |
| 82 | +mel_spec_db = np.clip(mel_spec_db, db_min, db_max) |
| 83 | + |
| 84 | +# Mel band center frequencies in Hz (for y-axis labels) |
| 85 | +mel_center_hz = 700 * (10 ** (np.linspace(mel_low, mel_high, n_mels) / 2595) - 1) |
| 86 | + |
| 87 | +# Higher resolution downsampling for smoother tiles |
| 88 | +time_step = max(1, len(times) // 300) |
| 89 | +mel_step = max(1, n_mels // 128) |
| 90 | +times_ds = times[::time_step] |
| 91 | +mel_indices_ds = np.arange(0, n_mels, mel_step) |
| 92 | +mel_spec_ds = mel_spec_db[::mel_step][:, ::time_step] |
| 93 | + |
| 94 | +# Map mel indices to Hz for tooltip display |
| 95 | +mel_hz_ds = mel_center_hz[mel_indices_ds] |
| 96 | + |
| 97 | +# Build DataFrame |
| 98 | +time_grid, mel_idx_grid = np.meshgrid(times_ds, mel_indices_ds) |
| 99 | +hz_grid = np.broadcast_to(mel_hz_ds[:, None], mel_spec_ds.shape) |
| 100 | +df = pd.DataFrame( |
| 101 | + { |
| 102 | + "Time (s)": time_grid.flatten(), |
| 103 | + "Mel Band": mel_idx_grid.flatten(), |
| 104 | + "Power (dB)": mel_spec_ds.flatten(), |
| 105 | + "Freq (Hz)": hz_grid.flatten(), |
| 106 | + } |
| 107 | +) |
| 108 | + |
| 109 | +# Y-axis breaks: map Hz values to mel band indices |
| 110 | +label_hz = [100, 200, 500, 1000, 2000, 5000, 10000] |
| 111 | +label_mel_vals = [2595 * np.log10(1 + f / 700) for f in label_hz] |
| 112 | +mel_range = np.linspace(mel_low, mel_high, n_mels) |
| 113 | +label_indices = [float(np.interp(mv, mel_range, np.arange(n_mels))) for mv in label_mel_vals] |
| 114 | +label_strs = ["100", "200", "500", "1k", "2k", "5k", "10k"] |
| 115 | + |
| 116 | +# Note annotation positions (mel band index for each fundamental) |
| 117 | +note_annotations = [] |
| 118 | +for i, (freq, name) in enumerate(zip(melody_freqs, note_names, strict=True)): |
| 119 | + mel_val = 2595 * np.log10(1 + freq / 700) |
| 120 | + mel_idx = float(np.interp(mel_val, mel_range, np.arange(n_mels))) |
| 121 | + mid_time = (i + 0.5) * duration / len(melody_freqs) |
| 122 | + note_annotations.append({"x": mid_time, "y": mel_idx, "label": name}) |
| 123 | + |
| 124 | +df_notes = pd.DataFrame(note_annotations) |
| 125 | + |
| 126 | +# Custom dark color scheme for spectrogram |
| 127 | +bg_color = "#1a1a2e" |
| 128 | +panel_color = "#16213e" |
| 129 | +text_color = "#e0e0e0" |
| 130 | +grid_color = "#2a2a4a" |
| 131 | + |
| 132 | +# Plot with polished dark theme and lets-plot distinctive features |
| 133 | +plot = ( |
| 134 | + ggplot(df, aes(x="Time (s)", y="Mel Band", fill="Power (dB)")) |
| 135 | + + geom_tile( |
| 136 | + tooltips=layer_tooltips() |
| 137 | + .title("Mel Spectrogram") |
| 138 | + .line("@{Time (s)}s | @{Freq (Hz)} Hz") |
| 139 | + .line("Power|@{Power (dB)} dB") |
| 140 | + .format("Time (s)", ".2f") |
| 141 | + .format("Freq (Hz)", ".0f") |
| 142 | + .format("Power (dB)", ".1f") |
| 143 | + .min_width(180) |
| 144 | + ) |
| 145 | + + geom_text(aes(x="x", y="y", label="label"), data=df_notes, color="#ffffff", size=14, fontface="bold", alpha=0.85) |
| 146 | + + scale_fill_viridis(option="inferno", name="Power\n(dB)", limits=[db_min, db_max]) |
| 147 | + + scale_y_continuous(breaks=label_indices, labels=label_strs, expand=[0, 0]) |
| 148 | + + scale_x_continuous(expand=[0, 0]) |
| 149 | + + labs(x="Time (s)", y="Frequency (Hz)", title="spectrogram-mel · letsplot · pyplots.ai") |
| 150 | + + ggsize(1600, 900) |
| 151 | + + flavor_darcula() |
| 152 | + + theme( |
| 153 | + plot_title=element_text(size=24, face="bold", color=text_color), |
| 154 | + plot_background=element_rect(fill=bg_color), |
| 155 | + panel_background=element_rect(fill=panel_color), |
| 156 | + axis_title=element_text(size=20, color=text_color), |
| 157 | + axis_text=element_text(size=16, color="#b0b0b0"), |
| 158 | + axis_line=element_blank(), |
| 159 | + axis_ticks=element_line(color=grid_color, size=0.5), |
| 160 | + panel_grid_major_x=element_line(color=grid_color, size=0.3), |
| 161 | + panel_grid_major_y=element_line(color=grid_color, size=0.3), |
| 162 | + panel_grid_minor=element_blank(), |
| 163 | + legend_title=element_text(size=16, color=text_color), |
| 164 | + legend_text=element_text(size=14, color="#b0b0b0"), |
| 165 | + legend_background=element_rect(fill=bg_color, color=bg_color), |
| 166 | + plot_margin=[40, 20, 20, 20], |
| 167 | + ) |
| 168 | +) |
| 169 | + |
| 170 | +# Save |
| 171 | +ggsave(plot, "plot.png", path=".", scale=3) |
| 172 | +ggsave(plot, "plot.html", path=".") |
0 commit comments