Skip to content

Commit 51cba9f

Browse files
feat(seaborn): implement spectrogram-mel (#4765)
## Implementation: `spectrogram-mel` - seaborn Implements the **seaborn** version of `spectrogram-mel`. **File:** `plots/spectrogram-mel/implementations/seaborn.py` **Parent Issue:** #4672 --- :robot: *[impl-generate workflow](https://github.com/MarkusNeusinger/pyplots/actions/runs/22970857219)* --------- Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
1 parent 4727962 commit 51cba9f

File tree

2 files changed

+463
-0
lines changed

2 files changed

+463
-0
lines changed
Lines changed: 226 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,226 @@
1+
""" pyplots.ai
2+
spectrogram-mel: Mel-Spectrogram for Audio Analysis
3+
Library: seaborn 0.13.2 | Python 3.14.3
4+
Quality: 93/100 | Created: 2026-03-11
5+
"""
6+
7+
import os
8+
import sys
9+
10+
11+
# Avoid local seaborn.py shadowing the real seaborn package
12+
_script_dir = os.path.dirname(os.path.abspath(__file__))
13+
if _script_dir in sys.path:
14+
sys.path.remove(_script_dir)
15+
16+
import matplotlib.pyplot as plt
17+
import numpy as np
18+
import pandas as pd
19+
import seaborn as sns
20+
from scipy.signal import stft
21+
22+
23+
sys.path.insert(0, _script_dir)
24+
25+
# Seaborn theming for distinctive look
26+
sns.set_theme(
27+
style="dark",
28+
rc={
29+
"axes.facecolor": "#1a1a2e",
30+
"figure.facecolor": "#0f0f1a",
31+
"text.color": "#e0e0e0",
32+
"axes.labelcolor": "#e0e0e0",
33+
"xtick.color": "#c0c0c0",
34+
"ytick.color": "#c0c0c0",
35+
},
36+
)
37+
sns.set_context("talk", font_scale=1.1)
38+
39+
# Data
40+
np.random.seed(42)
41+
sample_rate = 22050
42+
duration = 4.0
43+
n_fft = 2048
44+
hop_length = 512
45+
n_mels = 128
46+
t = np.linspace(0, duration, int(sample_rate * duration), endpoint=False)
47+
48+
# Synthesize audio: melody with harmonics and percussive transients
49+
note_names = ["C4", "E4", "G4", "C5", "A4", "F4", "D4", "C4"]
50+
freqs_melody = [261.6, 329.6, 392.0, 523.3, 440.0, 349.2, 293.7, 261.6]
51+
segment_len = len(t) // len(freqs_melody)
52+
audio = np.zeros_like(t)
53+
for i, freq in enumerate(freqs_melody):
54+
start = i * segment_len
55+
end = start + segment_len if i < len(freqs_melody) - 1 else len(t)
56+
seg_t = t[start:end]
57+
envelope = np.exp(-2.0 * (seg_t - seg_t[0]) / (seg_t[-1] - seg_t[0] + 1e-9))
58+
onset_env = np.exp(-80.0 * (seg_t - seg_t[0]))
59+
audio[start:end] = (
60+
0.6 * np.sin(2 * np.pi * freq * seg_t)
61+
+ 0.3 * np.sin(2 * np.pi * 2 * freq * seg_t)
62+
+ 0.1 * np.sin(2 * np.pi * 3 * freq * seg_t)
63+
) * envelope + 0.15 * onset_env * np.sin(2 * np.pi * 5 * freq * seg_t)
64+
audio += 0.02 * np.random.randn(len(audio))
65+
66+
# Compute STFT
67+
freqs_stft, times_stft, Zxx = stft(audio, fs=sample_rate, nperseg=n_fft, noverlap=n_fft - hop_length)
68+
power_spectrum = np.abs(Zxx) ** 2
69+
70+
# Mel filterbank
71+
f_min, f_max = 0.0, sample_rate / 2.0
72+
mel_min = 2595.0 * np.log10(1.0 + f_min / 700.0)
73+
mel_max = 2595.0 * np.log10(1.0 + f_max / 700.0)
74+
mel_points = np.linspace(mel_min, mel_max, n_mels + 2)
75+
hz_points = 700.0 * (10.0 ** (mel_points / 2595.0) - 1.0)
76+
bin_indices = np.floor((n_fft + 1) * hz_points / sample_rate).astype(int)
77+
78+
filterbank = np.zeros((n_mels, len(freqs_stft)))
79+
for m in range(1, n_mels + 1):
80+
f_left, f_center, f_right = bin_indices[m - 1], bin_indices[m], bin_indices[m + 1]
81+
for k in range(f_left, f_center):
82+
if f_center != f_left:
83+
filterbank[m - 1, k] = (k - f_left) / (f_center - f_left)
84+
for k in range(f_center, f_right):
85+
if f_right != f_center:
86+
filterbank[m - 1, k] = (f_right - k) / (f_right - f_center)
87+
88+
# Apply mel filterbank and convert to dB
89+
mel_spec = filterbank @ power_spectrum
90+
mel_spec_db = 10 * np.log10(np.maximum(mel_spec, 1e-10))
91+
mel_spec_db -= mel_spec_db.max()
92+
93+
# Build DataFrame for seaborn heatmap (flip so low freq at bottom)
94+
mel_center_freqs = 700.0 * (10.0 ** (mel_points[1:-1] / 2595.0) - 1.0)
95+
mel_spec_flipped = mel_spec_db[::-1]
96+
97+
df_spec = pd.DataFrame(mel_spec_flipped, index=np.arange(n_mels), columns=np.arange(mel_spec_flipped.shape[1]))
98+
99+
# Waveform DataFrame for seaborn lineplot (downsample for display)
100+
step = 80
101+
wave_df = pd.DataFrame({"Time (s)": t[::step], "Amplitude": audio[::step]})
102+
103+
# Use seaborn-specific 'mako' palette (not available in plain matplotlib)
104+
cmap_colors = sns.color_palette("mako", as_cmap=True)
105+
106+
# Plot: two-panel layout — waveform + mel-spectrogram
107+
fig, (ax_wave, ax_spec) = plt.subplots(2, 1, figsize=(16, 9), height_ratios=[1, 5], gridspec_kw={"hspace": 0.06})
108+
109+
# Top panel: waveform using seaborn lineplot
110+
sns.lineplot(data=wave_df, x="Time (s)", y="Amplitude", ax=ax_wave, color="#ffcc66", linewidth=0.6, alpha=0.85)
111+
ax_wave.fill_between(wave_df["Time (s)"], wave_df["Amplitude"], alpha=0.15, color="#ffcc66")
112+
ax_wave.set_xlim(0, duration)
113+
ax_wave.set_ylabel("Amp.", fontsize=16, labelpad=8)
114+
ax_wave.set_xlabel("")
115+
ax_wave.set_xticklabels([])
116+
ax_wave.tick_params(axis="y", labelsize=13, length=3)
117+
ax_wave.tick_params(axis="x", length=0)
118+
ax_wave.set_title(
119+
"spectrogram-mel \u00b7 seaborn \u00b7 pyplots.ai", fontsize=24, fontweight="bold", pad=14, color="#ffffff"
120+
)
121+
sns.despine(ax=ax_wave, bottom=True, left=False)
122+
ax_wave.spines["top"].set_edgecolor("#444466")
123+
ax_wave.spines["left"].set_edgecolor("#444466")
124+
ax_wave.spines["right"].set_edgecolor("#444466")
125+
for sp in ax_wave.spines.values():
126+
sp.set_linewidth(0.8)
127+
128+
# Note boundary lines on waveform
129+
for i in range(1, len(freqs_melody)):
130+
boundary_time = i * segment_len / sample_rate
131+
ax_wave.axvline(x=boundary_time, color="#ffffff", alpha=0.12, linewidth=0.8, linestyle="--")
132+
133+
# Note labels on waveform panel
134+
for i, name in enumerate(note_names):
135+
mid_time = (i + 0.5) * segment_len / sample_rate
136+
ax_wave.text(
137+
mid_time,
138+
ax_wave.get_ylim()[1] * 0.85,
139+
name,
140+
ha="center",
141+
va="top",
142+
fontsize=15,
143+
color="#ffcc66",
144+
fontweight="bold",
145+
alpha=0.9,
146+
)
147+
148+
# Bottom panel: mel-spectrogram heatmap
149+
sns.heatmap(
150+
df_spec,
151+
ax=ax_spec,
152+
cmap=cmap_colors,
153+
vmin=-80,
154+
vmax=0,
155+
cbar_kws={"label": "Power (dB)", "pad": 0.015, "aspect": 30, "shrink": 0.92},
156+
xticklabels=False,
157+
yticklabels=False,
158+
rasterized=True,
159+
)
160+
161+
# X-axis: time ticks
162+
x_tick_seconds = np.arange(0, 4.5, 0.5)
163+
x_tick_positions = [np.argmin(np.abs(times_stft - s)) for s in x_tick_seconds]
164+
ax_spec.set_xticks(x_tick_positions)
165+
ax_spec.set_xticklabels([f"{s:.1f}" for s in x_tick_seconds])
166+
167+
# Y-axis: Hz labels at key mel band positions (flipped coordinates)
168+
tick_freqs = [100, 200, 500, 1000, 2000, 4000, 8000]
169+
tick_positions_y = []
170+
tick_labels_y = []
171+
for freq in tick_freqs:
172+
idx = np.argmin(np.abs(mel_center_freqs - freq))
173+
tick_positions_y.append(n_mels - 1 - idx)
174+
tick_labels_y.append(f"{freq // 1000}k Hz" if freq >= 1000 else f"{freq} Hz")
175+
176+
ax_spec.set_yticks(tick_positions_y)
177+
ax_spec.set_yticklabels(tick_labels_y)
178+
179+
# Colorbar refinement
180+
cbar = ax_spec.collections[0].colorbar
181+
cbar.ax.tick_params(labelsize=14, colors="#c0c0c0")
182+
cbar.set_label("Power (dB)", fontsize=18, color="#e0e0e0")
183+
cbar.outline.set_edgecolor("#444466")
184+
cbar.outline.set_linewidth(0.8)
185+
186+
# Note boundary lines on spectrogram
187+
for i in range(1, len(freqs_melody)):
188+
boundary_time = i * segment_len / sample_rate
189+
x_pos = np.argmin(np.abs(times_stft - boundary_time))
190+
ax_spec.axvline(x=x_pos, color="#ffffff", alpha=0.12, linewidth=0.8, linestyle="--")
191+
192+
# Harmonic annotations on the last note (C4) to show overtone series
193+
last_note_mid = (len(freqs_melody) - 0.5) * segment_len / sample_rate
194+
x_anno = np.argmin(np.abs(times_stft - last_note_mid))
195+
for h, label in [(1, "f\u2080"), (2, "2f\u2080"), (3, "3f\u2080")]:
196+
freq_h = freqs_melody[0] * h
197+
mel_idx = np.argmin(np.abs(mel_center_freqs - freq_h))
198+
y_pos = n_mels - 1 - mel_idx
199+
ax_spec.plot(x_anno, y_pos, marker="<", color="#ffcc66", markersize=7, alpha=0.9)
200+
ax_spec.text(
201+
x_anno + 3,
202+
y_pos,
203+
label,
204+
fontsize=14,
205+
color="#ffcc66",
206+
fontweight="bold",
207+
alpha=0.95,
208+
va="center",
209+
ha="left",
210+
bbox={"boxstyle": "round,pad=0.15", "facecolor": "#1a1a2e", "edgecolor": "none", "alpha": 0.7},
211+
)
212+
213+
# Style refinement
214+
ax_spec.set_xlabel("Time (s)", fontsize=20, labelpad=10)
215+
ax_spec.set_ylabel("Frequency (mel scale)", fontsize=20, labelpad=10)
216+
ax_spec.tick_params(axis="both", labelsize=16, length=4, width=0.8)
217+
218+
# Use seaborn despine on spectrogram and style remaining spines
219+
sns.despine(ax=ax_spec, top=True, right=True)
220+
ax_spec.spines["bottom"].set_edgecolor("#444466")
221+
ax_spec.spines["bottom"].set_linewidth(0.8)
222+
ax_spec.spines["left"].set_edgecolor("#444466")
223+
ax_spec.spines["left"].set_linewidth(0.8)
224+
225+
# Save
226+
plt.savefig("plot.png", dpi=300, bbox_inches="tight", facecolor=fig.get_facecolor())

0 commit comments

Comments
 (0)