Skip to content

Commit 1195e29

Browse files
feat(plotnine): implement spectrogram-mel (#4762)
## Implementation: `spectrogram-mel` - plotnine Implements the **plotnine** version of `spectrogram-mel`. **File:** `plots/spectrogram-mel/implementations/plotnine.py` **Parent Issue:** #4672 --- :robot: *[impl-generate workflow](https://github.com/MarkusNeusinger/pyplots/actions/runs/22970857471)* --------- Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
1 parent 227cdc3 commit 1195e29

File tree

2 files changed

+425
-0
lines changed

2 files changed

+425
-0
lines changed
Lines changed: 189 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,189 @@
1+
""" pyplots.ai
2+
spectrogram-mel: Mel-Spectrogram for Audio Analysis
3+
Library: plotnine 0.15.3 | Python 3.14.3
4+
Quality: 90/100 | Created: 2026-03-11
5+
"""
6+
7+
import numpy as np
8+
import pandas as pd
9+
from plotnine import (
10+
aes,
11+
coord_cartesian,
12+
element_blank,
13+
element_rect,
14+
element_text,
15+
geom_raster,
16+
geom_segment,
17+
geom_text,
18+
ggplot,
19+
guide_colorbar,
20+
guides,
21+
labs,
22+
scale_fill_gradientn,
23+
scale_x_continuous,
24+
scale_y_continuous,
25+
theme,
26+
theme_minimal,
27+
)
28+
from scipy.signal import stft
29+
30+
31+
# Data - synthesize a 3-second audio signal with speech-like frequency components
32+
np.random.seed(42)
33+
sample_rate = 22050
34+
duration = 3.0
35+
n_samples = int(sample_rate * duration)
36+
t = np.linspace(0, duration, n_samples, endpoint=False)
37+
38+
# Build a rich audio signal: fundamental + harmonics with time-varying amplitude
39+
fundamental = 220
40+
signal = (
41+
0.6 * np.sin(2 * np.pi * fundamental * t) * np.exp(-0.3 * t)
42+
+ 0.4 * np.sin(2 * np.pi * 440 * t) * (0.5 + 0.5 * np.sin(2 * np.pi * 1.5 * t))
43+
+ 0.3 * np.sin(2 * np.pi * 880 * t) * np.exp(-0.5 * t)
44+
+ 0.2 * np.sin(2 * np.pi * 1320 * t) * (1 - t / duration)
45+
+ 0.15 * np.sin(2 * np.pi * 3300 * t) * np.exp(-1.0 * t)
46+
+ 0.1 * np.random.randn(n_samples) * np.exp(-0.8 * t)
47+
)
48+
49+
# Add a frequency sweep (chirp) from 500 to 4000 Hz in the middle section
50+
chirp_mask = (t > 0.8) & (t < 2.0)
51+
chirp_freq = 500 + (4000 - 500) * (t[chirp_mask] - 0.8) / 1.2
52+
signal[chirp_mask] += 0.35 * np.sin(2 * np.pi * np.cumsum(chirp_freq) / sample_rate)
53+
54+
# STFT
55+
n_fft = 2048
56+
hop_length = 512
57+
_, time_bins, Zxx = stft(signal, fs=sample_rate, nperseg=n_fft, noverlap=n_fft - hop_length)
58+
power_spec = np.abs(Zxx) ** 2
59+
60+
# Mel filterbank
61+
n_mels = 128
62+
freq_bins = np.linspace(0, sample_rate / 2, power_spec.shape[0])
63+
64+
mel_low = 2595.0 * np.log10(1.0 + 0 / 700.0)
65+
mel_high = 2595.0 * np.log10(1.0 + (sample_rate / 2) / 700.0)
66+
mel_points = np.linspace(mel_low, mel_high, n_mels + 2)
67+
hz_points = 700.0 * (10.0 ** (mel_points / 2595.0) - 1.0)
68+
69+
# Vectorized mel filterbank using numpy broadcasting
70+
lower = hz_points[:-2, np.newaxis] # (n_mels, 1)
71+
center = hz_points[1:-1, np.newaxis] # (n_mels, 1)
72+
upper = hz_points[2:, np.newaxis] # (n_mels, 1)
73+
freqs = freq_bins[np.newaxis, :] # (1, n_freq)
74+
75+
rising = np.where((freqs >= lower) & (freqs <= center) & (center != lower), (freqs - lower) / (center - lower), 0.0)
76+
falling = np.where((freqs > center) & (freqs <= upper) & (upper != center), (upper - freqs) / (upper - center), 0.0)
77+
filterbank = rising + falling
78+
79+
# Apply mel filterbank and convert to dB
80+
mel_spec = filterbank @ power_spec
81+
mel_spec_db = 10 * np.log10(np.maximum(mel_spec, 1e-10))
82+
mel_spec_db -= mel_spec_db.max()
83+
84+
# Build long-form DataFrame with evenly-spaced mel band indices for smooth raster
85+
mel_center_freqs = 700.0 * (10.0 ** (mel_points[1:-1] / 2595.0) - 1.0)
86+
time_grid, mel_idx_grid = np.meshgrid(time_bins, np.arange(n_mels))
87+
88+
df = pd.DataFrame({"Time (s)": time_grid.ravel(), "mel_band": mel_idx_grid.ravel(), "Power (dB)": mel_spec_db.ravel()})
89+
90+
# Y-axis tick positions: map Hz values to mel band indices
91+
y_ticks_hz = [128, 256, 512, 1024, 2048, 4096, 8000]
92+
y_ticks_hz = [f for f in y_ticks_hz if f <= sample_rate / 2]
93+
# Convert Hz to mel band index via interpolation
94+
y_ticks_band = np.interp(y_ticks_hz, mel_center_freqs, np.arange(n_mels))
95+
96+
97+
# Annotation data — grammar-of-graphics approach: data-driven geom layers
98+
f0_band = float(np.interp(220, mel_center_freqs, np.arange(n_mels)))
99+
h3_band = float(np.interp(880, mel_center_freqs, np.arange(n_mels)))
100+
101+
df_labels = pd.DataFrame(
102+
{"x": [2.85, 2.85], "y": [f0_band, h3_band], "label": ["F\u2080", "3rd"], "color": ["#fcffa4", "#fb9b06"]}
103+
)
104+
df_reflines = pd.DataFrame(
105+
{"x": [0.0, 0.0], "xend": [duration, duration], "y": [f0_band, h3_band], "yend": [f0_band, h3_band]}
106+
)
107+
108+
# Plot — geom_raster for smooth spectrogram, data-driven geom_text/geom_segment for annotations
109+
plot = (
110+
ggplot(df, aes(x="Time (s)", y="mel_band", fill="Power (dB)"))
111+
+ geom_raster(interpolate=True)
112+
+ scale_fill_gradientn(
113+
colors=[
114+
"#000004",
115+
"#1b0c41",
116+
"#4a0c6b",
117+
"#781c6d",
118+
"#a52c60",
119+
"#cf4446",
120+
"#ed6925",
121+
"#fb9b06",
122+
"#f7d13d",
123+
"#fcffa4",
124+
],
125+
name="Power (dB)",
126+
)
127+
+ guides(fill=guide_colorbar(nbin=256, display="raster"))
128+
+ geom_text(
129+
aes(x="x", y="y", label="label"),
130+
data=df_labels.iloc[[0]],
131+
inherit_aes=False,
132+
color="#fcffa4",
133+
size=11,
134+
ha="right",
135+
fontweight="bold",
136+
alpha=0.85,
137+
)
138+
+ geom_text(
139+
aes(x="x", y="y", label="label"),
140+
data=df_labels.iloc[[1]],
141+
inherit_aes=False,
142+
color="#fb9b06",
143+
size=9,
144+
ha="right",
145+
alpha=0.7,
146+
)
147+
+ geom_segment(
148+
aes(x="x", xend="xend", y="y", yend="yend"),
149+
data=df_reflines.iloc[[0]],
150+
inherit_aes=False,
151+
color="#fcffa4",
152+
alpha=0.15,
153+
size=0.4,
154+
)
155+
+ geom_segment(
156+
aes(x="x", xend="xend", y="y", yend="yend"),
157+
data=df_reflines.iloc[[1]],
158+
inherit_aes=False,
159+
color="#fb9b06",
160+
alpha=0.12,
161+
size=0.3,
162+
)
163+
+ scale_x_continuous(expand=(0, 0))
164+
+ scale_y_continuous(breaks=y_ticks_band.tolist(), labels=[str(f) for f in y_ticks_hz], expand=(0, 0))
165+
+ coord_cartesian(ylim=(0, n_mels - 1))
166+
+ labs(x="Time (s)", y="Frequency (Hz)", title="spectrogram-mel \u00b7 plotnine \u00b7 pyplots.ai")
167+
+ theme_minimal()
168+
+ theme(
169+
figure_size=(16, 9),
170+
text=element_text(family="sans-serif"),
171+
plot_title=element_text(size=24, ha="center", weight="bold", color="#e0e0e0", margin={"b": 8}),
172+
axis_title_x=element_text(size=20, color="#cccccc", margin={"t": 10}),
173+
axis_title_y=element_text(size=20, color="#cccccc", margin={"r": 8}),
174+
axis_text_x=element_text(size=16, color="#aaaaaa"),
175+
axis_text_y=element_text(size=16, color="#aaaaaa"),
176+
legend_title=element_text(size=16, weight="bold", color="#cccccc"),
177+
legend_text=element_text(size=14, color="#aaaaaa"),
178+
legend_position="right",
179+
legend_key_height=60,
180+
legend_key_width=14,
181+
panel_grid_major=element_blank(),
182+
panel_grid_minor=element_blank(),
183+
panel_background=element_rect(fill="#000004", color="none"),
184+
plot_background=element_rect(fill="#0e0e1a", color="none"),
185+
plot_margin=0.02,
186+
)
187+
)
188+
189+
plot.save("plot.png", dpi=300, verbose=False)

0 commit comments

Comments
 (0)