Skip to content

Commit 032aff1

Browse files
Merge branch 'main' into implementation/spectrogram-mel/plotnine
2 parents 929ac6e + 227cdc3 commit 032aff1

12 files changed

Lines changed: 2628 additions & 0 deletions

File tree

Lines changed: 219 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,219 @@
1+
""" pyplots.ai
2+
spectrogram-mel: Mel-Spectrogram for Audio Analysis
3+
Library: altair 6.0.0 | Python 3.14.3
4+
Quality: 91/100 | Created: 2026-03-11
5+
"""
6+
7+
import altair as alt
8+
import numpy as np
9+
import pandas as pd
10+
11+
12+
# Data - synthesize a rich audio signal with melody and harmonics
13+
np.random.seed(42)
14+
sample_rate = 22050
15+
duration = 4.0
16+
n_samples = int(sample_rate * duration)
17+
t = np.linspace(0, duration, n_samples, endpoint=False)
18+
19+
# Descending frequency sweep from 1200 Hz to 300 Hz with harmonics
20+
sweep_freq = np.cumsum(1200 * np.exp(-0.35 * t)) / sample_rate
21+
signal = 0.6 * np.sin(2 * np.pi * sweep_freq)
22+
signal += 0.3 * np.sin(2 * np.pi * 2 * sweep_freq)
23+
signal += 0.15 * np.sin(2 * np.pi * 3 * sweep_freq)
24+
25+
# Pulsed tone at 440 Hz (A4) with amplitude modulation
26+
envelope = 0.5 * (1 + np.sin(2 * np.pi * 2.5 * t))
27+
signal += 0.4 * envelope * np.sin(2 * np.pi * 440 * t)
28+
29+
# High-frequency chirp burst in the middle section
30+
chirp_mask = (t > 1.5) & (t < 2.5)
31+
chirp_phase = np.cumsum(chirp_mask * (2000 + 3000 * (t - 1.5))) / sample_rate
32+
signal += 0.35 * chirp_mask * np.sin(2 * np.pi * chirp_phase)
33+
34+
# Subtle noise floor
35+
signal += 0.05 * np.random.randn(n_samples)
36+
37+
# Compute STFT
38+
n_fft = 2048
39+
hop_length = 512
40+
window = np.hanning(n_fft)
41+
n_freq_bins = n_fft // 2 + 1
42+
n_frames = 1 + (n_samples - n_fft) // hop_length
43+
44+
stft_power = np.zeros((n_freq_bins, n_frames))
45+
for i in range(n_frames):
46+
start = i * hop_length
47+
frame = signal[start : start + n_fft] * window
48+
spectrum = np.fft.rfft(frame)
49+
stft_power[:, i] = np.abs(spectrum) ** 2
50+
51+
# Mel filter bank
52+
n_mels = 128
53+
f_max = sample_rate / 2.0
54+
55+
mel_max = 2595.0 * np.log10(1.0 + f_max / 700.0)
56+
mel_edges = np.linspace(0, mel_max, n_mels + 2)
57+
hz_edges = 700.0 * (10.0 ** (mel_edges / 2595.0) - 1.0)
58+
fft_freqs = np.linspace(0, f_max, n_freq_bins)
59+
60+
filterbank = np.zeros((n_mels, n_freq_bins))
61+
for i in range(n_mels):
62+
lo, mid, hi = hz_edges[i], hz_edges[i + 1], hz_edges[i + 2]
63+
up_slope = (fft_freqs >= lo) & (fft_freqs <= mid)
64+
dn_slope = (fft_freqs > mid) & (fft_freqs <= hi)
65+
if mid > lo:
66+
filterbank[i, up_slope] = (fft_freqs[up_slope] - lo) / (mid - lo)
67+
if hi > mid:
68+
filterbank[i, dn_slope] = (hi - fft_freqs[dn_slope]) / (hi - mid)
69+
70+
# Apply mel filter and convert to dB
71+
mel_spec = filterbank @ stft_power
72+
mel_spec = np.maximum(mel_spec, 1e-10)
73+
mel_spec_db = 10.0 * np.log10(mel_spec)
74+
mel_spec_db -= mel_spec_db.max()
75+
mel_spec_db = np.maximum(mel_spec_db, -80.0)
76+
77+
# Use ALL mel bins (no subsampling) to fix blockiness at low frequencies
78+
# Only subsample time frames to keep data manageable
79+
frame_step = 2
80+
time_idx = np.arange(0, n_frames, frame_step)
81+
mel_idx = np.arange(0, n_mels)
82+
83+
time_sec = time_idx * hop_length / sample_rate
84+
time_width = frame_step * hop_length / sample_rate
85+
86+
# Build dataframe with explicit rectangle bounds
87+
rows = []
88+
for mi in mel_idx:
89+
freq_lo = float(hz_edges[mi])
90+
freq_hi = float(hz_edges[mi + 2])
91+
for ti_pos, ti in enumerate(time_idx):
92+
rows.append(
93+
{
94+
"t1": round(float(time_sec[ti_pos]), 4),
95+
"t2": round(float(time_sec[ti_pos]) + time_width, 4),
96+
"f1": round(max(freq_lo, 20), 1),
97+
"f2": round(freq_hi, 1),
98+
"dB": round(float(mel_spec_db[mi, ti]), 1),
99+
}
100+
)
101+
102+
df = pd.DataFrame(rows)
103+
104+
# Annotation labels for key audio features (data storytelling)
105+
annotations = pd.DataFrame(
106+
[
107+
{"x": 0.6, "y": 1200, "label": "Harmonic Sweep"},
108+
{"x": 2.2, "y": 6500, "label": "Chirp Burst"},
109+
{"x": 3.5, "y": 350, "label": "440 Hz Tone"},
110+
]
111+
)
112+
113+
# Main spectrogram layer
114+
spectrogram = (
115+
alt.Chart(df)
116+
.mark_rect()
117+
.encode(
118+
x=alt.X(
119+
"t1:Q",
120+
title="Time (s)",
121+
scale=alt.Scale(domain=[0, duration], nice=False),
122+
axis=alt.Axis(
123+
labelFontSize=18,
124+
titleFontSize=22,
125+
titlePadding=14,
126+
values=[0, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0],
127+
domainColor="#444444",
128+
tickColor="#444444",
129+
labelColor="#333333",
130+
titleColor="#222222",
131+
tickSize=6,
132+
),
133+
),
134+
x2="t2:Q",
135+
y=alt.Y(
136+
"f1:Q",
137+
title="Frequency (Hz)",
138+
scale=alt.Scale(type="log", domain=[20, 11025], nice=False),
139+
axis=alt.Axis(
140+
labelFontSize=18,
141+
titleFontSize=22,
142+
titlePadding=14,
143+
values=[50, 100, 200, 500, 1000, 2000, 5000, 10000],
144+
domainColor="#444444",
145+
tickColor="#444444",
146+
labelColor="#333333",
147+
titleColor="#222222",
148+
tickSize=6,
149+
labelExpr="datum.value >= 1000 ? format(datum.value / 1000, '.0f') + 'k' : format(datum.value, '.0f')",
150+
),
151+
),
152+
y2="f2:Q",
153+
color=alt.Color(
154+
"dB:Q",
155+
scale=alt.Scale(scheme="inferno", domain=[-80, 0]),
156+
legend=alt.Legend(
157+
title="Power (dB)",
158+
titleFontSize=18,
159+
labelFontSize=16,
160+
gradientLength=480,
161+
gradientThickness=18,
162+
titlePadding=10,
163+
offset=14,
164+
direction="vertical",
165+
titleColor="#222222",
166+
labelColor="#333333",
167+
),
168+
),
169+
tooltip=[
170+
alt.Tooltip("t1:Q", title="Time (s)", format=".2f"),
171+
alt.Tooltip("f1:Q", title="Freq low (Hz)", format=".0f"),
172+
alt.Tooltip("f2:Q", title="Freq high (Hz)", format=".0f"),
173+
alt.Tooltip("dB:Q", title="Power (dB)", format=".1f"),
174+
],
175+
)
176+
)
177+
178+
# Annotation text layer for data storytelling emphasis
179+
annotation_labels = (
180+
alt.Chart(annotations)
181+
.mark_text(
182+
fontSize=16, fontWeight="bold", color="#ffffff", strokeWidth=3, stroke="#1a1a2e", align="left", dx=10, dy=-6
183+
)
184+
.encode(x="x:Q", y="y:Q", text="label:N")
185+
)
186+
187+
# Small arrow markers pointing to features
188+
annotation_marks = (
189+
alt.Chart(annotations)
190+
.mark_point(shape="triangle-right", size=150, color="#ffffff", strokeWidth=2, stroke="#1a1a2e", filled=True)
191+
.encode(x="x:Q", y="y:Q")
192+
)
193+
194+
# Layer composition: spectrogram + annotations
195+
chart = (
196+
alt.layer(spectrogram, annotation_marks, annotation_labels)
197+
.properties(
198+
width=1400,
199+
height=800,
200+
title=alt.Title(
201+
"spectrogram-mel · altair · pyplots.ai",
202+
subtitle="Mel-scaled power spectrogram of a synthesized signal — frequency sweep with harmonics, pulsed 440 Hz tone, and chirp burst",
203+
fontSize=28,
204+
subtitleFontSize=17,
205+
subtitleColor="#555555",
206+
anchor="start",
207+
offset=20,
208+
color="#111111",
209+
),
210+
padding={"left": 24, "right": 24, "top": 24, "bottom": 20},
211+
)
212+
.configure_axis(grid=False)
213+
.configure_view(strokeWidth=0)
214+
.configure(font="Helvetica Neue, Helvetica, Arial, sans-serif", background="#fafafa")
215+
)
216+
217+
# Save
218+
chart.save("plot.png", scale_factor=3.0)
219+
chart.save("plot.html")

0 commit comments

Comments
 (0)