Skip to content

Commit 7ad42ba

Browse files
Merge branch 'main' into implementation/spectrogram-mel/bokeh
2 parents 76cb57a + 51cba9f commit 7ad42ba

File tree

10 files changed

+2102
-0
lines changed

10 files changed

+2102
-0
lines changed
Lines changed: 219 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,219 @@
1+
""" pyplots.ai
2+
spectrogram-mel: Mel-Spectrogram for Audio Analysis
3+
Library: altair 6.0.0 | Python 3.14.3
4+
Quality: 91/100 | Created: 2026-03-11
5+
"""
6+
7+
import altair as alt
8+
import numpy as np
9+
import pandas as pd
10+
11+
12+
# Data - synthesize a rich audio signal with melody and harmonics
13+
np.random.seed(42)
14+
sample_rate = 22050
15+
duration = 4.0
16+
n_samples = int(sample_rate * duration)
17+
t = np.linspace(0, duration, n_samples, endpoint=False)
18+
19+
# Descending frequency sweep from 1200 Hz to 300 Hz with harmonics
20+
sweep_freq = np.cumsum(1200 * np.exp(-0.35 * t)) / sample_rate
21+
signal = 0.6 * np.sin(2 * np.pi * sweep_freq)
22+
signal += 0.3 * np.sin(2 * np.pi * 2 * sweep_freq)
23+
signal += 0.15 * np.sin(2 * np.pi * 3 * sweep_freq)
24+
25+
# Pulsed tone at 440 Hz (A4) with amplitude modulation
26+
envelope = 0.5 * (1 + np.sin(2 * np.pi * 2.5 * t))
27+
signal += 0.4 * envelope * np.sin(2 * np.pi * 440 * t)
28+
29+
# High-frequency chirp burst in the middle section
30+
chirp_mask = (t > 1.5) & (t < 2.5)
31+
chirp_phase = np.cumsum(chirp_mask * (2000 + 3000 * (t - 1.5))) / sample_rate
32+
signal += 0.35 * chirp_mask * np.sin(2 * np.pi * chirp_phase)
33+
34+
# Subtle noise floor
35+
signal += 0.05 * np.random.randn(n_samples)
36+
37+
# Compute STFT
38+
n_fft = 2048
39+
hop_length = 512
40+
window = np.hanning(n_fft)
41+
n_freq_bins = n_fft // 2 + 1
42+
n_frames = 1 + (n_samples - n_fft) // hop_length
43+
44+
stft_power = np.zeros((n_freq_bins, n_frames))
45+
for i in range(n_frames):
46+
start = i * hop_length
47+
frame = signal[start : start + n_fft] * window
48+
spectrum = np.fft.rfft(frame)
49+
stft_power[:, i] = np.abs(spectrum) ** 2
50+
51+
# Mel filter bank
52+
n_mels = 128
53+
f_max = sample_rate / 2.0
54+
55+
mel_max = 2595.0 * np.log10(1.0 + f_max / 700.0)
56+
mel_edges = np.linspace(0, mel_max, n_mels + 2)
57+
hz_edges = 700.0 * (10.0 ** (mel_edges / 2595.0) - 1.0)
58+
fft_freqs = np.linspace(0, f_max, n_freq_bins)
59+
60+
filterbank = np.zeros((n_mels, n_freq_bins))
61+
for i in range(n_mels):
62+
lo, mid, hi = hz_edges[i], hz_edges[i + 1], hz_edges[i + 2]
63+
up_slope = (fft_freqs >= lo) & (fft_freqs <= mid)
64+
dn_slope = (fft_freqs > mid) & (fft_freqs <= hi)
65+
if mid > lo:
66+
filterbank[i, up_slope] = (fft_freqs[up_slope] - lo) / (mid - lo)
67+
if hi > mid:
68+
filterbank[i, dn_slope] = (hi - fft_freqs[dn_slope]) / (hi - mid)
69+
70+
# Apply mel filter and convert to dB
71+
mel_spec = filterbank @ stft_power
72+
mel_spec = np.maximum(mel_spec, 1e-10)
73+
mel_spec_db = 10.0 * np.log10(mel_spec)
74+
mel_spec_db -= mel_spec_db.max()
75+
mel_spec_db = np.maximum(mel_spec_db, -80.0)
76+
77+
# Use ALL mel bins (no subsampling) to fix blockiness at low frequencies
78+
# Only subsample time frames to keep data manageable
79+
frame_step = 2
80+
time_idx = np.arange(0, n_frames, frame_step)
81+
mel_idx = np.arange(0, n_mels)
82+
83+
time_sec = time_idx * hop_length / sample_rate
84+
time_width = frame_step * hop_length / sample_rate
85+
86+
# Build dataframe with explicit rectangle bounds
87+
rows = []
88+
for mi in mel_idx:
89+
freq_lo = float(hz_edges[mi])
90+
freq_hi = float(hz_edges[mi + 2])
91+
for ti_pos, ti in enumerate(time_idx):
92+
rows.append(
93+
{
94+
"t1": round(float(time_sec[ti_pos]), 4),
95+
"t2": round(float(time_sec[ti_pos]) + time_width, 4),
96+
"f1": round(max(freq_lo, 20), 1),
97+
"f2": round(freq_hi, 1),
98+
"dB": round(float(mel_spec_db[mi, ti]), 1),
99+
}
100+
)
101+
102+
df = pd.DataFrame(rows)
103+
104+
# Annotation labels for key audio features (data storytelling)
105+
annotations = pd.DataFrame(
106+
[
107+
{"x": 0.6, "y": 1200, "label": "Harmonic Sweep"},
108+
{"x": 2.2, "y": 6500, "label": "Chirp Burst"},
109+
{"x": 3.5, "y": 350, "label": "440 Hz Tone"},
110+
]
111+
)
112+
113+
# Main spectrogram layer
114+
spectrogram = (
115+
alt.Chart(df)
116+
.mark_rect()
117+
.encode(
118+
x=alt.X(
119+
"t1:Q",
120+
title="Time (s)",
121+
scale=alt.Scale(domain=[0, duration], nice=False),
122+
axis=alt.Axis(
123+
labelFontSize=18,
124+
titleFontSize=22,
125+
titlePadding=14,
126+
values=[0, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0],
127+
domainColor="#444444",
128+
tickColor="#444444",
129+
labelColor="#333333",
130+
titleColor="#222222",
131+
tickSize=6,
132+
),
133+
),
134+
x2="t2:Q",
135+
y=alt.Y(
136+
"f1:Q",
137+
title="Frequency (Hz)",
138+
scale=alt.Scale(type="log", domain=[20, 11025], nice=False),
139+
axis=alt.Axis(
140+
labelFontSize=18,
141+
titleFontSize=22,
142+
titlePadding=14,
143+
values=[50, 100, 200, 500, 1000, 2000, 5000, 10000],
144+
domainColor="#444444",
145+
tickColor="#444444",
146+
labelColor="#333333",
147+
titleColor="#222222",
148+
tickSize=6,
149+
labelExpr="datum.value >= 1000 ? format(datum.value / 1000, '.0f') + 'k' : format(datum.value, '.0f')",
150+
),
151+
),
152+
y2="f2:Q",
153+
color=alt.Color(
154+
"dB:Q",
155+
scale=alt.Scale(scheme="inferno", domain=[-80, 0]),
156+
legend=alt.Legend(
157+
title="Power (dB)",
158+
titleFontSize=18,
159+
labelFontSize=16,
160+
gradientLength=480,
161+
gradientThickness=18,
162+
titlePadding=10,
163+
offset=14,
164+
direction="vertical",
165+
titleColor="#222222",
166+
labelColor="#333333",
167+
),
168+
),
169+
tooltip=[
170+
alt.Tooltip("t1:Q", title="Time (s)", format=".2f"),
171+
alt.Tooltip("f1:Q", title="Freq low (Hz)", format=".0f"),
172+
alt.Tooltip("f2:Q", title="Freq high (Hz)", format=".0f"),
173+
alt.Tooltip("dB:Q", title="Power (dB)", format=".1f"),
174+
],
175+
)
176+
)
177+
178+
# Annotation text layer for data storytelling emphasis
179+
annotation_labels = (
180+
alt.Chart(annotations)
181+
.mark_text(
182+
fontSize=16, fontWeight="bold", color="#ffffff", strokeWidth=3, stroke="#1a1a2e", align="left", dx=10, dy=-6
183+
)
184+
.encode(x="x:Q", y="y:Q", text="label:N")
185+
)
186+
187+
# Small arrow markers pointing to features
188+
annotation_marks = (
189+
alt.Chart(annotations)
190+
.mark_point(shape="triangle-right", size=150, color="#ffffff", strokeWidth=2, stroke="#1a1a2e", filled=True)
191+
.encode(x="x:Q", y="y:Q")
192+
)
193+
194+
# Layer composition: spectrogram + annotations
195+
chart = (
196+
alt.layer(spectrogram, annotation_marks, annotation_labels)
197+
.properties(
198+
width=1400,
199+
height=800,
200+
title=alt.Title(
201+
"spectrogram-mel · altair · pyplots.ai",
202+
subtitle="Mel-scaled power spectrogram of a synthesized signal — frequency sweep with harmonics, pulsed 440 Hz tone, and chirp burst",
203+
fontSize=28,
204+
subtitleFontSize=17,
205+
subtitleColor="#555555",
206+
anchor="start",
207+
offset=20,
208+
color="#111111",
209+
),
210+
padding={"left": 24, "right": 24, "top": 24, "bottom": 20},
211+
)
212+
.configure_axis(grid=False)
213+
.configure_view(strokeWidth=0)
214+
.configure(font="Helvetica Neue, Helvetica, Arial, sans-serif", background="#fafafa")
215+
)
216+
217+
# Save
218+
chart.save("plot.png", scale_factor=3.0)
219+
chart.save("plot.html")
Lines changed: 172 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,172 @@
1+
""" pyplots.ai
2+
spectrogram-mel: Mel-Spectrogram for Audio Analysis
3+
Library: letsplot 4.9.0 | Python 3.14.3
4+
Quality: 90/100 | Created: 2026-03-11
5+
"""
6+
7+
import numpy as np
8+
import pandas as pd
9+
from lets_plot import *
10+
11+
12+
LetsPlot.setup_html()
13+
14+
# Data
15+
np.random.seed(42)
16+
sample_rate = 22050
17+
duration = 3.0
18+
n_samples = int(sample_rate * duration)
19+
t = np.linspace(0, duration, n_samples, endpoint=False)
20+
21+
# Synthesize a richer melody: C4, E4, G4, C5 with harmonics and vibrato
22+
melody_freqs = [261.6, 329.6, 392.0, 523.3]
23+
note_names = ["C4", "E4", "G4", "C5"]
24+
audio_signal = np.zeros(n_samples)
25+
for i, freq in enumerate(melody_freqs):
26+
start = int(i * n_samples / len(melody_freqs))
27+
end = int((i + 1) * n_samples / len(melody_freqs))
28+
segment_t = t[start:end]
29+
envelope = np.sin(np.linspace(0, np.pi, end - start)) ** 1.5
30+
# Add slight vibrato for realism
31+
vibrato = 1 + 0.005 * np.sin(2 * np.pi * 5.5 * segment_t)
32+
audio_signal[start:end] += 0.5 * envelope * np.sin(2 * np.pi * freq * vibrato * segment_t)
33+
for harmonic, amplitude in [(2, 0.25), (3, 0.15), (4, 0.08), (5, 0.05)]:
34+
audio_signal[start:end] += (
35+
(amplitude / harmonic) * envelope * np.sin(2 * np.pi * freq * harmonic * vibrato * segment_t)
36+
)
37+
38+
audio_signal += 0.015 * np.random.randn(n_samples)
39+
40+
# STFT via numpy
41+
n_fft = 2048
42+
hop_length = 512
43+
n_mels = 128
44+
45+
window = np.hanning(n_fft)
46+
n_frames = 1 + (n_samples - n_fft) // hop_length
47+
stft_matrix = np.zeros((n_fft // 2 + 1, n_frames))
48+
for frame_idx in range(n_frames):
49+
start_sample = frame_idx * hop_length
50+
frame = audio_signal[start_sample : start_sample + n_fft] * window
51+
spectrum = np.fft.rfft(frame)
52+
stft_matrix[:, frame_idx] = np.abs(spectrum) ** 2
53+
54+
times = np.arange(n_frames) * hop_length / sample_rate
55+
frequencies = np.fft.rfftfreq(n_fft, 1.0 / sample_rate)
56+
57+
# Mel filter bank
58+
mel_low = 2595 * np.log10(1 + 0 / 700)
59+
mel_high = 2595 * np.log10(1 + (sample_rate / 2) / 700)
60+
mel_points = np.linspace(mel_low, mel_high, n_mels + 2)
61+
hz_points = 700 * (10 ** (mel_points / 2595) - 1)
62+
fft_bins = np.floor((n_fft + 1) * hz_points / sample_rate).astype(int)
63+
64+
mel_filterbank = np.zeros((n_mels, len(frequencies)))
65+
for m in range(1, n_mels + 1):
66+
f_left = fft_bins[m - 1]
67+
f_center = fft_bins[m]
68+
f_right = fft_bins[m + 1]
69+
for k in range(f_left, min(f_center, len(frequencies))):
70+
if f_center != f_left:
71+
mel_filterbank[m - 1, k] = (k - f_left) / (f_center - f_left)
72+
for k in range(f_center, min(f_right, len(frequencies))):
73+
if f_right != f_center:
74+
mel_filterbank[m - 1, k] = (f_right - k) / (f_right - f_center)
75+
76+
mel_spec = mel_filterbank @ stft_matrix
77+
mel_spec_db = 10 * np.log10(mel_spec + 1e-10)
78+
79+
# Clip dB range to emphasize musical content and reduce noise
80+
db_min = -10.0
81+
db_max = float(np.max(mel_spec_db))
82+
mel_spec_db = np.clip(mel_spec_db, db_min, db_max)
83+
84+
# Mel band center frequencies in Hz (for y-axis labels)
85+
mel_center_hz = 700 * (10 ** (np.linspace(mel_low, mel_high, n_mels) / 2595) - 1)
86+
87+
# Higher resolution downsampling for smoother tiles
88+
time_step = max(1, len(times) // 300)
89+
mel_step = max(1, n_mels // 128)
90+
times_ds = times[::time_step]
91+
mel_indices_ds = np.arange(0, n_mels, mel_step)
92+
mel_spec_ds = mel_spec_db[::mel_step][:, ::time_step]
93+
94+
# Map mel indices to Hz for tooltip display
95+
mel_hz_ds = mel_center_hz[mel_indices_ds]
96+
97+
# Build DataFrame
98+
time_grid, mel_idx_grid = np.meshgrid(times_ds, mel_indices_ds)
99+
hz_grid = np.broadcast_to(mel_hz_ds[:, None], mel_spec_ds.shape)
100+
df = pd.DataFrame(
101+
{
102+
"Time (s)": time_grid.flatten(),
103+
"Mel Band": mel_idx_grid.flatten(),
104+
"Power (dB)": mel_spec_ds.flatten(),
105+
"Freq (Hz)": hz_grid.flatten(),
106+
}
107+
)
108+
109+
# Y-axis breaks: map Hz values to mel band indices
110+
label_hz = [100, 200, 500, 1000, 2000, 5000, 10000]
111+
label_mel_vals = [2595 * np.log10(1 + f / 700) for f in label_hz]
112+
mel_range = np.linspace(mel_low, mel_high, n_mels)
113+
label_indices = [float(np.interp(mv, mel_range, np.arange(n_mels))) for mv in label_mel_vals]
114+
label_strs = ["100", "200", "500", "1k", "2k", "5k", "10k"]
115+
116+
# Note annotation positions (mel band index for each fundamental)
117+
note_annotations = []
118+
for i, (freq, name) in enumerate(zip(melody_freqs, note_names, strict=True)):
119+
mel_val = 2595 * np.log10(1 + freq / 700)
120+
mel_idx = float(np.interp(mel_val, mel_range, np.arange(n_mels)))
121+
mid_time = (i + 0.5) * duration / len(melody_freqs)
122+
note_annotations.append({"x": mid_time, "y": mel_idx, "label": name})
123+
124+
df_notes = pd.DataFrame(note_annotations)
125+
126+
# Custom dark color scheme for spectrogram
127+
bg_color = "#1a1a2e"
128+
panel_color = "#16213e"
129+
text_color = "#e0e0e0"
130+
grid_color = "#2a2a4a"
131+
132+
# Plot with polished dark theme and lets-plot distinctive features
133+
plot = (
134+
ggplot(df, aes(x="Time (s)", y="Mel Band", fill="Power (dB)"))
135+
+ geom_tile(
136+
tooltips=layer_tooltips()
137+
.title("Mel Spectrogram")
138+
.line("@{Time (s)}s | @{Freq (Hz)} Hz")
139+
.line("Power|@{Power (dB)} dB")
140+
.format("Time (s)", ".2f")
141+
.format("Freq (Hz)", ".0f")
142+
.format("Power (dB)", ".1f")
143+
.min_width(180)
144+
)
145+
+ geom_text(aes(x="x", y="y", label="label"), data=df_notes, color="#ffffff", size=14, fontface="bold", alpha=0.85)
146+
+ scale_fill_viridis(option="inferno", name="Power\n(dB)", limits=[db_min, db_max])
147+
+ scale_y_continuous(breaks=label_indices, labels=label_strs, expand=[0, 0])
148+
+ scale_x_continuous(expand=[0, 0])
149+
+ labs(x="Time (s)", y="Frequency (Hz)", title="spectrogram-mel · letsplot · pyplots.ai")
150+
+ ggsize(1600, 900)
151+
+ flavor_darcula()
152+
+ theme(
153+
plot_title=element_text(size=24, face="bold", color=text_color),
154+
plot_background=element_rect(fill=bg_color),
155+
panel_background=element_rect(fill=panel_color),
156+
axis_title=element_text(size=20, color=text_color),
157+
axis_text=element_text(size=16, color="#b0b0b0"),
158+
axis_line=element_blank(),
159+
axis_ticks=element_line(color=grid_color, size=0.5),
160+
panel_grid_major_x=element_line(color=grid_color, size=0.3),
161+
panel_grid_major_y=element_line(color=grid_color, size=0.3),
162+
panel_grid_minor=element_blank(),
163+
legend_title=element_text(size=16, color=text_color),
164+
legend_text=element_text(size=14, color="#b0b0b0"),
165+
legend_background=element_rect(fill=bg_color, color=bg_color),
166+
plot_margin=[40, 20, 20, 20],
167+
)
168+
)
169+
170+
# Save
171+
ggsave(plot, "plot.png", path=".", scale=3)
172+
ggsave(plot, "plot.html", path=".")

0 commit comments

Comments
 (0)