diff --git a/plots/spectrogram-mel/implementations/plotnine.py b/plots/spectrogram-mel/implementations/plotnine.py new file mode 100644 index 0000000000..76126b4801 --- /dev/null +++ b/plots/spectrogram-mel/implementations/plotnine.py @@ -0,0 +1,189 @@ +""" pyplots.ai +spectrogram-mel: Mel-Spectrogram for Audio Analysis +Library: plotnine 0.15.3 | Python 3.14.3 +Quality: 90/100 | Created: 2026-03-11 +""" + +import numpy as np +import pandas as pd +from plotnine import ( + aes, + coord_cartesian, + element_blank, + element_rect, + element_text, + geom_raster, + geom_segment, + geom_text, + ggplot, + guide_colorbar, + guides, + labs, + scale_fill_gradientn, + scale_x_continuous, + scale_y_continuous, + theme, + theme_minimal, +) +from scipy.signal import stft + + +# Data - synthesize a 3-second audio signal with speech-like frequency components +np.random.seed(42) +sample_rate = 22050 +duration = 3.0 +n_samples = int(sample_rate * duration) +t = np.linspace(0, duration, n_samples, endpoint=False) + +# Build a rich audio signal: fundamental + harmonics with time-varying amplitude +fundamental = 220 +signal = ( + 0.6 * np.sin(2 * np.pi * fundamental * t) * np.exp(-0.3 * t) + + 0.4 * np.sin(2 * np.pi * 440 * t) * (0.5 + 0.5 * np.sin(2 * np.pi * 1.5 * t)) + + 0.3 * np.sin(2 * np.pi * 880 * t) * np.exp(-0.5 * t) + + 0.2 * np.sin(2 * np.pi * 1320 * t) * (1 - t / duration) + + 0.15 * np.sin(2 * np.pi * 3300 * t) * np.exp(-1.0 * t) + + 0.1 * np.random.randn(n_samples) * np.exp(-0.8 * t) +) + +# Add a frequency sweep (chirp) from 500 to 4000 Hz in the middle section +chirp_mask = (t > 0.8) & (t < 2.0) +chirp_freq = 500 + (4000 - 500) * (t[chirp_mask] - 0.8) / 1.2 +signal[chirp_mask] += 0.35 * np.sin(2 * np.pi * np.cumsum(chirp_freq) / sample_rate) + +# STFT +n_fft = 2048 +hop_length = 512 +_, time_bins, Zxx = stft(signal, fs=sample_rate, nperseg=n_fft, noverlap=n_fft - hop_length) +power_spec = np.abs(Zxx) ** 2 + +# Mel filterbank +n_mels = 128 +freq_bins = np.linspace(0, sample_rate / 2, power_spec.shape[0]) + +mel_low = 2595.0 * np.log10(1.0 + 0 / 700.0) +mel_high = 2595.0 * np.log10(1.0 + (sample_rate / 2) / 700.0) +mel_points = np.linspace(mel_low, mel_high, n_mels + 2) +hz_points = 700.0 * (10.0 ** (mel_points / 2595.0) - 1.0) + +# Vectorized mel filterbank using numpy broadcasting +lower = hz_points[:-2, np.newaxis] # (n_mels, 1) +center = hz_points[1:-1, np.newaxis] # (n_mels, 1) +upper = hz_points[2:, np.newaxis] # (n_mels, 1) +freqs = freq_bins[np.newaxis, :] # (1, n_freq) + +rising = np.where((freqs >= lower) & (freqs <= center) & (center != lower), (freqs - lower) / (center - lower), 0.0) +falling = np.where((freqs > center) & (freqs <= upper) & (upper != center), (upper - freqs) / (upper - center), 0.0) +filterbank = rising + falling + +# Apply mel filterbank and convert to dB +mel_spec = filterbank @ power_spec +mel_spec_db = 10 * np.log10(np.maximum(mel_spec, 1e-10)) +mel_spec_db -= mel_spec_db.max() + +# Build long-form DataFrame with evenly-spaced mel band indices for smooth raster +mel_center_freqs = 700.0 * (10.0 ** (mel_points[1:-1] / 2595.0) - 1.0) +time_grid, mel_idx_grid = np.meshgrid(time_bins, np.arange(n_mels)) + +df = pd.DataFrame({"Time (s)": time_grid.ravel(), "mel_band": mel_idx_grid.ravel(), "Power (dB)": mel_spec_db.ravel()}) + +# Y-axis tick positions: map Hz values to mel band indices +y_ticks_hz = [128, 256, 512, 1024, 2048, 4096, 8000] +y_ticks_hz = [f for f in y_ticks_hz if f <= sample_rate / 2] +# Convert Hz to mel band index via interpolation +y_ticks_band = np.interp(y_ticks_hz, mel_center_freqs, np.arange(n_mels)) + + +# Annotation data — grammar-of-graphics approach: data-driven geom layers +f0_band = float(np.interp(220, mel_center_freqs, np.arange(n_mels))) +h3_band = float(np.interp(880, mel_center_freqs, np.arange(n_mels))) + +df_labels = pd.DataFrame( + {"x": [2.85, 2.85], "y": [f0_band, h3_band], "label": ["F\u2080", "3rd"], "color": ["#fcffa4", "#fb9b06"]} +) +df_reflines = pd.DataFrame( + {"x": [0.0, 0.0], "xend": [duration, duration], "y": [f0_band, h3_band], "yend": [f0_band, h3_band]} +) + +# Plot — geom_raster for smooth spectrogram, data-driven geom_text/geom_segment for annotations +plot = ( + ggplot(df, aes(x="Time (s)", y="mel_band", fill="Power (dB)")) + + geom_raster(interpolate=True) + + scale_fill_gradientn( + colors=[ + "#000004", + "#1b0c41", + "#4a0c6b", + "#781c6d", + "#a52c60", + "#cf4446", + "#ed6925", + "#fb9b06", + "#f7d13d", + "#fcffa4", + ], + name="Power (dB)", + ) + + guides(fill=guide_colorbar(nbin=256, display="raster")) + + geom_text( + aes(x="x", y="y", label="label"), + data=df_labels.iloc[[0]], + inherit_aes=False, + color="#fcffa4", + size=11, + ha="right", + fontweight="bold", + alpha=0.85, + ) + + geom_text( + aes(x="x", y="y", label="label"), + data=df_labels.iloc[[1]], + inherit_aes=False, + color="#fb9b06", + size=9, + ha="right", + alpha=0.7, + ) + + geom_segment( + aes(x="x", xend="xend", y="y", yend="yend"), + data=df_reflines.iloc[[0]], + inherit_aes=False, + color="#fcffa4", + alpha=0.15, + size=0.4, + ) + + geom_segment( + aes(x="x", xend="xend", y="y", yend="yend"), + data=df_reflines.iloc[[1]], + inherit_aes=False, + color="#fb9b06", + alpha=0.12, + size=0.3, + ) + + scale_x_continuous(expand=(0, 0)) + + scale_y_continuous(breaks=y_ticks_band.tolist(), labels=[str(f) for f in y_ticks_hz], expand=(0, 0)) + + coord_cartesian(ylim=(0, n_mels - 1)) + + labs(x="Time (s)", y="Frequency (Hz)", title="spectrogram-mel \u00b7 plotnine \u00b7 pyplots.ai") + + theme_minimal() + + theme( + figure_size=(16, 9), + text=element_text(family="sans-serif"), + plot_title=element_text(size=24, ha="center", weight="bold", color="#e0e0e0", margin={"b": 8}), + axis_title_x=element_text(size=20, color="#cccccc", margin={"t": 10}), + axis_title_y=element_text(size=20, color="#cccccc", margin={"r": 8}), + axis_text_x=element_text(size=16, color="#aaaaaa"), + axis_text_y=element_text(size=16, color="#aaaaaa"), + legend_title=element_text(size=16, weight="bold", color="#cccccc"), + legend_text=element_text(size=14, color="#aaaaaa"), + legend_position="right", + legend_key_height=60, + legend_key_width=14, + panel_grid_major=element_blank(), + panel_grid_minor=element_blank(), + panel_background=element_rect(fill="#000004", color="none"), + plot_background=element_rect(fill="#0e0e1a", color="none"), + plot_margin=0.02, + ) +) + +plot.save("plot.png", dpi=300, verbose=False) diff --git a/plots/spectrogram-mel/metadata/plotnine.yaml b/plots/spectrogram-mel/metadata/plotnine.yaml new file mode 100644 index 0000000000..f991c2cdc2 --- /dev/null +++ b/plots/spectrogram-mel/metadata/plotnine.yaml @@ -0,0 +1,236 @@ +library: plotnine +specification_id: spectrogram-mel +created: '2026-03-11T19:40:09Z' +updated: '2026-03-11T20:10:00Z' +generated_by: claude-opus-4-5-20251101 +workflow_run: 22970857471 +issue: 4672 +python_version: 3.14.3 +library_version: 0.15.3 +preview_url: https://storage.googleapis.com/pyplots-images/plots/spectrogram-mel/plotnine/plot.png +preview_thumb: https://storage.googleapis.com/pyplots-images/plots/spectrogram-mel/plotnine/plot_thumb.png +preview_html: null +quality_score: 90 +review: + strengths: + - Cohesive dark theme design with panel background matching colormap floor and harmonized + outer background + - Rich synthesized audio data with harmonics, chirp sweep, and amplitude decay creating + a visually compelling spectrogram + - Clean vectorized mel filterbank implementation avoiding librosa dependency + - Data-driven annotations using separate DataFrames following grammar-of-graphics + principles + - Full spec compliance with all required features correctly implemented + weaknesses: + - Library Mastery remains the main gap — implementation uses plotnine correctly + but does not leverage truly distinctive features like stat transformations or + custom geoms + - Annotation reference lines are extremely subtle (alpha 0.12-0.15), nearly invisible + in the final render + image_description: The plot displays a mel-spectrogram on a fully dark-themed canvas. + The x-axis shows time (0-3 seconds) and the y-axis shows mel-scaled frequency + with Hz labels at key mel band edges (128, 256, 512, 1024, 2048, 4096, 8000 Hz). + The colormap is a custom inferno-like gradient progressing from very dark black/purple + (low power ~-60 dB) through magenta and orange to bright yellow (0 dB). Clear + horizontal bands of energy are visible at harmonic frequencies (~220, 440, 880 + Hz), with lower harmonics being brightest and decaying over time. A prominent + chirp sweep arcs from approximately 500 Hz at t=0.8s up to ~4000 Hz at t=2.0s, + creating a striking curved bright line that serves as the main visual focal point. + Two data-driven annotations label the fundamental frequency (F0 in bright yellow) + and 3rd harmonic (in orange), with very subtle reference lines at those frequencies. + A tall colorbar on the right is labeled Power (dB). The title reads spectrogram-mel + plotnine pyplots.ai in bold light gray. The panel background (#000004) matches + the colormap floor, and the outer plot background (#0e0e1a) creates a cohesive + dark theme throughout. + criteria_checklist: + visual_quality: + score: 29 + max: 30 + items: + - id: VQ-01 + name: Text Legibility + score: 8 + max: 8 + passed: true + comment: 'All font sizes explicitly set: title=24pt, axis titles=20pt, tick + labels=16pt, legend title=16pt, legend text=14pt' + - id: VQ-02 + name: No Overlap + score: 6 + max: 6 + passed: true + comment: All text fully readable, y-axis labels well-spaced, annotations avoid + collision + - id: VQ-03 + name: Element Visibility + score: 6 + max: 6 + passed: true + comment: geom_raster with interpolate=True produces smooth gap-free spectrogram + - id: VQ-04 + name: Color Accessibility + score: 4 + max: 4 + passed: true + comment: Custom inferno-like sequential colormap is perceptually uniform and + colorblind-safe + - id: VQ-05 + name: Layout & Canvas + score: 3 + max: 4 + passed: true + comment: Good 16:9 proportions with cohesive dark background, slight right-side + weight from colorbar + - id: VQ-06 + name: Axis Labels & Title + score: 2 + max: 2 + passed: true + comment: Time (s) and Frequency (Hz) with units, correct title format + design_excellence: + score: 15 + max: 20 + items: + - id: DE-01 + name: Aesthetic Sophistication + score: 6 + max: 8 + passed: true + comment: Custom 10-color gradient, dark panel matching colormap floor, cohesive + dark outer background, text color hierarchy + - id: DE-02 + name: Visual Refinement + score: 5 + max: 6 + passed: true + comment: Grid removed, dark theme throughout, generous legend key, custom + margins. Reference lines nearly invisible + - id: DE-03 + name: Data Storytelling + score: 4 + max: 6 + passed: true + comment: F0 and 3rd harmonic annotations guide viewer, chirp sweep creates + focal point, time-varying decay adds narrative + spec_compliance: + score: 15 + max: 15 + items: + - id: SC-01 + name: Plot Type + score: 5 + max: 5 + passed: true + comment: Correct mel-spectrogram with STFT, mel filterbank, and dB conversion + - id: SC-02 + name: Required Features + score: 4 + max: 4 + passed: true + comment: 'All spec features present: dB scale, sequential colormap, mel y-axis + with Hz labels, colorbar, correct parameters' + - id: SC-03 + name: Data Mapping + score: 3 + max: 3 + passed: true + comment: X=time, Y=mel frequency with Hz labels, fill=power in dB + - id: SC-04 + name: Title & Legend + score: 3 + max: 3 + passed: true + comment: Correct title format, colorbar labeled Power (dB) + data_quality: + score: 15 + max: 15 + items: + - id: DQ-01 + name: Feature Coverage + score: 6 + max: 6 + passed: true + comment: Rich signal with fundamental, harmonics, chirp sweep, noise, time-varying + amplitudes + - id: DQ-02 + name: Realistic Context + score: 5 + max: 5 + passed: true + comment: 220 Hz fundamental with natural harmonics is realistic for speech/music + analysis + - id: DQ-03 + name: Appropriate Scale + score: 4 + max: 4 + passed: true + comment: Standard 22050 Hz sample rate, 3-second duration, standard STFT parameters + code_quality: + score: 10 + max: 10 + items: + - id: CQ-01 + name: KISS Structure + score: 3 + max: 3 + passed: true + comment: Clean linear flow with no functions or classes + - id: CQ-02 + name: Reproducibility + score: 2 + max: 2 + passed: true + comment: np.random.seed(42) set + - id: CQ-03 + name: Clean Imports + score: 2 + max: 2 + passed: true + comment: All imports used, scipy.signal.stft appropriate + - id: CQ-04 + name: Code Elegance + score: 2 + max: 2 + passed: true + comment: Vectorized mel filterbank, data-driven annotations are elegant + - id: CQ-05 + name: Output & API + score: 1 + max: 1 + passed: true + comment: Saves as plot.png with dpi=300, current API + library_mastery: + score: 6 + max: 10 + items: + - id: LM-01 + name: Idiomatic Usage + score: 4 + max: 5 + passed: true + comment: Strong grammar-of-graphics composition with data-driven annotations + via separate DataFrames + - id: LM-02 + name: Distinctive Features + score: 2 + max: 5 + passed: false + comment: geom_raster(interpolate=True) and guide_colorbar are plotnine-specific + but not truly distinctive + verdict: APPROVED +impl_tags: + dependencies: + - scipy + techniques: + - colorbar + - annotations + - layer-composition + patterns: + - data-generation + - matrix-construction + dataprep: + - binning + styling: + - dark-theme + - custom-colormap + - alpha-blending