|
1 | 1 | """ pyplots.ai |
2 | 2 | density-basic: Basic Density Plot |
3 | | -Library: altair 6.0.0 | Python 3.13.11 |
4 | | -Quality: 92/100 | Created: 2025-12-23 |
| 3 | +Library: altair 6.0.0 | Python 3.14.3 |
| 4 | +Quality: 91/100 | Updated: 2026-02-23 |
5 | 5 | """ |
6 | 6 |
|
7 | 7 | import altair as alt |
8 | 8 | import numpy as np |
9 | 9 | import pandas as pd |
10 | 10 |
|
11 | 11 |
|
12 | | -# Data - bimodal distribution to demonstrate density estimation capability |
| 12 | +# Data - bimodal distribution showing two student groups with distinct performance |
13 | 13 | np.random.seed(42) |
14 | 14 | values = np.concatenate( |
15 | 15 | [ |
16 | | - np.random.normal(loc=35, scale=8, size=200), # First peak - test scores group A |
17 | | - np.random.normal(loc=65, scale=10, size=150), # Second peak - test scores group B |
| 16 | + np.random.normal(loc=38, scale=7, size=200), # Group A — foundational course |
| 17 | + np.random.normal(loc=72, scale=8, size=150), # Group B — advanced course |
18 | 18 | ] |
19 | 19 | ) |
| 20 | +values = np.clip(values, 5, 100) # Keep within realistic test score range |
20 | 21 |
|
21 | 22 | df = pd.DataFrame({"Test Score": values}) |
22 | 23 |
|
23 | | -# Create density plot using transform_density with tooltips |
24 | | -chart = ( |
| 24 | +# Peak annotations to highlight bimodal distribution storytelling |
| 25 | +peaks = pd.DataFrame( |
| 26 | + {"Test Score": [38, 72], "density": [0.032, 0.021], "label": ["Foundational Course", "Advanced Course"]} |
| 27 | +) |
| 28 | + |
| 29 | +# Nearest-point selection for interactive density readout (HTML export) |
| 30 | +nearest = alt.selection_point(nearest=True, on="pointerover", fields=["Test Score"], empty=False) |
| 31 | + |
| 32 | +# Density curve with filled area |
| 33 | +density = ( |
25 | 34 | alt.Chart(df) |
26 | | - .transform_density( |
27 | | - "Test Score", |
28 | | - as_=["Test Score", "density"], |
29 | | - bandwidth=5, # Smoothing parameter for KDE |
30 | | - ) |
31 | | - .mark_area( |
32 | | - opacity=0.7, |
33 | | - color="#306998", # Python Blue |
34 | | - line={"color": "#306998", "strokeWidth": 3}, |
35 | | - ) |
| 35 | + .transform_density("Test Score", as_=["Test Score", "density"], bandwidth=4) |
| 36 | + .mark_area(opacity=0.45, color="#306998", line={"color": "#1e4d6e", "strokeWidth": 2.5}) |
36 | 37 | .encode( |
37 | | - x=alt.X("Test Score:Q", title="Test Score (points)", axis=alt.Axis(labelFontSize=18, titleFontSize=22)), |
38 | | - y=alt.Y("density:Q", title="Probability Density", axis=alt.Axis(labelFontSize=18, titleFontSize=22)), |
| 38 | + x=alt.X( |
| 39 | + "Test Score:Q", |
| 40 | + title="Test Score (points)", |
| 41 | + scale=alt.Scale(domain=[15, 100]), |
| 42 | + axis=alt.Axis(labelFontSize=18, titleFontSize=22, tickCount=10, grid=False), |
| 43 | + ), |
| 44 | + y=alt.Y( |
| 45 | + "density:Q", title="Probability Density", axis=alt.Axis(labelFontSize=18, titleFontSize=22, format=".3f") |
| 46 | + ), |
39 | 47 | tooltip=[ |
40 | 48 | alt.Tooltip("Test Score:Q", title="Score", format=".1f"), |
41 | 49 | alt.Tooltip("density:Q", title="Density", format=".4f"), |
42 | 50 | ], |
43 | 51 | ) |
44 | | - .properties(width=1600, height=900, title=alt.Title(text="density-basic · altair · pyplots.ai", fontSize=28)) |
45 | | - .configure_view(strokeWidth=0) |
46 | | - .configure_axis( |
47 | | - gridColor="#cccccc", |
48 | | - gridOpacity=0.3, # Subtle grid lines |
| 52 | +) |
| 53 | + |
| 54 | +# Invisible points on density curve driving nearest-point selection |
| 55 | +hover_points = ( |
| 56 | + alt.Chart(df) |
| 57 | + .transform_density("Test Score", as_=["Test Score", "density"], bandwidth=4) |
| 58 | + .mark_point(opacity=0) |
| 59 | + .encode(x="Test Score:Q", y="density:Q") |
| 60 | + .add_params(nearest) |
| 61 | +) |
| 62 | + |
| 63 | +# Hover dot — conditionally visible point at cursor position |
| 64 | +hover_dot = ( |
| 65 | + alt.Chart(df) |
| 66 | + .transform_density("Test Score", as_=["Test Score", "density"], bandwidth=4) |
| 67 | + .mark_point(size=80, filled=True, color="#1e4d6e") |
| 68 | + .encode(x="Test Score:Q", y="density:Q", opacity=alt.condition(nearest, alt.value(1), alt.value(0))) |
| 69 | +) |
| 70 | + |
| 71 | +# Peak annotations — label the two distribution modes |
| 72 | +annotations = ( |
| 73 | + alt.Chart(peaks) |
| 74 | + .mark_text(fontSize=16, fontWeight="bold", color="#1e4d6e", dy=-18) |
| 75 | + .encode(x="Test Score:Q", y="density:Q", text="label:N") |
| 76 | +) |
| 77 | + |
| 78 | +# Rug plot — tick marks showing individual observations at density=0 |
| 79 | +rug = ( |
| 80 | + alt.Chart(df) |
| 81 | + .mark_tick(color="#306998", opacity=0.4, thickness=1.5, size=18) |
| 82 | + .encode(x=alt.X("Test Score:Q"), y=alt.Y(datum=0)) |
| 83 | +) |
| 84 | + |
| 85 | +# Combine layers |
| 86 | +chart = ( |
| 87 | + alt.layer(density, rug, annotations, hover_points, hover_dot) |
| 88 | + .properties( |
| 89 | + width=1600, |
| 90 | + height=900, |
| 91 | + title=alt.Title( |
| 92 | + text="density-basic · altair · pyplots.ai", |
| 93 | + subtitle="Kernel density estimation of test scores across two course levels", |
| 94 | + fontSize=28, |
| 95 | + subtitleFontSize=16, |
| 96 | + subtitleColor="#666666", |
| 97 | + ), |
49 | 98 | ) |
| 99 | + .configure_view(strokeWidth=0) |
| 100 | + .configure_axis(gridColor="#e0e0e0", gridOpacity=0.15, gridDash=[4, 4], domainColor="#888888") |
50 | 101 | ) |
51 | 102 |
|
52 | 103 | # Save as PNG (1600 * 3 = 4800, 900 * 3 = 2700) |
53 | 104 | chart.save("plot.png", scale_factor=3.0) |
54 | 105 |
|
55 | | -# Save as interactive HTML |
56 | | -chart.interactive().save("plot.html") |
| 106 | +# Save as interactive HTML with selection-driven hover readout |
| 107 | +chart.save("plot.html") |
0 commit comments