|
| 1 | +""" pyplots.ai |
| 2 | +scatter-matrix: Scatter Plot Matrix |
| 3 | +Library: altair 6.0.0 | Python 3.13.11 |
| 4 | +Quality: 85/100 | Created: 2025-12-26 |
| 5 | +""" |
| 6 | + |
| 7 | +import altair as alt |
| 8 | +import numpy as np |
| 9 | +import pandas as pd |
| 10 | + |
| 11 | + |
| 12 | +# Data - Iris dataset (classic multivariate data) |
| 13 | +np.random.seed(42) |
| 14 | + |
| 15 | +# Generate realistic iris-like data with 4 variables and 3 species |
| 16 | +n_per_species = 50 |
| 17 | + |
| 18 | +data = [] |
| 19 | +# Setosa - smaller flowers |
| 20 | +for _ in range(n_per_species): |
| 21 | + data.append( |
| 22 | + { |
| 23 | + "Sepal Length (cm)": np.random.normal(5.0, 0.35), |
| 24 | + "Sepal Width (cm)": np.random.normal(3.4, 0.38), |
| 25 | + "Petal Length (cm)": np.random.normal(1.5, 0.17), |
| 26 | + "Petal Width (cm)": np.random.normal(0.25, 0.1), |
| 27 | + "Species": "Setosa", |
| 28 | + } |
| 29 | + ) |
| 30 | + |
| 31 | +# Versicolor - medium flowers |
| 32 | +for _ in range(n_per_species): |
| 33 | + data.append( |
| 34 | + { |
| 35 | + "Sepal Length (cm)": np.random.normal(5.9, 0.52), |
| 36 | + "Sepal Width (cm)": np.random.normal(2.8, 0.31), |
| 37 | + "Petal Length (cm)": np.random.normal(4.3, 0.47), |
| 38 | + "Petal Width (cm)": np.random.normal(1.3, 0.2), |
| 39 | + "Species": "Versicolor", |
| 40 | + } |
| 41 | + ) |
| 42 | + |
| 43 | +# Virginica - larger flowers |
| 44 | +for _ in range(n_per_species): |
| 45 | + data.append( |
| 46 | + { |
| 47 | + "Sepal Length (cm)": np.random.normal(6.6, 0.64), |
| 48 | + "Sepal Width (cm)": np.random.normal(3.0, 0.32), |
| 49 | + "Petal Length (cm)": np.random.normal(5.5, 0.55), |
| 50 | + "Petal Width (cm)": np.random.normal(2.0, 0.27), |
| 51 | + "Species": "Virginica", |
| 52 | + } |
| 53 | + ) |
| 54 | + |
| 55 | +df = pd.DataFrame(data) |
| 56 | + |
| 57 | +# Variables for the scatter matrix |
| 58 | +variables = ["Sepal Length (cm)", "Sepal Width (cm)", "Petal Length (cm)", "Petal Width (cm)"] |
| 59 | + |
| 60 | +# Color scheme - Distinct colorblind-safe palette (blue, orange, green) |
| 61 | +color_scale = alt.Scale(domain=["Setosa", "Versicolor", "Virginica"], range=["#306998", "#E69F00", "#009E73"]) |
| 62 | + |
| 63 | +# Use Altair's native repeat() for declarative scatter matrix construction |
| 64 | +# This is the idiomatic Altair approach for creating SPLOM (scatter plot matrix) |
| 65 | +scatter_matrix = ( |
| 66 | + alt.Chart(df) |
| 67 | + .mark_circle(size=100, opacity=0.7) |
| 68 | + .encode( |
| 69 | + alt.X(alt.repeat("column"), type="quantitative", axis=alt.Axis(labelFontSize=18, titleFontSize=22)), |
| 70 | + alt.Y(alt.repeat("row"), type="quantitative", axis=alt.Axis(labelFontSize=18, titleFontSize=22)), |
| 71 | + alt.Color( |
| 72 | + "Species:N", |
| 73 | + scale=color_scale, |
| 74 | + legend=alt.Legend( |
| 75 | + title="Species", |
| 76 | + titleFontSize=28, |
| 77 | + labelFontSize=24, |
| 78 | + symbolSize=400, |
| 79 | + orient="right", |
| 80 | + titlePadding=15, |
| 81 | + labelPadding=10, |
| 82 | + ), |
| 83 | + ), |
| 84 | + tooltip=[ |
| 85 | + "Species:N", |
| 86 | + alt.Tooltip(alt.repeat("column"), type="quantitative"), |
| 87 | + alt.Tooltip(alt.repeat("row"), type="quantitative"), |
| 88 | + ], |
| 89 | + ) |
| 90 | + .properties(width=320, height=320) |
| 91 | + .repeat(row=variables, column=variables) |
| 92 | +) |
| 93 | + |
| 94 | +# Apply configuration and title |
| 95 | +chart = ( |
| 96 | + scatter_matrix.properties( |
| 97 | + title=alt.Title(text="scatter-matrix · altair · pyplots.ai", fontSize=36, anchor="middle", offset=25) |
| 98 | + ) |
| 99 | + .configure_axis(gridOpacity=0.3) |
| 100 | + .configure_view(strokeWidth=0) |
| 101 | +) |
| 102 | + |
| 103 | +# Save as PNG (scale_factor=3 gives us ~3840x3840 for square output close to 3600x3600 target) |
| 104 | +chart.save("plot.png", scale_factor=3.0) |
| 105 | + |
| 106 | +# Save interactive HTML version |
| 107 | +chart.save("plot.html") |
0 commit comments