|
| 1 | +""" pyplots.ai |
| 2 | +scatter-matrix-interactive: Interactive Scatter Plot Matrix (SPLOM) |
| 3 | +Library: letsplot 4.8.2 | Python 3.13.11 |
| 4 | +Quality: 78/100 | Created: 2026-01-10 |
| 5 | +""" |
| 6 | + |
| 7 | +import numpy as np |
| 8 | +import pandas as pd |
| 9 | +from lets_plot import ( |
| 10 | + LetsPlot, |
| 11 | + aes, |
| 12 | + element_blank, |
| 13 | + element_text, |
| 14 | + geom_histogram, |
| 15 | + geom_point, |
| 16 | + ggbunch, |
| 17 | + ggplot, |
| 18 | + ggsave, |
| 19 | + ggsize, |
| 20 | + ggtitle, |
| 21 | + labs, |
| 22 | + layer_tooltips, |
| 23 | + scale_color_manual, |
| 24 | + scale_fill_manual, |
| 25 | + theme, |
| 26 | + theme_minimal, |
| 27 | +) |
| 28 | + |
| 29 | + |
| 30 | +LetsPlot.setup_html() |
| 31 | + |
| 32 | +# Data - Synthetic iris-like dataset (4 numeric variables, 150 points) |
| 33 | +np.random.seed(42) |
| 34 | + |
| 35 | +# Generate measurements for three plant species with realistic correlations |
| 36 | +n_per_species = 50 |
| 37 | + |
| 38 | +# Species A (small flowers): lower values |
| 39 | +species_a = pd.DataFrame( |
| 40 | + { |
| 41 | + "Sepal Length (cm)": np.random.normal(5.0, 0.35, n_per_species), |
| 42 | + "Sepal Width (cm)": np.random.normal(3.4, 0.38, n_per_species), |
| 43 | + "Petal Length (cm)": np.random.normal(1.5, 0.17, n_per_species), |
| 44 | + "Petal Width (cm)": np.random.normal(0.25, 0.10, n_per_species), |
| 45 | + "Species": "Setosa", |
| 46 | + } |
| 47 | +) |
| 48 | + |
| 49 | +# Species B (medium flowers): intermediate values with correlation |
| 50 | +base_b = np.random.normal(0, 1, n_per_species) |
| 51 | +species_b = pd.DataFrame( |
| 52 | + { |
| 53 | + "Sepal Length (cm)": 5.9 + 0.5 * base_b + np.random.normal(0, 0.2, n_per_species), |
| 54 | + "Sepal Width (cm)": 2.8 + 0.3 * base_b + np.random.normal(0, 0.2, n_per_species), |
| 55 | + "Petal Length (cm)": 4.3 + 0.5 * base_b + np.random.normal(0, 0.3, n_per_species), |
| 56 | + "Petal Width (cm)": 1.3 + 0.2 * base_b + np.random.normal(0, 0.15, n_per_species), |
| 57 | + "Species": "Versicolor", |
| 58 | + } |
| 59 | +) |
| 60 | + |
| 61 | +# Species C (large flowers): higher values with strong correlation |
| 62 | +base_c = np.random.normal(0, 1, n_per_species) |
| 63 | +species_c = pd.DataFrame( |
| 64 | + { |
| 65 | + "Sepal Length (cm)": 6.6 + 0.6 * base_c + np.random.normal(0, 0.3, n_per_species), |
| 66 | + "Sepal Width (cm)": 3.0 + 0.3 * base_c + np.random.normal(0, 0.25, n_per_species), |
| 67 | + "Petal Length (cm)": 5.6 + 0.6 * base_c + np.random.normal(0, 0.3, n_per_species), |
| 68 | + "Petal Width (cm)": 2.0 + 0.3 * base_c + np.random.normal(0, 0.2, n_per_species), |
| 69 | + "Species": "Virginica", |
| 70 | + } |
| 71 | +) |
| 72 | + |
| 73 | +df = pd.concat([species_a, species_b, species_c], ignore_index=True) |
| 74 | + |
| 75 | +# Variables for the scatter matrix (use shorter labels for axis to prevent truncation) |
| 76 | +variables = ["Sepal Length (cm)", "Sepal Width (cm)", "Petal Length (cm)", "Petal Width (cm)"] |
| 77 | +short_labels = ["Sepal Len.", "Sepal Wid.", "Petal Len.", "Petal Wid."] |
| 78 | +n = len(variables) |
| 79 | + |
| 80 | +# Color palette - accessible colors with good contrast on white background |
| 81 | +colors = ["#306998", "#E67E22", "#16A085"] |
| 82 | + |
| 83 | +# Create individual plots for the 4x4 matrix |
| 84 | +plots = [] |
| 85 | +for i, var_y in enumerate(variables): |
| 86 | + for j, var_x in enumerate(variables): |
| 87 | + # Only show axis labels on edges (bottom row for x, left column for y) |
| 88 | + show_x_label = i == n - 1 |
| 89 | + show_y_label = j == 0 |
| 90 | + |
| 91 | + # Get short labels for axes |
| 92 | + x_label = short_labels[j] |
| 93 | + y_label = short_labels[i] |
| 94 | + |
| 95 | + if i == j: |
| 96 | + # Diagonal: histogram showing distribution |
| 97 | + p = ( |
| 98 | + ggplot(df, aes(x=var_x, fill="Species")) |
| 99 | + + geom_histogram(alpha=0.7, bins=15, position="identity") |
| 100 | + + scale_fill_manual(values=colors) |
| 101 | + + labs(x=x_label if show_x_label else "", y="") |
| 102 | + + theme_minimal() |
| 103 | + + theme( |
| 104 | + axis_title_x=element_text(size=16) if show_x_label else element_blank(), |
| 105 | + axis_title_y=element_blank(), |
| 106 | + axis_text=element_text(size=13), |
| 107 | + legend_position="none", |
| 108 | + plot_margin=[5, 5, 5, 5], |
| 109 | + ) |
| 110 | + ) |
| 111 | + else: |
| 112 | + # Off-diagonal: scatter plot with tooltips for interactivity |
| 113 | + p = ( |
| 114 | + ggplot(df, aes(x=var_x, y=var_y, color="Species", fill="Species")) |
| 115 | + + geom_point( |
| 116 | + size=4, |
| 117 | + alpha=0.7, |
| 118 | + shape=21, |
| 119 | + tooltips=layer_tooltips() |
| 120 | + .line("Species: @Species") |
| 121 | + .line(f"{var_x}: @{{{var_x}}}") |
| 122 | + .line(f"{var_y}: @{{{var_y}}}"), |
| 123 | + ) |
| 124 | + + scale_color_manual(values=colors) |
| 125 | + + scale_fill_manual(values=colors) |
| 126 | + + labs(x=x_label if show_x_label else "", y=y_label if show_y_label else "") |
| 127 | + + theme_minimal() |
| 128 | + + theme( |
| 129 | + axis_title_x=element_text(size=16) if show_x_label else element_blank(), |
| 130 | + axis_title_y=element_text(size=16) if show_y_label else element_blank(), |
| 131 | + axis_text=element_text(size=13), |
| 132 | + legend_position="none", |
| 133 | + plot_margin=[5, 5, 5, 5], |
| 134 | + ) |
| 135 | + ) |
| 136 | + plots.append(p) |
| 137 | + |
| 138 | +# Calculate regions for ggbunch (4x4 grid) |
| 139 | +# Leave space for title at top and legend at bottom |
| 140 | +title_height = 0.06 |
| 141 | +legend_height = 0.06 |
| 142 | +grid_height = 1.0 - title_height - legend_height |
| 143 | +cell_size = grid_height / n |
| 144 | + |
| 145 | +# Title plot - needs a geom layer for lets-plot (use middle dot character) |
| 146 | +title_df = pd.DataFrame({"x": [0], "y": [0]}) |
| 147 | +title_plot = ( |
| 148 | + ggplot(title_df, aes(x="x", y="y")) |
| 149 | + + geom_point(alpha=0) # Invisible point to satisfy lets-plot layer requirement |
| 150 | + + ggtitle("scatter-matrix-interactive \u00b7 letsplot \u00b7 pyplots.ai") |
| 151 | + + theme_minimal() |
| 152 | + + theme( |
| 153 | + plot_title=element_text(size=32, hjust=0.5), |
| 154 | + axis_line=element_blank(), |
| 155 | + axis_text=element_blank(), |
| 156 | + axis_ticks=element_blank(), |
| 157 | + axis_title=element_blank(), |
| 158 | + panel_grid=element_blank(), |
| 159 | + ) |
| 160 | +) |
| 161 | + |
| 162 | +# Legend plot - separate plot for better control over legend text |
| 163 | +legend_df = pd.DataFrame({"x": [1, 2, 3], "y": [0, 0, 0], "Species": ["Setosa", "Versicolor", "Virginica"]}) |
| 164 | +legend_plot = ( |
| 165 | + ggplot(legend_df, aes(x="x", y="y", color="Species", fill="Species")) |
| 166 | + + geom_point(size=6, shape=21, alpha=0.8) |
| 167 | + + scale_color_manual(values=colors) |
| 168 | + + scale_fill_manual(values=colors) |
| 169 | + + theme_minimal() |
| 170 | + + theme( |
| 171 | + legend_position="bottom", |
| 172 | + legend_direction="horizontal", |
| 173 | + legend_title=element_text(size=18), |
| 174 | + legend_text=element_text(size=16), |
| 175 | + axis_line=element_blank(), |
| 176 | + axis_text=element_blank(), |
| 177 | + axis_ticks=element_blank(), |
| 178 | + axis_title=element_blank(), |
| 179 | + panel_grid=element_blank(), |
| 180 | + ) |
| 181 | +) |
| 182 | + |
| 183 | +# Build final layout: title + matrix + legend |
| 184 | +final_plots = [title_plot] |
| 185 | +final_plots.extend(plots) |
| 186 | +final_plots.append(legend_plot) |
| 187 | + |
| 188 | +# Define regions for ggbunch |
| 189 | +final_regions = [] |
| 190 | + |
| 191 | +# Title region (top) |
| 192 | +final_regions.append((0, 0, 1, title_height, 0, 0)) |
| 193 | + |
| 194 | +# Matrix regions (4x4 grid) - slightly offset from left edge |
| 195 | +for idx in range(n * n): |
| 196 | + row = idx // n |
| 197 | + col = idx % n |
| 198 | + x = col * cell_size + 0.02 |
| 199 | + y = title_height + row * cell_size |
| 200 | + final_regions.append((x, y, cell_size, cell_size, 0, 0)) |
| 201 | + |
| 202 | +# Legend region (bottom center) |
| 203 | +final_regions.append((0.25, 1.0 - legend_height, 0.5, legend_height, 0, 0)) |
| 204 | + |
| 205 | +# Combine all plots using ggbunch with square aspect ratio |
| 206 | +final_plot = ggbunch(final_plots, final_regions) + ggsize(1200, 1200) |
| 207 | + |
| 208 | +# Save output to current directory (PNG only) |
| 209 | +ggsave(final_plot, "plot.png", path=".", scale=3) |
0 commit comments