|
| 1 | +""" pyplots.ai |
| 2 | +andrews-curves: Andrews Curves for Multivariate Data |
| 3 | +Library: pygal 3.1.0 | Python 3.13.11 |
| 4 | +Quality: 88/100 | Created: 2025-12-31 |
| 5 | +""" |
| 6 | + |
| 7 | +import numpy as np |
| 8 | +import pygal |
| 9 | +from pygal.style import Style |
| 10 | + |
| 11 | + |
| 12 | +# Generate synthetic Iris-like data (4 features, 3 species) |
| 13 | +np.random.seed(42) |
| 14 | + |
| 15 | +# Simulate sepal length, sepal width, petal length, petal width for 3 species |
| 16 | +# Species 1: Setosa - small petals, medium sepals |
| 17 | +setosa = np.column_stack( |
| 18 | + [ |
| 19 | + np.random.normal(5.0, 0.35, 50), # sepal length |
| 20 | + np.random.normal(3.4, 0.38, 50), # sepal width |
| 21 | + np.random.normal(1.5, 0.17, 50), # petal length |
| 22 | + np.random.normal(0.2, 0.10, 50), # petal width |
| 23 | + ] |
| 24 | +) |
| 25 | + |
| 26 | +# Species 2: Versicolor - medium petals and sepals |
| 27 | +versicolor = np.column_stack( |
| 28 | + [ |
| 29 | + np.random.normal(5.9, 0.52, 50), # sepal length |
| 30 | + np.random.normal(2.8, 0.31, 50), # sepal width |
| 31 | + np.random.normal(4.3, 0.47, 50), # petal length |
| 32 | + np.random.normal(1.3, 0.20, 50), # petal width |
| 33 | + ] |
| 34 | +) |
| 35 | + |
| 36 | +# Species 3: Virginica - large petals and sepals |
| 37 | +virginica = np.column_stack( |
| 38 | + [ |
| 39 | + np.random.normal(6.6, 0.64, 50), # sepal length |
| 40 | + np.random.normal(3.0, 0.32, 50), # sepal width |
| 41 | + np.random.normal(5.5, 0.55, 50), # petal length |
| 42 | + np.random.normal(2.0, 0.27, 50), # petal width |
| 43 | + ] |
| 44 | +) |
| 45 | + |
| 46 | +# Combine data |
| 47 | +X = np.vstack([setosa, versicolor, virginica]) |
| 48 | +y = np.array([0] * 50 + [1] * 50 + [2] * 50) |
| 49 | +species_names = ["Setosa", "Versicolor", "Virginica"] |
| 50 | + |
| 51 | +# Normalize variables (z-score standardization) |
| 52 | +X_mean = X.mean(axis=0) |
| 53 | +X_std = X.std(axis=0) |
| 54 | +X_scaled = (X - X_mean) / X_std |
| 55 | + |
| 56 | +# Andrews curve function: f(t) = x1/sqrt(2) + x2*sin(t) + x3*cos(t) + x4*sin(2t) + ... |
| 57 | +t_values = np.linspace(-np.pi, np.pi, 100) |
| 58 | + |
| 59 | +# Colors for 3 species - colorblind-safe palette (blue, orange, purple) |
| 60 | +species_colors = ("#306998", "#E67E22", "#9B59B6") |
| 61 | +n_curves_per_species = 15 |
| 62 | + |
| 63 | +# Custom style for large canvas with increased font sizes for readability |
| 64 | +custom_style = Style( |
| 65 | + background="white", |
| 66 | + plot_background="white", |
| 67 | + foreground="#333333", |
| 68 | + foreground_strong="#333333", |
| 69 | + foreground_subtle="#666666", |
| 70 | + colors=species_colors, |
| 71 | + title_font_size=96, |
| 72 | + label_font_size=64, |
| 73 | + major_label_font_size=56, |
| 74 | + legend_font_size=64, |
| 75 | + value_font_size=48, |
| 76 | + stroke_width=2, |
| 77 | + opacity=0.4, |
| 78 | + opacity_hover=0.8, |
| 79 | + tooltip_font_size=48, |
| 80 | +) |
| 81 | + |
| 82 | +# Create XY chart with interactive features |
| 83 | +chart = pygal.XY( |
| 84 | + width=4800, |
| 85 | + height=2700, |
| 86 | + style=custom_style, |
| 87 | + title="andrews-curves · pygal · pyplots.ai", |
| 88 | + x_title="t (radians)", |
| 89 | + y_title="f(t)", |
| 90 | + show_dots=False, |
| 91 | + stroke_style={"width": 2}, |
| 92 | + show_x_guides=True, |
| 93 | + show_y_guides=True, |
| 94 | + legend_at_bottom=True, |
| 95 | + legend_at_bottom_columns=3, |
| 96 | + legend_box_size=32, |
| 97 | + truncate_legend=-1, |
| 98 | + tooltip_border_radius=10, |
| 99 | + js=["https://kozea.github.io/pygal.js/2.0.x/pygal-tooltips.min.js"], |
| 100 | +) |
| 101 | + |
| 102 | +# Plot curves for each species - group all curves into single series per species |
| 103 | +for species_idx in range(3): |
| 104 | + species_mask = y == species_idx |
| 105 | + species_data = X_scaled[species_mask] |
| 106 | + original_data = X[species_mask] |
| 107 | + |
| 108 | + # Sample curves per species for clarity |
| 109 | + indices = np.random.choice(len(species_data), n_curves_per_species, replace=False) |
| 110 | + |
| 111 | + # Collect all points for this species into a single series |
| 112 | + all_points = [] |
| 113 | + for curve_num, idx in enumerate(indices): |
| 114 | + row = species_data[idx] |
| 115 | + orig = original_data[idx] |
| 116 | + # Andrews transform: f(t) = x1/sqrt(2) + x2*sin(t) + x3*cos(t) + x4*sin(2t) |
| 117 | + curve_values = ( |
| 118 | + row[0] / np.sqrt(2) + row[1] * np.sin(t_values) + row[2] * np.cos(t_values) + row[3] * np.sin(2 * t_values) |
| 119 | + ) |
| 120 | + # Create points with metadata for interactive tooltips |
| 121 | + tooltip = ( |
| 122 | + f"{species_names[species_idx]}: Sepal {orig[0]:.1f}×{orig[1]:.1f}cm, Petal {orig[2]:.1f}×{orig[3]:.1f}cm" |
| 123 | + ) |
| 124 | + points = [ |
| 125 | + {"value": (float(t), float(v)), "label": tooltip} for t, v in zip(t_values, curve_values, strict=True) |
| 126 | + ] |
| 127 | + all_points.extend(points) |
| 128 | + # Add None to create a break between curves (discontinuity) |
| 129 | + if curve_num < len(indices) - 1: |
| 130 | + all_points.append(None) |
| 131 | + |
| 132 | + # Add single series per species - clean legend with only 3 entries |
| 133 | + chart.add(species_names[species_idx], all_points, show_dots=False) |
| 134 | + |
| 135 | +# Save outputs |
| 136 | +chart.render_to_file("plot.html") |
| 137 | +chart.render_to_png("plot.png") |
0 commit comments