Skip to content

Commit 8410700

Browse files
Merge branch 'main' into implementation/chernoff-basic/seaborn
2 parents ff32cae + d8228a6 commit 8410700

27 files changed

Lines changed: 3723 additions & 1 deletion

File tree

README.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
<img src="app/public/logo.svg" alt="pyplots.ai" width="250">
22

3+
**[pyplots.ai](https://pyplots.ai)**
4+
35
[![Python 3.13+](https://img.shields.io/badge/python-3.13+-blue.svg)](https://www.python.org/)
46
[![License: MIT](https://img.shields.io/badge/License-MIT-green.svg)](LICENSE)
57
[![Tests](https://github.com/MarkusNeusinger/pyplots/actions/workflows/ci-tests.yml/badge.svg?branch=main)](https://github.com/MarkusNeusinger/pyplots/actions/workflows/ci-tests.yml)
@@ -205,7 +207,7 @@ MIT License - see [LICENSE](LICENSE) file for details.
205207

206208
<div align="center">
207209

208-
**Made with ❤️ by the data science community**
210+
**Built by [Markus Neusinger](https://linkedin.com/in/markus-neusinger/)**
209211

210212
[⭐ Star us on GitHub](https://github.com/MarkusNeusinger/pyplots)[🐛 Report Bug](https://github.com/MarkusNeusinger/pyplots/issues)[💡 Request Feature](https://github.com/MarkusNeusinger/pyplots/issues)
211213

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
""" pyplots.ai
2+
andrews-curves: Andrews Curves for Multivariate Data
3+
Library: letsplot 4.8.2 | Python 3.13.11
4+
Quality: 91/100 | Created: 2025-12-31
5+
"""
6+
7+
import numpy as np
8+
import pandas as pd
9+
from lets_plot import (
10+
LetsPlot,
11+
aes,
12+
element_text,
13+
geom_line,
14+
ggplot,
15+
ggsize,
16+
labs,
17+
scale_color_manual,
18+
scale_x_continuous,
19+
theme,
20+
theme_minimal,
21+
)
22+
from lets_plot.export import ggsave
23+
from sklearn.datasets import load_iris
24+
from sklearn.preprocessing import StandardScaler
25+
26+
27+
LetsPlot.setup_html()
28+
29+
# Load and prepare data
30+
iris = load_iris()
31+
X = iris.data
32+
y = iris.target
33+
feature_names = iris.feature_names
34+
target_names = iris.target_names
35+
36+
# Normalize variables to similar scales
37+
scaler = StandardScaler()
38+
X_scaled = scaler.fit_transform(X)
39+
40+
# Create DataFrame with normalized features and species
41+
df_features = pd.DataFrame(X_scaled, columns=feature_names)
42+
df_features["species"] = [target_names[i] for i in y]
43+
44+
# Andrews curves transformation
45+
# f(t) = x1/sqrt(2) + x2*sin(t) + x3*cos(t) + x4*sin(2t) + x5*cos(2t) + ...
46+
t_values = np.linspace(-np.pi, np.pi, 200)
47+
48+
curves_data = []
49+
for idx, row in df_features.iterrows():
50+
values = row[feature_names].values
51+
species = row["species"]
52+
53+
for t in t_values:
54+
# Fourier expansion
55+
y_val = values[0] / np.sqrt(2)
56+
for i in range(1, len(values)):
57+
if i % 2 == 1:
58+
y_val += values[i] * np.sin((i // 2 + 1) * t)
59+
else:
60+
y_val += values[i] * np.cos((i // 2) * t)
61+
62+
curves_data.append({"t": t, "y": y_val, "observation": idx, "species": species})
63+
64+
df_curves = pd.DataFrame(curves_data)
65+
66+
# Define colors for species - Python blue, yellow, and a third color
67+
species_colors = {"setosa": "#306998", "versicolor": "#FFD43B", "virginica": "#DC2626"}
68+
69+
# Create plot
70+
plot = (
71+
ggplot(df_curves, aes(x="t", y="y", group="observation", color="species"))
72+
+ geom_line(alpha=0.4, size=0.8)
73+
+ scale_color_manual(values=list(species_colors.values()))
74+
+ scale_x_continuous(breaks=[-np.pi, -np.pi / 2, 0, np.pi / 2, np.pi], labels=["-π", "-π/2", "0", "π/2", "π"])
75+
+ labs(
76+
x="Parameter t (radians)",
77+
y="Fourier Function Value",
78+
title="andrews-curves · letsplot · pyplots.ai",
79+
color="Species",
80+
)
81+
+ theme_minimal()
82+
+ theme(
83+
axis_title=element_text(size=20),
84+
axis_text=element_text(size=16),
85+
plot_title=element_text(size=24),
86+
legend_title=element_text(size=18),
87+
legend_text=element_text(size=16),
88+
legend_position="right",
89+
)
90+
+ ggsize(1600, 900)
91+
)
92+
93+
# Save PNG (scale 3x to get 4800 × 2700 px)
94+
ggsave(plot, "plot.png", path=".", scale=3)
95+
96+
# Save HTML for interactivity
97+
ggsave(plot, "plot.html", path=".")
Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
""" pyplots.ai
2+
andrews-curves: Andrews Curves for Multivariate Data
3+
Library: plotly 6.5.0 | Python 3.13.11
4+
Quality: 91/100 | Created: 2025-12-31
5+
"""
6+
7+
import numpy as np
8+
import plotly.graph_objects as go
9+
from sklearn.datasets import load_iris
10+
11+
12+
# Data - Load Iris dataset
13+
iris = load_iris()
14+
X = iris.data # 150 samples, 4 features
15+
y = iris.target
16+
species_names = ["Setosa", "Versicolor", "Virginica"]
17+
colors = ["#306998", "#FFD43B", "#E74C3C"] # Python Blue, Python Yellow, Red
18+
19+
# Normalize data to prevent dominant variables
20+
X_normalized = (X - X.mean(axis=0)) / X.std(axis=0)
21+
22+
# Andrews curve transformation
23+
# f(t) = x1/sqrt(2) + x2*sin(t) + x3*cos(t) + x4*sin(2t) + x5*cos(2t) + ...
24+
t = np.linspace(-np.pi, np.pi, 200)
25+
26+
27+
def andrews_curve(x, t_vals):
28+
"""Transform a single observation to Andrews curve values."""
29+
n_features = len(x)
30+
curve = np.ones_like(t_vals) * x[0] / np.sqrt(2)
31+
for i in range(1, n_features):
32+
freq = (i + 1) // 2
33+
if i % 2 == 1:
34+
curve += x[i] * np.sin(freq * t_vals)
35+
else:
36+
curve += x[i] * np.cos(freq * t_vals)
37+
return curve
38+
39+
40+
# Create figure
41+
fig = go.Figure()
42+
43+
# Plot Andrews curves for each sample, colored by species
44+
for species_idx in range(3):
45+
species_mask = y == species_idx
46+
X_species = X_normalized[species_mask]
47+
48+
for i, x in enumerate(X_species):
49+
curve_y = andrews_curve(x, t)
50+
fig.add_trace(
51+
go.Scatter(
52+
x=t,
53+
y=curve_y,
54+
mode="lines",
55+
line=dict(color=colors[species_idx], width=2),
56+
opacity=0.4,
57+
name=species_names[species_idx],
58+
legendgroup=species_names[species_idx],
59+
showlegend=(i == 0), # Only show legend for first curve of each species
60+
hovertemplate=f"{species_names[species_idx]}<br>t: %{{x:.2f}}<br>f(t): %{{y:.2f}}<extra></extra>",
61+
)
62+
)
63+
64+
# Update layout for 4800x2700 canvas
65+
fig.update_layout(
66+
title=dict(text="Iris Dataset · andrews-curves · plotly · pyplots.ai", font=dict(size=32), x=0.5, xanchor="center"),
67+
xaxis=dict(
68+
title=dict(text="Parameter t (radians)", font=dict(size=24)),
69+
tickfont=dict(size=18),
70+
gridcolor="rgba(128, 128, 128, 0.3)",
71+
gridwidth=1,
72+
zeroline=True,
73+
zerolinecolor="rgba(128, 128, 128, 0.5)",
74+
zerolinewidth=1,
75+
range=[-np.pi, np.pi],
76+
tickvals=[-np.pi, -np.pi / 2, 0, np.pi / 2, np.pi],
77+
ticktext=["-π", "-π/2", "0", "π/2", "π"],
78+
),
79+
yaxis=dict(
80+
title=dict(text="f(t) (normalized units)", font=dict(size=24)),
81+
tickfont=dict(size=18),
82+
gridcolor="rgba(128, 128, 128, 0.3)",
83+
gridwidth=1,
84+
zeroline=True,
85+
zerolinecolor="rgba(128, 128, 128, 0.5)",
86+
zerolinewidth=1,
87+
),
88+
template="plotly_white",
89+
legend=dict(
90+
font=dict(size=20),
91+
x=0.98,
92+
y=0.98,
93+
xanchor="right",
94+
yanchor="top",
95+
bgcolor="rgba(255, 255, 255, 0.8)",
96+
bordercolor="rgba(128, 128, 128, 0.3)",
97+
borderwidth=1,
98+
),
99+
margin=dict(l=100, r=80, t=100, b=80),
100+
plot_bgcolor="white",
101+
paper_bgcolor="white",
102+
)
103+
104+
# Save as PNG (4800 x 2700 px)
105+
fig.write_image("plot.png", width=1600, height=900, scale=3)
106+
107+
# Save interactive HTML version
108+
fig.write_html("plot.html", include_plotlyjs=True, full_html=True)
Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
""" pyplots.ai
2+
andrews-curves: Andrews Curves for Multivariate Data
3+
Library: pygal 3.1.0 | Python 3.13.11
4+
Quality: 88/100 | Created: 2025-12-31
5+
"""
6+
7+
import numpy as np
8+
import pygal
9+
from pygal.style import Style
10+
11+
12+
# Generate synthetic Iris-like data (4 features, 3 species)
13+
np.random.seed(42)
14+
15+
# Simulate sepal length, sepal width, petal length, petal width for 3 species
16+
# Species 1: Setosa - small petals, medium sepals
17+
setosa = np.column_stack(
18+
[
19+
np.random.normal(5.0, 0.35, 50), # sepal length
20+
np.random.normal(3.4, 0.38, 50), # sepal width
21+
np.random.normal(1.5, 0.17, 50), # petal length
22+
np.random.normal(0.2, 0.10, 50), # petal width
23+
]
24+
)
25+
26+
# Species 2: Versicolor - medium petals and sepals
27+
versicolor = np.column_stack(
28+
[
29+
np.random.normal(5.9, 0.52, 50), # sepal length
30+
np.random.normal(2.8, 0.31, 50), # sepal width
31+
np.random.normal(4.3, 0.47, 50), # petal length
32+
np.random.normal(1.3, 0.20, 50), # petal width
33+
]
34+
)
35+
36+
# Species 3: Virginica - large petals and sepals
37+
virginica = np.column_stack(
38+
[
39+
np.random.normal(6.6, 0.64, 50), # sepal length
40+
np.random.normal(3.0, 0.32, 50), # sepal width
41+
np.random.normal(5.5, 0.55, 50), # petal length
42+
np.random.normal(2.0, 0.27, 50), # petal width
43+
]
44+
)
45+
46+
# Combine data
47+
X = np.vstack([setosa, versicolor, virginica])
48+
y = np.array([0] * 50 + [1] * 50 + [2] * 50)
49+
species_names = ["Setosa", "Versicolor", "Virginica"]
50+
51+
# Normalize variables (z-score standardization)
52+
X_mean = X.mean(axis=0)
53+
X_std = X.std(axis=0)
54+
X_scaled = (X - X_mean) / X_std
55+
56+
# Andrews curve function: f(t) = x1/sqrt(2) + x2*sin(t) + x3*cos(t) + x4*sin(2t) + ...
57+
t_values = np.linspace(-np.pi, np.pi, 100)
58+
59+
# Colors for 3 species - colorblind-safe palette (blue, orange, purple)
60+
species_colors = ("#306998", "#E67E22", "#9B59B6")
61+
n_curves_per_species = 15
62+
63+
# Custom style for large canvas with increased font sizes for readability
64+
custom_style = Style(
65+
background="white",
66+
plot_background="white",
67+
foreground="#333333",
68+
foreground_strong="#333333",
69+
foreground_subtle="#666666",
70+
colors=species_colors,
71+
title_font_size=96,
72+
label_font_size=64,
73+
major_label_font_size=56,
74+
legend_font_size=64,
75+
value_font_size=48,
76+
stroke_width=2,
77+
opacity=0.4,
78+
opacity_hover=0.8,
79+
tooltip_font_size=48,
80+
)
81+
82+
# Create XY chart with interactive features
83+
chart = pygal.XY(
84+
width=4800,
85+
height=2700,
86+
style=custom_style,
87+
title="andrews-curves · pygal · pyplots.ai",
88+
x_title="t (radians)",
89+
y_title="f(t)",
90+
show_dots=False,
91+
stroke_style={"width": 2},
92+
show_x_guides=True,
93+
show_y_guides=True,
94+
legend_at_bottom=True,
95+
legend_at_bottom_columns=3,
96+
legend_box_size=32,
97+
truncate_legend=-1,
98+
tooltip_border_radius=10,
99+
js=["https://kozea.github.io/pygal.js/2.0.x/pygal-tooltips.min.js"],
100+
)
101+
102+
# Plot curves for each species - group all curves into single series per species
103+
for species_idx in range(3):
104+
species_mask = y == species_idx
105+
species_data = X_scaled[species_mask]
106+
original_data = X[species_mask]
107+
108+
# Sample curves per species for clarity
109+
indices = np.random.choice(len(species_data), n_curves_per_species, replace=False)
110+
111+
# Collect all points for this species into a single series
112+
all_points = []
113+
for curve_num, idx in enumerate(indices):
114+
row = species_data[idx]
115+
orig = original_data[idx]
116+
# Andrews transform: f(t) = x1/sqrt(2) + x2*sin(t) + x3*cos(t) + x4*sin(2t)
117+
curve_values = (
118+
row[0] / np.sqrt(2) + row[1] * np.sin(t_values) + row[2] * np.cos(t_values) + row[3] * np.sin(2 * t_values)
119+
)
120+
# Create points with metadata for interactive tooltips
121+
tooltip = (
122+
f"{species_names[species_idx]}: Sepal {orig[0]:.1f}×{orig[1]:.1f}cm, Petal {orig[2]:.1f}×{orig[3]:.1f}cm"
123+
)
124+
points = [
125+
{"value": (float(t), float(v)), "label": tooltip} for t, v in zip(t_values, curve_values, strict=True)
126+
]
127+
all_points.extend(points)
128+
# Add None to create a break between curves (discontinuity)
129+
if curve_num < len(indices) - 1:
130+
all_points.append(None)
131+
132+
# Add single series per species - clean legend with only 3 entries
133+
chart.add(species_names[species_idx], all_points, show_dots=False)
134+
135+
# Save outputs
136+
chart.render_to_file("plot.html")
137+
chart.render_to_png("plot.png")
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
library: letsplot
2+
specification_id: andrews-curves
3+
created: '2025-12-31T21:36:48Z'
4+
updated: '2025-12-31T21:39:19Z'
5+
generated_by: claude-opus-4-5-20251101
6+
workflow_run: 20627522568
7+
issue: 2859
8+
python_version: 3.13.11
9+
library_version: 4.8.2
10+
preview_url: https://storage.googleapis.com/pyplots-images/plots/andrews-curves/letsplot/plot.png
11+
preview_thumb: https://storage.googleapis.com/pyplots-images/plots/andrews-curves/letsplot/plot_thumb.png
12+
preview_html: https://storage.googleapis.com/pyplots-images/plots/andrews-curves/letsplot/plot.html
13+
quality_score: 91
14+
review:
15+
strengths:
16+
- Excellent implementation of Andrews curves Fourier transformation algorithm
17+
- Proper normalization of data using StandardScaler before transformation
18+
- Good use of transparency (alpha=0.4) for overlapping curves as recommended in
19+
spec
20+
- Clear cluster separation visible between species
21+
- Proper use of π symbols in x-axis tick labels
22+
- Correct title format following pyplots.ai convention
23+
- Clean, readable code structure
24+
- Both PNG and HTML outputs generated for interactivity
25+
weaknesses:
26+
- No visible grid lines to aid visual reference (theme_minimal may have removed
27+
them, but subtle grid would help)
28+
- Missing explicit random seed (though data is deterministic from sklearn, good
29+
practice to include)

0 commit comments

Comments
 (0)