|
| 1 | +""" pyplots.ai |
| 2 | +elbow-curve: Elbow Curve for K-Means Clustering |
| 3 | +Library: altair 6.0.0 | Python 3.13.11 |
| 4 | +Quality: 92/100 | Created: 2025-12-26 |
| 5 | +""" |
| 6 | + |
| 7 | +import altair as alt |
| 8 | +import numpy as np |
| 9 | +import pandas as pd |
| 10 | + |
| 11 | + |
| 12 | +# Data - Simulate K-means inertia values with realistic decay |
| 13 | +np.random.seed(42) |
| 14 | +k_values = list(range(1, 12)) |
| 15 | + |
| 16 | +# Realistic inertia: sharp drop initially, then diminishing returns |
| 17 | +# Using exponential decay with noise |
| 18 | +base_inertia = 5000 |
| 19 | +inertia = [] |
| 20 | +for k in k_values: |
| 21 | + decay = base_inertia * np.exp(-0.35 * (k - 1)) + 200 |
| 22 | + noise = np.random.uniform(-50, 50) |
| 23 | + inertia.append(max(decay + noise, 150)) |
| 24 | + |
| 25 | +# Mark optimal k (elbow point at k=4) |
| 26 | +optimal_k = 4 |
| 27 | + |
| 28 | +df = pd.DataFrame({"Number of Clusters (k)": k_values, "Inertia (Within-Cluster Sum of Squares)": inertia}) |
| 29 | + |
| 30 | +# Create base line chart |
| 31 | +line = ( |
| 32 | + alt.Chart(df) |
| 33 | + .mark_line(color="#306998", strokeWidth=4) |
| 34 | + .encode( |
| 35 | + x=alt.X( |
| 36 | + "Number of Clusters (k):Q", |
| 37 | + scale=alt.Scale(domain=[0.5, 11.5]), |
| 38 | + axis=alt.Axis(tickCount=11, values=k_values), |
| 39 | + ), |
| 40 | + y=alt.Y("Inertia (Within-Cluster Sum of Squares):Q", scale=alt.Scale(domain=[0, max(inertia) * 1.1])), |
| 41 | + ) |
| 42 | +) |
| 43 | + |
| 44 | +# Add points at each k value |
| 45 | +points = ( |
| 46 | + alt.Chart(df) |
| 47 | + .mark_point(size=300, color="#306998", filled=True) |
| 48 | + .encode( |
| 49 | + x="Number of Clusters (k):Q", |
| 50 | + y="Inertia (Within-Cluster Sum of Squares):Q", |
| 51 | + tooltip=["Number of Clusters (k)", "Inertia (Within-Cluster Sum of Squares)"], |
| 52 | + ) |
| 53 | +) |
| 54 | + |
| 55 | +# Highlight the elbow point (optimal k) |
| 56 | +elbow_df = df[df["Number of Clusters (k)"] == optimal_k] |
| 57 | +elbow_point = ( |
| 58 | + alt.Chart(elbow_df) |
| 59 | + .mark_point(size=600, color="#FFD43B", filled=True, stroke="#306998", strokeWidth=3) |
| 60 | + .encode(x="Number of Clusters (k):Q", y="Inertia (Within-Cluster Sum of Squares):Q") |
| 61 | +) |
| 62 | + |
| 63 | +# Add annotation for elbow point |
| 64 | +elbow_text = ( |
| 65 | + alt.Chart(elbow_df) |
| 66 | + .mark_text(align="left", baseline="bottom", dx=15, dy=-15, fontSize=20, fontWeight="bold", color="#306998") |
| 67 | + .encode( |
| 68 | + x="Number of Clusters (k):Q", |
| 69 | + y="Inertia (Within-Cluster Sum of Squares):Q", |
| 70 | + text=alt.value(f"Optimal k = {optimal_k}"), |
| 71 | + ) |
| 72 | +) |
| 73 | + |
| 74 | +# Combine layers |
| 75 | +chart = ( |
| 76 | + (line + points + elbow_point + elbow_text) |
| 77 | + .properties( |
| 78 | + width=1600, height=900, title=alt.Title("elbow-curve · altair · pyplots.ai", fontSize=28, anchor="middle") |
| 79 | + ) |
| 80 | + .configure_axis(labelFontSize=18, titleFontSize=22, gridColor="#CCCCCC", gridOpacity=0.3) |
| 81 | + .configure_view(strokeWidth=0) |
| 82 | +) |
| 83 | + |
| 84 | +# Save outputs |
| 85 | +chart.save("plot.png", scale_factor=3.0) |
| 86 | +chart.save("plot.html") |
0 commit comments