Skip to content

Commit c66242f

Browse files
feat(matplotlib): implement parallel-categories-basic (#2527)
## Implementation: `parallel-categories-basic` - matplotlib Implements the **matplotlib** version of `parallel-categories-basic`. **File:** `plots/parallel-categories-basic/implementations/matplotlib.py` --- :robot: *[impl-generate workflow](https://github.com/MarkusNeusinger/pyplots/actions/runs/20585400230)* --------- Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
1 parent 2c1408a commit c66242f

2 files changed

Lines changed: 248 additions & 0 deletions

File tree

Lines changed: 220 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,220 @@
1+
""" pyplots.ai
2+
parallel-categories-basic: Basic Parallel Categories Plot
3+
Library: matplotlib 3.10.8 | Python 3.13.11
4+
Quality: 91/100 | Created: 2025-12-30
5+
"""
6+
7+
import matplotlib.patches as mpatches
8+
import matplotlib.pyplot as plt
9+
import numpy as np
10+
import pandas as pd
11+
from matplotlib.path import Path
12+
13+
14+
# Data: Product purchase flow (Channel -> Category -> Outcome)
15+
np.random.seed(42)
16+
17+
# Create synthetic categorical data representing customer purchase flow
18+
n_samples = 500
19+
channels = np.random.choice(["Online", "Store", "Mobile"], size=n_samples, p=[0.4, 0.35, 0.25])
20+
categories = np.random.choice(["Electronics", "Clothing", "Home", "Sports"], size=n_samples, p=[0.3, 0.25, 0.25, 0.2])
21+
outcomes = np.random.choice(["Purchased", "Returned", "Abandoned"], size=n_samples, p=[0.6, 0.15, 0.25])
22+
23+
df = pd.DataFrame({"Channel": channels, "Category": categories, "Outcome": outcomes})
24+
25+
# Define dimensions and their categories
26+
dimensions = ["Channel", "Category", "Outcome"]
27+
dim_categories = {
28+
"Channel": ["Online", "Store", "Mobile"],
29+
"Category": ["Electronics", "Clothing", "Home", "Sports"],
30+
"Outcome": ["Purchased", "Returned", "Abandoned"],
31+
}
32+
33+
# Color palette for the first dimension (source) - distinct colors for clear differentiation
34+
colors = {"Online": "#1F77B4", "Store": "#FF7F0E", "Mobile": "#2CA02C"}
35+
36+
# Create figure
37+
fig, ax = plt.subplots(figsize=(16, 9))
38+
39+
# Calculate positions for each dimension
40+
n_dims = len(dimensions)
41+
x_positions = np.linspace(0, 1, n_dims)
42+
dim_width = 0.08
43+
44+
# Calculate category positions within each dimension
45+
category_positions = {}
46+
category_heights = {}
47+
48+
for dim in dimensions:
49+
cats = dim_categories[dim]
50+
counts = df[dim].value_counts()
51+
total = counts.sum()
52+
53+
# Calculate heights proportional to counts
54+
heights = {cat: counts.get(cat, 0) / total for cat in cats}
55+
56+
# Stack categories vertically
57+
y_start = 0.05
58+
y_end = 0.95
59+
available_height = y_end - y_start
60+
gap = 0.02
61+
total_gap = gap * (len(cats) - 1)
62+
usable_height = available_height - total_gap
63+
64+
positions = {}
65+
current_y = y_start
66+
for cat in cats:
67+
h = heights[cat] * usable_height
68+
positions[cat] = (current_y, current_y + h)
69+
current_y += h + gap
70+
71+
category_positions[dim] = positions
72+
category_heights[dim] = heights
73+
74+
# Draw ribbons between consecutive dimensions
75+
for i in range(n_dims - 1):
76+
dim1 = dimensions[i]
77+
dim2 = dimensions[i + 1]
78+
x1 = x_positions[i]
79+
x2 = x_positions[i + 1]
80+
81+
# Get flow counts between categories
82+
flow_counts = df.groupby([dim1, dim2]).size().reset_index(name="count")
83+
84+
# Track current y position for each category to stack ribbons
85+
current_y_left = {cat: category_positions[dim1][cat][0] for cat in dim_categories[dim1]}
86+
current_y_right = {cat: category_positions[dim2][cat][0] for cat in dim_categories[dim2]}
87+
88+
total = len(df)
89+
90+
for _, row in flow_counts.iterrows():
91+
cat1 = row[dim1]
92+
cat2 = row[dim2]
93+
count = row["count"]
94+
95+
# Calculate ribbon heights
96+
h1 = (
97+
(count / total)
98+
* (category_positions[dim1][cat1][1] - category_positions[dim1][cat1][0])
99+
/ category_heights[dim1][cat1]
100+
)
101+
h2 = (
102+
(count / total)
103+
* (category_positions[dim2][cat2][1] - category_positions[dim2][cat2][0])
104+
/ category_heights[dim2][cat2]
105+
)
106+
107+
# Ribbon corners
108+
y1_bottom = current_y_left[cat1]
109+
y1_top = (
110+
y1_bottom + h1 * (category_positions[dim1][cat1][1] - category_positions[dim1][cat1][0]) / h1
111+
if h1 > 0
112+
else y1_bottom
113+
)
114+
y1_top = current_y_left[cat1] + (count / df[dim1].value_counts()[cat1]) * (
115+
category_positions[dim1][cat1][1] - category_positions[dim1][cat1][0]
116+
)
117+
118+
y2_bottom = current_y_right[cat2]
119+
y2_top = current_y_right[cat2] + (count / df[dim2].value_counts()[cat2]) * (
120+
category_positions[dim2][cat2][1] - category_positions[dim2][cat2][0]
121+
)
122+
123+
# Create bezier path for smooth ribbon
124+
x_ctrl1 = x1 + dim_width + (x2 - x1 - 2 * dim_width) * 0.4
125+
x_ctrl2 = x1 + dim_width + (x2 - x1 - 2 * dim_width) * 0.6
126+
127+
# Path vertices
128+
vertices = [
129+
(x1 + dim_width, y1_bottom), # Start bottom left
130+
(x_ctrl1, y1_bottom), # Control point 1
131+
(x_ctrl2, y2_bottom), # Control point 2
132+
(x2 - dim_width, y2_bottom), # End bottom right
133+
(x2 - dim_width, y2_top), # End top right
134+
(x_ctrl2, y2_top), # Control point 3
135+
(x_ctrl1, y1_top), # Control point 4
136+
(x1 + dim_width, y1_top), # Start top left
137+
(x1 + dim_width, y1_bottom), # Close path
138+
]
139+
140+
codes = [
141+
Path.MOVETO,
142+
Path.CURVE4,
143+
Path.CURVE4,
144+
Path.CURVE4,
145+
Path.LINETO,
146+
Path.CURVE4,
147+
Path.CURVE4,
148+
Path.CURVE4,
149+
Path.CLOSEPOLY,
150+
]
151+
152+
path = Path(vertices, codes)
153+
154+
# Get color based on first dimension category
155+
if i == 0:
156+
color = colors[cat1]
157+
else:
158+
# For subsequent flows, trace back to original channel
159+
orig_cat = df[df[dim1] == cat1]["Channel"].mode()
160+
if len(orig_cat) > 0:
161+
color = colors.get(orig_cat.iloc[0], "#306998")
162+
else:
163+
color = "#306998"
164+
165+
patch = mpatches.PathPatch(path, facecolor=color, edgecolor="white", linewidth=0.5, alpha=0.6)
166+
ax.add_patch(patch)
167+
168+
# Update current positions
169+
current_y_left[cat1] = y1_top
170+
current_y_right[cat2] = y2_top
171+
172+
# Draw category bars
173+
for i, dim in enumerate(dimensions):
174+
x = x_positions[i]
175+
for cat in dim_categories[dim]:
176+
y_start, y_end = category_positions[dim][cat]
177+
178+
# Draw rectangle for category
179+
rect = mpatches.Rectangle(
180+
(x - dim_width, y_start),
181+
dim_width * 2,
182+
y_end - y_start,
183+
facecolor="#2C3E50",
184+
edgecolor="white",
185+
linewidth=2,
186+
)
187+
ax.add_patch(rect)
188+
189+
# Add category label
190+
ax.text(x, (y_start + y_end) / 2, cat, ha="center", va="center", fontsize=14, fontweight="bold", color="white")
191+
192+
# Add dimension labels
193+
for i, dim in enumerate(dimensions):
194+
ax.text(x_positions[i], 1.02, dim, ha="center", va="bottom", fontsize=20, fontweight="bold", color="#2C3E50")
195+
196+
# Styling
197+
ax.set_xlim(-0.15, 1.15)
198+
ax.set_ylim(-0.05, 1.15)
199+
ax.set_aspect("equal")
200+
ax.axis("off")
201+
202+
# Title
203+
ax.set_title(
204+
"parallel-categories-basic · matplotlib · pyplots.ai", fontsize=24, fontweight="bold", pad=20, color="#2C3E50"
205+
)
206+
207+
# Legend
208+
legend_patches = [mpatches.Patch(color=colors[ch], alpha=0.6, label=ch) for ch in ["Online", "Store", "Mobile"]]
209+
ax.legend(
210+
handles=legend_patches,
211+
loc="lower right",
212+
fontsize=16,
213+
title="Channel",
214+
title_fontsize=18,
215+
framealpha=0.9,
216+
bbox_to_anchor=(1.12, 0.0),
217+
)
218+
219+
plt.tight_layout()
220+
plt.savefig("plot.png", dpi=300, bbox_inches="tight", facecolor="white")
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
library: matplotlib
2+
specification_id: parallel-categories-basic
3+
created: '2025-12-30T00:04:38Z'
4+
updated: '2025-12-30T00:22:19Z'
5+
generated_by: claude-opus-4-5-20251101
6+
workflow_run: 20585400230
7+
issue: 0
8+
python_version: 3.13.11
9+
library_version: 3.10.8
10+
preview_url: https://storage.googleapis.com/pyplots-images/plots/parallel-categories-basic/matplotlib/plot.png
11+
preview_thumb: https://storage.googleapis.com/pyplots-images/plots/parallel-categories-basic/matplotlib/plot_thumb.png
12+
preview_html: null
13+
quality_score: 91
14+
review:
15+
strengths:
16+
- Excellent implementation of parallel categories using matplotlib low-level Path
17+
API with smooth bezier curve ribbons
18+
- Clean visual design with dark navy category bars and white text labels
19+
- Proper ribbon width proportional to observation counts showing flow patterns
20+
- Good color coding by source channel with appropriate transparency for overlapping
21+
ribbons
22+
- Realistic e-commerce customer journey scenario (Channel → Category → Outcome)
23+
- Title follows exact required format
24+
weaknesses:
25+
- Online (blue) and Mobile (lighter blue) colors in legend are too similar and could
26+
be confused; recommend using more distinct colors like green for Mobile
27+
- The second-hop ribbon coloring logic uses mode() which may not accurately trace
28+
back the original channel for all flows

0 commit comments

Comments
 (0)