Skip to content

Commit f5fd726

Browse files
feat(bokeh): implement parallel-categories-basic (#2533)
## Implementation: `parallel-categories-basic` - bokeh Implements the **bokeh** version of `parallel-categories-basic`. **File:** `plots/parallel-categories-basic/implementations/bokeh.py` --- :robot: *[impl-generate workflow](https://github.com/MarkusNeusinger/pyplots/actions/runs/20585401180)* --------- Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
1 parent 527fef1 commit f5fd726

2 files changed

Lines changed: 311 additions & 0 deletions

File tree

Lines changed: 284 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,284 @@
1+
""" pyplots.ai
2+
parallel-categories-basic: Basic Parallel Categories Plot
3+
Library: bokeh 3.8.1 | Python 3.13.11
4+
Quality: 91/100 | Created: 2025-12-30
5+
"""
6+
7+
import numpy as np
8+
import pandas as pd
9+
from bokeh.io import export_png, save
10+
from bokeh.models import ColumnDataSource, Label
11+
from bokeh.plotting import figure
12+
from bokeh.resources import CDN
13+
14+
15+
# Data - Product purchase journey: Channel -> Category -> Outcome
16+
np.random.seed(42)
17+
18+
# Create realistic product journey data
19+
channels = ["Online", "Store", "Mobile"]
20+
categories = ["Electronics", "Clothing", "Home"]
21+
outcomes = ["Purchased", "Returned", "Exchanged"]
22+
23+
# Generate data with realistic patterns
24+
data = []
25+
for _ in range(500):
26+
channel = np.random.choice(channels, p=[0.45, 0.35, 0.20])
27+
# Category probabilities vary by channel
28+
if channel == "Online":
29+
category = np.random.choice(categories, p=[0.5, 0.3, 0.2])
30+
elif channel == "Store":
31+
category = np.random.choice(categories, p=[0.2, 0.5, 0.3])
32+
else:
33+
category = np.random.choice(categories, p=[0.6, 0.25, 0.15])
34+
# Outcome probabilities vary by category
35+
if category == "Electronics":
36+
outcome = np.random.choice(outcomes, p=[0.7, 0.2, 0.1])
37+
elif category == "Clothing":
38+
outcome = np.random.choice(outcomes, p=[0.6, 0.25, 0.15])
39+
else:
40+
outcome = np.random.choice(outcomes, p=[0.85, 0.1, 0.05])
41+
data.append({"Channel": channel, "Category": category, "Outcome": outcome})
42+
43+
df = pd.DataFrame(data)
44+
45+
# Aggregate the data to get counts for each path
46+
path_counts = df.groupby(["Channel", "Category", "Outcome"]).size().reset_index(name="count")
47+
48+
# Define dimensions and their unique values (ordered)
49+
dimensions = ["Channel", "Category", "Outcome"]
50+
dim_values = {"Channel": channels, "Category": categories, "Outcome": outcomes}
51+
52+
# Calculate x positions for each dimension
53+
x_positions = {dim: i * 1.5 for i, dim in enumerate(dimensions)}
54+
55+
# Calculate y positions for each category within each dimension
56+
# Each dimension gets a vertical axis from 0 to total_count
57+
total_count = len(df)
58+
59+
# Build category positions for each dimension
60+
dim_cat_positions = {}
61+
for dim in dimensions:
62+
# Count occurrences of each category
63+
if dim == dimensions[0]:
64+
counts = df[dim].value_counts()
65+
else:
66+
counts = df[dim].value_counts()
67+
68+
positions = {}
69+
y_current = 0
70+
for cat in dim_values[dim]:
71+
count = counts.get(cat, 0)
72+
height = count / total_count
73+
positions[cat] = {"y_start": y_current, "height": height, "y_end": y_current + height}
74+
y_current += height
75+
dim_cat_positions[dim] = positions
76+
77+
# Create ribbons connecting categories between adjacent dimensions
78+
# Track current fill position for each category
79+
ribbon_patches_x = []
80+
ribbon_patches_y = []
81+
ribbon_colors = []
82+
83+
# Color by first dimension (Channel)
84+
channel_colors = {
85+
"Online": "#306998", # Python Blue
86+
"Store": "#FFD43B", # Python Yellow
87+
"Mobile": "#4DAF4A", # Green
88+
}
89+
90+
# Track running position within each category box
91+
running_positions = {dim: dict.fromkeys(dim_values[dim], 0) for dim in dimensions}
92+
93+
# Process each unique path
94+
for _, row in path_counts.iterrows():
95+
count = row["count"]
96+
ribbon_height = count / total_count
97+
98+
# Get color based on first dimension
99+
color = channel_colors[row["Channel"]]
100+
101+
# Create ribbons between each pair of adjacent dimensions
102+
for i in range(len(dimensions) - 1):
103+
dim1 = dimensions[i]
104+
dim2 = dimensions[i + 1]
105+
cat1 = row[dim1]
106+
cat2 = row[dim2]
107+
108+
# Get x positions
109+
x1 = x_positions[dim1]
110+
x2 = x_positions[dim2]
111+
112+
# Get y positions
113+
y1_base = dim_cat_positions[dim1][cat1]["y_start"]
114+
y1_start = y1_base + running_positions[dim1][cat1]
115+
y1_end = y1_start + ribbon_height
116+
117+
y2_base = dim_cat_positions[dim2][cat2]["y_start"]
118+
y2_start = y2_base + running_positions[dim2][cat2]
119+
y2_end = y2_start + ribbon_height
120+
121+
# Create smooth ribbon using bezier-like path
122+
# Use intermediate points for smooth curves
123+
x_mid = (x1 + x2) / 2
124+
125+
# Create patch coordinates (going clockwise)
126+
# Left edge (bottom to top), then curve to right edge (top to bottom)
127+
num_curve_points = 20
128+
t = np.linspace(0, 1, num_curve_points)
129+
130+
# Top edge: bezier from (x1, y1_end) to (x2, y2_end)
131+
top_x = x1 * (1 - t) ** 3 + 3 * x_mid * t * (1 - t) ** 2 + 3 * x_mid * t**2 * (1 - t) + x2 * t**3
132+
top_y = y1_end * (1 - t) ** 3 + 3 * y1_end * t * (1 - t) ** 2 + 3 * y2_end * t**2 * (1 - t) + y2_end * t**3
133+
134+
# Bottom edge: bezier from (x2, y2_start) to (x1, y1_start) (reversed)
135+
bottom_x = x2 * (1 - t) ** 3 + 3 * x_mid * t * (1 - t) ** 2 + 3 * x_mid * t**2 * (1 - t) + x1 * t**3
136+
bottom_y = (
137+
y2_start * (1 - t) ** 3 + 3 * y2_start * t * (1 - t) ** 2 + 3 * y1_start * t**2 * (1 - t) + y1_start * t**3
138+
)
139+
140+
# Combine to form closed polygon
141+
patch_x = np.concatenate([top_x, bottom_x])
142+
patch_y = np.concatenate([top_y, bottom_y])
143+
144+
ribbon_patches_x.append(patch_x.tolist())
145+
ribbon_patches_y.append(patch_y.tolist())
146+
ribbon_colors.append(color)
147+
148+
# Update running positions only after processing the LAST segment for this path
149+
if i == len(dimensions) - 2:
150+
for j in range(len(dimensions)):
151+
dim = dimensions[j]
152+
cat = row[dim]
153+
running_positions[dim][cat] += ribbon_height
154+
155+
# Reset running positions for proper tracking
156+
running_positions = {dim: dict.fromkeys(dim_values[dim], 0) for dim in dimensions}
157+
158+
# Process each unique path again to correctly update positions
159+
for _, row in path_counts.iterrows():
160+
count = row["count"]
161+
ribbon_height = count / total_count
162+
for dim in dimensions:
163+
cat = row[dim]
164+
running_positions[dim][cat] += ribbon_height
165+
166+
# Create figure
167+
p = figure(
168+
width=4800,
169+
height=2700,
170+
title="parallel-categories-basic · bokeh · pyplots.ai",
171+
x_range=(-0.7, 4.0),
172+
y_range=(-0.05, 1.15),
173+
tools="",
174+
toolbar_location=None,
175+
)
176+
177+
# Draw ribbons
178+
for i in range(len(ribbon_patches_x)):
179+
source = ColumnDataSource(data={"x": [ribbon_patches_x[i]], "y": [ribbon_patches_y[i]]})
180+
p.patches(
181+
xs="x",
182+
ys="y",
183+
source=source,
184+
fill_color=ribbon_colors[i],
185+
fill_alpha=0.6,
186+
line_color=ribbon_colors[i],
187+
line_alpha=0.8,
188+
line_width=0.5,
189+
)
190+
191+
# Draw category boxes (rectangles for each category in each dimension)
192+
box_width = 0.12
193+
for dim in dimensions:
194+
x = x_positions[dim]
195+
for cat in dim_values[dim]:
196+
pos = dim_cat_positions[dim][cat]
197+
# Draw rectangle
198+
source = ColumnDataSource(
199+
data={
200+
"x": [[x - box_width / 2, x + box_width / 2, x + box_width / 2, x - box_width / 2]],
201+
"y": [[pos["y_start"], pos["y_start"], pos["y_end"], pos["y_end"]]],
202+
}
203+
)
204+
p.patches(xs="x", ys="y", source=source, fill_color="#333333", fill_alpha=0.9, line_color="white", line_width=2)
205+
206+
# Add category label (to the side of the box for better readability)
207+
y_mid = (pos["y_start"] + pos["y_end"]) / 2
208+
# Place labels on left side for first two dimensions, right side for last
209+
if dim == dimensions[-1]:
210+
label_x = x + box_width / 2 + 0.05
211+
align = "left"
212+
else:
213+
label_x = x - box_width / 2 - 0.05
214+
align = "right"
215+
label = Label(
216+
x=label_x,
217+
y=y_mid,
218+
text=cat,
219+
text_font_size="28pt",
220+
text_color="#333333",
221+
text_align=align,
222+
text_baseline="middle",
223+
)
224+
p.add_layout(label)
225+
226+
# Add dimension labels at the top
227+
for dim in dimensions:
228+
x = x_positions[dim]
229+
label = Label(
230+
x=x,
231+
y=1.08,
232+
text=dim,
233+
text_font_size="36pt",
234+
text_color="#333333",
235+
text_font_style="bold",
236+
text_align="center",
237+
text_baseline="bottom",
238+
)
239+
p.add_layout(label)
240+
241+
# Add legend manually
242+
legend_items = [("Online", "#306998"), ("Store", "#FFD43B"), ("Mobile", "#4DAF4A")]
243+
legend_y = 0.92
244+
for i, (name, color) in enumerate(legend_items):
245+
# Legend box
246+
lx = 3.35
247+
ly = legend_y - i * 0.1
248+
source = ColumnDataSource(
249+
data={"x": [[lx - 0.05, lx + 0.05, lx + 0.05, lx - 0.05]], "y": [[ly - 0.03, ly - 0.03, ly + 0.03, ly + 0.03]]}
250+
)
251+
p.patches(xs="x", ys="y", source=source, fill_color=color, fill_alpha=0.8, line_color="#333333", line_width=2)
252+
# Legend label
253+
label = Label(
254+
x=lx + 0.1,
255+
y=ly,
256+
text=name,
257+
text_font_size="24pt",
258+
text_color="#333333",
259+
text_align="left",
260+
text_baseline="middle",
261+
)
262+
p.add_layout(label)
263+
264+
# Style the figure
265+
p.title.text_font_size = "48pt"
266+
p.title.text_color = "#333333"
267+
p.title.align = "center"
268+
269+
# Hide axes and grid (parallel categories don't use traditional axes)
270+
p.xaxis.visible = False
271+
p.yaxis.visible = False
272+
p.xgrid.visible = False
273+
p.ygrid.visible = False
274+
p.outline_line_color = None
275+
276+
# Background color
277+
p.background_fill_color = "#FAFAFA"
278+
p.border_fill_color = "#FAFAFA"
279+
280+
# Save as PNG
281+
export_png(p, filename="plot.png")
282+
283+
# Also save as HTML for interactivity
284+
save(p, filename="plot.html", resources=CDN, title="Parallel Categories Plot")
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
library: bokeh
2+
specification_id: parallel-categories-basic
3+
created: '2025-12-30T00:05:32Z'
4+
updated: '2025-12-30T00:13:57Z'
5+
generated_by: claude-opus-4-5-20251101
6+
workflow_run: 20585401180
7+
issue: 0
8+
python_version: 3.13.11
9+
library_version: 3.8.1
10+
preview_url: https://storage.googleapis.com/pyplots-images/plots/parallel-categories-basic/bokeh/plot.png
11+
preview_thumb: https://storage.googleapis.com/pyplots-images/plots/parallel-categories-basic/bokeh/plot_thumb.png
12+
preview_html: https://storage.googleapis.com/pyplots-images/plots/parallel-categories-basic/bokeh/plot.html
13+
quality_score: 91
14+
review:
15+
strengths:
16+
- Excellent implementation of parallel categories from first principles using Bokeh
17+
patches API
18+
- Smooth bezier curves for ribbons create professional appearance
19+
- Color scheme (Python blue/yellow + green) is distinctive and colorblind-accessible
20+
- Realistic product journey data with conditional probabilities creates meaningful
21+
flow patterns
22+
- Both PNG and HTML outputs provided for static and interactive use
23+
- Text sizes appropriately scaled for 4800x2700 canvas
24+
weaknesses:
25+
- Some thinner ribbons representing less common paths are harder to trace visually
26+
- Legend placement in upper right creates slight visual imbalance with the main
27+
chart

0 commit comments

Comments
 (0)