|
| 1 | +""" pyplots.ai |
| 2 | +parallel-categories-basic: Basic Parallel Categories Plot |
| 3 | +Library: bokeh 3.8.1 | Python 3.13.11 |
| 4 | +Quality: 91/100 | Created: 2025-12-30 |
| 5 | +""" |
| 6 | + |
| 7 | +import numpy as np |
| 8 | +import pandas as pd |
| 9 | +from bokeh.io import export_png, save |
| 10 | +from bokeh.models import ColumnDataSource, Label |
| 11 | +from bokeh.plotting import figure |
| 12 | +from bokeh.resources import CDN |
| 13 | + |
| 14 | + |
| 15 | +# Data - Product purchase journey: Channel -> Category -> Outcome |
| 16 | +np.random.seed(42) |
| 17 | + |
| 18 | +# Create realistic product journey data |
| 19 | +channels = ["Online", "Store", "Mobile"] |
| 20 | +categories = ["Electronics", "Clothing", "Home"] |
| 21 | +outcomes = ["Purchased", "Returned", "Exchanged"] |
| 22 | + |
| 23 | +# Generate data with realistic patterns |
| 24 | +data = [] |
| 25 | +for _ in range(500): |
| 26 | + channel = np.random.choice(channels, p=[0.45, 0.35, 0.20]) |
| 27 | + # Category probabilities vary by channel |
| 28 | + if channel == "Online": |
| 29 | + category = np.random.choice(categories, p=[0.5, 0.3, 0.2]) |
| 30 | + elif channel == "Store": |
| 31 | + category = np.random.choice(categories, p=[0.2, 0.5, 0.3]) |
| 32 | + else: |
| 33 | + category = np.random.choice(categories, p=[0.6, 0.25, 0.15]) |
| 34 | + # Outcome probabilities vary by category |
| 35 | + if category == "Electronics": |
| 36 | + outcome = np.random.choice(outcomes, p=[0.7, 0.2, 0.1]) |
| 37 | + elif category == "Clothing": |
| 38 | + outcome = np.random.choice(outcomes, p=[0.6, 0.25, 0.15]) |
| 39 | + else: |
| 40 | + outcome = np.random.choice(outcomes, p=[0.85, 0.1, 0.05]) |
| 41 | + data.append({"Channel": channel, "Category": category, "Outcome": outcome}) |
| 42 | + |
| 43 | +df = pd.DataFrame(data) |
| 44 | + |
| 45 | +# Aggregate the data to get counts for each path |
| 46 | +path_counts = df.groupby(["Channel", "Category", "Outcome"]).size().reset_index(name="count") |
| 47 | + |
| 48 | +# Define dimensions and their unique values (ordered) |
| 49 | +dimensions = ["Channel", "Category", "Outcome"] |
| 50 | +dim_values = {"Channel": channels, "Category": categories, "Outcome": outcomes} |
| 51 | + |
| 52 | +# Calculate x positions for each dimension |
| 53 | +x_positions = {dim: i * 1.5 for i, dim in enumerate(dimensions)} |
| 54 | + |
| 55 | +# Calculate y positions for each category within each dimension |
| 56 | +# Each dimension gets a vertical axis from 0 to total_count |
| 57 | +total_count = len(df) |
| 58 | + |
| 59 | +# Build category positions for each dimension |
| 60 | +dim_cat_positions = {} |
| 61 | +for dim in dimensions: |
| 62 | + # Count occurrences of each category |
| 63 | + if dim == dimensions[0]: |
| 64 | + counts = df[dim].value_counts() |
| 65 | + else: |
| 66 | + counts = df[dim].value_counts() |
| 67 | + |
| 68 | + positions = {} |
| 69 | + y_current = 0 |
| 70 | + for cat in dim_values[dim]: |
| 71 | + count = counts.get(cat, 0) |
| 72 | + height = count / total_count |
| 73 | + positions[cat] = {"y_start": y_current, "height": height, "y_end": y_current + height} |
| 74 | + y_current += height |
| 75 | + dim_cat_positions[dim] = positions |
| 76 | + |
| 77 | +# Create ribbons connecting categories between adjacent dimensions |
| 78 | +# Track current fill position for each category |
| 79 | +ribbon_patches_x = [] |
| 80 | +ribbon_patches_y = [] |
| 81 | +ribbon_colors = [] |
| 82 | + |
| 83 | +# Color by first dimension (Channel) |
| 84 | +channel_colors = { |
| 85 | + "Online": "#306998", # Python Blue |
| 86 | + "Store": "#FFD43B", # Python Yellow |
| 87 | + "Mobile": "#4DAF4A", # Green |
| 88 | +} |
| 89 | + |
| 90 | +# Track running position within each category box |
| 91 | +running_positions = {dim: dict.fromkeys(dim_values[dim], 0) for dim in dimensions} |
| 92 | + |
| 93 | +# Process each unique path |
| 94 | +for _, row in path_counts.iterrows(): |
| 95 | + count = row["count"] |
| 96 | + ribbon_height = count / total_count |
| 97 | + |
| 98 | + # Get color based on first dimension |
| 99 | + color = channel_colors[row["Channel"]] |
| 100 | + |
| 101 | + # Create ribbons between each pair of adjacent dimensions |
| 102 | + for i in range(len(dimensions) - 1): |
| 103 | + dim1 = dimensions[i] |
| 104 | + dim2 = dimensions[i + 1] |
| 105 | + cat1 = row[dim1] |
| 106 | + cat2 = row[dim2] |
| 107 | + |
| 108 | + # Get x positions |
| 109 | + x1 = x_positions[dim1] |
| 110 | + x2 = x_positions[dim2] |
| 111 | + |
| 112 | + # Get y positions |
| 113 | + y1_base = dim_cat_positions[dim1][cat1]["y_start"] |
| 114 | + y1_start = y1_base + running_positions[dim1][cat1] |
| 115 | + y1_end = y1_start + ribbon_height |
| 116 | + |
| 117 | + y2_base = dim_cat_positions[dim2][cat2]["y_start"] |
| 118 | + y2_start = y2_base + running_positions[dim2][cat2] |
| 119 | + y2_end = y2_start + ribbon_height |
| 120 | + |
| 121 | + # Create smooth ribbon using bezier-like path |
| 122 | + # Use intermediate points for smooth curves |
| 123 | + x_mid = (x1 + x2) / 2 |
| 124 | + |
| 125 | + # Create patch coordinates (going clockwise) |
| 126 | + # Left edge (bottom to top), then curve to right edge (top to bottom) |
| 127 | + num_curve_points = 20 |
| 128 | + t = np.linspace(0, 1, num_curve_points) |
| 129 | + |
| 130 | + # Top edge: bezier from (x1, y1_end) to (x2, y2_end) |
| 131 | + top_x = x1 * (1 - t) ** 3 + 3 * x_mid * t * (1 - t) ** 2 + 3 * x_mid * t**2 * (1 - t) + x2 * t**3 |
| 132 | + top_y = y1_end * (1 - t) ** 3 + 3 * y1_end * t * (1 - t) ** 2 + 3 * y2_end * t**2 * (1 - t) + y2_end * t**3 |
| 133 | + |
| 134 | + # Bottom edge: bezier from (x2, y2_start) to (x1, y1_start) (reversed) |
| 135 | + bottom_x = x2 * (1 - t) ** 3 + 3 * x_mid * t * (1 - t) ** 2 + 3 * x_mid * t**2 * (1 - t) + x1 * t**3 |
| 136 | + bottom_y = ( |
| 137 | + y2_start * (1 - t) ** 3 + 3 * y2_start * t * (1 - t) ** 2 + 3 * y1_start * t**2 * (1 - t) + y1_start * t**3 |
| 138 | + ) |
| 139 | + |
| 140 | + # Combine to form closed polygon |
| 141 | + patch_x = np.concatenate([top_x, bottom_x]) |
| 142 | + patch_y = np.concatenate([top_y, bottom_y]) |
| 143 | + |
| 144 | + ribbon_patches_x.append(patch_x.tolist()) |
| 145 | + ribbon_patches_y.append(patch_y.tolist()) |
| 146 | + ribbon_colors.append(color) |
| 147 | + |
| 148 | + # Update running positions only after processing the LAST segment for this path |
| 149 | + if i == len(dimensions) - 2: |
| 150 | + for j in range(len(dimensions)): |
| 151 | + dim = dimensions[j] |
| 152 | + cat = row[dim] |
| 153 | + running_positions[dim][cat] += ribbon_height |
| 154 | + |
| 155 | +# Reset running positions for proper tracking |
| 156 | +running_positions = {dim: dict.fromkeys(dim_values[dim], 0) for dim in dimensions} |
| 157 | + |
| 158 | +# Process each unique path again to correctly update positions |
| 159 | +for _, row in path_counts.iterrows(): |
| 160 | + count = row["count"] |
| 161 | + ribbon_height = count / total_count |
| 162 | + for dim in dimensions: |
| 163 | + cat = row[dim] |
| 164 | + running_positions[dim][cat] += ribbon_height |
| 165 | + |
| 166 | +# Create figure |
| 167 | +p = figure( |
| 168 | + width=4800, |
| 169 | + height=2700, |
| 170 | + title="parallel-categories-basic · bokeh · pyplots.ai", |
| 171 | + x_range=(-0.7, 4.0), |
| 172 | + y_range=(-0.05, 1.15), |
| 173 | + tools="", |
| 174 | + toolbar_location=None, |
| 175 | +) |
| 176 | + |
| 177 | +# Draw ribbons |
| 178 | +for i in range(len(ribbon_patches_x)): |
| 179 | + source = ColumnDataSource(data={"x": [ribbon_patches_x[i]], "y": [ribbon_patches_y[i]]}) |
| 180 | + p.patches( |
| 181 | + xs="x", |
| 182 | + ys="y", |
| 183 | + source=source, |
| 184 | + fill_color=ribbon_colors[i], |
| 185 | + fill_alpha=0.6, |
| 186 | + line_color=ribbon_colors[i], |
| 187 | + line_alpha=0.8, |
| 188 | + line_width=0.5, |
| 189 | + ) |
| 190 | + |
| 191 | +# Draw category boxes (rectangles for each category in each dimension) |
| 192 | +box_width = 0.12 |
| 193 | +for dim in dimensions: |
| 194 | + x = x_positions[dim] |
| 195 | + for cat in dim_values[dim]: |
| 196 | + pos = dim_cat_positions[dim][cat] |
| 197 | + # Draw rectangle |
| 198 | + source = ColumnDataSource( |
| 199 | + data={ |
| 200 | + "x": [[x - box_width / 2, x + box_width / 2, x + box_width / 2, x - box_width / 2]], |
| 201 | + "y": [[pos["y_start"], pos["y_start"], pos["y_end"], pos["y_end"]]], |
| 202 | + } |
| 203 | + ) |
| 204 | + p.patches(xs="x", ys="y", source=source, fill_color="#333333", fill_alpha=0.9, line_color="white", line_width=2) |
| 205 | + |
| 206 | + # Add category label (to the side of the box for better readability) |
| 207 | + y_mid = (pos["y_start"] + pos["y_end"]) / 2 |
| 208 | + # Place labels on left side for first two dimensions, right side for last |
| 209 | + if dim == dimensions[-1]: |
| 210 | + label_x = x + box_width / 2 + 0.05 |
| 211 | + align = "left" |
| 212 | + else: |
| 213 | + label_x = x - box_width / 2 - 0.05 |
| 214 | + align = "right" |
| 215 | + label = Label( |
| 216 | + x=label_x, |
| 217 | + y=y_mid, |
| 218 | + text=cat, |
| 219 | + text_font_size="28pt", |
| 220 | + text_color="#333333", |
| 221 | + text_align=align, |
| 222 | + text_baseline="middle", |
| 223 | + ) |
| 224 | + p.add_layout(label) |
| 225 | + |
| 226 | +# Add dimension labels at the top |
| 227 | +for dim in dimensions: |
| 228 | + x = x_positions[dim] |
| 229 | + label = Label( |
| 230 | + x=x, |
| 231 | + y=1.08, |
| 232 | + text=dim, |
| 233 | + text_font_size="36pt", |
| 234 | + text_color="#333333", |
| 235 | + text_font_style="bold", |
| 236 | + text_align="center", |
| 237 | + text_baseline="bottom", |
| 238 | + ) |
| 239 | + p.add_layout(label) |
| 240 | + |
| 241 | +# Add legend manually |
| 242 | +legend_items = [("Online", "#306998"), ("Store", "#FFD43B"), ("Mobile", "#4DAF4A")] |
| 243 | +legend_y = 0.92 |
| 244 | +for i, (name, color) in enumerate(legend_items): |
| 245 | + # Legend box |
| 246 | + lx = 3.35 |
| 247 | + ly = legend_y - i * 0.1 |
| 248 | + source = ColumnDataSource( |
| 249 | + data={"x": [[lx - 0.05, lx + 0.05, lx + 0.05, lx - 0.05]], "y": [[ly - 0.03, ly - 0.03, ly + 0.03, ly + 0.03]]} |
| 250 | + ) |
| 251 | + p.patches(xs="x", ys="y", source=source, fill_color=color, fill_alpha=0.8, line_color="#333333", line_width=2) |
| 252 | + # Legend label |
| 253 | + label = Label( |
| 254 | + x=lx + 0.1, |
| 255 | + y=ly, |
| 256 | + text=name, |
| 257 | + text_font_size="24pt", |
| 258 | + text_color="#333333", |
| 259 | + text_align="left", |
| 260 | + text_baseline="middle", |
| 261 | + ) |
| 262 | + p.add_layout(label) |
| 263 | + |
| 264 | +# Style the figure |
| 265 | +p.title.text_font_size = "48pt" |
| 266 | +p.title.text_color = "#333333" |
| 267 | +p.title.align = "center" |
| 268 | + |
| 269 | +# Hide axes and grid (parallel categories don't use traditional axes) |
| 270 | +p.xaxis.visible = False |
| 271 | +p.yaxis.visible = False |
| 272 | +p.xgrid.visible = False |
| 273 | +p.ygrid.visible = False |
| 274 | +p.outline_line_color = None |
| 275 | + |
| 276 | +# Background color |
| 277 | +p.background_fill_color = "#FAFAFA" |
| 278 | +p.border_fill_color = "#FAFAFA" |
| 279 | + |
| 280 | +# Save as PNG |
| 281 | +export_png(p, filename="plot.png") |
| 282 | + |
| 283 | +# Also save as HTML for interactivity |
| 284 | +save(p, filename="plot.html", resources=CDN, title="Parallel Categories Plot") |
0 commit comments