|
| 1 | +""" pyplots.ai |
| 2 | +parallel-categories-basic: Basic Parallel Categories Plot |
| 3 | +Library: letsplot 4.8.2 | Python 3.13.11 |
| 4 | +Quality: 91/100 | Created: 2025-12-30 |
| 5 | +""" |
| 6 | + |
| 7 | +import pandas as pd |
| 8 | +from lets_plot import ( |
| 9 | + LetsPlot, |
| 10 | + aes, |
| 11 | + element_blank, |
| 12 | + element_text, |
| 13 | + geom_polygon, |
| 14 | + geom_rect, |
| 15 | + geom_text, |
| 16 | + ggplot, |
| 17 | + ggsize, |
| 18 | + labs, |
| 19 | + scale_fill_manual, |
| 20 | + scale_x_continuous, |
| 21 | + scale_y_continuous, |
| 22 | + theme, |
| 23 | + theme_minimal, |
| 24 | +) |
| 25 | +from lets_plot.export import ggsave |
| 26 | + |
| 27 | + |
| 28 | +LetsPlot.setup_html() |
| 29 | + |
| 30 | +# Customer journey data with multiple categorical dimensions |
| 31 | +# Dimensions: Channel (acquisition), Product Category, Purchase Size, Outcome |
| 32 | +data = [ |
| 33 | + # Online channel journeys |
| 34 | + ("Online", "Electronics", "Large", "Completed", 45), |
| 35 | + ("Online", "Electronics", "Small", "Completed", 32), |
| 36 | + ("Online", "Electronics", "Large", "Abandoned", 18), |
| 37 | + ("Online", "Electronics", "Small", "Abandoned", 12), |
| 38 | + ("Online", "Clothing", "Large", "Completed", 28), |
| 39 | + ("Online", "Clothing", "Small", "Completed", 55), |
| 40 | + ("Online", "Clothing", "Large", "Abandoned", 8), |
| 41 | + ("Online", "Clothing", "Small", "Abandoned", 15), |
| 42 | + ("Online", "Home", "Large", "Completed", 22), |
| 43 | + ("Online", "Home", "Small", "Completed", 18), |
| 44 | + ("Online", "Home", "Large", "Abandoned", 10), |
| 45 | + ("Online", "Home", "Small", "Abandoned", 7), |
| 46 | + # Store channel journeys |
| 47 | + ("Store", "Electronics", "Large", "Completed", 35), |
| 48 | + ("Store", "Electronics", "Small", "Completed", 20), |
| 49 | + ("Store", "Electronics", "Large", "Abandoned", 5), |
| 50 | + ("Store", "Electronics", "Small", "Abandoned", 3), |
| 51 | + ("Store", "Clothing", "Large", "Completed", 40), |
| 52 | + ("Store", "Clothing", "Small", "Completed", 65), |
| 53 | + ("Store", "Clothing", "Large", "Abandoned", 4), |
| 54 | + ("Store", "Clothing", "Small", "Abandoned", 6), |
| 55 | + ("Store", "Home", "Large", "Completed", 30), |
| 56 | + ("Store", "Home", "Small", "Completed", 25), |
| 57 | + ("Store", "Home", "Large", "Abandoned", 3), |
| 58 | + ("Store", "Home", "Small", "Abandoned", 2), |
| 59 | + # Mobile channel journeys |
| 60 | + ("Mobile", "Electronics", "Large", "Completed", 25), |
| 61 | + ("Mobile", "Electronics", "Small", "Completed", 42), |
| 62 | + ("Mobile", "Electronics", "Large", "Abandoned", 22), |
| 63 | + ("Mobile", "Electronics", "Small", "Abandoned", 18), |
| 64 | + ("Mobile", "Clothing", "Large", "Completed", 15), |
| 65 | + ("Mobile", "Clothing", "Small", "Completed", 48), |
| 66 | + ("Mobile", "Clothing", "Large", "Abandoned", 10), |
| 67 | + ("Mobile", "Clothing", "Small", "Abandoned", 20), |
| 68 | + ("Mobile", "Home", "Large", "Completed", 12), |
| 69 | + ("Mobile", "Home", "Small", "Completed", 22), |
| 70 | + ("Mobile", "Home", "Large", "Abandoned", 8), |
| 71 | + ("Mobile", "Home", "Small", "Abandoned", 12), |
| 72 | +] |
| 73 | + |
| 74 | +# Define dimensions and their categories |
| 75 | +dimensions = ["Channel", "Product", "Size", "Outcome"] |
| 76 | +categories = { |
| 77 | + "Channel": ["Online", "Store", "Mobile"], |
| 78 | + "Product": ["Electronics", "Clothing", "Home"], |
| 79 | + "Size": ["Large", "Small"], |
| 80 | + "Outcome": ["Completed", "Abandoned"], |
| 81 | +} |
| 82 | + |
| 83 | +# Colors for the first dimension (Channel) - used to color ribbons |
| 84 | +channel_colors = {"Online": "#306998", "Store": "#27AE60", "Mobile": "#FFD43B"} |
| 85 | + |
| 86 | +# Calculate totals for each dimension-category combination |
| 87 | +dimension_totals = {dim: {} for dim in dimensions} |
| 88 | +for channel, product, size, outcome, count in data: |
| 89 | + dimension_totals["Channel"][channel] = dimension_totals["Channel"].get(channel, 0) + count |
| 90 | + dimension_totals["Product"][product] = dimension_totals["Product"].get(product, 0) + count |
| 91 | + dimension_totals["Size"][size] = dimension_totals["Size"].get(size, 0) + count |
| 92 | + dimension_totals["Outcome"][outcome] = dimension_totals["Outcome"].get(outcome, 0) + count |
| 93 | + |
| 94 | +total_flow = sum(count for _, _, _, _, count in data) |
| 95 | + |
| 96 | +# Layout parameters - increased spacing for better label readability |
| 97 | +x_positions = [0.10, 0.37, 0.63, 0.90] |
| 98 | +node_width = 0.030 |
| 99 | +node_gap = 0.035 |
| 100 | + |
| 101 | +# Calculate node positions for all dimensions (flat structure) |
| 102 | +node_positions = [] |
| 103 | +for dim_idx in range(len(dimensions)): |
| 104 | + dim = dimensions[dim_idx] |
| 105 | + positions = {} |
| 106 | + y_offset = 0.10 |
| 107 | + for cat in categories[dim]: |
| 108 | + height = dimension_totals[dim].get(cat, 0) / total_flow * 0.72 |
| 109 | + positions[cat] = {"y0": y_offset, "y1": y_offset + height, "x": x_positions[dim_idx]} |
| 110 | + y_offset += height + node_gap |
| 111 | + node_positions.append(positions) |
| 112 | + |
| 113 | +# Build flow polygons between adjacent dimensions |
| 114 | +flow_data = [] |
| 115 | + |
| 116 | +# Process each pair of adjacent dimensions |
| 117 | +for dim_from_idx in range(len(dimensions) - 1): |
| 118 | + dim_to_idx = dim_from_idx + 1 |
| 119 | + dim_from = dimensions[dim_from_idx] |
| 120 | + dim_to = dimensions[dim_to_idx] |
| 121 | + |
| 122 | + # Aggregate flows between categories |
| 123 | + flow_counts = {} |
| 124 | + for channel, product, size, outcome, count in data: |
| 125 | + values = {"Channel": channel, "Product": product, "Size": size, "Outcome": outcome} |
| 126 | + from_cat = values[dim_from] |
| 127 | + to_cat = values[dim_to] |
| 128 | + source_channel = channel |
| 129 | + key = (from_cat, to_cat, source_channel) |
| 130 | + flow_counts[key] = flow_counts.get(key, 0) + count |
| 131 | + |
| 132 | + # Track offsets for positioning flows within nodes |
| 133 | + from_offsets = dict.fromkeys(categories[dim_from], 0) |
| 134 | + to_offsets = dict.fromkeys(categories[dim_to], 0) |
| 135 | + |
| 136 | + from_positions = node_positions[dim_from_idx] |
| 137 | + to_positions = node_positions[dim_to_idx] |
| 138 | + |
| 139 | + x_left = x_positions[dim_from_idx] + node_width / 2 |
| 140 | + x_right = x_positions[dim_to_idx] - node_width / 2 |
| 141 | + |
| 142 | + # Sort flows for consistent ordering |
| 143 | + sorted_flows = sorted( |
| 144 | + flow_counts.items(), |
| 145 | + key=lambda x: ( |
| 146 | + categories[dim_from].index(x[0][0]), |
| 147 | + categories[dim_to].index(x[0][1]), |
| 148 | + list(channel_colors.keys()).index(x[0][2]), |
| 149 | + ), |
| 150 | + ) |
| 151 | + |
| 152 | + for (from_cat, to_cat, source_channel), count in sorted_flows: |
| 153 | + flow_height = count / total_flow * 0.72 |
| 154 | + |
| 155 | + src_y0 = from_positions[from_cat]["y0"] + from_offsets[from_cat] |
| 156 | + src_y1 = src_y0 + flow_height |
| 157 | + from_offsets[from_cat] += flow_height |
| 158 | + |
| 159 | + tgt_y0 = to_positions[to_cat]["y0"] + to_offsets[to_cat] |
| 160 | + tgt_y1 = tgt_y0 + flow_height |
| 161 | + to_offsets[to_cat] += flow_height |
| 162 | + |
| 163 | + # Create smooth curve polygon with easing |
| 164 | + n_points = 30 |
| 165 | + x_vals_top = [] |
| 166 | + y_vals_top = [] |
| 167 | + x_vals_bottom = [] |
| 168 | + y_vals_bottom = [] |
| 169 | + |
| 170 | + for i in range(n_points + 1): |
| 171 | + t = i / n_points |
| 172 | + x = x_left + t * (x_right - x_left) |
| 173 | + ease = t * t * (3 - 2 * t) |
| 174 | + y_top = src_y1 + ease * (tgt_y1 - src_y1) |
| 175 | + y_bottom = src_y0 + ease * (tgt_y0 - src_y0) |
| 176 | + |
| 177 | + x_vals_top.append(x) |
| 178 | + y_vals_top.append(y_top) |
| 179 | + x_vals_bottom.append(x) |
| 180 | + y_vals_bottom.append(y_bottom) |
| 181 | + |
| 182 | + # Combine into closed polygon |
| 183 | + x_polygon = x_vals_top + x_vals_bottom[::-1] |
| 184 | + y_polygon = y_vals_top + y_vals_bottom[::-1] |
| 185 | + |
| 186 | + flow_id = f"d{dim_from_idx}_{from_cat}_{to_cat}_{source_channel}" |
| 187 | + for x, y in zip(x_polygon, y_polygon, strict=False): |
| 188 | + flow_data.append( |
| 189 | + {"x": x, "y": y, "flow_id": flow_id, "channel": source_channel, "from_cat": from_cat, "to_cat": to_cat} |
| 190 | + ) |
| 191 | + |
| 192 | +df_flows = pd.DataFrame(flow_data) |
| 193 | + |
| 194 | +# Build node rectangles |
| 195 | +node_rects = [] |
| 196 | +for dim_idx, dim in enumerate(dimensions): |
| 197 | + for cat in categories[dim]: |
| 198 | + pos = node_positions[dim_idx][cat] |
| 199 | + node_rects.append( |
| 200 | + { |
| 201 | + "xmin": pos["x"] - node_width / 2, |
| 202 | + "xmax": pos["x"] + node_width / 2, |
| 203 | + "ymin": pos["y0"], |
| 204 | + "ymax": pos["y1"], |
| 205 | + "category": cat, |
| 206 | + "dimension": dim, |
| 207 | + } |
| 208 | + ) |
| 209 | + |
| 210 | +df_nodes = pd.DataFrame(node_rects) |
| 211 | + |
| 212 | +# Build labels |
| 213 | +labels = [] |
| 214 | + |
| 215 | +# Dimension headers at top |
| 216 | +for i, dim in enumerate(dimensions): |
| 217 | + labels.append({"x": x_positions[i], "y": 0.96, "label": dim, "type": "header", "hjust": 0.5}) |
| 218 | + |
| 219 | +# Category labels with counts - positioned with more spacing |
| 220 | +for dim_idx, dim in enumerate(dimensions): |
| 221 | + for cat in categories[dim]: |
| 222 | + pos = node_positions[dim_idx][cat] |
| 223 | + count = dimension_totals[dim][cat] |
| 224 | + |
| 225 | + # Position labels on outer sides for first/last dimensions, alternating for middle |
| 226 | + if dim_idx == 0: |
| 227 | + x_label = pos["x"] - node_width / 2 - 0.02 |
| 228 | + hjust = 1 |
| 229 | + elif dim_idx == len(dimensions) - 1: |
| 230 | + x_label = pos["x"] + node_width / 2 + 0.02 |
| 231 | + hjust = 0 |
| 232 | + elif dim_idx % 2 == 0: |
| 233 | + x_label = pos["x"] - node_width / 2 - 0.02 |
| 234 | + hjust = 1 |
| 235 | + else: |
| 236 | + x_label = pos["x"] + node_width / 2 + 0.02 |
| 237 | + hjust = 0 |
| 238 | + |
| 239 | + labels.append( |
| 240 | + { |
| 241 | + "x": x_label, |
| 242 | + "y": (pos["y0"] + pos["y1"]) / 2, |
| 243 | + "label": f"{cat} ({count})", |
| 244 | + "type": "category", |
| 245 | + "hjust": hjust, |
| 246 | + } |
| 247 | + ) |
| 248 | + |
| 249 | +df_labels = pd.DataFrame(labels) |
| 250 | + |
| 251 | +# Create the plot |
| 252 | +plot = ( |
| 253 | + ggplot() |
| 254 | + + geom_polygon( |
| 255 | + aes(x="x", y="y", group="flow_id", fill="channel"), data=df_flows, alpha=0.5, color="white", size=0.08 |
| 256 | + ) |
| 257 | + + geom_rect( |
| 258 | + aes(xmin="xmin", xmax="xmax", ymin="ymin", ymax="ymax"), |
| 259 | + data=df_nodes, |
| 260 | + fill="#2C3E50", |
| 261 | + color="#1A252F", |
| 262 | + size=1.2, |
| 263 | + ) |
| 264 | + + geom_text( |
| 265 | + aes(x="x", y="y", label="label"), |
| 266 | + data=df_labels[df_labels["type"] == "header"], |
| 267 | + size=18, |
| 268 | + hjust=0.5, |
| 269 | + fontface="bold", |
| 270 | + color="#1A1A1A", |
| 271 | + ) |
| 272 | + + geom_text( |
| 273 | + aes(x="x", y="y", label="label"), data=df_labels[df_labels["type"] == "category"], size=14, color="#333333" |
| 274 | + ) |
| 275 | + + scale_fill_manual( |
| 276 | + values={ |
| 277 | + "Online": channel_colors["Online"], |
| 278 | + "Store": channel_colors["Store"], |
| 279 | + "Mobile": channel_colors["Mobile"], |
| 280 | + }, |
| 281 | + name="Acquisition Channel", |
| 282 | + ) |
| 283 | + + labs(title="parallel-categories-basic · letsplot · pyplots.ai") |
| 284 | + + theme_minimal() |
| 285 | + + theme( |
| 286 | + plot_title=element_text(size=26, face="bold"), |
| 287 | + axis_title=element_blank(), |
| 288 | + axis_text=element_blank(), |
| 289 | + axis_ticks=element_blank(), |
| 290 | + panel_grid=element_blank(), |
| 291 | + legend_text=element_text(size=16), |
| 292 | + legend_title=element_text(size=18, face="bold"), |
| 293 | + legend_position="bottom", |
| 294 | + ) |
| 295 | + + scale_x_continuous(limits=[-0.02, 1.02]) |
| 296 | + + scale_y_continuous(limits=[-0.02, 1.02]) |
| 297 | + + ggsize(1600, 900) |
| 298 | +) |
| 299 | + |
| 300 | +# Save as PNG (scale 3x for 4800 × 2700 px) |
| 301 | +ggsave(plot, "plot.png", path=".", scale=3) |
| 302 | + |
| 303 | +# Save as HTML for interactivity |
| 304 | +ggsave(plot, "plot.html", path=".") |
0 commit comments