|
| 1 | +""" pyplots.ai |
| 2 | +parallel-categories-basic: Basic Parallel Categories Plot |
| 3 | +Library: pygal 3.1.0 | Python 3.13.11 |
| 4 | +Quality: 90/100 | Created: 2025-12-30 |
| 5 | +""" |
| 6 | + |
| 7 | +import cairosvg |
| 8 | +import numpy as np |
| 9 | +import pygal |
| 10 | +from pygal.style import Style |
| 11 | + |
| 12 | + |
| 13 | +# Set seed for reproducibility |
| 14 | +np.random.seed(42) |
| 15 | + |
| 16 | +# Data: Product journey from category through channel to outcome |
| 17 | +# This shows customer flow through a purchase funnel |
| 18 | +categories = ["Category", "Channel", "Payment", "Outcome"] |
| 19 | + |
| 20 | +# Define values for each dimension |
| 21 | +dimension_values = { |
| 22 | + "Category": ["Electronics", "Clothing", "Home & Garden", "Sports"], |
| 23 | + "Channel": ["Online", "Store", "Mobile App"], |
| 24 | + "Payment": ["Credit Card", "Debit Card", "Digital Wallet"], |
| 25 | + "Outcome": ["Completed", "Returned", "Cancelled"], |
| 26 | +} |
| 27 | + |
| 28 | + |
| 29 | +# Generate flow data - counts of observations for each path |
| 30 | +# Structure: (dim1_value, dim2_value, dim3_value, dim4_value): count |
| 31 | +np.random.seed(42) |
| 32 | +flows = {} |
| 33 | + |
| 34 | +# Generate realistic shopping journey data |
| 35 | +base_counts = { |
| 36 | + # Electronics patterns - high online, good completion |
| 37 | + ("Electronics", "Online", "Credit Card", "Completed"): 450, |
| 38 | + ("Electronics", "Online", "Credit Card", "Returned"): 85, |
| 39 | + ("Electronics", "Online", "Digital Wallet", "Completed"): 280, |
| 40 | + ("Electronics", "Online", "Digital Wallet", "Returned"): 45, |
| 41 | + ("Electronics", "Store", "Credit Card", "Completed"): 320, |
| 42 | + ("Electronics", "Store", "Debit Card", "Completed"): 180, |
| 43 | + ("Electronics", "Mobile App", "Digital Wallet", "Completed"): 220, |
| 44 | + ("Electronics", "Mobile App", "Digital Wallet", "Cancelled"): 75, |
| 45 | + ("Electronics", "Online", "Credit Card", "Cancelled"): 40, |
| 46 | + # Clothing patterns - balanced channels, higher returns |
| 47 | + ("Clothing", "Online", "Credit Card", "Completed"): 380, |
| 48 | + ("Clothing", "Online", "Credit Card", "Returned"): 120, |
| 49 | + ("Clothing", "Online", "Debit Card", "Completed"): 190, |
| 50 | + ("Clothing", "Online", "Debit Card", "Returned"): 65, |
| 51 | + ("Clothing", "Store", "Credit Card", "Completed"): 410, |
| 52 | + ("Clothing", "Store", "Debit Card", "Completed"): 250, |
| 53 | + ("Clothing", "Store", "Debit Card", "Returned"): 40, |
| 54 | + ("Clothing", "Mobile App", "Digital Wallet", "Completed"): 175, |
| 55 | + ("Clothing", "Mobile App", "Credit Card", "Completed"): 130, |
| 56 | + ("Clothing", "Online", "Digital Wallet", "Cancelled"): 45, |
| 57 | + # Home & Garden - more store visits |
| 58 | + ("Home & Garden", "Store", "Credit Card", "Completed"): 380, |
| 59 | + ("Home & Garden", "Store", "Debit Card", "Completed"): 290, |
| 60 | + ("Home & Garden", "Store", "Debit Card", "Returned"): 55, |
| 61 | + ("Home & Garden", "Online", "Credit Card", "Completed"): 210, |
| 62 | + ("Home & Garden", "Online", "Credit Card", "Returned"): 40, |
| 63 | + ("Home & Garden", "Online", "Digital Wallet", "Completed"): 145, |
| 64 | + ("Home & Garden", "Mobile App", "Digital Wallet", "Completed"): 95, |
| 65 | + # Sports - mobile-friendly, good completion |
| 66 | + ("Sports", "Mobile App", "Digital Wallet", "Completed"): 260, |
| 67 | + ("Sports", "Mobile App", "Credit Card", "Completed"): 185, |
| 68 | + ("Sports", "Online", "Credit Card", "Completed"): 295, |
| 69 | + ("Sports", "Online", "Debit Card", "Completed"): 175, |
| 70 | + ("Sports", "Store", "Credit Card", "Completed"): 220, |
| 71 | + ("Sports", "Store", "Debit Card", "Completed"): 165, |
| 72 | + ("Sports", "Store", "Debit Card", "Returned"): 30, |
| 73 | +} |
| 74 | + |
| 75 | +# Colors for first dimension (Category) - colorblind-safe |
| 76 | +category_colors = { |
| 77 | + "Electronics": "#306998", # Python Blue |
| 78 | + "Clothing": "#FFD43B", # Python Yellow |
| 79 | + "Home & Garden": "#4ECDC4", # Teal |
| 80 | + "Sports": "#E17055", # Coral |
| 81 | +} |
| 82 | + |
| 83 | +# Secondary colors for middle dimensions - distinct from category colors |
| 84 | +dimension_colors = { |
| 85 | + "Channel": {"Online": "#7B68EE", "Store": "#20B2AA", "Mobile App": "#FF69B4"}, |
| 86 | + "Payment": {"Credit Card": "#9370DB", "Debit Card": "#3CB371", "Digital Wallet": "#FF6347"}, |
| 87 | + "Outcome": {"Completed": "#32CD32", "Returned": "#FFA500", "Cancelled": "#DC143C"}, |
| 88 | +} |
| 89 | + |
| 90 | +# Custom style for pygal |
| 91 | +custom_style = Style( |
| 92 | + background="white", |
| 93 | + plot_background="white", |
| 94 | + foreground="#333333", |
| 95 | + foreground_strong="#333333", |
| 96 | + foreground_subtle="#666666", |
| 97 | + title_font_size=72, |
| 98 | +) |
| 99 | + |
| 100 | +# Create minimal chart for title rendering |
| 101 | +chart = pygal.XY( |
| 102 | + width=4800, |
| 103 | + height=2700, |
| 104 | + style=custom_style, |
| 105 | + title="parallel-categories-basic · pygal · pyplots.ai", |
| 106 | + show_legend=False, |
| 107 | + show_x_guides=False, |
| 108 | + show_y_guides=False, |
| 109 | + show_x_labels=False, |
| 110 | + show_y_labels=False, |
| 111 | + dots_size=0, |
| 112 | + stroke=False, |
| 113 | + range=(0, 100), |
| 114 | + xrange=(0, 100), |
| 115 | +) |
| 116 | + |
| 117 | +# Add empty data to avoid "No data" message |
| 118 | +chart.add("", [(50, 50)]) |
| 119 | + |
| 120 | +# Render base SVG |
| 121 | +base_svg = chart.render().decode("utf-8") |
| 122 | + |
| 123 | +# SVG coordinate mapping |
| 124 | +margin_left = 450 |
| 125 | +margin_right = 350 |
| 126 | +margin_top = 350 |
| 127 | +margin_bottom = 250 |
| 128 | +chart_width = 4800 - margin_left - margin_right |
| 129 | +chart_height = 2700 - margin_top - margin_bottom |
| 130 | + |
| 131 | +# Calculate positions for each dimension axis |
| 132 | +n_dims = len(categories) |
| 133 | +x_positions = [margin_left + i * chart_width / (n_dims - 1) for i in range(n_dims)] |
| 134 | +bar_width = 120 |
| 135 | +gap_ratio = 0.05 # Gap between categories on each axis |
| 136 | + |
| 137 | +# Calculate totals for each category in each dimension |
| 138 | +dim_totals = {} |
| 139 | +for dim_idx, dim_name in enumerate(categories): |
| 140 | + dim_totals[dim_idx] = {} |
| 141 | + for cat in dimension_values[dim_name]: |
| 142 | + total = 0 |
| 143 | + for path, count in base_counts.items(): |
| 144 | + if path[dim_idx] == cat: |
| 145 | + total += count |
| 146 | + dim_totals[dim_idx][cat] = total |
| 147 | + |
| 148 | +# Calculate node positions |
| 149 | +node_positions = {} # {(dim_idx, category): (y_top, y_bottom, x)} |
| 150 | + |
| 151 | +for dim_idx, dim_name in enumerate(categories): |
| 152 | + x = x_positions[dim_idx] |
| 153 | + dim_total = sum(dim_totals[dim_idx].values()) |
| 154 | + total_gap = gap_ratio * chart_height |
| 155 | + available_height = chart_height - total_gap |
| 156 | + n_cats = len(dimension_values[dim_name]) |
| 157 | + gap_size = total_gap / max(1, n_cats - 1) if n_cats > 1 else 0 |
| 158 | + |
| 159 | + y_top = margin_top |
| 160 | + for _cat_idx, cat in enumerate(dimension_values[dim_name]): |
| 161 | + height = (dim_totals[dim_idx][cat] / dim_total) * available_height if dim_total > 0 else 0 |
| 162 | + y_bottom = y_top + height |
| 163 | + node_positions[(dim_idx, cat)] = (y_top, y_bottom, x) |
| 164 | + y_top = y_bottom + gap_size |
| 165 | + |
| 166 | +# Build SVG elements |
| 167 | +parallel_svg = '<g id="parallel-categories">' |
| 168 | + |
| 169 | +# Draw nodes (category bars) for each dimension |
| 170 | +for dim_idx, dim_name in enumerate(categories): |
| 171 | + x = x_positions[dim_idx] |
| 172 | + |
| 173 | + for cat in dimension_values[dim_name]: |
| 174 | + y_top, y_bottom, _ = node_positions[(dim_idx, cat)] |
| 175 | + height = y_bottom - y_top |
| 176 | + |
| 177 | + if height < 1: |
| 178 | + continue |
| 179 | + |
| 180 | + # Color based on dimension - use category colors for first dim, dimension colors for others |
| 181 | + if dim_idx == 0: |
| 182 | + fill_color = category_colors[cat] |
| 183 | + else: |
| 184 | + fill_color = dimension_colors[dim_name][cat] |
| 185 | + |
| 186 | + parallel_svg += f''' |
| 187 | + <rect x="{x - bar_width / 2:.0f}" y="{y_top:.0f}" width="{bar_width:.0f}" height="{height:.0f}" |
| 188 | + fill="{fill_color}" stroke="white" stroke-width="2" opacity="0.9"/>''' |
| 189 | + |
| 190 | + # Add dimension label at top (escape & for Home & Garden) |
| 191 | + dim_name_escaped = dim_name.replace("&", "&") |
| 192 | + parallel_svg += f''' |
| 193 | + <text x="{x:.0f}" y="{margin_top - 60:.0f}" text-anchor="middle" |
| 194 | + font-size="48" font-weight="bold" font-family="DejaVu Sans, sans-serif" |
| 195 | + fill="#333333">{dim_name_escaped}</text>''' |
| 196 | + |
| 197 | +# Add category labels for each dimension |
| 198 | +for dim_idx, dim_name in enumerate(categories): |
| 199 | + x = x_positions[dim_idx] |
| 200 | + for cat in dimension_values[dim_name]: |
| 201 | + y_top, y_bottom, _ = node_positions[(dim_idx, cat)] |
| 202 | + y_center = (y_top + y_bottom) / 2 |
| 203 | + height = y_bottom - y_top |
| 204 | + |
| 205 | + # Position label based on dimension |
| 206 | + if dim_idx == 0: # Left side - outside bar |
| 207 | + label_x = x - bar_width / 2 - 20 |
| 208 | + anchor = "end" |
| 209 | + elif dim_idx == n_dims - 1: # Right side - outside bar |
| 210 | + label_x = x + bar_width / 2 + 20 |
| 211 | + anchor = "start" |
| 212 | + else: # Middle dimensions - below the bar |
| 213 | + label_x = x |
| 214 | + anchor = "middle" |
| 215 | + |
| 216 | + # Use consistent readable font size (minimum 28px for all labels) |
| 217 | + font_size = max(28, min(36, height * 0.35)) |
| 218 | + |
| 219 | + # Escape special characters |
| 220 | + cat_escaped = cat.replace("&", "&") |
| 221 | + |
| 222 | + if dim_idx in [0, n_dims - 1]: |
| 223 | + # Side labels - next to bars |
| 224 | + parallel_svg += f''' |
| 225 | + <text x="{label_x:.0f}" y="{y_center:.0f}" text-anchor="{anchor}" |
| 226 | + font-size="{font_size:.0f}" font-family="DejaVu Sans, sans-serif" |
| 227 | + fill="#333333" dominant-baseline="middle">{cat_escaped}</text>''' |
| 228 | + else: |
| 229 | + # Middle dimension labels - below each bar segment |
| 230 | + label_y = y_bottom + 35 |
| 231 | + parallel_svg += f''' |
| 232 | + <text x="{label_x:.0f}" y="{label_y:.0f}" text-anchor="{anchor}" |
| 233 | + font-size="{font_size:.0f}" font-family="DejaVu Sans, sans-serif" |
| 234 | + fill="#333333">{cat_escaped}</text>''' |
| 235 | + |
| 236 | +# Calculate flow offsets for drawing ribbons |
| 237 | +# Track cumulative position for each (dim_idx, category, direction) |
| 238 | +source_offsets = {} # For outgoing flows |
| 239 | +target_offsets = {} # For incoming flows |
| 240 | + |
| 241 | +for dim_idx in range(n_dims): |
| 242 | + for cat in dimension_values[categories[dim_idx]]: |
| 243 | + y_top, y_bottom, _ = node_positions[(dim_idx, cat)] |
| 244 | + source_offsets[(dim_idx, cat)] = y_top |
| 245 | + target_offsets[(dim_idx, cat)] = y_top |
| 246 | + |
| 247 | +# Draw flows between consecutive dimensions |
| 248 | +for dim_idx in range(n_dims - 1): |
| 249 | + dim1_name = categories[dim_idx] |
| 250 | + dim2_name = categories[dim_idx + 1] |
| 251 | + x0 = x_positions[dim_idx] |
| 252 | + x1 = x_positions[dim_idx + 1] |
| 253 | + |
| 254 | + # Calculate total for normalization at each dimension |
| 255 | + dim1_total = sum(dim_totals[dim_idx].values()) |
| 256 | + dim2_total = sum(dim_totals[dim_idx + 1].values()) |
| 257 | + |
| 258 | + # Aggregate flows between consecutive dimensions |
| 259 | + flow_aggregates = {} |
| 260 | + for path, count in base_counts.items(): |
| 261 | + key = (path[dim_idx], path[dim_idx + 1], path[0]) # Include first category for color |
| 262 | + if key not in flow_aggregates: |
| 263 | + flow_aggregates[key] = 0 |
| 264 | + flow_aggregates[key] += count |
| 265 | + |
| 266 | + # Sort flows for consistent drawing (by source category order) |
| 267 | + sorted_flows = sorted( |
| 268 | + flow_aggregates.items(), |
| 269 | + key=lambda x: (dimension_values[dim1_name].index(x[0][0]), dimension_values[dim2_name].index(x[0][1])), |
| 270 | + ) |
| 271 | + |
| 272 | + # Draw each flow |
| 273 | + for (source_cat, target_cat, first_cat), flow_value in sorted_flows: |
| 274 | + if flow_value <= 0: |
| 275 | + continue |
| 276 | + |
| 277 | + source_y_top, source_y_bottom, _ = node_positions[(dim_idx, source_cat)] |
| 278 | + target_y_top, target_y_bottom, _ = node_positions[(dim_idx + 1, target_cat)] |
| 279 | + |
| 280 | + source_dim_total = dim_totals[dim_idx][source_cat] |
| 281 | + target_dim_total = dim_totals[dim_idx + 1][target_cat] |
| 282 | + |
| 283 | + source_height = ( |
| 284 | + (flow_value / source_dim_total) * (source_y_bottom - source_y_top) if source_dim_total > 0 else 0 |
| 285 | + ) |
| 286 | + target_height = ( |
| 287 | + (flow_value / target_dim_total) * (target_y_bottom - target_y_top) if target_dim_total > 0 else 0 |
| 288 | + ) |
| 289 | + |
| 290 | + # Get current positions |
| 291 | + y0_top = source_offsets[(dim_idx, source_cat)] |
| 292 | + y0_bottom = y0_top + source_height |
| 293 | + y1_top = target_offsets[(dim_idx + 1, target_cat)] |
| 294 | + y1_bottom = y1_top + target_height |
| 295 | + |
| 296 | + # Bezier curve control points |
| 297 | + band_x0 = x0 + bar_width / 2 |
| 298 | + band_x1 = x1 - bar_width / 2 |
| 299 | + cx0 = band_x0 + 0.4 * (band_x1 - band_x0) |
| 300 | + cx1 = band_x0 + 0.6 * (band_x1 - band_x0) |
| 301 | + |
| 302 | + # Create path for the curved ribbon |
| 303 | + path_d = ( |
| 304 | + f"M {band_x0:.0f},{y0_top:.0f} " |
| 305 | + f"C {cx0:.0f},{y0_top:.0f} {cx1:.0f},{y1_top:.0f} {band_x1:.0f},{y1_top:.0f} " |
| 306 | + f"L {band_x1:.0f},{y1_bottom:.0f} " |
| 307 | + f"C {cx1:.0f},{y1_bottom:.0f} {cx0:.0f},{y0_bottom:.0f} {band_x0:.0f},{y0_bottom:.0f} " |
| 308 | + f"Z" |
| 309 | + ) |
| 310 | + |
| 311 | + # Color by first category |
| 312 | + ribbon_color = category_colors[first_cat] |
| 313 | + |
| 314 | + parallel_svg += f''' |
| 315 | + <path d="{path_d}" fill="{ribbon_color}" fill-opacity="0.4" stroke="none"/>''' |
| 316 | + |
| 317 | + # Update offsets |
| 318 | + source_offsets[(dim_idx, source_cat)] = y0_bottom |
| 319 | + target_offsets[(dim_idx + 1, target_cat)] = y1_bottom |
| 320 | + |
| 321 | +# Add legend for categories |
| 322 | +legend_x = margin_left |
| 323 | +legend_y = chart_height + margin_top + 100 |
| 324 | +legend_spacing = 400 |
| 325 | + |
| 326 | +for idx, (cat, color) in enumerate(category_colors.items()): |
| 327 | + lx = legend_x + idx * legend_spacing |
| 328 | + cat_escaped = cat.replace("&", "&") |
| 329 | + parallel_svg += f''' |
| 330 | + <rect x="{lx:.0f}" y="{legend_y:.0f}" width="50" height="50" fill="{color}" stroke="none"/> |
| 331 | + <text x="{lx + 70:.0f}" y="{legend_y + 38:.0f}" text-anchor="start" |
| 332 | + font-size="40" font-family="DejaVu Sans, sans-serif" fill="#333333">{cat_escaped}</text>''' |
| 333 | + |
| 334 | +# Add subtitle |
| 335 | +parallel_svg += f''' |
| 336 | + <text x="2400" y="{chart_height + margin_top + 200:.0f}" text-anchor="middle" |
| 337 | + font-size="36" font-style="italic" font-family="DejaVu Sans, sans-serif" |
| 338 | + fill="#666666">Customer Purchase Journey Flows by Product Category</text>''' |
| 339 | + |
| 340 | +parallel_svg += "\n</g>" |
| 341 | + |
| 342 | +# Insert elements before closing </svg> tag |
| 343 | +svg_with_parallel = base_svg.replace("</svg>", f"{parallel_svg}\n</svg>") |
| 344 | + |
| 345 | +# Save SVG |
| 346 | +with open("plot.svg", "w") as f: |
| 347 | + f.write(svg_with_parallel) |
| 348 | + |
| 349 | +# Render to PNG |
| 350 | +cairosvg.svg2png(bytestring=svg_with_parallel.encode("utf-8"), write_to="plot.png") |
| 351 | + |
| 352 | +# Save HTML for interactive version |
| 353 | +with open("plot.html", "w") as f: |
| 354 | + f.write("""<!DOCTYPE html> |
| 355 | +<html> |
| 356 | +<head> |
| 357 | + <title>parallel-categories-basic · pygal · pyplots.ai</title> |
| 358 | + <style> |
| 359 | + body { margin: 0; padding: 20px; background: #f5f5f5; font-family: sans-serif; } |
| 360 | + .container { max-width: 100%; margin: 0 auto; } |
| 361 | + object { width: 100%; height: auto; } |
| 362 | + </style> |
| 363 | +</head> |
| 364 | +<body> |
| 365 | + <div class="container"> |
| 366 | + <object type="image/svg+xml" data="plot.svg"> |
| 367 | + Parallel categories diagram not supported |
| 368 | + </object> |
| 369 | + </div> |
| 370 | +</body> |
| 371 | +</html>""") |
0 commit comments