|
1 | | -""" pyplots.ai |
| 1 | +""" anyplot.ai |
2 | 2 | hexbin-map-geographic: Hexagonal Binning Map |
3 | | -Library: pygal 3.1.0 | Python 3.13.11 |
4 | | -Quality: 75/100 | Created: 2026-01-20 |
| 3 | +Library: pygal 3.1.0 | Python 3.13.13 |
| 4 | +Quality: 84/100 | Updated: 2026-05-27 |
5 | 5 | """ |
6 | 6 |
|
7 | | -# Fix module name conflict (this file is named pygal.py) |
| 7 | +import math |
| 8 | +import os |
| 9 | +import re |
8 | 10 | import sys |
9 | 11 | from collections import defaultdict |
10 | 12 |
|
11 | 13 |
|
| 14 | +# Fix module name conflict (this file is named pygal.py) |
12 | 15 | _cwd = sys.path[0] if sys.path and sys.path[0] else None |
13 | 16 | if _cwd: |
14 | 17 | sys.path.remove(_cwd) |
15 | 18 |
|
| 19 | +import cairosvg # noqa: E402 |
16 | 20 | import numpy as np # noqa: E402 |
17 | 21 | import pygal # noqa: E402 |
18 | 22 | from pygal.style import Style # noqa: E402 |
|
21 | 25 | if _cwd: |
22 | 26 | sys.path.insert(0, _cwd) |
23 | 27 |
|
24 | | -# Data - Simulated NYC taxi pickup locations (Manhattan area) |
| 28 | +# Theme tokens |
| 29 | +THEME = os.getenv("ANYPLOT_THEME", "light") |
| 30 | +PAGE_BG = "#FAF8F1" if THEME == "light" else "#1A1A17" |
| 31 | +INK = "#1A1A17" if THEME == "light" else "#F0EFE8" |
| 32 | +INK_MUTED = "#6B6A63" if THEME == "light" else "#A8A79F" |
| 33 | + |
| 34 | +# imprint_seq: #009E73 → #4467A3 (5 evenly-spaced stops, low → high density) |
| 35 | +seq_stops = ("#009E73", "#11907F", "#22838B", "#337597", "#4467A3") |
| 36 | +n_density_bins = 5 |
| 37 | + |
| 38 | +# Data — NYC taxi pickup locations (Manhattan) |
25 | 39 | np.random.seed(42) |
26 | 40 | n_points = 5000 |
27 | | - |
28 | | -# NYC Manhattan bounds (approximately) |
29 | 41 | lat_min, lat_max = 40.70, 40.82 |
30 | 42 | lon_min, lon_max = -74.02, -73.93 |
31 | 43 |
|
32 | | -# Create multiple hotspots (Midtown, Lower Manhattan, Upper East Side) |
33 | | -# Cluster 1: Midtown (Times Square area) |
34 | 44 | c1_lat = np.random.normal(40.758, 0.015, n_points // 3) |
35 | 45 | c1_lon = np.random.normal(-73.985, 0.01, n_points // 3) |
36 | | -c1_vals = np.random.exponential(25, n_points // 3) # Fare amounts |
| 46 | +c1_vals = np.random.exponential(25, n_points // 3) |
37 | 47 |
|
38 | | -# Cluster 2: Lower Manhattan (Financial District) |
39 | 48 | c2_lat = np.random.normal(40.710, 0.012, n_points // 3) |
40 | 49 | c2_lon = np.random.normal(-74.010, 0.008, n_points // 3) |
41 | | -c2_vals = np.random.exponential(35, n_points // 3) # Higher fares downtown |
| 50 | +c2_vals = np.random.exponential(35, n_points // 3) |
42 | 51 |
|
43 | | -# Cluster 3: Upper East Side |
44 | 52 | c3_lat = np.random.normal(40.775, 0.018, n_points // 3) |
45 | 53 | c3_lon = np.random.normal(-73.960, 0.012, n_points // 3) |
46 | 54 | c3_vals = np.random.exponential(20, n_points // 3) |
47 | 55 |
|
48 | | -# Combine all points |
49 | | -lat = np.concatenate([c1_lat, c2_lat, c3_lat]) |
50 | | -lon = np.concatenate([c1_lon, c2_lon, c3_lon]) |
| 56 | +lat = np.clip(np.concatenate([c1_lat, c2_lat, c3_lat]), lat_min, lat_max) |
| 57 | +lon = np.clip(np.concatenate([c1_lon, c2_lon, c3_lon]), lon_min, lon_max) |
51 | 58 | values = np.concatenate([c1_vals, c2_vals, c3_vals]) |
52 | 59 |
|
53 | | -# Clip to bounds |
54 | | -lat = np.clip(lat, lat_min, lat_max) |
55 | | -lon = np.clip(lon, lon_min, lon_max) |
56 | | - |
57 | | -# Compute hexagonal binning with aggregation (inline - KISS principle) |
| 60 | +# Hexagonal binning with count and fare aggregation |
58 | 61 | gridsize = 25 |
59 | | -x = np.asarray(lon) |
60 | | -y = np.asarray(lat) |
61 | | - |
62 | | -# Compute data bounds |
63 | | -x_min, x_max = x.min(), x.max() |
64 | | -y_min = y.min() |
65 | | - |
66 | | -# Hexagon dimensions |
67 | | -x_range = x_max - x_min |
68 | | -hex_width = x_range / gridsize |
| 62 | +x_arr = np.asarray(lon) |
| 63 | +y_arr = np.asarray(lat) |
| 64 | +x_min_v, x_max_v = x_arr.min(), x_arr.max() |
| 65 | +y_min_v = y_arr.min() |
| 66 | +hex_width = (x_max_v - x_min_v) / gridsize |
69 | 67 | hex_height = hex_width * np.sqrt(3) / 2 |
70 | 68 |
|
71 | | -# Convert points to hex grid coordinates with value aggregation |
72 | | -bins = defaultdict(lambda: {"count": 0, "sum": 0.0, "values": []}) |
73 | | - |
74 | | -for xi, yi, vi in zip(x, y, values, strict=True): |
75 | | - col = (xi - x_min) / hex_width |
| 69 | +bins = defaultdict(lambda: {"count": 0, "sum": 0.0}) |
| 70 | +for xi, yi, vi in zip(x_arr, y_arr, values, strict=True): |
| 71 | + col = (xi - x_min_v) / hex_width |
76 | 72 | row_offset = (int(col) % 2) * 0.5 |
77 | | - row = (yi - y_min) / hex_height - row_offset |
| 73 | + row = (yi - y_min_v) / hex_height - row_offset |
78 | 74 | col_idx = int(round(col)) |
79 | 75 | row_idx = int(round(row)) |
80 | 76 | bins[(col_idx, row_idx)]["count"] += 1 |
81 | 77 | bins[(col_idx, row_idx)]["sum"] += vi |
82 | | - bins[(col_idx, row_idx)]["values"].append(vi) |
83 | 78 |
|
84 | | -# Convert bin indices back to coordinates with full statistics |
85 | 79 | hex_data = [] |
86 | | - |
87 | 80 | for (col_idx, row_idx), data in bins.items(): |
88 | | - cx = x_min + col_idx * hex_width |
| 81 | + cx = x_min_v + col_idx * hex_width |
89 | 82 | row_offset = (col_idx % 2) * 0.5 |
90 | | - cy = y_min + (row_idx + row_offset) * hex_height |
| 83 | + cy = y_min_v + (row_idx + row_offset) * hex_height |
91 | 84 | count = data["count"] |
92 | 85 | total = data["sum"] |
93 | 86 | mean = total / count if count > 0 else 0 |
94 | 87 | hex_data.append({"lon": cx, "lat": cy, "count": count, "sum": total, "mean": mean}) |
95 | 88 |
|
96 | | -# Extract arrays for plotting |
97 | | -hex_lon = np.array([h["lon"] for h in hex_data]) |
98 | | -hex_lat = np.array([h["lat"] for h in hex_data]) |
99 | 89 | counts = np.array([h["count"] for h in hex_data]) |
100 | 90 |
|
101 | | -# Get count statistics for binning - use percentile-based bins for better distribution |
102 | | -count_min, count_max = counts.min(), counts.max() |
103 | | -count_range = count_max - count_min if count_max > count_min else 1 |
| 91 | +# Percentile-based bin edges for balanced distribution |
| 92 | +bin_edges = np.percentile(counts, [0, 20, 40, 60, 80, 100]) |
| 93 | +for i in range(1, len(bin_edges)): |
| 94 | + if bin_edges[i] <= bin_edges[i - 1]: |
| 95 | + bin_edges[i] = bin_edges[i - 1] + 1 |
| 96 | + |
| 97 | +# Shortened labels to prevent legend truncation |
| 98 | +bin_labels = [ |
| 99 | + f"Low ({int(bin_edges[0])}–{int(bin_edges[1])})", |
| 100 | + f"Med-Low ({int(bin_edges[1])}–{int(bin_edges[2])})", |
| 101 | + f"Medium ({int(bin_edges[2])}–{int(bin_edges[3])})", |
| 102 | + f"Med-High ({int(bin_edges[3])}–{int(bin_edges[4])})", |
| 103 | + f"High ({int(bin_edges[4])}+)", |
| 104 | +] |
104 | 105 |
|
105 | | -# Simplified Manhattan coastline (approximate outline) |
| 106 | +# Geographic outlines (Manhattan island + waterways) |
106 | 107 | manhattan_outline = [ |
107 | 108 | (-74.020, 40.700), |
108 | 109 | (-74.010, 40.705), |
|
121 | 122 | (-74.015, 40.720), |
122 | 123 | (-74.020, 40.700), |
123 | 124 | ] |
124 | | - |
125 | | -# Hudson River (west boundary approximation) |
126 | 125 | hudson_river = [ |
127 | 126 | (-74.035, 40.690), |
128 | 127 | (-74.025, 40.705), |
|
132 | 131 | (-73.985, 40.810), |
133 | 132 | (-73.970, 40.835), |
134 | 133 | ] |
135 | | - |
136 | | -# East River (east boundary approximation) |
137 | 134 | east_river = [ |
138 | 135 | (-73.935, 40.695), |
139 | 136 | (-73.940, 40.720), |
|
143 | 140 | (-73.920, 40.835), |
144 | 141 | ] |
145 | 142 |
|
146 | | -# Custom style - YlOrRd colormap for density (5 levels) |
147 | | -# Boundary lines use darker colors for visibility |
| 143 | +title = "hexbin-map-geographic · python · pygal · anyplot.ai" |
| 144 | + |
148 | 145 | custom_style = Style( |
149 | | - background="white", |
150 | | - plot_background="#E8F4F8", # Light water blue |
151 | | - foreground="#333333", |
152 | | - foreground_strong="#111111", |
153 | | - foreground_subtle="#666666", |
154 | | - guide_stroke_color="transparent", # Hide grid lines completely |
155 | | - colors=( |
156 | | - # 3 boundary/river colors (darker for visibility) |
157 | | - "#555555", |
158 | | - "#446688", |
159 | | - "#446688", |
160 | | - # 5 density levels (YlOrRd colormap - darker for PNG visibility) |
161 | | - "#FFFFB2", # Very low - light yellow |
162 | | - "#FECC5C", # Low - yellow |
163 | | - "#FD8D3C", # Medium - orange |
164 | | - "#F03B20", # High - red-orange |
165 | | - "#BD0026", # Very high - dark red |
166 | | - ), |
167 | | - opacity=0.85, # Higher opacity for better visibility |
| 146 | + background=PAGE_BG, |
| 147 | + plot_background=PAGE_BG, |
| 148 | + foreground=INK, |
| 149 | + foreground_strong=INK, |
| 150 | + foreground_subtle=INK_MUTED, |
| 151 | + guide_stroke_color="transparent", |
| 152 | + colors=(INK_MUTED, INK_MUTED, INK_MUTED) + seq_stops, |
| 153 | + opacity=0.85, |
168 | 154 | opacity_hover=0.95, |
169 | | - title_font_size=72, |
170 | | - label_font_size=48, |
171 | | - major_label_font_size=42, |
172 | | - legend_font_size=40, |
| 155 | + title_font_size=66, |
| 156 | + label_font_size=56, |
| 157 | + major_label_font_size=44, |
| 158 | + legend_font_size=44, |
173 | 159 | value_font_size=36, |
174 | | - tooltip_font_size=36, |
175 | | - stroke_width=3, # Thicker lines for boundaries |
| 160 | + stroke_width=2.5, |
176 | 161 | ) |
177 | 162 |
|
178 | | -# Create XY chart - disable grid for clean map background |
179 | 163 | chart = pygal.XY( |
180 | | - width=4800, |
181 | | - height=2700, |
| 164 | + width=3200, |
| 165 | + height=1800, |
182 | 166 | style=custom_style, |
183 | | - title="hexbin-map-geographic · pygal · pyplots.ai", |
| 167 | + title=title, |
184 | 168 | x_title="Longitude (°)", |
185 | 169 | y_title="Latitude (°)", |
186 | 170 | show_legend=True, |
|
189 | 173 | legend_box_size=28, |
190 | 174 | stroke=False, |
191 | 175 | dots_size=30, |
192 | | - show_x_guides=False, # Hide x grid lines |
193 | | - show_y_guides=False, # Hide y grid lines |
| 176 | + show_x_guides=False, |
| 177 | + show_y_guides=False, |
194 | 178 | explicit_size=True, |
195 | 179 | print_values=False, |
196 | | - xrange=(lon_min - 0.015, lon_max + 0.015), # Tighter margins |
| 180 | + xrange=(lon_min - 0.015, lon_max + 0.015), |
197 | 181 | range=(lat_min - 0.008, lat_max + 0.008), |
198 | 182 | ) |
199 | 183 |
|
200 | | -# Add geographic boundaries as background lines with thicker stroke |
| 184 | +# Geographic boundaries (excluded from legend via None label) |
201 | 185 | chart.add(None, manhattan_outline, stroke=True, dots_size=0, show_dots=False, fill=False, stroke_width=4) |
202 | 186 | chart.add(None, hudson_river, stroke=True, dots_size=0, show_dots=False, fill=False, stroke_width=3) |
203 | 187 | chart.add(None, east_river, stroke=True, dots_size=0, show_dots=False, fill=False, stroke_width=3) |
204 | 188 |
|
205 | | -# Use percentile-based bin edges to ensure all 5 bins have data |
206 | | -n_bins = 5 |
207 | | -percentiles = [0, 20, 40, 60, 80, 100] |
208 | | -bin_edges = np.percentile(counts, percentiles) |
209 | | -# Ensure edges are strictly increasing |
210 | | -for i in range(1, len(bin_edges)): |
211 | | - if bin_edges[i] <= bin_edges[i - 1]: |
212 | | - bin_edges[i] = bin_edges[i - 1] + 1 |
213 | | - |
214 | | -# Create legend labels showing density scale |
215 | | -bin_labels = [ |
216 | | - f"Low ({int(bin_edges[0])}-{int(bin_edges[1])})", |
217 | | - f"Medium-Low ({int(bin_edges[1])}-{int(bin_edges[2])})", |
218 | | - f"Medium ({int(bin_edges[2])}-{int(bin_edges[3])})", |
219 | | - f"Medium-High ({int(bin_edges[3])}-{int(bin_edges[4])})", |
220 | | - f"High ({int(bin_edges[4])}+)", |
221 | | -] |
222 | | - |
223 | | -# Create series for each density level with rich tooltips |
224 | | -series_data = [[] for _ in range(n_bins)] |
| 189 | +# Density bins with size encoding (larger hexagons = more pickups) |
| 190 | +series_data = [[] for _ in range(n_density_bins)] |
225 | 191 | for h in hex_data: |
226 | 192 | hx, hy = h["lon"], h["lat"] |
227 | 193 | count = h["count"] |
228 | 194 | total = h["sum"] |
229 | 195 | mean = h["mean"] |
230 | | - # Assign to bin based on percentile edges |
231 | 196 | bin_idx = 0 |
232 | | - for i in range(1, n_bins): |
| 197 | + for i in range(1, n_density_bins): |
233 | 198 | if count >= bin_edges[i]: |
234 | 199 | bin_idx = i |
235 | | - # Rich tooltip with cell statistics as spec requires |
236 | | - tooltip = ( |
237 | | - f"Count: {count} pickups | Fares: ${total:.0f} total, ${mean:.2f} avg | Coords: ({hy:.4f}°N, {abs(hx):.4f}°W)" |
238 | | - ) |
239 | | - point = {"value": (float(hx), float(hy)), "label": tooltip} |
240 | | - series_data[bin_idx].append(point) |
241 | | - |
242 | | -# Add ALL series (even empty ones) to ensure complete legend |
243 | | -# Using larger dot sizes for density visualization |
| 200 | + tooltip = f"Count: {count} | Fares: ${total:.0f} total, ${mean:.2f} avg | ({hy:.4f}°N, {abs(hx):.4f}°W)" |
| 201 | + series_data[bin_idx].append({"value": (float(hx), float(hy)), "label": tooltip}) |
| 202 | + |
244 | 203 | dot_sizes = [26, 34, 44, 54, 66] |
245 | | -for i in range(n_bins): |
246 | | - # Add dummy point off-screen if series is empty to show in legend |
| 204 | +for i in range(n_density_bins): |
247 | 205 | if not series_data[i]: |
248 | | - # Add invisible point outside plot range to ensure legend appears |
249 | 206 | series_data[i].append({"value": (-99, 0), "label": "No data"}) |
250 | 207 | chart.add(bin_labels[i], series_data[i], dots_size=dot_sizes[i]) |
251 | 208 |
|
252 | | -# Save PNG (primary output) |
253 | | -chart.render_to_png("plot.png") |
254 | | - |
255 | | -# Save interactive HTML with CSS hexagon styling |
256 | | -# Use CSS clip-path to transform circles into hexagons in interactive view |
257 | | -html_content = f"""<!DOCTYPE html> |
258 | | -<html> |
259 | | -<head> |
260 | | - <meta charset="utf-8"> |
261 | | - <title>hexbin-map-geographic - pygal</title> |
262 | | - <style> |
263 | | - body {{ margin: 0; display: flex; justify-content: center; align-items: center; |
264 | | - min-height: 100vh; background: #f5f5f5; }} |
265 | | - .chart {{ max-width: 100%; height: auto; }} |
266 | | - /* Transform dots into hexagons using CSS clip-path */ |
267 | | - .dot {{ |
268 | | - clip-path: polygon(50% 0%, 100% 25%, 100% 75%, 50% 100%, 0% 75%, 0% 25%); |
269 | | - }} |
270 | | - </style> |
271 | | -</head> |
272 | | -<body> |
273 | | - <figure class="chart"> |
274 | | - {chart.render(is_unicode=True)} |
275 | | - </figure> |
276 | | -</body> |
277 | | -</html> |
278 | | -""" |
279 | 209 |
|
280 | | -with open("plot.html", "w", encoding="utf-8") as f: |
281 | | - f.write(html_content) |
| 210 | +def circles_to_hexagons(svg_text): |
| 211 | + """Post-process SVG: replace circular dot markers with flat-top hexagonal polygons.""" |
| 212 | + |
| 213 | + def replace_one(m): |
| 214 | + tag = m.group(0) |
| 215 | + cx_m = re.search(r'\bcx="([^"]+)"', tag) |
| 216 | + cy_m = re.search(r'\bcy="([^"]+)"', tag) |
| 217 | + r_m = re.search(r'\br="([^"]+)"', tag) |
| 218 | + if not (cx_m and cy_m and r_m): |
| 219 | + return tag |
| 220 | + cx = float(cx_m.group(1)) |
| 221 | + cy = float(cy_m.group(1)) |
| 222 | + r = float(r_m.group(1)) |
| 223 | + # Flat-top hexagon: vertex angles 30°, 90°, 150°, 210°, 270°, 330° |
| 224 | + pts = " ".join( |
| 225 | + f"{cx + r * math.cos(math.radians(60 * k + 30)):.2f},{cy + r * math.sin(math.radians(60 * k + 30)):.2f}" |
| 226 | + for k in range(6) |
| 227 | + ) |
| 228 | + poly = tag.replace("<circle", "<polygon", 1) |
| 229 | + poly = re.sub(r'\bcx="[^"]*"\s*', "", poly) |
| 230 | + poly = re.sub(r'\bcy="[^"]*"\s*', "", poly) |
| 231 | + poly = re.sub(r'\br="[^"]*"', f'points="{pts}"', poly) |
| 232 | + return poly |
| 233 | + |
| 234 | + return re.sub(r"<circle\b[^>]*/>", replace_one, svg_text) |
| 235 | + |
| 236 | + |
| 237 | +# Render SVG, convert circle markers to hexagonal polygons, then produce PNG |
| 238 | +svg_bytes = chart.render() |
| 239 | +svg_str = svg_bytes.decode("utf-8") |
| 240 | +modified_svg = circles_to_hexagons(svg_str) |
| 241 | +modified_bytes = modified_svg.encode("utf-8") |
| 242 | + |
| 243 | +cairosvg.svg2png(bytestring=modified_bytes, write_to=f"plot-{THEME}.png") |
| 244 | + |
| 245 | +with open(f"plot-{THEME}.html", "wb") as f: |
| 246 | + f.write(modified_bytes) |
0 commit comments