Skip to content

Commit c3a3ffd

Browse files
github-actions[bot]claudeMarkusNeusinger
authored
feat(pygal): implement hexbin-map-geographic (#7746)
## Implementation: `hexbin-map-geographic` - python/pygal Implements the **python/pygal** version of `hexbin-map-geographic`. **File:** `plots/hexbin-map-geographic/implementations/python/pygal.py` **Parent Issue:** #3767 --- :robot: *[impl-generate workflow](https://github.com/MarkusNeusinger/anyplot/actions/runs/26513744161)* --------- Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com> Co-authored-by: Markus Neusinger <2921697+MarkusNeusinger@users.noreply.github.com>
1 parent 4dd441f commit c3a3ffd

2 files changed

Lines changed: 295 additions & 279 deletions

File tree

Lines changed: 116 additions & 151 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,22 @@
1-
""" pyplots.ai
1+
""" anyplot.ai
22
hexbin-map-geographic: Hexagonal Binning Map
3-
Library: pygal 3.1.0 | Python 3.13.11
4-
Quality: 75/100 | Created: 2026-01-20
3+
Library: pygal 3.1.0 | Python 3.13.13
4+
Quality: 84/100 | Updated: 2026-05-27
55
"""
66

7-
# Fix module name conflict (this file is named pygal.py)
7+
import math
8+
import os
9+
import re
810
import sys
911
from collections import defaultdict
1012

1113

14+
# Fix module name conflict (this file is named pygal.py)
1215
_cwd = sys.path[0] if sys.path and sys.path[0] else None
1316
if _cwd:
1417
sys.path.remove(_cwd)
1518

19+
import cairosvg # noqa: E402
1620
import numpy as np # noqa: E402
1721
import pygal # noqa: E402
1822
from pygal.style import Style # noqa: E402
@@ -21,88 +25,85 @@
2125
if _cwd:
2226
sys.path.insert(0, _cwd)
2327

24-
# Data - Simulated NYC taxi pickup locations (Manhattan area)
28+
# Theme tokens
29+
THEME = os.getenv("ANYPLOT_THEME", "light")
30+
PAGE_BG = "#FAF8F1" if THEME == "light" else "#1A1A17"
31+
INK = "#1A1A17" if THEME == "light" else "#F0EFE8"
32+
INK_MUTED = "#6B6A63" if THEME == "light" else "#A8A79F"
33+
34+
# imprint_seq: #009E73 → #4467A3 (5 evenly-spaced stops, low → high density)
35+
seq_stops = ("#009E73", "#11907F", "#22838B", "#337597", "#4467A3")
36+
n_density_bins = 5
37+
38+
# Data — NYC taxi pickup locations (Manhattan)
2539
np.random.seed(42)
2640
n_points = 5000
27-
28-
# NYC Manhattan bounds (approximately)
2941
lat_min, lat_max = 40.70, 40.82
3042
lon_min, lon_max = -74.02, -73.93
3143

32-
# Create multiple hotspots (Midtown, Lower Manhattan, Upper East Side)
33-
# Cluster 1: Midtown (Times Square area)
3444
c1_lat = np.random.normal(40.758, 0.015, n_points // 3)
3545
c1_lon = np.random.normal(-73.985, 0.01, n_points // 3)
36-
c1_vals = np.random.exponential(25, n_points // 3) # Fare amounts
46+
c1_vals = np.random.exponential(25, n_points // 3)
3747

38-
# Cluster 2: Lower Manhattan (Financial District)
3948
c2_lat = np.random.normal(40.710, 0.012, n_points // 3)
4049
c2_lon = np.random.normal(-74.010, 0.008, n_points // 3)
41-
c2_vals = np.random.exponential(35, n_points // 3) # Higher fares downtown
50+
c2_vals = np.random.exponential(35, n_points // 3)
4251

43-
# Cluster 3: Upper East Side
4452
c3_lat = np.random.normal(40.775, 0.018, n_points // 3)
4553
c3_lon = np.random.normal(-73.960, 0.012, n_points // 3)
4654
c3_vals = np.random.exponential(20, n_points // 3)
4755

48-
# Combine all points
49-
lat = np.concatenate([c1_lat, c2_lat, c3_lat])
50-
lon = np.concatenate([c1_lon, c2_lon, c3_lon])
56+
lat = np.clip(np.concatenate([c1_lat, c2_lat, c3_lat]), lat_min, lat_max)
57+
lon = np.clip(np.concatenate([c1_lon, c2_lon, c3_lon]), lon_min, lon_max)
5158
values = np.concatenate([c1_vals, c2_vals, c3_vals])
5259

53-
# Clip to bounds
54-
lat = np.clip(lat, lat_min, lat_max)
55-
lon = np.clip(lon, lon_min, lon_max)
56-
57-
# Compute hexagonal binning with aggregation (inline - KISS principle)
60+
# Hexagonal binning with count and fare aggregation
5861
gridsize = 25
59-
x = np.asarray(lon)
60-
y = np.asarray(lat)
61-
62-
# Compute data bounds
63-
x_min, x_max = x.min(), x.max()
64-
y_min = y.min()
65-
66-
# Hexagon dimensions
67-
x_range = x_max - x_min
68-
hex_width = x_range / gridsize
62+
x_arr = np.asarray(lon)
63+
y_arr = np.asarray(lat)
64+
x_min_v, x_max_v = x_arr.min(), x_arr.max()
65+
y_min_v = y_arr.min()
66+
hex_width = (x_max_v - x_min_v) / gridsize
6967
hex_height = hex_width * np.sqrt(3) / 2
7068

71-
# Convert points to hex grid coordinates with value aggregation
72-
bins = defaultdict(lambda: {"count": 0, "sum": 0.0, "values": []})
73-
74-
for xi, yi, vi in zip(x, y, values, strict=True):
75-
col = (xi - x_min) / hex_width
69+
bins = defaultdict(lambda: {"count": 0, "sum": 0.0})
70+
for xi, yi, vi in zip(x_arr, y_arr, values, strict=True):
71+
col = (xi - x_min_v) / hex_width
7672
row_offset = (int(col) % 2) * 0.5
77-
row = (yi - y_min) / hex_height - row_offset
73+
row = (yi - y_min_v) / hex_height - row_offset
7874
col_idx = int(round(col))
7975
row_idx = int(round(row))
8076
bins[(col_idx, row_idx)]["count"] += 1
8177
bins[(col_idx, row_idx)]["sum"] += vi
82-
bins[(col_idx, row_idx)]["values"].append(vi)
8378

84-
# Convert bin indices back to coordinates with full statistics
8579
hex_data = []
86-
8780
for (col_idx, row_idx), data in bins.items():
88-
cx = x_min + col_idx * hex_width
81+
cx = x_min_v + col_idx * hex_width
8982
row_offset = (col_idx % 2) * 0.5
90-
cy = y_min + (row_idx + row_offset) * hex_height
83+
cy = y_min_v + (row_idx + row_offset) * hex_height
9184
count = data["count"]
9285
total = data["sum"]
9386
mean = total / count if count > 0 else 0
9487
hex_data.append({"lon": cx, "lat": cy, "count": count, "sum": total, "mean": mean})
9588

96-
# Extract arrays for plotting
97-
hex_lon = np.array([h["lon"] for h in hex_data])
98-
hex_lat = np.array([h["lat"] for h in hex_data])
9989
counts = np.array([h["count"] for h in hex_data])
10090

101-
# Get count statistics for binning - use percentile-based bins for better distribution
102-
count_min, count_max = counts.min(), counts.max()
103-
count_range = count_max - count_min if count_max > count_min else 1
91+
# Percentile-based bin edges for balanced distribution
92+
bin_edges = np.percentile(counts, [0, 20, 40, 60, 80, 100])
93+
for i in range(1, len(bin_edges)):
94+
if bin_edges[i] <= bin_edges[i - 1]:
95+
bin_edges[i] = bin_edges[i - 1] + 1
96+
97+
# Shortened labels to prevent legend truncation
98+
bin_labels = [
99+
f"Low ({int(bin_edges[0])}{int(bin_edges[1])})",
100+
f"Med-Low ({int(bin_edges[1])}{int(bin_edges[2])})",
101+
f"Medium ({int(bin_edges[2])}{int(bin_edges[3])})",
102+
f"Med-High ({int(bin_edges[3])}{int(bin_edges[4])})",
103+
f"High ({int(bin_edges[4])}+)",
104+
]
104105

105-
# Simplified Manhattan coastline (approximate outline)
106+
# Geographic outlines (Manhattan island + waterways)
106107
manhattan_outline = [
107108
(-74.020, 40.700),
108109
(-74.010, 40.705),
@@ -121,8 +122,6 @@
121122
(-74.015, 40.720),
122123
(-74.020, 40.700),
123124
]
124-
125-
# Hudson River (west boundary approximation)
126125
hudson_river = [
127126
(-74.035, 40.690),
128127
(-74.025, 40.705),
@@ -132,8 +131,6 @@
132131
(-73.985, 40.810),
133132
(-73.970, 40.835),
134133
]
135-
136-
# East River (east boundary approximation)
137134
east_river = [
138135
(-73.935, 40.695),
139136
(-73.940, 40.720),
@@ -143,44 +140,31 @@
143140
(-73.920, 40.835),
144141
]
145142

146-
# Custom style - YlOrRd colormap for density (5 levels)
147-
# Boundary lines use darker colors for visibility
143+
title = "hexbin-map-geographic · python · pygal · anyplot.ai"
144+
148145
custom_style = Style(
149-
background="white",
150-
plot_background="#E8F4F8", # Light water blue
151-
foreground="#333333",
152-
foreground_strong="#111111",
153-
foreground_subtle="#666666",
154-
guide_stroke_color="transparent", # Hide grid lines completely
155-
colors=(
156-
# 3 boundary/river colors (darker for visibility)
157-
"#555555",
158-
"#446688",
159-
"#446688",
160-
# 5 density levels (YlOrRd colormap - darker for PNG visibility)
161-
"#FFFFB2", # Very low - light yellow
162-
"#FECC5C", # Low - yellow
163-
"#FD8D3C", # Medium - orange
164-
"#F03B20", # High - red-orange
165-
"#BD0026", # Very high - dark red
166-
),
167-
opacity=0.85, # Higher opacity for better visibility
146+
background=PAGE_BG,
147+
plot_background=PAGE_BG,
148+
foreground=INK,
149+
foreground_strong=INK,
150+
foreground_subtle=INK_MUTED,
151+
guide_stroke_color="transparent",
152+
colors=(INK_MUTED, INK_MUTED, INK_MUTED) + seq_stops,
153+
opacity=0.85,
168154
opacity_hover=0.95,
169-
title_font_size=72,
170-
label_font_size=48,
171-
major_label_font_size=42,
172-
legend_font_size=40,
155+
title_font_size=66,
156+
label_font_size=56,
157+
major_label_font_size=44,
158+
legend_font_size=44,
173159
value_font_size=36,
174-
tooltip_font_size=36,
175-
stroke_width=3, # Thicker lines for boundaries
160+
stroke_width=2.5,
176161
)
177162

178-
# Create XY chart - disable grid for clean map background
179163
chart = pygal.XY(
180-
width=4800,
181-
height=2700,
164+
width=3200,
165+
height=1800,
182166
style=custom_style,
183-
title="hexbin-map-geographic · pygal · pyplots.ai",
167+
title=title,
184168
x_title="Longitude (°)",
185169
y_title="Latitude (°)",
186170
show_legend=True,
@@ -189,93 +173,74 @@
189173
legend_box_size=28,
190174
stroke=False,
191175
dots_size=30,
192-
show_x_guides=False, # Hide x grid lines
193-
show_y_guides=False, # Hide y grid lines
176+
show_x_guides=False,
177+
show_y_guides=False,
194178
explicit_size=True,
195179
print_values=False,
196-
xrange=(lon_min - 0.015, lon_max + 0.015), # Tighter margins
180+
xrange=(lon_min - 0.015, lon_max + 0.015),
197181
range=(lat_min - 0.008, lat_max + 0.008),
198182
)
199183

200-
# Add geographic boundaries as background lines with thicker stroke
184+
# Geographic boundaries (excluded from legend via None label)
201185
chart.add(None, manhattan_outline, stroke=True, dots_size=0, show_dots=False, fill=False, stroke_width=4)
202186
chart.add(None, hudson_river, stroke=True, dots_size=0, show_dots=False, fill=False, stroke_width=3)
203187
chart.add(None, east_river, stroke=True, dots_size=0, show_dots=False, fill=False, stroke_width=3)
204188

205-
# Use percentile-based bin edges to ensure all 5 bins have data
206-
n_bins = 5
207-
percentiles = [0, 20, 40, 60, 80, 100]
208-
bin_edges = np.percentile(counts, percentiles)
209-
# Ensure edges are strictly increasing
210-
for i in range(1, len(bin_edges)):
211-
if bin_edges[i] <= bin_edges[i - 1]:
212-
bin_edges[i] = bin_edges[i - 1] + 1
213-
214-
# Create legend labels showing density scale
215-
bin_labels = [
216-
f"Low ({int(bin_edges[0])}-{int(bin_edges[1])})",
217-
f"Medium-Low ({int(bin_edges[1])}-{int(bin_edges[2])})",
218-
f"Medium ({int(bin_edges[2])}-{int(bin_edges[3])})",
219-
f"Medium-High ({int(bin_edges[3])}-{int(bin_edges[4])})",
220-
f"High ({int(bin_edges[4])}+)",
221-
]
222-
223-
# Create series for each density level with rich tooltips
224-
series_data = [[] for _ in range(n_bins)]
189+
# Density bins with size encoding (larger hexagons = more pickups)
190+
series_data = [[] for _ in range(n_density_bins)]
225191
for h in hex_data:
226192
hx, hy = h["lon"], h["lat"]
227193
count = h["count"]
228194
total = h["sum"]
229195
mean = h["mean"]
230-
# Assign to bin based on percentile edges
231196
bin_idx = 0
232-
for i in range(1, n_bins):
197+
for i in range(1, n_density_bins):
233198
if count >= bin_edges[i]:
234199
bin_idx = i
235-
# Rich tooltip with cell statistics as spec requires
236-
tooltip = (
237-
f"Count: {count} pickups | Fares: ${total:.0f} total, ${mean:.2f} avg | Coords: ({hy:.4f}°N, {abs(hx):.4f}°W)"
238-
)
239-
point = {"value": (float(hx), float(hy)), "label": tooltip}
240-
series_data[bin_idx].append(point)
241-
242-
# Add ALL series (even empty ones) to ensure complete legend
243-
# Using larger dot sizes for density visualization
200+
tooltip = f"Count: {count} | Fares: ${total:.0f} total, ${mean:.2f} avg | ({hy:.4f}°N, {abs(hx):.4f}°W)"
201+
series_data[bin_idx].append({"value": (float(hx), float(hy)), "label": tooltip})
202+
244203
dot_sizes = [26, 34, 44, 54, 66]
245-
for i in range(n_bins):
246-
# Add dummy point off-screen if series is empty to show in legend
204+
for i in range(n_density_bins):
247205
if not series_data[i]:
248-
# Add invisible point outside plot range to ensure legend appears
249206
series_data[i].append({"value": (-99, 0), "label": "No data"})
250207
chart.add(bin_labels[i], series_data[i], dots_size=dot_sizes[i])
251208

252-
# Save PNG (primary output)
253-
chart.render_to_png("plot.png")
254-
255-
# Save interactive HTML with CSS hexagon styling
256-
# Use CSS clip-path to transform circles into hexagons in interactive view
257-
html_content = f"""<!DOCTYPE html>
258-
<html>
259-
<head>
260-
<meta charset="utf-8">
261-
<title>hexbin-map-geographic - pygal</title>
262-
<style>
263-
body {{ margin: 0; display: flex; justify-content: center; align-items: center;
264-
min-height: 100vh; background: #f5f5f5; }}
265-
.chart {{ max-width: 100%; height: auto; }}
266-
/* Transform dots into hexagons using CSS clip-path */
267-
.dot {{
268-
clip-path: polygon(50% 0%, 100% 25%, 100% 75%, 50% 100%, 0% 75%, 0% 25%);
269-
}}
270-
</style>
271-
</head>
272-
<body>
273-
<figure class="chart">
274-
{chart.render(is_unicode=True)}
275-
</figure>
276-
</body>
277-
</html>
278-
"""
279209

280-
with open("plot.html", "w", encoding="utf-8") as f:
281-
f.write(html_content)
210+
def circles_to_hexagons(svg_text):
211+
"""Post-process SVG: replace circular dot markers with flat-top hexagonal polygons."""
212+
213+
def replace_one(m):
214+
tag = m.group(0)
215+
cx_m = re.search(r'\bcx="([^"]+)"', tag)
216+
cy_m = re.search(r'\bcy="([^"]+)"', tag)
217+
r_m = re.search(r'\br="([^"]+)"', tag)
218+
if not (cx_m and cy_m and r_m):
219+
return tag
220+
cx = float(cx_m.group(1))
221+
cy = float(cy_m.group(1))
222+
r = float(r_m.group(1))
223+
# Flat-top hexagon: vertex angles 30°, 90°, 150°, 210°, 270°, 330°
224+
pts = " ".join(
225+
f"{cx + r * math.cos(math.radians(60 * k + 30)):.2f},{cy + r * math.sin(math.radians(60 * k + 30)):.2f}"
226+
for k in range(6)
227+
)
228+
poly = tag.replace("<circle", "<polygon", 1)
229+
poly = re.sub(r'\bcx="[^"]*"\s*', "", poly)
230+
poly = re.sub(r'\bcy="[^"]*"\s*', "", poly)
231+
poly = re.sub(r'\br="[^"]*"', f'points="{pts}"', poly)
232+
return poly
233+
234+
return re.sub(r"<circle\b[^>]*/>", replace_one, svg_text)
235+
236+
237+
# Render SVG, convert circle markers to hexagonal polygons, then produce PNG
238+
svg_bytes = chart.render()
239+
svg_str = svg_bytes.decode("utf-8")
240+
modified_svg = circles_to_hexagons(svg_str)
241+
modified_bytes = modified_svg.encode("utf-8")
242+
243+
cairosvg.svg2png(bytestring=modified_bytes, write_to=f"plot-{THEME}.png")
244+
245+
with open(f"plot-{THEME}.html", "wb") as f:
246+
f.write(modified_bytes)

0 commit comments

Comments
 (0)