This repository was archived by the owner on Apr 9, 2026. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpointing.py
More file actions
226 lines (179 loc) · 6.26 KB
/
pointing.py
File metadata and controls
226 lines (179 loc) · 6.26 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
"""Parse Molmo 2 pointing responses and draw overlays on images."""
from __future__ import annotations
import re
from dataclasses import dataclass
from pathlib import Path
from PIL import Image, ImageDraw, ImageFont
# Molmo 2 coordinate space: 0–1000 for both axes
COORD_SCALE = 1000
# Regex to match <points coords="...">label</points>
_POINTS_RE = re.compile(
r'<points\s+coords="([^"]+)">(.*?)</points>',
re.DOTALL,
)
# Distinct colors for multiple points (RGB)
POINT_COLORS = [
(255, 59, 48), # red
(0, 122, 255), # blue
(52, 199, 89), # green
(255, 149, 0), # orange
(175, 82, 222), # purple
(255, 45, 85), # pink
(90, 200, 250), # cyan
(255, 204, 0), # yellow
]
@dataclass
class Point:
x: float # 0–1000
y: float # 0–1000
label: str
index: int
@dataclass
class PointGroup:
points: list[Point]
label: str
def parse_points(text: str) -> list[PointGroup]:
"""Extract all point groups from a Molmo 2 response.
Format: <points coords="1 1 X Y [N X Y ...]">label</points>
- First point: flag flag X Y (4 numbers)
- Subsequent: index X Y (3 numbers)
- Coordinates are in 0–1000 space.
"""
groups: list[PointGroup] = []
for match in _POINTS_RE.finditer(text):
coords_str = match.group(1).strip()
label = match.group(2).strip()
nums = [int(n) for n in coords_str.split()]
if len(nums) < 4:
continue
points: list[Point] = []
# First point: skip 2 prefix flags, take x, y
points.append(Point(x=nums[2], y=nums[3], label=label, index=1))
# Subsequent points: index, x, y (3 numbers each)
i = 4
idx = 2
while i + 2 < len(nums):
# nums[i] is the point index, then x, y
points.append(Point(x=nums[i + 1], y=nums[i + 2], label=label, index=idx))
i += 3
idx += 1
groups.append(PointGroup(points=points, label=label))
return groups
def has_points(text: str) -> bool:
"""Check if a response contains pointing data."""
return bool(_POINTS_RE.search(text))
def strip_points(text: str) -> str:
"""Remove pointing XML tags from a response, keeping just the text."""
return _POINTS_RE.sub(r"\2", text).strip()
def _make_marker(
color: tuple[int, int, int],
radius: int,
label: str | None,
*,
scale: int = 4,
) -> Image.Image:
"""Render a single anti-aliased point marker via supersampling.
Draws at *scale*× resolution then downscales with LANCZOS for smooth edges.
"""
sr = radius * scale # supersampled radius
pad = 4 * scale # padding for outer glow/border
size = (sr + pad) * 2
marker = Image.new("RGBA", (size, size), (0, 0, 0, 0))
draw = ImageDraw.Draw(marker)
cx = cy = size // 2
# Soft outer glow
draw.ellipse(
[cx - sr - pad // 2, cy - sr - pad // 2,
cx + sr + pad // 2, cy + sr + pad // 2],
fill=(0, 0, 0, 50),
)
# White border ring
border = 3 * scale
draw.ellipse(
[cx - sr - border, cy - sr - border,
cx + sr + border, cy + sr + border],
fill=(255, 255, 255, 240),
)
# Main colored circle
draw.ellipse(
[cx - sr, cy - sr, cx + sr, cy + sr],
fill=(*color, 230),
)
# Draw number label
if label:
font = None
font_size = int(sr * 1.3)
for font_name in (
"/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf",
"/usr/share/fonts/truetype/liberation/LiberationSans-Bold.ttf",
"/usr/share/fonts/truetype/freefont/FreeSansBold.ttf",
):
if Path(font_name).exists():
try:
font = ImageFont.truetype(font_name, font_size)
except Exception:
pass
break
# Dark shadow for contrast (anchor="mm" = middle-middle centering)
for ox, oy in [(1, 1), (-1, 1), (1, -1), (-1, -1)]:
off = 2 * scale
draw.text(
(cx + ox * off, cy + oy * off),
label,
fill=(0, 0, 0, 120),
font=font,
anchor="mm",
)
# White text, perfectly centered
draw.text((cx, cy), label, fill=(255, 255, 255, 255), font=font, anchor="mm")
# Downscale to target size with LANCZOS for smooth anti-aliasing
final_size = size // scale
marker = marker.resize((final_size, final_size), Image.LANCZOS)
return marker
def draw_points_on_image(
image_path: str,
groups: list[PointGroup],
*,
dot_radius: int | None = None,
output_path: str | None = None,
) -> tuple[str, str]:
"""Draw colored point markers on an image and save it.
Returns (output_path, caption).
"""
img = Image.open(image_path).convert("RGBA")
w, h = img.size
# Auto-scale dot size based on image dimensions
if dot_radius is None:
dot_radius = max(10, min(w, h) // 50)
all_points = []
for group in groups:
for pt in group.points:
all_points.append((pt, group.label))
show_numbers = len(all_points) > 1
for i, (pt, label) in enumerate(all_points):
color = POINT_COLORS[i % len(POINT_COLORS)]
# Convert from 0–1000 to pixel coordinates
px = int(pt.x / COORD_SCALE * w)
py = int(pt.y / COORD_SCALE * h)
px = max(0, min(w - 1, px))
py = max(0, min(h - 1, py))
number = str(i + 1) if show_numbers else None
marker = _make_marker(color, dot_radius, number)
mw, mh = marker.size
# Paste marker centered on the point
img.paste(marker, (px - mw // 2, py - mh // 2), marker)
# Build caption
unique_labels = list(dict.fromkeys(label for _, label in all_points))
if len(all_points) == 1:
caption = unique_labels[0]
elif len(unique_labels) == 1:
caption = f"📍 {unique_labels[0]} ({len(all_points)} points)"
else:
caption = "\n".join(
f"📍 {i + 1}. {label}" for i, (_, label) in enumerate(all_points)
)
# Save as RGB JPEG
if output_path is None:
output_path = image_path.rsplit(".", 1)[0] + "_pointed.jpg"
img.convert("RGB").save(output_path, "JPEG", quality=92)
return output_path, caption