Skip to content

Commit 33a8e0f

Browse files
feat: add Ramer-Douglas-Peucker polyline simplification algorithm (#14372)
* feat: add Ramer-Douglas-Peucker polyline simplification algorithm * Use descriptive parameter names * Update geometry/ramer_douglas_peucker.py * Update geometry/ramer_douglas_peucker.py * Update ramer_douglas_peucker.py * Update ramer_douglas_peucker.py * Update ramer_douglas_peucker.py --------- Co-authored-by: John Law <johnlaw.po@gmail.com>
1 parent 144ef9c commit 33a8e0f

1 file changed

Lines changed: 184 additions & 0 deletions

File tree

geometry/ramer_douglas_peucker.py

Lines changed: 184 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,184 @@
1+
"""
2+
Ramer-Douglas-Peucker polyline simplification algorithm.
3+
4+
Given a sequence of 2-D points and a tolerance epsilon, the algorithm
5+
reduces the number of points while preserving the overall shape of the curve.
6+
7+
Time complexity: O(n log n) average, O(n²) worst case
8+
Space complexity: O(n)
9+
10+
References:
11+
https://en.wikipedia.org/wiki/Ramer%E2%80%93Douglas%E2%80%93Peucker_algorithm
12+
"""
13+
14+
from __future__ import annotations
15+
16+
import math
17+
18+
19+
def _euclidean_distance(
20+
point_a: tuple[float, float],
21+
point_b: tuple[float, float],
22+
) -> float:
23+
"""Return the Euclidean distance between two 2-D points.
24+
25+
>>> _euclidean_distance((0.0, 0.0), (3.0, 4.0))
26+
5.0
27+
>>> _euclidean_distance((1.0, 1.0), (1.0, 1.0))
28+
0.0
29+
"""
30+
return math.hypot(point_b[0] - point_a[0], point_b[1] - point_a[1])
31+
32+
33+
def _perpendicular_distance(
34+
point: tuple[float, float],
35+
line_start: tuple[float, float],
36+
line_end: tuple[float, float],
37+
) -> float:
38+
"""Return the distance from *point* to the line **segment** between
39+
*line_start* and *line_end*.
40+
41+
When the perpendicular projection of *point* onto the infinite line falls
42+
within the segment, this equals the perpendicular distance to that line.
43+
When the projection falls outside the segment, the distance to the nearest
44+
endpoint is returned instead (projection clamped to [0, 1]).
45+
46+
This is the correct distance measure for the Ramer-Douglas-Peucker
47+
algorithm: using the infinite-line distance can incorrectly discard points
48+
whose projection lies beyond a segment endpoint.
49+
50+
>>> _perpendicular_distance((4.0, 0.0), (0.0, 0.0), (0.0, 3.0))
51+
4.0
52+
>>> # order of line_start and line_end does not affect the result
53+
>>> _perpendicular_distance((4.0, 0.0), (0.0, 3.0), (0.0, 0.0))
54+
4.0
55+
>>> _perpendicular_distance((4.0, 1.0), (0.0, 1.0), (0.0, 4.0))
56+
4.0
57+
>>> _perpendicular_distance((2.0, 1.0), (-2.0, 1.0), (-2.0, 4.0))
58+
4.0
59+
>>> # projection falls outside the segment; distance to nearest endpoint
60+
>>> round(_perpendicular_distance((0.0, 2.0), (1.0, 0.0), (3.0, 0.0)), 6)
61+
2.236068
62+
"""
63+
px, py = point
64+
ax, ay = line_start
65+
bx, by = line_end
66+
dx, dy = bx - ax, by - ay
67+
seg_len_sq = dx * dx + dy * dy
68+
if seg_len_sq == 0.0:
69+
# line_start and line_end coincide; fall back to point-to-point distance
70+
return _euclidean_distance(point, line_start)
71+
# Project point onto the segment line, then clamp t to [0, 1] so the
72+
# nearest point is always on the segment rather than the infinite line.
73+
t = max(0.0, min(1.0, ((px - ax) * dx + (py - ay) * dy) / seg_len_sq))
74+
nearest_x = ax + t * dx
75+
nearest_y = ay + t * dy
76+
return math.hypot(px - nearest_x, py - nearest_y)
77+
78+
79+
def ramer_douglas_peucker(
80+
pts: list[tuple[float, float]],
81+
epsilon: float,
82+
) -> list[tuple[float, float]]:
83+
"""Simplify a polyline using the Ramer-Douglas-Peucker algorithm.
84+
85+
Given a sequence of 2-D points and a maximum allowable deviation
86+
*epsilon* (>= 0), returns a simplified list of points such that no
87+
discarded point is farther than *epsilon* from the simplified polyline.
88+
89+
Parameters
90+
----------
91+
pts:
92+
Ordered sequence of ``(x, y)`` points describing the polyline.
93+
epsilon:
94+
Maximum allowable distance of any discarded point from the
95+
simplified polyline. Must be non-negative.
96+
97+
Returns
98+
-------
99+
list[tuple[float, float]]
100+
Simplified list of ``(x, y)`` points. The first and last points of
101+
*pts* are always preserved.
102+
103+
Raises
104+
------
105+
ValueError
106+
If *epsilon* is negative.
107+
108+
References
109+
----------
110+
https://en.wikipedia.org/wiki/Ramer%E2%80%93Douglas%E2%80%93Peucker_algorithm
111+
112+
Examples
113+
--------
114+
>>> ramer_douglas_peucker([], epsilon=1.0)
115+
[]
116+
>>> ramer_douglas_peucker([(0.0, 0.0)], epsilon=1.0)
117+
[(0.0, 0.0)]
118+
>>> ramer_douglas_peucker([(0.0, 0.0), (1.0, 0.0)], epsilon=1.0)
119+
[(0.0, 0.0), (1.0, 0.0)]
120+
>>> # middle point is within epsilon - it is discarded
121+
>>> ramer_douglas_peucker([(0.0, 0.0), (1.0, 0.1), (2.0, 0.0)], epsilon=0.5)
122+
[(0.0, 0.0), (2.0, 0.0)]
123+
>>> # middle point exceeds epsilon - it is kept
124+
>>> ramer_douglas_peucker([(0.0, 0.0), (1.0, 1.0), (2.0, 0.0)], epsilon=0.5)
125+
[(0.0, 0.0), (1.0, 1.0), (2.0, 0.0)]
126+
>>> ramer_douglas_peucker([(0.0, 0.0), (1.0, 0.5), (2.0, 0.0)], epsilon=-1.0)
127+
Traceback (most recent call last):
128+
...
129+
ValueError: epsilon must be non-negative, got -1.0
130+
"""
131+
if epsilon < 0:
132+
msg = f"epsilon must be non-negative, got {epsilon!r}"
133+
raise ValueError(msg)
134+
135+
if len(pts) < 3:
136+
return list(pts)
137+
138+
# ---------------------------------------------------------------------------
139+
# Iterative, stack-based implementation.
140+
#
141+
# The naive recursive approach copies sublists at every level via slicing
142+
# (pts[:max_index+1] / pts[max_index:]), which is O(n) per call and makes
143+
# the overall algorithm O(n²) in memory even for well-balanced splits. An
144+
# explicit stack operating on index ranges avoids all copying and also
145+
# eliminates the risk of hitting Python's recursion limit for long polylines.
146+
# ---------------------------------------------------------------------------
147+
n = len(pts)
148+
149+
# keep[i] is True when pts[i] must appear in the output.
150+
keep: list[bool] = [False] * n
151+
keep[0] = True
152+
keep[-1] = True
153+
154+
# Stack of (start_index, end_index) pairs still to be examined.
155+
stack: list[tuple[int, int]] = [(0, n - 1)]
156+
157+
while stack:
158+
start, end = stack.pop()
159+
if end - start < 2:
160+
# Only one interior candidate at most; nothing to split further.
161+
continue
162+
163+
# Find the interior point with the greatest distance to the segment.
164+
max_dist = 0.0
165+
max_index = start
166+
for i in range(start + 1, end):
167+
dist = _perpendicular_distance(pts[i], pts[start], pts[end])
168+
if dist > max_dist:
169+
max_dist = dist
170+
max_index = i
171+
172+
if max_dist > epsilon:
173+
keep[max_index] = True
174+
stack.append((start, max_index))
175+
stack.append((max_index, end))
176+
# else: all interior points are within epsilon; discard them all.
177+
178+
return [pts[i] for i in range(n) if keep[i]]
179+
180+
181+
if __name__ == "__main__":
182+
import doctest
183+
184+
doctest.testmod()

0 commit comments

Comments
 (0)