Skip to content

Commit aef48a9

Browse files
author
Robert Sachunsky
committed
repair: add params spread / spread_level, update/improve docs
1 parent 20c943a commit aef48a9

2 files changed

Lines changed: 101 additions & 31 deletions

File tree

ocrd_segment/ocrd-tool.json

Lines changed: 21 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,18 @@
1515
],
1616
"steps": ["layout/segmentation/region"],
1717
"parameters": {
18+
"sanitize": {
19+
"type": "boolean",
20+
"default": false,
21+
"description": "Shrink each region such that its coordinates become the minimal concave hull of its binary foreground. (Assumes that a perfect binarization is available.)"
22+
},
23+
"sanitize_padding": {
24+
"type": "number",
25+
"format": "integer",
26+
"minimum": 1,
27+
"default": 5,
28+
"description": "When shrinking a region, enlarge the resulting hull by this amount of pixels in each direction."
29+
},
1830
"simplify": {
1931
"type": "number",
2032
"format": "float",
@@ -35,17 +47,17 @@
3547
"default": 0.90,
3648
"description": "When merging a region or line almost contained in another, require at least this ratio of area is shared with the other."
3749
},
38-
"sanitize": {
39-
"type": "boolean",
40-
"default": false,
41-
"description": "Shrink each region such that its coordinates become the minimal concave hull of its binary foreground. (Assumes that a perfect binarization is available.)"
42-
},
43-
"sanitize_padding": {
50+
"spread": {
4451
"type": "number",
4552
"format": "integer",
46-
"minimum": 1,
47-
"default": 5,
48-
"description": "When shrinking a region, enlarge the resulting hull by this amount of pixels in each direction."
53+
"default": 0,
54+
"description": "After all other steps, enlarge segments by this many pixels into the background."
55+
},
56+
"spread_level": {
57+
"type": "string",
58+
"enum": ["page", "table", "region", "line", "word"],
59+
"default": "region",
60+
"description": "Hierarchy level spread operates on"
4961
}
5062
}
5163
},

ocrd_segment/repair.py

Lines changed: 80 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
from ocrd_modelfactory import page_from_file
2424
from ocrd_models.ocrd_page import (
2525
PageType,
26+
BorderType,
2627
TextRegionType,
2728
to_xml
2829
)
@@ -55,18 +56,18 @@ def __init__(self, *args, **kwargs):
5556

5657
def process(self):
5758
"""Perform generic post-processing of page segmentation with Shapely and OpenCV.
58-
59+
5960
Open and deserialize PAGE input files and their respective images,
6061
then validate syntax and semantics, checking for invalid or inconsistent
6162
segmentation. Fix invalidities by simplifying and/or re-ordering polygon paths.
6263
Fix inconsistencies by shrinking segment polygons to their parents. Log
6364
errors that cannot be repaired automatically.
64-
65+
6566
Next, if ``simplify`` is non-zero, then for each segment (top-level page or
6667
recursive region, line, word, glyph), simplify the polygon points up to that
6768
precision, while preserving its topology and parent-child consistency.
6869
(This will usually reduce the number of points.)
69-
70+
7071
\b
7172
Next, if ``plausibilize``, then for each segment (top-level page or recursive region)
7273
which contains any text regions, try to find all pairs of such regions in it that
@@ -88,14 +89,21 @@ def process(self):
8889
(a fraction of more than ``plausibilize_merge_min_overlap``),
8990
then the one line can be merged into the other.
9091
* If another overlap, and
91-
- if either line's centroid is in the other,
92+
- if either line's centroid is in the other,
9293
then the smaller line can be merged into the larger,
9394
- otherwise the smaller line can be subtracted from the larger.
9495
Apply those repairs and update the reading order.
95-
96-
Furthermore, if ``sanitize``, then for each text region, update
97-
the coordinates to become the minimal convex hull of its constituent
98-
text lines. (But consider running ocrd-segment-project instead.)
96+
97+
Next, if ``spread`` is non-zero, then enlarge each ``spread_level`` segment
98+
by this many pixels (without causing additional overlap between neighbours).
99+
100+
However, if ``sanitize``, then as a first step (prior to everything else
101+
including repairs), for each text region, update the coordinates to become
102+
the minimal convex hull of its binary foreground. (So in contrast to
103+
ocrd-segment-project, this ignores constituent lines. It uses the binarized
104+
image and generates a tight outline properly contained within the old
105+
region outline, as if extended by ``sanitize_padding``. If ``spread`` is
106+
non-zero and ``spread_level=region``, then this still applies to the result.)
99107
100108
Finally, produce new output files by serialising the resulting hierarchy.
101109
"""
@@ -188,6 +196,8 @@ def process(self):
188196
# delete/merge/split redundant text regions (or its text lines)
189197
if self.parameter['plausibilize']:
190198
self.plausibilize_page(page, page_id)
199+
if self.parameter['spread']:
200+
self.spread_segments(page, page_id)
191201

192202
self.workspace.add_file(
193203
ID=file_id,
@@ -216,7 +226,7 @@ def simplify_page(self, page, page_id):
216226
ensure_consistent(region, at_parent=True)
217227
if page.get_Border() is not None:
218228
ensure_consistent(page)
219-
229+
220230
def plausibilize_page(self, page, page_id):
221231
ro = page.get_ReadingOrder()
222232
if ro:
@@ -286,18 +296,50 @@ def plausibilize_page(self, page, page_id):
286296
marked_for_deletion,
287297
marked_for_merging,
288298
marked_for_splitting)
289-
299+
300+
def spread_segments(self, page, page_id):
301+
level = self.parameter['spread_level']
302+
if level == 'page':
303+
border = page.get_Border()
304+
if border is not None:
305+
spread_segments([border], self.parameter['spread'])
306+
return
307+
if level == 'table':
308+
for table in page.get_TableRegion():
309+
cells = table.get_TextRegion()
310+
spread_segments(cells, self.parameter['spread'])
311+
return
312+
regions = page.get_AllRegions(depth=1)
313+
if level == 'region':
314+
spread_segments(regions, self.parameter['spread'])
315+
return
316+
for region in regions:
317+
if not isinstance(region, TextRegionType):
318+
continue
319+
lines = region.get_TextLine()
320+
if level == 'line':
321+
spread_segments(lines, self.parameter['spread'])
322+
continue
323+
for line in lines:
324+
words = line.get_Word()
325+
if level == 'word':
326+
spread_segments(words, self.parameter['spread'])
327+
continue
328+
for word in words:
329+
glyphs = word.get_Glyph()
330+
spread_segments(glyphs, self.parameter['spread'])
331+
290332
def _compare_segments(seg1, seg2, poly1, poly2, marked_for_deletion, marked_for_merging, min_overlap, page_id):
291333
"""Determine redundancies in a pair of regions/lines
292-
334+
293335
\b
294336
For segments ``seg1`` (with coordinates ``poly1``) and ``seg2`` (with coordinates ``poly2``),
295337
- if their coordinates are nearly identical, then just mark ``seg2`` for deletion
296338
- if either properly contains the other, then mark the other for deletion
297339
- if they overlap, then mark the most overlapped side in favour of the other – unless
298340
- the union is larger than the sum (i.e. covers area outside of both) and
299341
- the intersection is smaller than ``min_overlap`` fraction of either side
300-
342+
301343
Return whether something else besides deletion must be done about the redundancy,
302344
i.e. true iff they overlap, but neither side could be marked for deletion.
303345
"""
@@ -347,7 +389,7 @@ def _compare_segments(seg1, seg2, poly1, poly2, marked_for_deletion, marked_for_
347389

348390
def _merge_segments(seg, superseg, poly, superpoly, segpolys, reading_order):
349391
"""Merge one segment into another and update reading order refs.
350-
392+
351393
\b
352394
Given a region/line ``seg`` that should be dissolved into a
353395
region/line ``superseg``, update the latter's
@@ -432,17 +474,17 @@ def _merge_segments(seg, superseg, poly, superpoly, segpolys, reading_order):
432474
LOG.warning('Merging "{}" with TextEquiv {} into "{}" with {}'.format(
433475
seg.id, seg.get_TextEquiv(), # FIXME needs repr...
434476
superseg.id, superseg.get_TextEquiv())) # ...to be informative
435-
477+
436478
def _plausibilize_segments(segpolys, rogroup, marked_for_deletion, marked_for_merging, marked_for_splitting):
437479
"""Remove redundancy among a set of segments by applying deletion/merging/splitting
438-
480+
439481
\b
440482
Given the segment-polygon tuples ``segpolys`` and analysis of actions to be taken:
441483
- ``marked_for_deletion``: list of segment identifiers that can be removed,
442484
- ``marked_for_merging``: dict of segment identifiers that can be dissolved into some other,
443485
- ``marked_for_splitting``: dict of segment identifiers that can be shrinked in favour of some other,
444486
apply these one by one (possibly recursing from regions to lines).
445-
487+
446488
Finally, update the reading order ``rogroup`` accordingly.
447489
"""
448490
LOG = getLogger('processor.RepairSegmentation')
@@ -498,7 +540,7 @@ def _plausibilize_segments(segpolys, rogroup, marked_for_deletion, marked_for_me
498540

499541
def page_get_reading_order(ro, rogroup):
500542
"""Add all elements from the given reading order group to the given dictionary.
501-
543+
502544
Given a dict ``ro`` from layout element IDs to ReadingOrder element objects,
503545
and an object ``rogroup`` with additional ReadingOrder element objects,
504546
add all references to the dict, traversing the group recursively.
@@ -568,6 +610,17 @@ def shrink_regions(page_image, page_coords, page, page_id, padding=0):
568610
LOG.debug('Using new coordinates for region "%s"', region.id)
569611
region.get_Coords().set_points(points_from_polygon(region_polygon.exterior.coords[:-1]))
570612

613+
def spread_segments(segments, distance=0):
614+
polygons = [Polygon(polygon_from_points(segment.get_Coords().points))
615+
for segment in segments]
616+
all_poly = unary_union(polygons)
617+
for segment, polygon in zip(segments, polygons):
618+
# enlarge by spread, then remove any existing segments except for original outline
619+
polygon = merge_poly(polygon, polygon.buffer(distance).difference(all_poly))
620+
polygon = polygon.exterior.coords[:-1]
621+
segment.get_Coords().set_points(points_from_polygon(polygon))
622+
ensure_consistent(segment, at_parent=True)
623+
571624
def simplify(segment, tolerance=0):
572625
if tolerance <= 0:
573626
return # nothing to do
@@ -634,22 +687,22 @@ def page_poly(page):
634687
# same as polygon_for_parent pattern in other processors
635688
def ensure_consistent(child, at_parent=False):
636689
"""Make segment coordinates fit into parent coordinates.
637-
690+
638691
Ensure that the coordinate polygon of ``child`` is fully
639692
contained in the coordinate polygon of its parent.
640-
693+
641694
\b
642695
To achieve that when necessary, either
643696
- enlarge the parent to the union of both,
644697
if ``at_parent``
645698
- shrink the child to the intersection of both,
646699
otherwise.
647-
700+
648701
In any case, ensure the resulting polygon is valid.
649-
702+
650703
If the parent is at page level, and there is no Border,
651704
then use the page frame (and assume `at_parent=False`).
652-
705+
653706
If ``child`` is at page level, and there is a Border,
654707
then use the page frame as parent (and assume `at_parent=False`).
655708
"""
@@ -660,6 +713,11 @@ def ensure_consistent(child, at_parent=False):
660713
parentp = page_poly(child)
661714
at_parent = False # clip to page frame
662715
parent = child
716+
elif isinstance(child, BorderType):
717+
childp = Polygon(polygon_from_points(child.get_Coords().points))
718+
parentp = page_poly(child.parent_object_)
719+
at_parent = False # clip to page frame
720+
parent = child.parent_object_
663721
else:
664722
points = child.get_Coords().points
665723
polygon = polygon_from_points(points)

0 commit comments

Comments
 (0)