Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 34 additions & 2 deletions packages/recognition/src/RPA/recognition/ocr.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,29 @@
DEFAULT_CONFIDENCE = 80.0


def _parse_ocr_confidence(
confidence: Optional[Union[str, float, int]]
) -> Optional[float]:
try:
value = float(confidence)
except (TypeError, ValueError):
return None

return value if value >= 0 else None


def _average_ocr_confidence(words: List[Dict]) -> Optional[float]:
confidences = [
word["ocr_confidence"]
for word in words
if word.get("ocr_confidence") is not None
]
if not confidences:
return None

return sum(confidences) / len(confidences)


def read(
image: Union[Image.Image, Path],
language: Optional[str] = None,
Expand Down Expand Up @@ -57,6 +80,9 @@ def find(
"""Scan image for text and return a list of regions
that contain it (or something close to it).

Returned matches preserve the existing text similarity ``confidence`` value and
include ``ocr_confidence`` from the underlying Tesseract data when available.

:param image: Path to image or Image object
:param text: Text to find in image
:param confidence: Minimum confidence for text similaritys
Expand Down Expand Up @@ -114,8 +140,13 @@ def _dict_lines(data: Dict) -> List:
word["left"], word["top"], word["width"], word["height"]
)

# NOTE: Currently ignoring confidence in tesseract results
lines[key].append({"text": word["text"], "region": region})
lines[key].append(
{
"text": word["text"],
"region": region,
"ocr_confidence": _parse_ocr_confidence(word.get("conf")),
}
)
assert len(lines[key]) == word["word_num"]

return list(lines.values())
Expand Down Expand Up @@ -155,6 +186,7 @@ def _match_lines(lines: List[Dict], text: str, confidence: float) -> List[Dict]:
"text": sentence,
"region": Region.merge(regions),
"confidence": ratio,
"ocr_confidence": _average_ocr_confidence(words),
}

if match:
Expand Down
68 changes: 68 additions & 0 deletions packages/recognition/tests/python/test_ocr_confidence.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
from RPA.core.geometry import Region
from RPA.recognition import ocr


def test_dict_lines_preserves_tesseract_confidence():
data = {
"level": [5, 5, 5],
"text": ["Open", "New", "Ignored"],
"conf": ["95.5", "73", "-1"],
"block_num": [1, 1, 1],
"par_num": [1, 1, 1],
"line_num": [1, 1, 1],
"left": [10, 30, 60],
"top": [20, 20, 20],
"width": [10, 20, 30],
"height": [10, 10, 10],
"word_num": [1, 2, 3],
}

result = ocr._dict_lines(data)

assert result == [
[
{
"text": "Open",
"region": Region.from_size(10, 20, 10, 10),
"ocr_confidence": 95.5,
},
{
"text": "New",
"region": Region.from_size(30, 20, 20, 10),
"ocr_confidence": 73.0,
},
{
"text": "Ignored",
"region": Region.from_size(60, 20, 30, 10),
"ocr_confidence": None,
},
]
]


def test_match_lines_returns_average_ocr_confidence():
lines = [
[
{
"text": "Open",
"region": Region.from_size(10, 20, 10, 10),
"ocr_confidence": 95.5,
},
{
"text": "New",
"region": Region.from_size(30, 20, 20, 10),
"ocr_confidence": 73.0,
},
]
]

result = ocr._match_lines(lines, "Open New", 100)

assert result == [
{
"text": "Open New",
"region": Region.from_size(10, 20, 40, 10),
"confidence": 100.0,
"ocr_confidence": 84.25,
}
]
Loading