diff --git a/packages/recognition/src/RPA/recognition/ocr.py b/packages/recognition/src/RPA/recognition/ocr.py index 1f5130a91d..481cf49b43 100644 --- a/packages/recognition/src/RPA/recognition/ocr.py +++ b/packages/recognition/src/RPA/recognition/ocr.py @@ -23,6 +23,29 @@ DEFAULT_CONFIDENCE = 80.0 +def _parse_ocr_confidence( + confidence: Optional[Union[str, float, int]] +) -> Optional[float]: + try: + value = float(confidence) + except (TypeError, ValueError): + return None + + return value if value >= 0 else None + + +def _average_ocr_confidence(words: List[Dict]) -> Optional[float]: + confidences = [ + word["ocr_confidence"] + for word in words + if word.get("ocr_confidence") is not None + ] + if not confidences: + return None + + return sum(confidences) / len(confidences) + + def read( image: Union[Image.Image, Path], language: Optional[str] = None, @@ -57,6 +80,9 @@ def find( """Scan image for text and return a list of regions that contain it (or something close to it). + Returned matches preserve the existing text similarity ``confidence`` value and + include ``ocr_confidence`` from the underlying Tesseract data when available. + :param image: Path to image or Image object :param text: Text to find in image :param confidence: Minimum confidence for text similaritys @@ -114,8 +140,13 @@ def _dict_lines(data: Dict) -> List: word["left"], word["top"], word["width"], word["height"] ) - # NOTE: Currently ignoring confidence in tesseract results - lines[key].append({"text": word["text"], "region": region}) + lines[key].append( + { + "text": word["text"], + "region": region, + "ocr_confidence": _parse_ocr_confidence(word.get("conf")), + } + ) assert len(lines[key]) == word["word_num"] return list(lines.values()) @@ -155,6 +186,7 @@ def _match_lines(lines: List[Dict], text: str, confidence: float) -> List[Dict]: "text": sentence, "region": Region.merge(regions), "confidence": ratio, + "ocr_confidence": _average_ocr_confidence(words), } if match: diff --git a/packages/recognition/tests/python/test_ocr_confidence.py b/packages/recognition/tests/python/test_ocr_confidence.py new file mode 100644 index 0000000000..5b84fe74d8 --- /dev/null +++ b/packages/recognition/tests/python/test_ocr_confidence.py @@ -0,0 +1,68 @@ +from RPA.core.geometry import Region +from RPA.recognition import ocr + + +def test_dict_lines_preserves_tesseract_confidence(): + data = { + "level": [5, 5, 5], + "text": ["Open", "New", "Ignored"], + "conf": ["95.5", "73", "-1"], + "block_num": [1, 1, 1], + "par_num": [1, 1, 1], + "line_num": [1, 1, 1], + "left": [10, 30, 60], + "top": [20, 20, 20], + "width": [10, 20, 30], + "height": [10, 10, 10], + "word_num": [1, 2, 3], + } + + result = ocr._dict_lines(data) + + assert result == [ + [ + { + "text": "Open", + "region": Region.from_size(10, 20, 10, 10), + "ocr_confidence": 95.5, + }, + { + "text": "New", + "region": Region.from_size(30, 20, 20, 10), + "ocr_confidence": 73.0, + }, + { + "text": "Ignored", + "region": Region.from_size(60, 20, 30, 10), + "ocr_confidence": None, + }, + ] + ] + + +def test_match_lines_returns_average_ocr_confidence(): + lines = [ + [ + { + "text": "Open", + "region": Region.from_size(10, 20, 10, 10), + "ocr_confidence": 95.5, + }, + { + "text": "New", + "region": Region.from_size(30, 20, 20, 10), + "ocr_confidence": 73.0, + }, + ] + ] + + result = ocr._match_lines(lines, "Open New", 100) + + assert result == [ + { + "text": "Open New", + "region": Region.from_size(10, 20, 40, 10), + "confidence": 100.0, + "ocr_confidence": 84.25, + } + ]