robocorp · mikahanninen · May 3, 2026 · Apr 16, 2026 · May 3, 2026
diff --git a/packages/recognition/src/RPA/recognition/ocr.py b/packages/recognition/src/RPA/recognition/ocr.py
@@ -23,6 +23,29 @@
 DEFAULT_CONFIDENCE = 80.0
 
 
+def _parse_ocr_confidence(
+    confidence: Optional[Union[str, float, int]]
+) -> Optional[float]:
+    try:
+        value = float(confidence)
+    except (TypeError, ValueError):
+        return None
+
+    return value if value >= 0 else None
+
+
+def _average_ocr_confidence(words: List[Dict]) -> Optional[float]:
+    confidences = [
+        word["ocr_confidence"]
+        for word in words
+        if word.get("ocr_confidence") is not None
+    ]
+    if not confidences:
+        return None
+
+    return sum(confidences) / len(confidences)
+
+
 def read(
     image: Union[Image.Image, Path],
     language: Optional[str] = None,
@@ -57,6 +80,9 @@ def find(
     """Scan image for text and return a list of regions
     that contain it (or something close to it).
 
+    Returned matches preserve the existing text similarity ``confidence`` value and
+    include ``ocr_confidence`` from the underlying Tesseract data when available.
+
     :param image: Path to image or Image object
     :param text: Text to find in image
     :param confidence: Minimum confidence for text similaritys
@@ -114,8 +140,13 @@ def _dict_lines(data: Dict) -> List:
             word["left"], word["top"], word["width"], word["height"]
         )
 
-        # NOTE: Currently ignoring confidence in tesseract results
-        lines[key].append({"text": word["text"], "region": region})
+        lines[key].append(
+            {
+                "text": word["text"],
+                "region": region,
+                "ocr_confidence": _parse_ocr_confidence(word.get("conf")),
+            }
+        )
         assert len(lines[key]) == word["word_num"]
 
     return list(lines.values())
@@ -155,6 +186,7 @@ def _match_lines(lines: List[Dict], text: str, confidence: float) -> List[Dict]:
                     "text": sentence,
                     "region": Region.merge(regions),
                     "confidence": ratio,
+                    "ocr_confidence": _average_ocr_confidence(words),
                 }
 
         if match:

diff --git a/packages/recognition/tests/python/test_ocr_confidence.py b/packages/recognition/tests/python/test_ocr_confidence.py
@@ -0,0 +1,68 @@
+from RPA.core.geometry import Region
+from RPA.recognition import ocr
+
+
+def test_dict_lines_preserves_tesseract_confidence():
+    data = {
+        "level": [5, 5, 5],
+        "text": ["Open", "New", "Ignored"],
+        "conf": ["95.5", "73", "-1"],
+        "block_num": [1, 1, 1],
+        "par_num": [1, 1, 1],
+        "line_num": [1, 1, 1],
+        "left": [10, 30, 60],
+        "top": [20, 20, 20],
+        "width": [10, 20, 30],
+        "height": [10, 10, 10],
+        "word_num": [1, 2, 3],
+    }
+
+    result = ocr._dict_lines(data)
+
+    assert result == [
+        [
+            {
+                "text": "Open",
+                "region": Region.from_size(10, 20, 10, 10),
+                "ocr_confidence": 95.5,
+            },
+            {
+                "text": "New",
+                "region": Region.from_size(30, 20, 20, 10),
+                "ocr_confidence": 73.0,
+            },
+            {
+                "text": "Ignored",
+                "region": Region.from_size(60, 20, 30, 10),
+                "ocr_confidence": None,
+            },
+        ]
+    ]
+
+
+def test_match_lines_returns_average_ocr_confidence():
+    lines = [
+        [
+            {
+                "text": "Open",
+                "region": Region.from_size(10, 20, 10, 10),
+                "ocr_confidence": 95.5,
+            },
+            {
+                "text": "New",
+                "region": Region.from_size(30, 20, 20, 10),
+                "ocr_confidence": 73.0,
+            },
+        ]
+    ]
+
+    result = ocr._match_lines(lines, "Open New", 100)
+
+    assert result == [
+        {
+            "text": "Open New",
+            "region": Region.from_size(10, 20, 40, 10),
+            "confidence": 100.0,
+            "ocr_confidence": 84.25,
+        }
+    ]