Skip to content

Commit aaa68a1

Browse files
committed
Added OCR for MRZ images
1 parent bc13d21 commit aaa68a1

File tree

3 files changed

+308
-1
lines changed

3 files changed

+308
-1
lines changed

examples/official/dcv/main.py

Lines changed: 138 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,12 @@
11
import sys
22
import os
3+
4+
# Import utils and RapidOCR before PySide6 to avoid DLL conflicts
5+
try:
6+
from utils import *
7+
except ImportError:
8+
pass
9+
310
import json
411
import csv
512
import threading
@@ -989,12 +996,14 @@ class ProcessingWorker(QThread):
989996
error = Signal(str) # Error message
990997
progress = Signal(str) # Progress message
991998
normalized_image_used = Signal(object) # Signal when normalized image should replace original
999+
ocr_results_ready = Signal(list) # Signal for OCR results from VIZ zone
9921000

9931001
def __init__(self, cvr_instance, file_path, detection_mode="Barcode"):
9941002
super().__init__()
9951003
self.cvr_instance = cvr_instance
9961004
self.file_path = file_path
9971005
self.detection_mode = detection_mode
1006+
self.current_image = None # Store current image for OCR
9981007

9991008
def run(self):
10001009
"""Run detection in background thread."""
@@ -1044,6 +1053,8 @@ def _process_mrz_with_fallback(self, mrz_template):
10441053

10451054
if has_mrz_results:
10461055
self.progress.emit("✅ MRZ detection successful on original image")
1056+
# Run OCR on the VIZ zone
1057+
self._run_ocr_on_image(self.file_path, result_list)
10471058
return mrz_results
10481059

10491060
# Stage 2: No MRZ found, try document normalization + MRZ
@@ -1128,6 +1139,8 @@ def _process_mrz_with_fallback(self, mrz_template):
11281139
self.progress.emit("✅ MRZ detection successful on normalized document!")
11291140
# Emit the normalized image to replace the original
11301141
self.normalized_image_used.emit(cv_image)
1142+
# Run OCR on the normalized image
1143+
self._run_ocr_on_image(cv_image, enhanced_result_list)
11311144
return enhanced_results
11321145
else:
11331146
self.progress.emit(f"⚠️ No MRZ found in normalized document {i+1}")
@@ -1144,6 +1157,56 @@ def _process_mrz_with_fallback(self, mrz_template):
11441157
self.progress.emit(f"❌ Error in MRZ fallback processing: {e}")
11451158
# Fallback to standard processing
11461159
return self.cvr_instance.capture_multi_pages(self.file_path, mrz_template)
1160+
1161+
def _run_ocr_on_image(self, image_source, mrz_result_list):
1162+
"""
1163+
Run OCR on the VIZ (Visual Inspection Zone) of the passport image.
1164+
1165+
Args:
1166+
image_source: Either a file path (str) or OpenCV image (numpy array)
1167+
mrz_result_list: List of MRZ detection results to determine MRZ zone location
1168+
"""
1169+
try:
1170+
from utils import get_passport_ocr, RAPIDOCR_AVAILABLE
1171+
1172+
if not RAPIDOCR_AVAILABLE:
1173+
self.progress.emit("⚠️ OCR not available - install rapidocr_onnxruntime for VIZ text recognition")
1174+
return
1175+
1176+
self.progress.emit("📝 Running OCR on Visual Inspection Zone...")
1177+
1178+
# Load image if it's a file path
1179+
if isinstance(image_source, str):
1180+
image = cv2.imread(image_source)
1181+
else:
1182+
image = image_source
1183+
1184+
if image is None:
1185+
self.progress.emit("⚠️ Could not load image for OCR")
1186+
return
1187+
1188+
# Collect MRZ items with location info for filtering
1189+
mrz_items = []
1190+
for result in mrz_result_list:
1191+
if result.get_error_code() == EnumErrorCode.EC_OK:
1192+
line_result = result.get_recognized_text_lines_result()
1193+
if line_result:
1194+
mrz_items.extend(line_result.get_items())
1195+
1196+
# Run OCR
1197+
ocr_engine = get_passport_ocr()
1198+
ocr_results = ocr_engine.recognize_viz_region(image, mrz_items)
1199+
1200+
if ocr_results:
1201+
self.progress.emit(f"✅ OCR found {len(ocr_results)} text region(s) in VIZ")
1202+
self.ocr_results_ready.emit(ocr_results)
1203+
else:
1204+
self.progress.emit("⚠️ No text found in VIZ region")
1205+
self.ocr_results_ready.emit([])
1206+
1207+
except Exception as e:
1208+
self.progress.emit(f"⚠️ OCR error: {e}")
1209+
self.ocr_results_ready.emit([])
11471210

11481211
class ImageDisplayWidget(QLabel):
11491212
"""Custom widget for displaying and zooming images with barcode annotations."""
@@ -1649,6 +1712,9 @@ def __init__(self):
16491712
# Face detection variables
16501713
self.current_detected_faces = {} # Store detected faces {page_index: [face_data]}
16511714

1715+
# OCR results for passport VIZ (Visual Inspection Zone)
1716+
self.current_ocr_results = [] # Store OCR results from VIZ text recognition
1717+
16521718
# Camera mode variables
16531719
self.camera_results = [] # Store recent camera detection results
16541720
self.camera_history = [] # Store detection history
@@ -2948,6 +3014,7 @@ def process_current_file(self):
29483014
self.page_results = {}
29493015
self.page_hash_mapping = {}
29503016
self.current_detected_faces = {} # Clear face detection results
3017+
self.current_ocr_results = [] # Clear OCR results
29513018
if self.custom_receiver:
29523019
self.custom_receiver.images.clear()
29533020

@@ -2962,6 +3029,7 @@ def process_current_file(self):
29623029
self.worker.error.connect(self.on_processing_error)
29633030
self.worker.progress.connect(self.log_message)
29643031
self.worker.normalized_image_used.connect(self.on_normalized_image_used)
3032+
self.worker.ocr_results_ready.connect(self.on_ocr_results_ready)
29653033
self.worker.start()
29663034

29673035
def on_normalized_image_used(self, normalized_image):
@@ -2981,6 +3049,21 @@ def on_normalized_image_used(self, normalized_image):
29813049
except Exception as e:
29823050
self.log_message(f"⚠️ Error replacing image with normalized version: {e}")
29833051

3052+
def on_ocr_results_ready(self, ocr_results):
3053+
"""Handle OCR results from the VIZ zone."""
3054+
try:
3055+
self.current_ocr_results = ocr_results
3056+
3057+
if ocr_results:
3058+
self.log_message(f"📝 Received {len(ocr_results)} OCR text region(s) from passport VIZ")
3059+
# Results will be displayed when display_page_results is called
3060+
else:
3061+
self.log_message("📝 No additional text found in passport VIZ zone")
3062+
3063+
except Exception as e:
3064+
self.log_message(f"⚠️ Error handling OCR results: {e}")
3065+
self.current_ocr_results = []
3066+
29843067
def on_processing_finished(self, results):
29853068
"""Handle completion of detection processing."""
29863069
try:
@@ -3207,7 +3290,13 @@ def display_current_page(self):
32073290
self.display_mrz_with_faces(cv_image, detection_items)
32083291
else:
32093292
# Standard display for barcodes and MRZ without face detection
3210-
self.image_widget.set_image(cv_image, detection_items)
3293+
if mode_name == "MRZ":
3294+
annotated_image = cv_image.copy()
3295+
# Draw OCR annotations
3296+
self.draw_ocr_annotations(annotated_image)
3297+
self.image_widget.set_image(annotated_image, detection_items)
3298+
else:
3299+
self.image_widget.set_image(cv_image, detection_items)
32113300

32123301
self.display_page_results()
32133302

@@ -3372,6 +3461,18 @@ def save_normalized_document(self):
33723461
self.log_message(f"❌ Save error: {e}")
33733462
QMessageBox.critical(self, "Save Error", f"Failed to save normalized document: {e}")
33743463

3464+
def draw_ocr_annotations(self, image):
3465+
"""Draw OCR bounding boxes on the image."""
3466+
if hasattr(self, 'current_ocr_results') and self.current_ocr_results:
3467+
for ocr_result in self.current_ocr_results:
3468+
if hasattr(ocr_result, 'bbox'):
3469+
bbox = ocr_result.bbox
3470+
# Convert bbox points to numpy array of integers
3471+
pts = np.array([[int(p[0]), int(p[1])] for p in bbox], np.int32)
3472+
# Draw bounding box in Cyan
3473+
cv2.polylines(image, [pts], True, (255, 255, 0), 2)
3474+
return image
3475+
33753476
def display_mrz_with_faces(self, cv_image, detection_items):
33763477
"""Display MRZ results with face detection annotations."""
33773478
try:
@@ -3389,6 +3490,9 @@ def display_mrz_with_faces(self, cv_image, detection_items):
33893490
pts = np.array([[int(p.x), int(p.y)] for p in points], np.int32)
33903491
cv2.polylines(annotated_image, [pts], True, (0, 165, 255), 2) # Orange for MRZ
33913492

3493+
# Draw OCR annotations if available
3494+
self.draw_ocr_annotations(annotated_image)
3495+
33923496
# Perform face detection
33933497
if face_detector:
33943498
faces = face_detector.detect_and_crop_faces(cv_image, min_confidence=0.8)
@@ -3668,6 +3772,39 @@ def _format_mrz_results(self, items):
36683772

36693773
html_content += '</div>'
36703774

3775+
# Add OCR results from VIZ (Visual Inspection Zone) if available
3776+
if hasattr(self, 'current_ocr_results') and self.current_ocr_results:
3777+
html_content += self._format_ocr_results(self.current_ocr_results)
3778+
3779+
return html_content
3780+
3781+
def _format_ocr_results(self, ocr_results):
3782+
"""Format OCR results from the Visual Inspection Zone."""
3783+
if not ocr_results:
3784+
return ""
3785+
3786+
html_content = '<div style="margin: 15px 0; padding: 10px; background-color: #e8f4f8; border-left: 4px solid #17a2b8;">'
3787+
html_content += '<h4 style="color: #0c5460; margin: 0 0 10px 0;">📝 Visual Inspection Zone (VIZ) - OCR Results</h4>'
3788+
html_content += '<p style="margin: 0 0 10px 0; color: #666; font-size: 11px;">Text recognized above the MRZ zone using RapidOCR</p>'
3789+
3790+
# Sort results by Y position (top to bottom)
3791+
sorted_results = sorted(ocr_results, key=lambda x: x.get_top_y())
3792+
3793+
for i, result in enumerate(sorted_results, 1):
3794+
confidence_color = "#28a745" if result.confidence > 0.8 else "#ffc107" if result.confidence > 0.5 else "#dc3545"
3795+
confidence_pct = int(result.confidence * 100)
3796+
3797+
# Escape HTML in text
3798+
escaped_text = result.text.replace('<', '&lt;').replace('>', '&gt;')
3799+
3800+
html_content += f'<div style="margin: 5px 0; padding: 8px; background-color: #fff; border: 1px solid #dee2e6; border-radius: 3px;">'
3801+
html_content += f'<span style="color: #495057; font-weight: bold;">#{i}</span> '
3802+
html_content += f'<span style="font-family: monospace; color: #212529;">{escaped_text}</span> '
3803+
html_content += f'<span style="color: {confidence_color}; font-size: 10px;">({confidence_pct}%)</span>'
3804+
html_content += '</div>'
3805+
3806+
html_content += '</div>'
3807+
36713808
return html_content
36723809

36733810
def calculate_polygon_area(self, points):

examples/official/dcv/requirements.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,9 @@ facenet-pytorch>=2.5.0
1515
torch>=1.11.0
1616
torchvision>=0.12.0
1717

18+
# OCR for passport VIZ (Visual Inspection Zone) text recognition
19+
rapidocr_onnxruntime>=1.3.0
20+
1821
# System monitoring (optional)
1922
psutil>=5.9.0
2023

0 commit comments

Comments
 (0)