Skip to content

Commit 049fec9

Browse files
committed
Added document detection
1 parent 2b5622b commit 049fec9

1 file changed

Lines changed: 134 additions & 83 deletions

File tree

examples/official/mrz_scanner_gui.py

Lines changed: 134 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,42 @@
3434
pass
3535

3636

37+
# ==============================================================================
38+
# Helpers
39+
# ==============================================================================
40+
41+
def get_document_quad(result: CapturedResult) -> Quadrilateral:
42+
"""Extract document quadrilateral from processed document result."""
43+
processed_document_result: ProcessedDocumentResult = result.get_processed_document_result()
44+
if processed_document_result is None or len(processed_document_result.get_detected_quad_result_items()) == 0:
45+
return None
46+
47+
items = processed_document_result.get_detected_quad_result_items()
48+
if len(items) > 0:
49+
return items[0].get_location()
50+
return None
51+
52+
def save_processed_document_result(result:CapturedResult, page_number:int, output_dir:str):
53+
"""Save the processed document image."""
54+
processed_document_result = result.get_processed_document_result()
55+
if processed_document_result is None or len(processed_document_result.get_enhanced_image_result_items()) == 0:
56+
print("Page-"+str(page_number), "No processed document result found.")
57+
return False
58+
items = processed_document_result.get_enhanced_image_result_items()
59+
if len(items) > 0:
60+
out_path = os.path.join(output_dir, f"document_page_{page_number}.png")
61+
image_io = ImageIO()
62+
image = items[0].get_image_data()
63+
if image != None:
64+
errorCode, errorMsg = image_io.save_to_file(image, out_path)
65+
if errorCode == 0:
66+
print("Document file: " + out_path)
67+
return True
68+
else:
69+
print("Save processed document failed, error:", errorCode, errorMsg)
70+
return False
71+
72+
3773
# ==============================================================================
3874
# Data Classes and Helper Classes
3975
# ==============================================================================
@@ -203,7 +239,7 @@ def to_mrz_result(self, portrait_zone=None, mrz_locations=None) -> MRZResult:
203239
class CameraCaptureThread(QThread):
204240
"""Thread for capturing camera frames and processing MRZ."""
205241

206-
frame_ready = Signal(np.ndarray, list) # frame, list of MRZResult
242+
frame_ready = Signal(np.ndarray, list, object, object) # frame, list of MRZResult, doc_quad, captured_result
207243
error_occurred = Signal(str)
208244

209245
def __init__(self, cvr: CaptureVisionRouter, irm, camera_index: int = 0):
@@ -241,10 +277,10 @@ def run(self):
241277
self.irr.clear()
242278

243279
# Process the frame
244-
mrz_results = self._process_frame(frame)
280+
mrz_results, doc_quad, captured_result = self._process_frame(frame)
245281

246282
# Emit the frame and results
247-
self.frame_ready.emit(frame.copy(), mrz_results)
283+
self.frame_ready.emit(frame.copy(), mrz_results, doc_quad, captured_result)
248284

249285
cap.release()
250286

@@ -253,23 +289,30 @@ def run(self):
253289
self.irm.remove_result_receiver(self.irr)
254290
self.irr = None
255291

256-
def _process_frame(self, frame: np.ndarray) -> List[MRZResult]:
292+
def _process_frame(self, frame: np.ndarray) -> Tuple[List[MRZResult], Optional['Quadrilateral'], Optional[CapturedResult]]:
257293
"""Process a single frame and return MRZ results."""
258294
results = []
295+
doc_quad = None
296+
captured_result = None
259297

260298
try:
261299
# Convert frame to ImageData format for the SDK
262300
height, width = frame.shape[:2]
263301

264302
# Capture using the SDK
265303
result = self.cvr.capture(frame, "ReadPassportAndId")
304+
captured_result = result
266305

267306
if result is None:
268-
return results
307+
return results, doc_quad, captured_result
269308

270309
parsed_result = result.get_parsed_result()
310+
311+
# Get document edge
312+
doc_quad = get_document_quad(result)
313+
271314
if parsed_result is None:
272-
return results
315+
return results, doc_quad, captured_result
273316

274317
# Get locations from recognized text lines
275318
mrz_locations = []
@@ -291,7 +334,7 @@ def _process_frame(self, frame: np.ndarray) -> List[MRZResult]:
291334
except Exception as e:
292335
print(f"Error processing frame: {e}")
293336

294-
return results
337+
return results, doc_quad, captured_result
295338

296339
def stop(self):
297340
self.running = False
@@ -307,24 +350,21 @@ class ImageDisplayWidget(QLabel):
307350

308351
def __init__(self, parent=None):
309352
super().__init__(parent)
310-
self.setMinimumSize(640, 480)
311-
self.setAlignment(Qt.AlignCenter)
312-
self.setStyleSheet("background-color: #2d2d2d; border: 2px dashed #555;")
313-
self.setText("Drop image here or use Load button")
314-
315-
self.current_image: Optional[np.ndarray] = None
316-
self.mrz_results: List[MRZResult] = []
353+
self.current_image = None
354+
self.mrz_results = []
355+
self.doc_quad: Optional['Quadrilateral'] = None
317356
self.scale_factor = 1.0
318357
self.offset_x = 0
319358
self.offset_y = 0
320359

321360
# Enable drag and drop
322361
self.setAcceptDrops(True)
323362

324-
def set_image(self, image: np.ndarray, mrz_results: List[MRZResult] = None):
363+
def set_image(self, image: np.ndarray, mrz_results: List[MRZResult] = None, doc_quad = None):
325364
"""Set the image and optionally MRZ results to display."""
326365
self.current_image = image
327366
self.mrz_results = mrz_results or []
367+
self.doc_quad = doc_quad
328368
self._update_display()
329369

330370
def _update_display(self):
@@ -359,10 +399,19 @@ def _update_display(self):
359399
self.offset_y = (widget_size.height() - scaled_pixmap.height()) // 2
360400

361401
# Draw overlays on the pixmap
362-
if self.mrz_results:
402+
if self.mrz_results or self.doc_quad:
363403
painter = QPainter(scaled_pixmap)
364404
painter.setRenderHint(QPainter.Antialiasing)
365405

406+
# Draw Document Quad
407+
if self.doc_quad:
408+
self._draw_quadrilateral(
409+
painter,
410+
self.doc_quad,
411+
QColor(0, 0, 255, 200), # Blue
412+
"Document"
413+
)
414+
366415
for result in self.mrz_results:
367416
# Draw MRZ locations
368417
if result.mrz_locations:
@@ -421,6 +470,7 @@ def clear_display(self):
421470
"""Clear the current display."""
422471
self.current_image = None
423472
self.mrz_results = []
473+
self.doc_quad = None
424474
self.setPixmap(QPixmap())
425475
self.setText("Drop image here or use Load button")
426476

@@ -482,6 +532,13 @@ def __init__(self):
482532
self.file_list: List[str] = []
483533
self.current_file_index = 0
484534

535+
# Store current CapturedResult for export
536+
self.current_captured_result: Optional[CapturedResult] = None
537+
538+
# Setup UI
539+
self._setup_ui()
540+
self.current_file_index = 0
541+
485542
# Setup UI
486543
self._setup_ui()
487544

@@ -554,6 +611,11 @@ def _setup_ui(self):
554611

555612
# Action buttons
556613
button_layout = QHBoxLayout()
614+
615+
self.export_btn = QPushButton("Export Passport")
616+
self.export_btn.clicked.connect(self._on_export_clicked)
617+
self.export_btn.setEnabled(False)
618+
button_layout.addWidget(self.export_btn)
557619

558620
self.load_btn = QPushButton("Load File/Folder")
559621
self.load_btn.clicked.connect(self._on_load_clicked)
@@ -668,8 +730,12 @@ def _on_source_changed(self, source: str):
668730
# Update load button text
669731
if is_folder:
670732
self.load_btn.setText("Load Folder")
671-
else:
733+
self.export_btn.setEnabled(self.current_captured_result is not None)
734+
elif not is_camera:
672735
self.load_btn.setText("Load File")
736+
self.export_btn.setEnabled(self.current_captured_result is not None)
737+
else:
738+
self._update_export_button_state()
673739

674740
def _on_load_clicked(self):
675741
"""Handle load button click."""
@@ -763,7 +829,36 @@ def handle_dropped_files(self, paths: List[str]):
763829
# Multiple files - treat as folder mode
764830
self.source_combo.setCurrentText("Image Folder")
765831
self._load_file_list(image_files)
832+
833+
def _update_export_button_state(self):
834+
"""Update export button based on state."""
835+
is_camera = self.source_combo.currentText() == "Camera"
836+
has_result = self.current_captured_result is not None
837+
838+
if is_camera:
839+
# Enable if camera is STOPPED and we have a result
840+
is_camera_running = self.camera_thread is not None and self.camera_thread.isRunning()
841+
self.export_btn.setEnabled(not is_camera_running and has_result)
842+
else:
843+
# Enable if we have a result
844+
self.export_btn.setEnabled(has_result)
845+
846+
def _on_export_clicked(self):
847+
"""Handle export button click."""
848+
if self.current_captured_result is None:
849+
return
850+
851+
# Ask for output directory
852+
out_dir = QFileDialog.getExistingDirectory(self, "Select Output Directory")
853+
if not out_dir:
854+
return
766855

856+
success = save_processed_document_result(self.current_captured_result, 0, out_dir)
857+
if success:
858+
QMessageBox.information(self, "Export", "Passport image saved successfully.")
859+
else:
860+
QMessageBox.warning(self, "Export", "Failed to save passport image. No document result found.")
861+
767862
def _load_folder(self, folder_path: str):
768863
"""Load all images from a folder."""
769864
image_extensions = {'.png', '.jpg', '.jpeg', '.bmp', '.tiff', '.tif', '.pdf'}
@@ -837,6 +932,7 @@ def _start_camera(self):
837932

838933
self.start_stop_btn.setText("Stop Camera")
839934
self.statusBar().showMessage("Camera started")
935+
self._update_export_button_state()
840936

841937
def _stop_camera(self):
842938
"""Stop the camera capture."""
@@ -846,64 +942,47 @@ def _stop_camera(self):
846942

847943
self.start_stop_btn.setText("Start Camera")
848944
self.statusBar().showMessage("Camera stopped")
945+
self._update_export_button_state()
849946

850-
def _on_camera_frame(self, frame: np.ndarray, results: List[MRZResult]):
947+
def _on_camera_frame(self, frame: np.ndarray, results: List[MRZResult], doc_quad, captured_result):
851948
"""Handle camera frame with results."""
852-
self.image_display.set_image(frame, results)
949+
self.current_captured_result = captured_result
950+
self.image_display.set_image(frame, results, doc_quad)
853951
self._update_results_display(results)
854952

855953
def _on_camera_error(self, error: str):
856954
"""Handle camera error."""
857-
self.statusBar().showMessage(f"Camera error: {error}")
955+
self.statusBar().showMessage("Camera stopped")
858956
self._stop_camera()
859-
957+
860958
def _process_image_file(self, file_path: str):
861-
"""Process an image file."""
862-
if not os.path.exists(file_path):
863-
self.statusBar().showMessage(f"File not found: {file_path}")
864-
return
865-
866-
# Read image
867-
image = cv2.imread(file_path)
868-
if image is None:
869-
# Try using the SDK for PDF or other formats
870-
try:
871-
result_array = self.cvr.capture_multi_pages(file_path, "ReadPassportAndId")
872-
results = result_array.get_results()
873-
if results:
874-
# Process first page
875-
result = results[0]
876-
self._process_captured_result(result, file_path)
877-
return
878-
except Exception as e:
879-
self.statusBar().showMessage(f"Failed to load image: {file_path}")
959+
"""Process a single image file."""
960+
try:
961+
# Read the image first to display it quickly
962+
image = cv2.imread(file_path)
963+
if image is None:
964+
self.statusBar().showMessage("Failed to load image")
880965
return
881-
else:
882-
self._process_image(image, file_path)
883966

884-
def _process_image(self, image: np.ndarray, source_name: str):
885-
"""Process an image and display results."""
886-
self.statusBar().showMessage(f"Processing: {source_name}")
887-
888-
# Clear previous intermediate results
889-
self.irr.clear()
890-
891-
try:
892-
# Capture using the SDK
893-
result = self.cvr.capture(image, "ReadPassportAndId")
967+
# Capture and process the image
968+
result = self.cvr.capture(file_path, "ReadPassportAndId")
969+
self.current_captured_result = result
894970

895971
if result is None:
896972
self.image_display.set_image(image, [])
897973
self._update_results_display([])
898974
self.statusBar().showMessage("No MRZ detected")
975+
self._update_export_button_state()
899976
return
900977

901978
# Process the result
902979
mrz_results = self._extract_mrz_results(result)
980+
doc_quad = get_document_quad(result)
903981

904982
# Update display
905-
self.image_display.set_image(image, mrz_results)
983+
self.image_display.set_image(image, mrz_results, doc_quad)
906984
self._update_results_display(mrz_results)
985+
self._update_export_button_state()
907986

908987
if mrz_results:
909988
self.statusBar().showMessage(f"Found {len(mrz_results)} MRZ zone(s)")
@@ -913,36 +992,8 @@ def _process_image(self, image: np.ndarray, source_name: str):
913992
except Exception as e:
914993
self.statusBar().showMessage(f"Error processing image: {str(e)}")
915994
self.image_display.set_image(image, [])
916-
917-
def _process_captured_result(self, result: CapturedResult, file_path: str):
918-
"""Process a CapturedResult from multi-page capture."""
919-
mrz_results = self._extract_mrz_results(result)
920-
921-
# Try to get original image
922-
original_image = None
923-
for item in result.get_items():
924-
if isinstance(item, OriginalImageResultItem):
925-
img_data = item.get_image_data()
926-
if img_data:
927-
# Convert ImageData to numpy array
928-
# This is a simplified conversion - actual implementation may vary
929-
original_image = cv2.imread(file_path)
930-
break
931-
932-
if original_image is not None:
933-
self.image_display.set_image(original_image, mrz_results)
934-
else:
935-
# If we can't get the original image, just load it directly
936-
image = cv2.imread(file_path)
937-
if image is not None:
938-
self.image_display.set_image(image, mrz_results)
939-
940-
self._update_results_display(mrz_results)
941-
942-
if mrz_results:
943-
self.statusBar().showMessage(f"Found {len(mrz_results)} MRZ zone(s)")
944-
else:
945-
self.statusBar().showMessage("No MRZ detected")
995+
self.current_captured_result = None
996+
self._update_export_button_state()
946997

947998
def _extract_mrz_results(self, result: CapturedResult) -> List[MRZResult]:
948999
"""Extract MRZ results from a CapturedResult."""

0 commit comments

Comments
 (0)