Skip to content

Commit c209a35

Browse files
committed
Improve image ocr
1 parent af95e4a commit c209a35

2 files changed

Lines changed: 45 additions & 25 deletions

File tree

src/bigocrpdf/services/screen_capture.py

Lines changed: 33 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -262,13 +262,19 @@ def _run_flameshot(self, cmd: list[str], temp_path: str) -> bool:
262262
Returns:
263263
True (flameshot was executed, regardless of success/cancel).
264264
"""
265-
result = subprocess.run(cmd, capture_output=True, timeout=60)
266-
if result.returncode == 0 and result.stdout:
265+
proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
266+
try:
267+
stdout, stderr = proc.communicate(timeout=60)
268+
except subprocess.TimeoutExpired:
269+
proc.kill()
270+
proc.communicate()
271+
return True
272+
if proc.returncode == 0 and stdout:
267273
with open(temp_path, "wb") as f:
268-
f.write(result.stdout)
274+
f.write(stdout)
269275
else:
270276
logger.debug(
271-
f"Flameshot exited with code {result.returncode}: {result.stderr.decode().strip()}"
277+
f"Flameshot exited with code {proc.returncode}: {stderr.decode().strip()}"
272278
)
273279
return True
274280

@@ -285,11 +291,17 @@ def _run_standard_tool(self, cmd: list[str]) -> bool | None:
285291
if subprocess.call(["which", tool_name], stdout=subprocess.DEVNULL) != 0:
286292
return None
287293

288-
result = subprocess.run(cmd, capture_output=True, timeout=60)
289-
if result.returncode != 0:
294+
proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
295+
try:
296+
_stdout, stderr = proc.communicate(timeout=60)
297+
except subprocess.TimeoutExpired:
298+
proc.kill()
299+
proc.communicate()
300+
return True
301+
if proc.returncode != 0:
290302
logger.debug(
291-
f"{tool_name} exited with code {result.returncode}: "
292-
f"{result.stderr.decode().strip()}"
303+
f"{tool_name} exited with code {proc.returncode}: "
304+
f"{stderr.decode().strip()}"
293305
)
294306
return True
295307

@@ -341,15 +353,25 @@ def extract_text_from_image(
341353
cmd = self._build_ocr_command(temp_img_path, config)
342354
logger.info(f"Running image OCR: language={language}")
343355

344-
proc = subprocess.run(cmd, capture_output=True, text=True, timeout=120)
356+
proc = subprocess.Popen(
357+
cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True
358+
)
359+
try:
360+
stdout, stderr = proc.communicate(timeout=120)
361+
except subprocess.TimeoutExpired:
362+
proc.kill()
363+
proc.communicate()
364+
logger.error("OCR processing timed out")
365+
self._invoke_callback(None, _("OCR processing timed out."))
366+
return None
345367

346368
if proc.returncode != 0:
347-
logger.error(f"OCR subprocess failed: {proc.stderr}")
369+
logger.error(f"OCR subprocess failed: {stderr}")
348370
self._invoke_callback(None, _("OCR processing failed."))
349371
return None
350372

351373
# Parse OCR results
352-
results = self._parse_ocr_results(proc.stdout)
374+
results = self._parse_ocr_results(stdout)
353375
if not results:
354376
return _("No text found in the image")
355377

@@ -366,10 +388,6 @@ def extract_text_from_image(
366388
None, _("OCR engine not available. Please check your installation.")
367389
)
368390
return None
369-
except subprocess.TimeoutExpired:
370-
logger.error("OCR processing timed out")
371-
self._invoke_callback(None, _("OCR processing timed out."))
372-
return None
373391
except (OSError, subprocess.SubprocessError, ValueError) as e:
374392
logger.error(f"OCR processing error: {e}")
375393
return None

src/bigocrpdf/utils/temp_manager.py

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
import shutil
1212
import signal
1313
import tempfile
14+
import threading
1415
from pathlib import Path
1516

1617
from bigocrpdf.utils.logger import logger
@@ -39,18 +40,19 @@ def _register_cleanup() -> None:
3940

4041
atexit.register(cleanup_all)
4142

42-
# Also handle SIGTERM (e.g. system shutdown, kill <pid>)
43-
prev_handler = signal.getsignal(signal.SIGTERM)
43+
# SIGTERM handler can only be set from the main thread
44+
if threading.current_thread() is threading.main_thread():
45+
prev_handler = signal.getsignal(signal.SIGTERM)
4446

45-
def _on_sigterm(signum, frame):
46-
cleanup_all()
47-
# Chain to previous handler
48-
if callable(prev_handler):
49-
prev_handler(signum, frame)
50-
else:
51-
raise SystemExit(1)
47+
def _on_sigterm(signum, frame):
48+
cleanup_all()
49+
# Chain to previous handler
50+
if callable(prev_handler):
51+
prev_handler(signum, frame)
52+
else:
53+
raise SystemExit(1)
5254

53-
signal.signal(signal.SIGTERM, _on_sigterm)
55+
signal.signal(signal.SIGTERM, _on_sigterm)
5456

5557

5658
# ---------------------------------------------------------------------------

0 commit comments

Comments
 (0)