diff --git a/deploy/config.py b/deploy/config.py index 817f2abfc..b3649689b 100644 --- a/deploy/config.py +++ b/deploy/config.py @@ -38,6 +38,7 @@ class ConfigModel: StartOcrServer: bool = False OcrServerPort: int = 22268 OcrClientAddress: str = "127.0.0.1:22268" + OcrEngine: str = "ppocr" # Update EnableReload: bool = True diff --git a/deploy/template b/deploy/template index 54f49c98c..a801ee6f0 100644 --- a/deploy/template +++ b/deploy/template @@ -80,6 +80,10 @@ Deploy: # Address of ocr server for alas instance to connect # [Default] 127.0.0.1:22268 OcrClientAddress: 127.0.0.1:22268 + # Specify the OCR engine/backend to be used for text recognition + # Supported options: ppocr (onnxocr) + # [Default] ppocr + OcrEngine: ppocr Update: # Use auto update and builtin updater feature diff --git a/module/ocr/models.py b/module/ocr/models.py index 5005fe781..5a1e0be82 100644 --- a/module/ocr/models.py +++ b/module/ocr/models.py @@ -1,11 +1,17 @@ from module.base.decorator import cached_property -from module.ocr.ppocr import TextSystem +from module.server.setting import State class OcrModel: @cached_property def ch(self): - return TextSystem() + if "ppocr" in State.deploy_config.OcrEngine.lower(): + from module.ocr.ppocr import TextSystem + return TextSystem() + elif "onnxocr" in State.deploy_config.OcrEngine.lower(): + from module.ocr.oxocr import ONNXPaddleOcr + return ONNXPaddleOcr() + return None OCR_MODEL = OcrModel() @@ -17,7 +23,7 @@ def ch(self): import cv2 import time from memory_profiler import profile - image = cv2.imread(r"E:\Project\OnmyojiAutoScript-assets\jade.png") + image = cv2.imread(r"D:\2025-10-08_01-32-05-371817.png") # 引入ocr 会导致非常巨大的内存开销 @profile diff --git a/module/ocr/oxocr.py b/module/ocr/oxocr.py new file mode 100644 index 000000000..ad90bc478 --- /dev/null +++ b/module/ocr/oxocr.py @@ -0,0 +1,129 @@ +import base64 +import pickle +from typing import List + +import cv2 +import numpy as np +from onnxocr import onnx_paddleocr + + +class BoxedResult(object): + box: np.ndarray + text_img: np.ndarray + ocr_text: str + score: float + + def __init__(self, box, text_img, ocr_text, score): + self.box = box + self.text_img = text_img + self.ocr_text = ocr_text + self.score = score + + def __str__(self): + return 'BoxedResult[%s, %s]' % (self.ocr_text, self.score) + + def __repr__(self): + return self.__str__() + + def to_dict(self): + """Convert BoxedResult to a serializable dictionary""" + return { + 'box': self.box.tolist(), + 'text_img': base64.b64encode(pickle.dumps(self.text_img)).decode( + 'utf-8') if self.text_img is not None else None, + 'ocr_text': self.ocr_text, + 'score': self.score + } + + @classmethod + def from_dict(cls, data): + """Create BoxedResult from a dictionary""" + text_img = None + if data['text_img'] is not None: + text_img = pickle.loads(base64.b64decode(data['text_img'].encode('utf-8'))) + + return cls( + box=np.array(data['box']), + text_img=text_img, + ocr_text=data['ocr_text'], + score=data['score'] + ) + + +class ONNXPaddleOcr(onnx_paddleocr.ONNXPaddleOcr): + def __init__(self, + use_gpu=False, + gpu_mem=500, + gpu_id=0, + use_tensorrt=False, + precision="fp32", + drop_score=0.5, + use_angle_cls=True, + ): + super().__init__( + use_gpu=use_gpu, + gpu_mem=gpu_mem, + gpu_id=gpu_id, + use_tensorrt=use_tensorrt, + precision=precision, + drop_score=drop_score, + use_angle_cls=use_angle_cls, + + ) + + @staticmethod + def _prepare_ocr_image(img: np.ndarray, use_grayscale: bool=True) -> np.ndarray: + if not use_grayscale: + print("Using original image for single line OCR") + return img + if img.ndim == 2: + return cv2.cvtColor(img, cv2.COLOR_GRAY2RGB) + if img.ndim == 3 and img.shape[2] == 1: + return cv2.cvtColor(img[:, :, 0], cv2.COLOR_GRAY2RGB) + + gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY) + return cv2.cvtColor(gray, cv2.COLOR_GRAY2RGB) + + def detect_and_ocr(self, img: np.ndarray, drop_score=None): + """ + Detect text boxes and recognize text from the image. + :param img: Input image in RGB format. + :param drop_score: Minimum score to keep the recognized text. If None, uses the object's drop_score. + :return: List of BoxedResult containing detected boxes, cropped images, recognized text, and scores. + + """ + img = self._prepare_ocr_image(img) + rec_res = self.ocr(img, det=True, rec=True, cls=True) + if not rec_res: + return [] + rec_res = rec_res[0] + res = [] + + if drop_score is None: + drop_score = self.drop_score + + for box, rec_result in rec_res: + text, score = rec_result + if score >= drop_score: + if not isinstance(box, np.ndarray): + box = np.array(box) + res.append(BoxedResult(box, img, text, score)) + return res + + def ocr_lines(self, img_list: List[np.ndarray]): + tmp_img_list = [] + for img in img_list: + img = self._prepare_ocr_image(img) + img_height, img_width = img.shape[0:2] + if img_height * 1.0 / img_width >= 1.5: + img = np.rot90(img) + tmp_img_list.append(img) + + rec_res = self.text_recognizer(tmp_img_list) + return rec_res + + def ocr_single_line(self, img): + res = self.ocr_lines([img]) + if res: + return res[0] + return None diff --git a/requirements-in.txt b/requirements-in.txt index 13bae5347..ce817087e 100644 --- a/requirements-in.txt +++ b/requirements-in.txt @@ -13,6 +13,7 @@ pywin32==306 # OCR ppocr-onnx==0.0.3.9 cn2an==0.5.23 +onnxocr==2025.5 # Web paho-mqtt==1.6.1 diff --git a/requirements.txt b/requirements.txt index 9882d0017..cf7a991cc 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,6 +4,8 @@ # # pip-compile --annotation-style=line --output-file=requirements.txt requirements-in.txt # +--index-url https://pypi.tuna.tsinghua.edu.cn/simple + adbutils==0.11.0 # via -r requirements-in.txt, uiautomator2 annotated-types==0.7.0 # via pydantic anyio==3.7.1 # via fastapi, starlette @@ -41,15 +43,19 @@ markdown-it-py==2.2.0 # via rich mdurl==0.1.2 # via markdown-it-py mpmath==1.3.0 # via sympy msgpack==1.0.7 # via zerorpc -numpy==1.24.3 # via -r requirements-in.txt, onnxruntime, opencv-python, ppocr-onnx, shapely +numpy==1.24.3 # via -r requirements-in.txt, onnxocr, onnxruntime, opencv-contrib-python, opencv-python, opencv-python-headless, ppocr-onnx, shapely oas-checkin-biggod==0.0.1 # via -r requirements-in.txt oashya==0.0.7 # via -r requirements-in.txt onepush==1.3.0 # via -r requirements-in.txt -onnxruntime==1.16.3 # via ppocr-onnx +onnxocr==2025.5 # via -r requirements-in.txt +onnxruntime==1.16.3 # via onnxocr, ppocr-onnx +opencv-contrib-python==4.11.0.86 # via onnxocr opencv-python==4.7.0.72 # via -r requirements-in.txt, ppocr-onnx +opencv-python-headless==4.11.0.86 # via onnxocr packaging==20.9 # via deprecation, onnxruntime, uiautomator2 paho-mqtt==1.6.1 # via -r requirements-in.txt -pillow==10.2.0 # via ppocr-onnx, uiautomator2 +pdf2image==1.17.0 # via onnxocr +pillow==10.2.0 # via pdf2image, ppocr-onnx, uiautomator2 ppocr-onnx==0.0.3.9 # via -r requirements-in.txt proces==0.1.7 # via cn2an progress==1.6 # via uiautomator2 @@ -57,13 +63,14 @@ prompt-toolkit==3.0.52 # via frida-tools protobuf==4.25.1 # via onnxruntime psutil==6.1.1 # via -r requirements-in.txt py==1.11.0 # via retry -pyclipper==1.3.0.post5 # via ppocr-onnx +pyclipper==1.3.0.post5 # via onnxocr, ppocr-onnx pycparser==2.21 # via cffi pycryptodome==3.21.0 # via onepush pydantic==2.10.0 # via -r requirements-in.txt, fastapi pydantic-core==2.27.0 # via pydantic pyelftools==0.30 # via apkutils2 pygments==2.17.2 # via frida-tools, rich +pymupdf==1.27.2 # via onnxocr pyparsing==3.1.1 # via packaging pyreadline3==3.4.1 # via humanfriendly pywin32==306 # via -r requirements-in.txt @@ -72,7 +79,7 @@ pyzmq==25.1.2 # via zerorpc requests==2.31.0 # via adbutils, onepush, ppocr-onnx, uiautomator2 retry==0.9.2 # via adbutils, uiautomator2 rich==13.3.5 # via -r requirements-in.txt -shapely==2.0.2 # via ppocr-onnx +shapely==2.0.2 # via onnxocr, ppocr-onnx six==1.16.0 # via adbutils, anytree, uiautomator2 sniffio==1.3.0 # via anyio starlette==0.27.0 # via fastapi diff --git a/tasks/SixRealms/oas_ocr.py b/tasks/SixRealms/oas_ocr.py index 3a58c81f5..f31b9abf9 100644 --- a/tasks/SixRealms/oas_ocr.py +++ b/tasks/SixRealms/oas_ocr.py @@ -7,6 +7,7 @@ from module.base.utils import color_similarity_2d, load_image # from module.ocr.ocr import Ocr from module.ocr.base_ocr import BaseCor +from module.ocr.ppocr import TextSystem @@ -33,6 +34,10 @@ def rotate_image(image): return image def detect_and_ocr(self, *args, **kwargs): + # onnx不需要此补丁 + if not isinstance(self.model, TextSystem): + return super().detect_and_ocr(*args, **kwargs) + # Try hard to lower TextSystem.box_thresh backup = self.model.text_detector.box_thresh # Patch text_recognizer