RapidAI
diff --git a/‎ocrweb_multi/README.md‎
Lines changed: 1 addition & 0 deletions b/‎ocrweb_multi/README.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎ocrweb_multi/assets/ocr_web_multi.jpg‎
37.9 KB b/‎ocrweb_multi/assets/ocr_web_multi.jpg‎
37.9 KB
diff --git a/‎ocrweb_multi/build.py‎
Lines changed: 21 additions & 0 deletions b/‎ocrweb_multi/build.py‎
Lines changed: 21 additions & 0 deletions
diff --git a/‎ocrweb_multi/config.yaml‎
Lines changed: 104 additions & 0 deletions b/‎ocrweb_multi/config.yaml‎
Lines changed: 104 additions & 0 deletions
diff --git a/‎ocrweb_multi/main.py‎
Lines changed: 79 additions & 0 deletions b/‎ocrweb_multi/main.py‎
Lines changed: 79 additions & 0 deletions
diff --git a/‎ocrweb_multi/main.spec‎
Lines changed: 52 additions & 0 deletions b/‎ocrweb_multi/main.spec‎
Lines changed: 52 additions & 0 deletions
diff --git a/‎ocrweb_multi/models/.gitkeep‎ b/‎ocrweb_multi/models/.gitkeep‎
diff --git a/‎ocrweb_multi/rapidocr/__init__.py‎ b/‎ocrweb_multi/rapidocr/__init__.py‎
diff --git a/‎ocrweb_multi/rapidocr/classify.py‎
Lines changed: 122 additions & 0 deletions b/‎ocrweb_multi/rapidocr/classify.py‎
Lines changed: 122 additions & 0 deletions
@@ -0,0 +1 @@
+### See [Documentation](https://rapidai.github.io/RapidOCRDocs/main/install_usage/rapidocr_web/ocrweb_multi/)
@@ -0,0 +1,21 @@
+import os
+import shutil
+
+print("Compile ocrweb")
+os.system("pyinstaller -y main.spec")
+
+print("Compile wrapper")
+os.system("windres .\wrapper.rc -O coff -o wrapper.res")
+os.system("gcc .\wrapper.c wrapper.res -o dist/ocrweb.exe")
+
+print("Copy config.yaml")
+shutil.copy2("config.yaml", "dist/config.yaml")
+
+print("Copy models")
+shutil.copytree("models", "dist/models", dirs_exist_ok=True)
+os.remove("dist/models/.gitkeep")
+
+print("Pack to ocrweb.zip")
+shutil.make_archive("ocrweb", "zip", "dist")
+
+print("Done")
@@ -0,0 +1,104 @@
+server:
+  host: 127.0.0.1
+  port: 8001
+  # OCR接口Token, 为null时将跳过Token验证
+  token: null
+
+global:
+  use_cuda: false
+  verbose: false
+  cuda_provider:
+    device_id: 0
+    arena_extend_strategy: kNextPowerOfTwo
+    cudnn_conv_algo_search: EXHAUSTIVE
+    do_copy_in_default_stream: true
+
+# 模型配置
+models:
+  # 位置检测模型
+  detect:
+    det_en:
+      path: models/en_PP-OCRv3_det_infer.onnx
+      config: &detectConfig
+        pre_process:
+          - class: DetResizeForTest
+            limit_side_len: 736
+            limit_type: min
+          - class: NormalizeImage
+            std: [0.229, 0.224, 0.225]
+            mean: [0.485, 0.456, 0.406]
+            # 1 / 255
+            scale: 0.00392156862745098
+            order: hwc
+          - class: ToCHWImage
+          - class: KeepKeys
+            keep_keys: ["image", "shape"]
+        post_process:
+          thresh: 0.3
+          box_thresh: 0.5
+          max_candidates: 1000
+          unclip_ratio: 1.6
+          use_dilation: true
+    det_ch:
+      path: models/ch_PP-OCRv3_det_infer.onnx
+      config: *detectConfig
+    det_ml:
+      path: models/ch_PP-OCRv3_det_infer.onnx
+      config: *detectConfig
+  # 方向检测模型
+  classify:
+    cls_ml:
+      path: models/ch_ppocr_mobile_v2.0_cls_infer.meta.onnx
+      config:
+        batch_size: 8
+        score_thresh: 0.9
+  # 文字识别模型
+  recognize:
+    rec_ch:
+      path: models/ch_PP-OCRv3_rec_infer.meta.onnx
+      config: &recognizeConfig
+        batch_size: 8
+    rec_cht:
+      path: models/chinese_cht_PP-OCRv3_rec_infer.meta.onnx
+      config: *recognizeConfig
+    rec_en:
+      path: models/en_PP-OCRv3_rec_infer.meta.onnx
+      config: *recognizeConfig
+    rec_ja:
+      path: models/japan_PP-OCRv3_rec_infer.meta.onnx
+      config: *recognizeConfig
+
+# 多语言配置
+languages:
+  ch:
+    name: 中文
+    models:
+      detect: det_ch
+      classify: cls_ml
+      recognize: rec_ch
+    config: &languageConfig
+      text_score: 0.5
+      use_angle_cls: true
+      verbose: false
+      min_height: 30
+  cht:
+    name: 繁体中文
+    models:
+      detect: det_ch
+      classify: cls_ml
+      recognize: rec_cht
+    config: *languageConfig
+  ja:
+    name: 日文
+    models:
+      detect: det_ch
+      classify: cls_ml
+      recognize: rec_ja
+    config: *languageConfig
+  en:
+    name: 英文
+    models:
+      detect: det_en
+      classify: cls_ml
+      recognize: rec_en
+    config: *languageConfig
@@ -0,0 +1,79 @@
+# -*- encoding: utf-8 -*-
+# @Author: SWHL
+# @Contact: liekkaskono@163.com
+import logging
+import cv2
+import numpy as np
+from flask import Flask, send_file, request, make_response
+from waitress import serve
+
+
+from rapidocr.main import detect_recognize
+from utils.config import conf
+from utils.utils import tojson, parse_bool
+
+app = Flask(__name__)
+log = logging.getLogger("app")
+# 设置上传文件大小
+app.config["MAX_CONTENT_LENGTH"] = 3 * 1024 * 1024
+
+
+@app.route("/")
+def index():
+    return send_file("static/index.html")
+
+
+def json_response(data, status=200):
+    return make_response(tojson(data), status, {"content-type": "application/json"})
+
+
+@app.route("/lang")
+def get_languages():
+    """返回可用语言列表"""
+    data = [
+        {"code": key, "name": val["name"]} for key, val in conf["languages"].items()
+    ]
+    result = {"msg": "OK", "data": data}
+    log.info("Send langs: %s", data)
+    return json_response(result)
+
+
+@app.route("/ocr", methods=["POST", "GET"])
+def ocr():
+    """执行文字识别"""
+    if conf["server"].get("token"):
+        if request.values.get("token") != conf["server"]["token"]:
+            return json_response({"msg": "invalid token"}, status=403)
+
+    lang = request.values.get("lang") or "ch"
+    detect = parse_bool(request.values.get("detect") or "true")
+    classify = parse_bool(request.values.get("classify") or "true")
+
+    image_file = request.files.get("image")
+    if not image_file:
+        return json_response({"msg": "no image"}, 400)
+    nparr = np.frombuffer(image_file.stream.read(), np.uint8)
+    image = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
+    log.info(
+        "Input: image %s, lang=%s, detect=%s, classify=%s",
+        image.shape,
+        lang,
+        detect,
+        classify,
+    )
+    if image.ndim == 2:
+        image = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)
+    result = detect_recognize(image, lang=lang, detect=detect, classify=classify)
+    log.info("OCR Done %s %s", result["ts"], len(result["results"]))
+    return json_response({"msg": "OK", "data": result})
+
+
+if __name__ == "__main__":
+    logging.basicConfig(level="INFO")
+    logging.getLogger("waitress").setLevel(logging.INFO)
+    if parse_bool(conf.get("debug", "0")):
+        # Debug
+        app.run(host=conf["server"]["host"], port=conf["server"]["port"], debug=True)
+    else:
+        # Deploy with waitress
+        serve(app, host=conf["server"]["host"], port=conf["server"]["port"])
@@ -0,0 +1,52 @@
+# -*- mode: python ; coding: utf-8 -*-
+
+
+block_cipher = None
+
+
+a = Analysis(
+    ['main.py'],
+    pathex=[],
+    binaries=[],
+    datas=[
+        ('static', 'static'),
+    ],
+    hiddenimports=[],
+    hookspath=[],
+    hooksconfig={},
+    runtime_hooks=[],
+    excludes=[],
+    win_no_prefer_redirects=False,
+    win_private_assemblies=False,
+    cipher=block_cipher,
+    noarchive=False,
+)
+pyz = PYZ(a.pure, a.zipped_data, cipher=block_cipher)
+
+exe = EXE(
+    pyz,
+    a.scripts,
+    [],
+    exclude_binaries=True,
+    name='main',
+    debug=False,
+    bootloader_ignore_signals=False,
+    strip=False,
+    upx=True,
+    console=True,
+    disable_windowed_traceback=False,
+    argv_emulation=False,
+    target_arch=None,
+    codesign_identity=None,
+    entitlements_file=None,
+)
+coll = COLLECT(
+    exe,
+    a.binaries,
+    a.zipfiles,
+    a.datas,
+    strip=False,
+    upx=True,
+    upx_exclude=[],
+    name='ocrweb',
+)
@@ -0,0 +1,122 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import copy
+import math
+import json
+from typing import List
+
+import cv2
+import numpy as np
+from utils.utils import OrtInferSession
+
+
+class ClsPostProcess:
+    """Convert between text-label and text-index"""
+
+    def __init__(self, label_list):
+        super(ClsPostProcess, self).__init__()
+        self.label_list = label_list
+
+    def __call__(self, preds, label=None):
+        pred_idxs = preds.argmax(axis=1)
+        decode_out = [
+            (self.label_list[idx], preds[i, idx]) for i, idx in enumerate(pred_idxs)
+        ]
+        if label is None:
+            return decode_out
+
+        label = [(self.label_list[idx], 1.0) for idx in label]
+        return decode_out, label
+
+
+class TextClassifier:
+    def __init__(self, path, config):
+        self.cls_batch_num = config["batch_size"]
+        self.cls_thresh = config["score_thresh"]
+
+        session_instance = OrtInferSession(path)
+        self.session = session_instance.session
+        metamap = self.session.get_modelmeta().custom_metadata_map
+
+        self.cls_image_shape = json.loads(metamap["shape"])
+
+        labels = json.loads(metamap["labels"])
+        self.postprocess_op = ClsPostProcess(labels)
+        self.input_name = session_instance.get_input_name()
+
+    def resize_norm_img(self, img):
+        img_c, img_h, img_w = self.cls_image_shape
+        h, w = img.shape[:2]
+        ratio = w / float(h)
+        if math.ceil(img_h * ratio) > img_w:
+            resized_w = img_w
+        else:
+            resized_w = int(math.ceil(img_h * ratio))
+
+        resized_image = cv2.resize(img, (resized_w, img_h))
+        resized_image = resized_image.astype("float32")
+        if img_c == 1:
+            resized_image = resized_image / 255
+            resized_image = resized_image[np.newaxis, :]
+        else:
+            resized_image = resized_image.transpose((2, 0, 1)) / 255
+
+        resized_image -= 0.5
+        resized_image /= 0.5
+        padding_im = np.zeros((img_c, img_h, img_w), dtype=np.float32)
+        padding_im[:, :, :resized_w] = resized_image
+        return padding_im
+
+    def __call__(self, img_list: List[np.ndarray]):
+        if isinstance(img_list, np.ndarray):
+            img_list = [img_list]
+
+        img_list = copy.deepcopy(img_list)
+
+        # Calculate the aspect ratio of all text bars
+        width_list = [img.shape[1] / float(img.shape[0]) for img in img_list]
+
+        # Sorting can speed up the cls process
+        indices = np.argsort(np.array(width_list))
+
+        img_num = len(img_list)
+        cls_res = [["", 0.0]] * img_num
+        batch_num = self.cls_batch_num
+        for beg_img_no in range(0, img_num, batch_num):
+            end_img_no = min(img_num, beg_img_no + batch_num)
+            max_wh_ratio = 0
+            for ino in range(beg_img_no, end_img_no):
+                h, w = img_list[indices[ino]].shape[0:2]
+                wh_ratio = w * 1.0 / h
+                max_wh_ratio = max(max_wh_ratio, wh_ratio)
+
+            norm_img_batch = []
+            for ino in range(beg_img_no, end_img_no):
+                norm_img = self.resize_norm_img(img_list[indices[ino]])
+                norm_img = norm_img[np.newaxis, :]
+                norm_img_batch.append(norm_img)
+            norm_img_batch = np.concatenate(norm_img_batch).astype(np.float32)
+
+            onnx_inputs = {self.input_name: norm_img_batch}
+            prob_out = self.session.run(None, onnx_inputs)[0]
+            cls_result = self.postprocess_op(prob_out)
+
+            for rno in range(len(cls_result)):
+                label, score = cls_result[rno]
+                cls_res[indices[beg_img_no + rno]] = [label, score]
+                if label == "180" and score > self.cls_thresh:
+                    img_list[indices[beg_img_no + rno]] = cv2.rotate(
+                        img_list[indices[beg_img_no + rno]], 1
+                    )
+        return img_list, cls_res
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+### See [Documentation](https://rapidai.github.io/RapidOCRDocs/main/install_usage/rapidocr_web/ocrweb_multi/)`