Skip to content

Commit 668c422

Browse files
committed
feat: add ocrweb_multi
1 parent e81b4e2 commit 668c422

24 files changed

Lines changed: 1794 additions & 0 deletions

ocrweb_multi/README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
### See [Documentation](https://rapidai.github.io/RapidOCRDocs/main/install_usage/rapidocr_web/ocrweb_multi/)
37.9 KB
Loading

ocrweb_multi/build.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
import os
2+
import shutil
3+
4+
print("Compile ocrweb")
5+
os.system("pyinstaller -y main.spec")
6+
7+
print("Compile wrapper")
8+
os.system("windres .\wrapper.rc -O coff -o wrapper.res")
9+
os.system("gcc .\wrapper.c wrapper.res -o dist/ocrweb.exe")
10+
11+
print("Copy config.yaml")
12+
shutil.copy2("config.yaml", "dist/config.yaml")
13+
14+
print("Copy models")
15+
shutil.copytree("models", "dist/models", dirs_exist_ok=True)
16+
os.remove("dist/models/.gitkeep")
17+
18+
print("Pack to ocrweb.zip")
19+
shutil.make_archive("ocrweb", "zip", "dist")
20+
21+
print("Done")

ocrweb_multi/config.yaml

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
server:
2+
host: 127.0.0.1
3+
port: 8001
4+
# OCR接口Token, 为null时将跳过Token验证
5+
token: null
6+
7+
global:
8+
use_cuda: false
9+
verbose: false
10+
cuda_provider:
11+
device_id: 0
12+
arena_extend_strategy: kNextPowerOfTwo
13+
cudnn_conv_algo_search: EXHAUSTIVE
14+
do_copy_in_default_stream: true
15+
16+
# 模型配置
17+
models:
18+
# 位置检测模型
19+
detect:
20+
det_en:
21+
path: models/en_PP-OCRv3_det_infer.onnx
22+
config: &detectConfig
23+
pre_process:
24+
- class: DetResizeForTest
25+
limit_side_len: 736
26+
limit_type: min
27+
- class: NormalizeImage
28+
std: [0.229, 0.224, 0.225]
29+
mean: [0.485, 0.456, 0.406]
30+
# 1 / 255
31+
scale: 0.00392156862745098
32+
order: hwc
33+
- class: ToCHWImage
34+
- class: KeepKeys
35+
keep_keys: ["image", "shape"]
36+
post_process:
37+
thresh: 0.3
38+
box_thresh: 0.5
39+
max_candidates: 1000
40+
unclip_ratio: 1.6
41+
use_dilation: true
42+
det_ch:
43+
path: models/ch_PP-OCRv3_det_infer.onnx
44+
config: *detectConfig
45+
det_ml:
46+
path: models/ch_PP-OCRv3_det_infer.onnx
47+
config: *detectConfig
48+
# 方向检测模型
49+
classify:
50+
cls_ml:
51+
path: models/ch_ppocr_mobile_v2.0_cls_infer.meta.onnx
52+
config:
53+
batch_size: 8
54+
score_thresh: 0.9
55+
# 文字识别模型
56+
recognize:
57+
rec_ch:
58+
path: models/ch_PP-OCRv3_rec_infer.meta.onnx
59+
config: &recognizeConfig
60+
batch_size: 8
61+
rec_cht:
62+
path: models/chinese_cht_PP-OCRv3_rec_infer.meta.onnx
63+
config: *recognizeConfig
64+
rec_en:
65+
path: models/en_PP-OCRv3_rec_infer.meta.onnx
66+
config: *recognizeConfig
67+
rec_ja:
68+
path: models/japan_PP-OCRv3_rec_infer.meta.onnx
69+
config: *recognizeConfig
70+
71+
# 多语言配置
72+
languages:
73+
ch:
74+
name: 中文
75+
models:
76+
detect: det_ch
77+
classify: cls_ml
78+
recognize: rec_ch
79+
config: &languageConfig
80+
text_score: 0.5
81+
use_angle_cls: true
82+
verbose: false
83+
min_height: 30
84+
cht:
85+
name: 繁体中文
86+
models:
87+
detect: det_ch
88+
classify: cls_ml
89+
recognize: rec_cht
90+
config: *languageConfig
91+
ja:
92+
name: 日文
93+
models:
94+
detect: det_ch
95+
classify: cls_ml
96+
recognize: rec_ja
97+
config: *languageConfig
98+
en:
99+
name: 英文
100+
models:
101+
detect: det_en
102+
classify: cls_ml
103+
recognize: rec_en
104+
config: *languageConfig

ocrweb_multi/main.py

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
# -*- encoding: utf-8 -*-
2+
# @Author: SWHL
3+
# @Contact: liekkaskono@163.com
4+
import logging
5+
import cv2
6+
import numpy as np
7+
from flask import Flask, send_file, request, make_response
8+
from waitress import serve
9+
10+
11+
from rapidocr.main import detect_recognize
12+
from utils.config import conf
13+
from utils.utils import tojson, parse_bool
14+
15+
app = Flask(__name__)
16+
log = logging.getLogger("app")
17+
# 设置上传文件大小
18+
app.config["MAX_CONTENT_LENGTH"] = 3 * 1024 * 1024
19+
20+
21+
@app.route("/")
22+
def index():
23+
return send_file("static/index.html")
24+
25+
26+
def json_response(data, status=200):
27+
return make_response(tojson(data), status, {"content-type": "application/json"})
28+
29+
30+
@app.route("/lang")
31+
def get_languages():
32+
"""返回可用语言列表"""
33+
data = [
34+
{"code": key, "name": val["name"]} for key, val in conf["languages"].items()
35+
]
36+
result = {"msg": "OK", "data": data}
37+
log.info("Send langs: %s", data)
38+
return json_response(result)
39+
40+
41+
@app.route("/ocr", methods=["POST", "GET"])
42+
def ocr():
43+
"""执行文字识别"""
44+
if conf["server"].get("token"):
45+
if request.values.get("token") != conf["server"]["token"]:
46+
return json_response({"msg": "invalid token"}, status=403)
47+
48+
lang = request.values.get("lang") or "ch"
49+
detect = parse_bool(request.values.get("detect") or "true")
50+
classify = parse_bool(request.values.get("classify") or "true")
51+
52+
image_file = request.files.get("image")
53+
if not image_file:
54+
return json_response({"msg": "no image"}, 400)
55+
nparr = np.frombuffer(image_file.stream.read(), np.uint8)
56+
image = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
57+
log.info(
58+
"Input: image %s, lang=%s, detect=%s, classify=%s",
59+
image.shape,
60+
lang,
61+
detect,
62+
classify,
63+
)
64+
if image.ndim == 2:
65+
image = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)
66+
result = detect_recognize(image, lang=lang, detect=detect, classify=classify)
67+
log.info("OCR Done %s %s", result["ts"], len(result["results"]))
68+
return json_response({"msg": "OK", "data": result})
69+
70+
71+
if __name__ == "__main__":
72+
logging.basicConfig(level="INFO")
73+
logging.getLogger("waitress").setLevel(logging.INFO)
74+
if parse_bool(conf.get("debug", "0")):
75+
# Debug
76+
app.run(host=conf["server"]["host"], port=conf["server"]["port"], debug=True)
77+
else:
78+
# Deploy with waitress
79+
serve(app, host=conf["server"]["host"], port=conf["server"]["port"])

ocrweb_multi/main.spec

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
# -*- mode: python ; coding: utf-8 -*-
2+
3+
4+
block_cipher = None
5+
6+
7+
a = Analysis(
8+
['main.py'],
9+
pathex=[],
10+
binaries=[],
11+
datas=[
12+
('static', 'static'),
13+
],
14+
hiddenimports=[],
15+
hookspath=[],
16+
hooksconfig={},
17+
runtime_hooks=[],
18+
excludes=[],
19+
win_no_prefer_redirects=False,
20+
win_private_assemblies=False,
21+
cipher=block_cipher,
22+
noarchive=False,
23+
)
24+
pyz = PYZ(a.pure, a.zipped_data, cipher=block_cipher)
25+
26+
exe = EXE(
27+
pyz,
28+
a.scripts,
29+
[],
30+
exclude_binaries=True,
31+
name='main',
32+
debug=False,
33+
bootloader_ignore_signals=False,
34+
strip=False,
35+
upx=True,
36+
console=True,
37+
disable_windowed_traceback=False,
38+
argv_emulation=False,
39+
target_arch=None,
40+
codesign_identity=None,
41+
entitlements_file=None,
42+
)
43+
coll = COLLECT(
44+
exe,
45+
a.binaries,
46+
a.zipfiles,
47+
a.datas,
48+
strip=False,
49+
upx=True,
50+
upx_exclude=[],
51+
name='ocrweb',
52+
)

ocrweb_multi/models/.gitkeep

Whitespace-only changes.

ocrweb_multi/rapidocr/__init__.py

Whitespace-only changes.

ocrweb_multi/rapidocr/classify.py

Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
import copy
15+
import math
16+
import json
17+
from typing import List
18+
19+
import cv2
20+
import numpy as np
21+
from utils.utils import OrtInferSession
22+
23+
24+
class ClsPostProcess:
25+
"""Convert between text-label and text-index"""
26+
27+
def __init__(self, label_list):
28+
super(ClsPostProcess, self).__init__()
29+
self.label_list = label_list
30+
31+
def __call__(self, preds, label=None):
32+
pred_idxs = preds.argmax(axis=1)
33+
decode_out = [
34+
(self.label_list[idx], preds[i, idx]) for i, idx in enumerate(pred_idxs)
35+
]
36+
if label is None:
37+
return decode_out
38+
39+
label = [(self.label_list[idx], 1.0) for idx in label]
40+
return decode_out, label
41+
42+
43+
class TextClassifier:
44+
def __init__(self, path, config):
45+
self.cls_batch_num = config["batch_size"]
46+
self.cls_thresh = config["score_thresh"]
47+
48+
session_instance = OrtInferSession(path)
49+
self.session = session_instance.session
50+
metamap = self.session.get_modelmeta().custom_metadata_map
51+
52+
self.cls_image_shape = json.loads(metamap["shape"])
53+
54+
labels = json.loads(metamap["labels"])
55+
self.postprocess_op = ClsPostProcess(labels)
56+
self.input_name = session_instance.get_input_name()
57+
58+
def resize_norm_img(self, img):
59+
img_c, img_h, img_w = self.cls_image_shape
60+
h, w = img.shape[:2]
61+
ratio = w / float(h)
62+
if math.ceil(img_h * ratio) > img_w:
63+
resized_w = img_w
64+
else:
65+
resized_w = int(math.ceil(img_h * ratio))
66+
67+
resized_image = cv2.resize(img, (resized_w, img_h))
68+
resized_image = resized_image.astype("float32")
69+
if img_c == 1:
70+
resized_image = resized_image / 255
71+
resized_image = resized_image[np.newaxis, :]
72+
else:
73+
resized_image = resized_image.transpose((2, 0, 1)) / 255
74+
75+
resized_image -= 0.5
76+
resized_image /= 0.5
77+
padding_im = np.zeros((img_c, img_h, img_w), dtype=np.float32)
78+
padding_im[:, :, :resized_w] = resized_image
79+
return padding_im
80+
81+
def __call__(self, img_list: List[np.ndarray]):
82+
if isinstance(img_list, np.ndarray):
83+
img_list = [img_list]
84+
85+
img_list = copy.deepcopy(img_list)
86+
87+
# Calculate the aspect ratio of all text bars
88+
width_list = [img.shape[1] / float(img.shape[0]) for img in img_list]
89+
90+
# Sorting can speed up the cls process
91+
indices = np.argsort(np.array(width_list))
92+
93+
img_num = len(img_list)
94+
cls_res = [["", 0.0]] * img_num
95+
batch_num = self.cls_batch_num
96+
for beg_img_no in range(0, img_num, batch_num):
97+
end_img_no = min(img_num, beg_img_no + batch_num)
98+
max_wh_ratio = 0
99+
for ino in range(beg_img_no, end_img_no):
100+
h, w = img_list[indices[ino]].shape[0:2]
101+
wh_ratio = w * 1.0 / h
102+
max_wh_ratio = max(max_wh_ratio, wh_ratio)
103+
104+
norm_img_batch = []
105+
for ino in range(beg_img_no, end_img_no):
106+
norm_img = self.resize_norm_img(img_list[indices[ino]])
107+
norm_img = norm_img[np.newaxis, :]
108+
norm_img_batch.append(norm_img)
109+
norm_img_batch = np.concatenate(norm_img_batch).astype(np.float32)
110+
111+
onnx_inputs = {self.input_name: norm_img_batch}
112+
prob_out = self.session.run(None, onnx_inputs)[0]
113+
cls_result = self.postprocess_op(prob_out)
114+
115+
for rno in range(len(cls_result)):
116+
label, score = cls_result[rno]
117+
cls_res[indices[beg_img_no + rno]] = [label, score]
118+
if label == "180" and score > self.cls_thresh:
119+
img_list[indices[beg_img_no + rno]] = cv2.rotate(
120+
img_list[indices[beg_img_no + rno]], 1
121+
)
122+
return img_list, cls_res

0 commit comments

Comments
 (0)