Skip to content

Commit 86f25b3

Browse files
committed
add PP-OCRv6_rec
1 parent 740a04d commit 86f25b3

7 files changed

Lines changed: 1048 additions & 8 deletions

File tree

Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
Global:
2+
model_name: PP-OCRv6_small_rec
3+
debug: false
4+
use_gpu: true
5+
epoch_num: 100
6+
log_smooth_window: 20
7+
print_batch_step: 10
8+
save_model_dir: ./output/PP-OCRv6_small_rec
9+
save_epoch_step: 10
10+
eval_batch_step: [0, 2000]
11+
cal_metric_during_train: true
12+
pretrained_model:
13+
checkpoints:
14+
save_inference_dir:
15+
use_visualdl: false
16+
infer_img: doc/imgs_words/ch/word_1.jpg
17+
character_dict_path: ppocr/utils/dict/ppocrv5_dict.txt
18+
max_text_length: &max_text_length 25
19+
infer_mode: false
20+
use_space_char: true
21+
distributed: true
22+
save_res_path: ./output/rec/predicts_ppocrv6_small.txt
23+
d2s_train_image_shape: [3, 48, 320]
24+
25+
26+
Optimizer:
27+
name: Adam
28+
beta1: 0.9
29+
beta2: 0.999
30+
lr:
31+
name: Cosine
32+
learning_rate: 0.0005
33+
warmup_epoch: 5
34+
regularizer:
35+
name: L2
36+
factor: 3.0e-05
37+
38+
39+
Architecture:
40+
model_type: rec
41+
algorithm: SVTR_LCNet
42+
Transform:
43+
Backbone:
44+
name: PPLCNetV4
45+
use_rep: true
46+
stem_type: branch
47+
Head:
48+
name: MultiHead
49+
head_list:
50+
- CTCHead:
51+
Neck:
52+
name: lightsvtr
53+
dims: 120
54+
depth: 2
55+
mlp_ratio: 2.0
56+
local_kernel: 7
57+
Head:
58+
fc_decay: 0.00001
59+
- NRTRHead:
60+
nrtr_dim: 384
61+
max_text_length: *max_text_length
62+
63+
Loss:
64+
name: MultiLoss
65+
loss_config_list:
66+
- CTCLoss:
67+
- NRTRLoss:
68+
69+
PostProcess:
70+
name: CTCLabelDecode
71+
72+
Metric:
73+
name: RecMetric
74+
main_indicator: acc
75+
76+
Train:
77+
dataset:
78+
name: MultiScaleDataSet
79+
ds_width: false
80+
data_dir: ./train_data/
81+
ext_op_transform_idx: 1
82+
label_file_list:
83+
- ./train_data/train_list.txt
84+
transforms:
85+
- DecodeImage:
86+
img_mode: BGR
87+
channel_first: false
88+
- RecConAug:
89+
prob: 0.5
90+
ext_data_num: 2
91+
image_shape: [48, 320, 3]
92+
max_text_length: *max_text_length
93+
- RecAug:
94+
- MultiLabelEncode:
95+
gtc_encode: NRTRLabelEncode
96+
- KeepKeys:
97+
keep_keys:
98+
- image
99+
- label_ctc
100+
- label_gtc
101+
- length
102+
- valid_ratio
103+
sampler:
104+
name: MultiScaleSampler
105+
scales: [[320, 32], [320, 48], [320, 64]]
106+
first_bs: &bs 128
107+
fix_bs: false
108+
divided_factor: [8, 16]
109+
is_training: True
110+
loader:
111+
shuffle: true
112+
batch_size_per_card: *bs
113+
drop_last: true
114+
num_workers: 8
115+
116+
Eval:
117+
dataset:
118+
name: SimpleDataSet
119+
data_dir: ./train_data
120+
label_file_list:
121+
- ./train_data/val_list.txt
122+
transforms:
123+
- DecodeImage:
124+
img_mode: BGR
125+
channel_first: false
126+
- MultiLabelEncode:
127+
gtc_encode: NRTRLabelEncode
128+
- RecResizeImg:
129+
image_shape: [3, 48, 320]
130+
- KeepKeys:
131+
keep_keys:
132+
- image
133+
- label_ctc
134+
- label_gtc
135+
- length
136+
- valid_ratio
137+
loader:
138+
shuffle: false
139+
drop_last: false
140+
batch_size_per_card: 128
141+
num_workers: 4
Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
Global:
2+
model_name: PP-OCRv6_tiny_rec
3+
debug: false
4+
use_gpu: true
5+
epoch_num: 100
6+
log_smooth_window: 20
7+
print_batch_step: 10
8+
save_model_dir: ./output/PP-OCRv6_tiny_rec
9+
save_epoch_step: 10
10+
eval_batch_step: [0, 2000]
11+
cal_metric_during_train: true
12+
pretrained_model:
13+
checkpoints:
14+
save_inference_dir:
15+
use_visualdl: false
16+
infer_img: doc/imgs_words/ch/word_1.jpg
17+
character_dict_path: ppocr/utils/ppocr_keys_v1.txt
18+
max_text_length: &max_text_length 25
19+
infer_mode: false
20+
use_space_char: true
21+
distributed: true
22+
save_res_path: ./output/rec/predicts_ppocrv6_tiny.txt
23+
d2s_train_image_shape: [3, 48, 320]
24+
25+
26+
Optimizer:
27+
name: Adam
28+
beta1: 0.9
29+
beta2: 0.999
30+
lr:
31+
name: Cosine
32+
learning_rate: 0.001
33+
warmup_epoch: 5
34+
regularizer:
35+
name: L2
36+
factor: 3.0e-05
37+
38+
39+
Architecture:
40+
model_type: rec
41+
algorithm: SVTR_LCNet
42+
Transform:
43+
Backbone:
44+
name: PPLCNetV4
45+
use_rep: true
46+
stem_type: simple
47+
config_name: tiny
48+
stem_channels: 48
49+
Head:
50+
name: MultiHead
51+
head_list:
52+
- CTCHead:
53+
Neck:
54+
name: reshape
55+
Head:
56+
mid_channels: 80
57+
use_guide: true
58+
fc_decay: 0.00001
59+
- NRTRHead:
60+
nrtr_dim: 384
61+
max_text_length: *max_text_length
62+
63+
Loss:
64+
name: MultiLoss
65+
loss_config_list:
66+
- CTCLoss:
67+
- NRTRLoss:
68+
69+
PostProcess:
70+
name: CTCLabelDecode
71+
72+
Metric:
73+
name: RecMetric
74+
main_indicator: acc
75+
76+
Train:
77+
dataset:
78+
name: MultiScaleDataSet
79+
ds_width: false
80+
data_dir: ./train_data/
81+
ext_op_transform_idx: 1
82+
label_file_list:
83+
- ./train_data/train_list.txt
84+
transforms:
85+
- DecodeImage:
86+
img_mode: BGR
87+
channel_first: false
88+
- RecConAug:
89+
prob: 0.5
90+
ext_data_num: 2
91+
image_shape: [48, 320, 3]
92+
max_text_length: *max_text_length
93+
- RecAug:
94+
- MultiLabelEncode:
95+
gtc_encode: NRTRLabelEncode
96+
- KeepKeys:
97+
keep_keys:
98+
- image
99+
- label_ctc
100+
- label_gtc
101+
- length
102+
- valid_ratio
103+
sampler:
104+
name: MultiScaleSampler
105+
scales: [[320, 32], [320, 48], [320, 64]]
106+
first_bs: &bs 128
107+
fix_bs: false
108+
divided_factor: [8, 16]
109+
is_training: True
110+
loader:
111+
shuffle: true
112+
batch_size_per_card: *bs
113+
drop_last: true
114+
num_workers: 8
115+
116+
Eval:
117+
dataset:
118+
name: SimpleDataSet
119+
data_dir: ./train_data
120+
label_file_list:
121+
- ./train_data/val_list.txt
122+
transforms:
123+
- DecodeImage:
124+
img_mode: BGR
125+
channel_first: false
126+
- MultiLabelEncode:
127+
gtc_encode: NRTRLabelEncode
128+
- RecResizeImg:
129+
image_shape: [3, 48, 320]
130+
- KeepKeys:
131+
keep_keys:
132+
- image
133+
- label_ctc
134+
- label_gtc
135+
- length
136+
- valid_ratio
137+
loader:
138+
shuffle: false
139+
drop_last: false
140+
batch_size_per_card: 128
141+
num_workers: 8

ppocr/modeling/backbones/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ def build_backbone(config, model_type):
2323
from .det_resnet_vd_sast import ResNet_SAST
2424
from .det_pp_lcnet import PPLCNet
2525
from .rec_lcnetv3 import PPLCNetV3
26+
from .rec_lcnetv4 import PPLCNetV4
2627
from .rec_hgnet import PPHGNet_small
2728
from .rec_vit import ViT
2829
from .det_pp_lcnet_v2 import PPLCNetV2_base
@@ -37,6 +38,7 @@ def build_backbone(config, model_type):
3738
"ResNet_SAST",
3839
"PPLCNet",
3940
"PPLCNetV3",
41+
"PPLCNetV4",
4042
"PPHGNet_small",
4143
"PPLCNetV2_base",
4244
"RepSVTR_det",
@@ -68,6 +70,7 @@ def build_backbone(config, model_type):
6870
from .rec_donut_swin import DonutSwinModel
6971
from .rec_shallow_cnn import ShallowCNN
7072
from .rec_lcnetv3 import PPLCNetV3
73+
from .rec_lcnetv4 import PPLCNetV4
7174
from .rec_hgnet import PPHGNet_small
7275
from .rec_vit_parseq import ViTParseQ
7376
from .rec_repvit import RepSVTR
@@ -97,6 +100,7 @@ def build_backbone(config, model_type):
97100
"DenseNet",
98101
"ShallowCNN",
99102
"PPLCNetV3",
103+
"PPLCNetV4",
100104
"PPHGNet_small",
101105
"ViTParseQ",
102106
"ViT",

0 commit comments

Comments
 (0)