Skip to content

Commit a582b47

Browse files
authored
Add config.yaml and bash for PaddleOCR-VL (#3600)
1 parent 06526b6 commit a582b47

21 files changed

Lines changed: 662 additions & 5 deletions
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
### data
2+
train_dataset_type: messages
3+
eval_dataset_type: messages
4+
train_dataset_path: ./ocr_vl_sft-train_Bengali.jsonl
5+
train_dataset_prob: "1.0"
6+
eval_dataset_path: ./ocr_vl_sft-test_Bengali.jsonl
7+
eval_dataset_prob: "1.0"
8+
max_seq_len: 16384
9+
padding_free: True
10+
truncate_packing: False
11+
dataloader_num_workers: 8
12+
mix_strategy: concat
13+
template_backend: custom
14+
template: paddleocr_vl
15+
16+
### model
17+
model_name_or_path: PaddlePaddle/PaddleOCR-VL
18+
attn_impl: flashmask
19+
20+
### finetuning
21+
# base
22+
stage: VL-SFT
23+
fine_tuning: full
24+
seed: 23
25+
do_train: true
26+
do_eval: true
27+
per_device_eval_batch_size: 8
28+
per_device_train_batch_size: 8
29+
num_train_epochs: 2
30+
max_steps: -1
31+
max_estimate_samples: 500
32+
eval_steps: 400
33+
evaluation_strategy: steps
34+
save_steps: 400
35+
save_strategy: steps
36+
logging_steps: 1
37+
gradient_accumulation_steps: 8
38+
logging_dir: ./PaddleOCR-VL-SFT-Bengali/visualdl_logs/
39+
output_dir: ./PaddleOCR-VL-SFT-Bengali
40+
disable_tqdm: true
41+
eval_accumulation_steps: 16
42+
43+
# train
44+
lr_scheduler_type: cosine
45+
warmup_ratio: 0.01
46+
learning_rate: 5.0e-6
47+
min_lr: 5.0e-7
48+
49+
# optimizer
50+
weight_decay: 0.1
51+
adam_epsilon: 1.0e-8
52+
adam_beta1: 0.9
53+
adam_beta2: 0.95
54+
55+
# performance
56+
tensor_model_parallel_size: 1
57+
pipeline_model_parallel_size: 1
58+
sharding: stage1
59+
recompute_granularity: full
60+
recompute_method: uniform
61+
recompute_num_layers: 1
62+
bf16: true
63+
fp16_opt_level: O2
64+
pre_alloc_memory: 24
65+
66+
# save
67+
unified_checkpoint: False
68+
save_checkpoint_format: "flex_checkpoint"
69+
load_checkpoint_format: "flex_checkpoint"
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
### data
2+
train_dataset_type: messages
3+
eval_dataset_type: messages
4+
train_dataset_path: ./ocr_vl_sft-train_Bengali.jsonl
5+
train_dataset_prob: "1.0"
6+
eval_dataset_path: ./ocr_vl_sft-test_Bengali.jsonl
7+
eval_dataset_prob: "1.0"
8+
max_seq_len: 16384
9+
padding_free: True
10+
truncate_packing: False
11+
dataloader_num_workers: 8
12+
mix_strategy: concat
13+
template_backend: custom
14+
template: paddleocr_vl
15+
16+
### model
17+
model_name_or_path: PaddlePaddle/PaddleOCR-VL
18+
attn_impl: flashmask
19+
lora: true
20+
lora_rank: 8
21+
22+
### finetuning
23+
# base
24+
stage: VL-SFT
25+
fine_tuning: lora
26+
seed: 23
27+
do_train: true
28+
do_eval: true
29+
per_device_eval_batch_size: 8
30+
per_device_train_batch_size: 8
31+
num_train_epochs: 2
32+
max_steps: -1
33+
max_estimate_samples: 500
34+
eval_steps: 400
35+
evaluation_strategy: steps
36+
save_steps: 400
37+
save_strategy: steps
38+
logging_steps: 1
39+
gradient_accumulation_steps: 8
40+
logging_dir: ./PaddleOCR-VL-SFT-Bengali-lora/visualdl_logs/
41+
output_dir: ./PaddleOCR-VL-SFT-Bengali-lora
42+
disable_tqdm: true
43+
eval_accumulation_steps: 16
44+
45+
# train
46+
lr_scheduler_type: cosine
47+
warmup_ratio: 0.01
48+
learning_rate: 5.0e-4
49+
min_lr: 5.0e-5
50+
51+
# optimizer
52+
weight_decay: 0.1
53+
adam_epsilon: 1.0e-8
54+
adam_beta1: 0.9
55+
adam_beta2: 0.95
56+
57+
# performance
58+
tensor_model_parallel_size: 1
59+
pipeline_model_parallel_size: 1
60+
sharding: stage1
61+
recompute_granularity: full
62+
recompute_method: uniform
63+
recompute_num_layers: 1
64+
bf16: true
65+
fp16_opt_level: O2
66+
pre_alloc_memory: 16
67+
68+
# save
69+
unified_checkpoint: false
70+
save_checkpoint_format: "flex_checkpoint"
71+
load_checkpoint_format: "flex_checkpoint"
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
### model
2+
fine_tuning: LoRA
3+
model_name_or_path: PaddlePaddle/PaddleOCR-VL
4+
output_dir: ./PaddleOCR-VL-SFT-Bengali-lora
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
# Copyright (c) 2026 PaddlePaddle Authors. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
wget https://paddleformers.bj.bcebos.com/datasets/ocr-vl/ocr_vl_sft-train_Bengali.jsonl
16+
wget https://paddleformers.bj.bcebos.com/datasets/ocr-vl/ocr_vl_sft-test_Bengali.jsonl
17+
18+
CUDA_VISIBLE_DEVICES=0 \
19+
paddleformers-cli train examples/best_practices/PaddleOCR-VL/paddleocr-vl_full_16k_config.yaml
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
# Copyright (c) 2026 PaddlePaddle Authors. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
wget https://paddleformers.bj.bcebos.com/datasets/ocr-vl/ocr_vl_sft-train_Bengali.jsonl
16+
wget https://paddleformers.bj.bcebos.com/datasets/ocr-vl/ocr_vl_sft-test_Bengali.jsonl
17+
18+
CUDA_VISIBLE_DEVICES=0 \
19+
paddleformers-cli train examples/best_practices/PaddleOCR-VL/paddleocr-vl_full_16k_config.yaml \
20+
per_device_train_batch_size=4 \
21+
per_device_eval_batch_size=4 \
22+
gradient_accumulation_steps=16 \
23+
pre_alloc_memory=18
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
# Copyright (c) 2026 PaddlePaddle Authors. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
wget https://paddleformers.bj.bcebos.com/datasets/ocr-vl/ocr_vl_sft-train_Bengali.jsonl
16+
wget https://paddleformers.bj.bcebos.com/datasets/ocr-vl/ocr_vl_sft-test_Bengali.jsonl
17+
18+
CUDA_VISIBLE_DEVICES=0 \
19+
paddleformers-cli train examples/best_practices/PaddleOCR-VL/paddleocr-vl_lora_16k_config.yaml
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
# Copyright (c) 2026 PaddlePaddle Authors. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
wget https://paddleformers.bj.bcebos.com/datasets/ocr-vl/ocr_vl_sft-train_Bengali.jsonl
16+
wget https://paddleformers.bj.bcebos.com/datasets/ocr-vl/ocr_vl_sft-test_Bengali.jsonl
17+
18+
CUDA_VISIBLE_DEVICES=0 \
19+
paddleformers-cli train examples/best_practices/PaddleOCR-VL/paddleocr-vl_lora_16k_config.yaml \
20+
per_device_train_batch_size=4 \
21+
per_device_eval_batch_size=4 \
22+
gradient_accumulation_steps=16 \
23+
pre_alloc_memory=12
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
# Copyright (c) 2026 PaddlePaddle Authors. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
paddleformers-cli export examples/best_practices/PaddleOCR-VL/paddleocr-vl_lora_export.yaml
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
### data
2+
train_dataset_type: messages
3+
eval_dataset_type: messages
4+
train_dataset_path: ./ocr_vl_sft-train_Bengali.jsonl
5+
train_dataset_prob: "1.0"
6+
eval_dataset_path: ./ocr_vl_sft-test_Bengali.jsonl
7+
eval_dataset_prob: "1.0"
8+
max_seq_len: 16384
9+
padding_free: True
10+
truncate_packing: False
11+
dataloader_num_workers: 8
12+
mix_strategy: concat
13+
template_backend: custom
14+
template: paddleocr_vl
15+
16+
### model
17+
model_name_or_path: PaddlePaddle/PaddleOCR-VL
18+
attn_impl: sdpa
19+
20+
### finetuning
21+
# base
22+
stage: VL-SFT
23+
fine_tuning: full
24+
seed: 23
25+
do_train: true
26+
do_eval: true
27+
per_device_eval_batch_size: 2
28+
per_device_train_batch_size: 2
29+
num_train_epochs: 2
30+
max_steps: -1
31+
max_estimate_samples: 500
32+
eval_steps: 400
33+
evaluation_strategy: steps
34+
save_steps: 400
35+
save_strategy: steps
36+
logging_steps: 1
37+
gradient_accumulation_steps: 32
38+
logging_dir: ./PaddleOCR-VL-SFT-Bengali/visualdl_logs/
39+
output_dir: ./PaddleOCR-VL-SFT-Bengali
40+
disable_tqdm: true
41+
eval_accumulation_steps: 16
42+
43+
# train
44+
lr_scheduler_type: cosine
45+
warmup_ratio: 0.01
46+
learning_rate: 5.0e-6
47+
min_lr: 5.0e-7
48+
49+
# optimizer
50+
weight_decay: 0.1
51+
adam_epsilon: 1.0e-8
52+
adam_beta1: 0.9
53+
adam_beta2: 0.95
54+
55+
# performance
56+
tensor_model_parallel_size: 1
57+
pipeline_model_parallel_size: 1
58+
sharding: stage2
59+
recompute_granularity: full
60+
recompute_method: uniform
61+
recompute_num_layers: 1
62+
bf16: true
63+
fp16_opt_level: O2
64+
pre_alloc_memory: 18
65+
66+
# save
67+
unified_checkpoint: False
68+
save_checkpoint_format: "flex_checkpoint"
69+
load_checkpoint_format: "flex_checkpoint"
70+
71+
# device
72+
device: iluvatar_gpu

0 commit comments

Comments
 (0)