PaddlePaddle · lugimzzz · Jan 17, 2026 · Jan 16, 2026 · Jan 16, 2026 · Jan 16, 2026
diff --git a/examples/best_practices/PaddleOCR-VL/paddleocr-vl_full_16k_config.yaml b/examples/best_practices/PaddleOCR-VL/paddleocr-vl_full_16k_config.yaml
@@ -0,0 +1,69 @@
+### data
+train_dataset_type: messages
+eval_dataset_type: messages
+train_dataset_path: ./ocr_vl_sft-train_Bengali.jsonl
+train_dataset_prob: "1.0"
+eval_dataset_path: ./ocr_vl_sft-test_Bengali.jsonl
+eval_dataset_prob: "1.0"
+max_seq_len: 16384
+padding_free: True
+truncate_packing: False
+dataloader_num_workers: 8
+mix_strategy: concat
+template_backend: custom
+template: paddleocr_vl
+
+### model
+model_name_or_path: PaddlePaddle/PaddleOCR-VL
+attn_impl: flashmask
+
+### finetuning
+# base
+stage: VL-SFT
+fine_tuning: full
+seed: 23
+do_train: true
+do_eval: true
+per_device_eval_batch_size: 8
+per_device_train_batch_size: 8
+num_train_epochs: 2
+max_steps: -1
+max_estimate_samples: 500
+eval_steps: 400
+evaluation_strategy: steps
+save_steps: 400
+save_strategy: steps
+logging_steps: 1
+gradient_accumulation_steps: 8
+logging_dir: ./PaddleOCR-VL-SFT-Bengali/visualdl_logs/
+output_dir: ./PaddleOCR-VL-SFT-Bengali
+disable_tqdm: true
+eval_accumulation_steps: 16
+
+# train
+lr_scheduler_type: cosine
+warmup_ratio: 0.01
+learning_rate: 5.0e-6
+min_lr: 5.0e-7
+
+# optimizer
+weight_decay: 0.1
+adam_epsilon: 1.0e-8
+adam_beta1: 0.9
+adam_beta2: 0.95
+
+# performance
+tensor_model_parallel_size: 1
+pipeline_model_parallel_size: 1
+sharding: stage1
+recompute_granularity: full
+recompute_method: uniform
+recompute_num_layers: 1
+bf16: true
+fp16_opt_level: O2
+pre_alloc_memory: 24
+
+# save
+unified_checkpoint: False
+save_checkpoint_format: "flex_checkpoint"
+load_checkpoint_format: "flex_checkpoint"
diff --git a/examples/best_practices/PaddleOCR-VL/paddleocr-vl_lora_16k_config.yaml b/examples/best_practices/PaddleOCR-VL/paddleocr-vl_lora_16k_config.yaml
@@ -0,0 +1,71 @@
+### data
+train_dataset_type: messages
+eval_dataset_type: messages
+train_dataset_path: ./ocr_vl_sft-train_Bengali.jsonl
+train_dataset_prob: "1.0"
+eval_dataset_path: ./ocr_vl_sft-test_Bengali.jsonl
+eval_dataset_prob: "1.0"
+max_seq_len: 16384
+padding_free: True
+truncate_packing: False
+dataloader_num_workers: 8
+mix_strategy: concat
+template_backend: custom
+template: paddleocr_vl
+
+### model
+model_name_or_path: PaddlePaddle/PaddleOCR-VL
+attn_impl: flashmask
+lora: true
+lora_rank: 8
+
+### finetuning
+# base
+stage: VL-SFT
+fine_tuning: lora
+seed: 23
+do_train: true
+do_eval: true
+per_device_eval_batch_size: 8
+per_device_train_batch_size: 8
+num_train_epochs: 2
+max_steps: -1
+max_estimate_samples: 500
+eval_steps: 400
+evaluation_strategy: steps
+save_steps: 400
+save_strategy: steps
+logging_steps: 1
+gradient_accumulation_steps: 8
+logging_dir: ./PaddleOCR-VL-SFT-Bengali-lora/visualdl_logs/
+output_dir: ./PaddleOCR-VL-SFT-Bengali-lora
+disable_tqdm: true
+eval_accumulation_steps: 16
+
+# train
+lr_scheduler_type: cosine
+warmup_ratio: 0.01
+learning_rate: 5.0e-4
+min_lr: 5.0e-5
+
+# optimizer
+weight_decay: 0.1
+adam_epsilon: 1.0e-8
+adam_beta1: 0.9
+adam_beta2: 0.95
+
+# performance
+tensor_model_parallel_size: 1
+pipeline_model_parallel_size: 1
+sharding: stage1
+recompute_granularity: full
+recompute_method: uniform
+recompute_num_layers: 1
+bf16: true
+fp16_opt_level: O2
+pre_alloc_memory: 16
+
+# save
+unified_checkpoint: false
+save_checkpoint_format: "flex_checkpoint"
+load_checkpoint_format: "flex_checkpoint"
diff --git a/examples/best_practices/PaddleOCR-VL/paddleocr-vl_lora_export.yaml b/examples/best_practices/PaddleOCR-VL/paddleocr-vl_lora_export.yaml
@@ -0,0 +1,4 @@
+### model
+fine_tuning: LoRA
+model_name_or_path: PaddlePaddle/PaddleOCR-VL
+output_dir: ./PaddleOCR-VL-SFT-Bengali-lora
diff --git a/examples/best_practices/PaddleOCR-VL/run_paddleocr-vl_full_16k.sh b/examples/best_practices/PaddleOCR-VL/run_paddleocr-vl_full_16k.sh
@@ -0,0 +1,19 @@
+# Copyright (c) 2026 PaddlePaddle Authors. All Rights Reserved.
+# 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# 
+#     http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+wget https://paddleformers.bj.bcebos.com/datasets/ocr-vl/ocr_vl_sft-train_Bengali.jsonl
+wget https://paddleformers.bj.bcebos.com/datasets/ocr-vl/ocr_vl_sft-test_Bengali.jsonl
+
+CUDA_VISIBLE_DEVICES=0 \
+paddleformers-cli train examples/best_practices/PaddleOCR-VL/paddleocr-vl_full_16k_config.yaml
diff --git a/examples/best_practices/PaddleOCR-VL/run_paddleocr-vl_full_16k_4090D.sh b/examples/best_practices/PaddleOCR-VL/run_paddleocr-vl_full_16k_4090D.sh
@@ -0,0 +1,23 @@
+# Copyright (c) 2026 PaddlePaddle Authors. All Rights Reserved.
+# 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# 
+#     http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+wget https://paddleformers.bj.bcebos.com/datasets/ocr-vl/ocr_vl_sft-train_Bengali.jsonl
+wget https://paddleformers.bj.bcebos.com/datasets/ocr-vl/ocr_vl_sft-test_Bengali.jsonl
+
+CUDA_VISIBLE_DEVICES=0 \
+paddleformers-cli train examples/best_practices/PaddleOCR-VL/paddleocr-vl_full_16k_config.yaml \
+                        per_device_train_batch_size=4 \
+                        per_device_eval_batch_size=4 \
+                        gradient_accumulation_steps=16 \
+                        pre_alloc_memory=18
diff --git a/examples/best_practices/PaddleOCR-VL/run_paddleocr-vl_lora_16k.sh b/examples/best_practices/PaddleOCR-VL/run_paddleocr-vl_lora_16k.sh
@@ -0,0 +1,19 @@
+# Copyright (c) 2026 PaddlePaddle Authors. All Rights Reserved.
+# 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# 
+#     http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+wget https://paddleformers.bj.bcebos.com/datasets/ocr-vl/ocr_vl_sft-train_Bengali.jsonl
+wget https://paddleformers.bj.bcebos.com/datasets/ocr-vl/ocr_vl_sft-test_Bengali.jsonl
+
+CUDA_VISIBLE_DEVICES=0 \
+paddleformers-cli train examples/best_practices/PaddleOCR-VL/paddleocr-vl_lora_16k_config.yaml
diff --git a/examples/best_practices/PaddleOCR-VL/run_paddleocr-vl_lora_16k_4090D.sh b/examples/best_practices/PaddleOCR-VL/run_paddleocr-vl_lora_16k_4090D.sh
@@ -0,0 +1,23 @@
+# Copyright (c) 2026 PaddlePaddle Authors. All Rights Reserved.
+# 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# 
+#     http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+wget https://paddleformers.bj.bcebos.com/datasets/ocr-vl/ocr_vl_sft-train_Bengali.jsonl
+wget https://paddleformers.bj.bcebos.com/datasets/ocr-vl/ocr_vl_sft-test_Bengali.jsonl
+
+CUDA_VISIBLE_DEVICES=0 \
+paddleformers-cli train examples/best_practices/PaddleOCR-VL/paddleocr-vl_lora_16k_config.yaml \
+                        per_device_train_batch_size=4 \
+                        per_device_eval_batch_size=4 \
+                        gradient_accumulation_steps=16 \
+                        pre_alloc_memory=12
diff --git a/examples/best_practices/PaddleOCR-VL/run_paddleocr-vl_lora_export.sh b/examples/best_practices/PaddleOCR-VL/run_paddleocr-vl_lora_export.sh
@@ -0,0 +1,15 @@
+# Copyright (c) 2026 PaddlePaddle Authors. All Rights Reserved.
+# 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# 
+#     http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+paddleformers-cli export examples/best_practices/PaddleOCR-VL/paddleocr-vl_lora_export.yaml
diff --git a/examples/config/iluvatar/PaddleOCR-VL/sft/paddleocr-vl_full_16k_config.yaml b/examples/config/iluvatar/PaddleOCR-VL/sft/paddleocr-vl_full_16k_config.yaml
@@ -0,0 +1,72 @@
+### data
+train_dataset_type: messages
+eval_dataset_type: messages
+train_dataset_path: ./ocr_vl_sft-train_Bengali.jsonl
+train_dataset_prob: "1.0"
+eval_dataset_path: ./ocr_vl_sft-test_Bengali.jsonl
+eval_dataset_prob: "1.0"
+max_seq_len: 16384
+padding_free: True
+truncate_packing: False
+dataloader_num_workers: 8
+mix_strategy: concat
+template_backend: custom
+template: paddleocr_vl
+
+### model
+model_name_or_path: PaddlePaddle/PaddleOCR-VL
+attn_impl: sdpa
+
+### finetuning
+# base
+stage: VL-SFT
+fine_tuning: full
+seed: 23
+do_train: true
+do_eval: true
+per_device_eval_batch_size: 2
+per_device_train_batch_size: 2
+num_train_epochs: 2
+max_steps: -1
+max_estimate_samples: 500
+eval_steps: 400
+evaluation_strategy: steps
+save_steps: 400
+save_strategy: steps
+logging_steps: 1
+gradient_accumulation_steps: 32
+logging_dir: ./PaddleOCR-VL-SFT-Bengali/visualdl_logs/
+output_dir: ./PaddleOCR-VL-SFT-Bengali
+disable_tqdm: true
+eval_accumulation_steps: 16
+
+# train
+lr_scheduler_type: cosine
+warmup_ratio: 0.01
+learning_rate: 5.0e-6
+min_lr: 5.0e-7
+
+# optimizer
+weight_decay: 0.1
+adam_epsilon: 1.0e-8
+adam_beta1: 0.9
+adam_beta2: 0.95
+
+# performance
+tensor_model_parallel_size: 1
+pipeline_model_parallel_size: 1
+sharding: stage2
+recompute_granularity: full
+recompute_method: uniform
+recompute_num_layers: 1
+bf16: true
+fp16_opt_level: O2
+pre_alloc_memory: 18
+
+# save
+unified_checkpoint: False
+save_checkpoint_format: "flex_checkpoint"
+load_checkpoint_format: "flex_checkpoint"
+
+# device
+device: iluvatar_gpu