diff --git a/examples/best_practices/PaddleOCR-VL/paddleocr-vl_full_16k_config.yaml b/examples/best_practices/PaddleOCR-VL/paddleocr-vl_full_16k_config.yaml
new file mode 100644
index 00000000000..2bb001d40d2
--- /dev/null
+++ b/examples/best_practices/PaddleOCR-VL/paddleocr-vl_full_16k_config.yaml
@@ -0,0 +1,69 @@
+### data
+train_dataset_type: messages
+eval_dataset_type: messages
+train_dataset_path: ./ocr_vl_sft-train_Bengali.jsonl
+train_dataset_prob: "1.0"
+eval_dataset_path: ./ocr_vl_sft-test_Bengali.jsonl
+eval_dataset_prob: "1.0"
+max_seq_len: 16384
+padding_free: True
+truncate_packing: False
+dataloader_num_workers: 8
+mix_strategy: concat
+template_backend: custom
+template: paddleocr_vl
+
+### model
+model_name_or_path: PaddlePaddle/PaddleOCR-VL
+attn_impl: flashmask
+
+### finetuning
+# base
+stage: VL-SFT
+fine_tuning: full
+seed: 23
+do_train: true
+do_eval: true
+per_device_eval_batch_size: 8
+per_device_train_batch_size: 8
+num_train_epochs: 2
+max_steps: -1
+max_estimate_samples: 500
+eval_steps: 400
+evaluation_strategy: steps
+save_steps: 400
+save_strategy: steps
+logging_steps: 1
+gradient_accumulation_steps: 8
+logging_dir: ./PaddleOCR-VL-SFT-Bengali/visualdl_logs/
+output_dir: ./PaddleOCR-VL-SFT-Bengali
+disable_tqdm: true
+eval_accumulation_steps: 16
+
+# train
+lr_scheduler_type: cosine
+warmup_ratio: 0.01
+learning_rate: 5.0e-6
+min_lr: 5.0e-7
+
+# optimizer
+weight_decay: 0.1
+adam_epsilon: 1.0e-8
+adam_beta1: 0.9
+adam_beta2: 0.95
+
+# performance
+tensor_model_parallel_size: 1
+pipeline_model_parallel_size: 1
+sharding: stage1
+recompute_granularity: full
+recompute_method: uniform
+recompute_num_layers: 1
+bf16: true
+fp16_opt_level: O2
+pre_alloc_memory: 24
+
+# save
+unified_checkpoint: False
+save_checkpoint_format: "flex_checkpoint"
+load_checkpoint_format: "flex_checkpoint"
\ No newline at end of file
diff --git a/examples/best_practices/PaddleOCR-VL/paddleocr-vl_lora_16k_config.yaml b/examples/best_practices/PaddleOCR-VL/paddleocr-vl_lora_16k_config.yaml
new file mode 100644
index 00000000000..6f4cbf00a0c
--- /dev/null
+++ b/examples/best_practices/PaddleOCR-VL/paddleocr-vl_lora_16k_config.yaml
@@ -0,0 +1,71 @@
+### data
+train_dataset_type: messages
+eval_dataset_type: messages
+train_dataset_path: ./ocr_vl_sft-train_Bengali.jsonl
+train_dataset_prob: "1.0"
+eval_dataset_path: ./ocr_vl_sft-test_Bengali.jsonl
+eval_dataset_prob: "1.0"
+max_seq_len: 16384
+padding_free: True
+truncate_packing: False
+dataloader_num_workers: 8
+mix_strategy: concat
+template_backend: custom
+template: paddleocr_vl
+
+### model
+model_name_or_path: PaddlePaddle/PaddleOCR-VL
+attn_impl: flashmask
+lora: true
+lora_rank: 8
+
+### finetuning
+# base
+stage: VL-SFT
+fine_tuning: lora
+seed: 23
+do_train: true
+do_eval: true
+per_device_eval_batch_size: 8
+per_device_train_batch_size: 8
+num_train_epochs: 2
+max_steps: -1
+max_estimate_samples: 500
+eval_steps: 400
+evaluation_strategy: steps
+save_steps: 400
+save_strategy: steps
+logging_steps: 1
+gradient_accumulation_steps: 8
+logging_dir: ./PaddleOCR-VL-SFT-Bengali-lora/visualdl_logs/
+output_dir: ./PaddleOCR-VL-SFT-Bengali-lora
+disable_tqdm: true
+eval_accumulation_steps: 16
+
+# train
+lr_scheduler_type: cosine
+warmup_ratio: 0.01
+learning_rate: 5.0e-4
+min_lr: 5.0e-5
+
+# optimizer
+weight_decay: 0.1
+adam_epsilon: 1.0e-8
+adam_beta1: 0.9
+adam_beta2: 0.95
+
+# performance
+tensor_model_parallel_size: 1
+pipeline_model_parallel_size: 1
+sharding: stage1
+recompute_granularity: full
+recompute_method: uniform
+recompute_num_layers: 1
+bf16: true
+fp16_opt_level: O2
+pre_alloc_memory: 16
+
+# save
+unified_checkpoint: false
+save_checkpoint_format: "flex_checkpoint"
+load_checkpoint_format: "flex_checkpoint"
\ No newline at end of file
diff --git a/examples/best_practices/PaddleOCR-VL/paddleocr-vl_lora_export.yaml b/examples/best_practices/PaddleOCR-VL/paddleocr-vl_lora_export.yaml
new file mode 100644
index 00000000000..7ec0a8473de
--- /dev/null
+++ b/examples/best_practices/PaddleOCR-VL/paddleocr-vl_lora_export.yaml
@@ -0,0 +1,4 @@
+### model
+fine_tuning: LoRA
+model_name_or_path: PaddlePaddle/PaddleOCR-VL
+output_dir: ./PaddleOCR-VL-SFT-Bengali-lora
\ No newline at end of file
diff --git a/examples/best_practices/PaddleOCR-VL/run_paddleocr-vl_full_16k.sh b/examples/best_practices/PaddleOCR-VL/run_paddleocr-vl_full_16k.sh
new file mode 100644
index 00000000000..311259a723d
--- /dev/null
+++ b/examples/best_practices/PaddleOCR-VL/run_paddleocr-vl_full_16k.sh
@@ -0,0 +1,19 @@
+# Copyright (c) 2026 PaddlePaddle Authors. All Rights Reserved.
+# 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# 
+#     http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+wget https://paddleformers.bj.bcebos.com/datasets/ocr-vl/ocr_vl_sft-train_Bengali.jsonl
+wget https://paddleformers.bj.bcebos.com/datasets/ocr-vl/ocr_vl_sft-test_Bengali.jsonl
+
+CUDA_VISIBLE_DEVICES=0 \
+paddleformers-cli train examples/best_practices/PaddleOCR-VL/paddleocr-vl_full_16k_config.yaml
\ No newline at end of file
diff --git a/examples/best_practices/PaddleOCR-VL/run_paddleocr-vl_full_16k_4090D.sh b/examples/best_practices/PaddleOCR-VL/run_paddleocr-vl_full_16k_4090D.sh
new file mode 100644
index 00000000000..e0df24fa7e4
--- /dev/null
+++ b/examples/best_practices/PaddleOCR-VL/run_paddleocr-vl_full_16k_4090D.sh
@@ -0,0 +1,23 @@
+# Copyright (c) 2026 PaddlePaddle Authors. All Rights Reserved.
+# 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# 
+#     http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+wget https://paddleformers.bj.bcebos.com/datasets/ocr-vl/ocr_vl_sft-train_Bengali.jsonl
+wget https://paddleformers.bj.bcebos.com/datasets/ocr-vl/ocr_vl_sft-test_Bengali.jsonl
+
+CUDA_VISIBLE_DEVICES=0 \
+paddleformers-cli train examples/best_practices/PaddleOCR-VL/paddleocr-vl_full_16k_config.yaml \
+                        per_device_train_batch_size=4 \
+                        per_device_eval_batch_size=4 \
+                        gradient_accumulation_steps=16 \
+                        pre_alloc_memory=18
\ No newline at end of file
diff --git a/examples/best_practices/PaddleOCR-VL/run_paddleocr-vl_lora_16k.sh b/examples/best_practices/PaddleOCR-VL/run_paddleocr-vl_lora_16k.sh
new file mode 100644
index 00000000000..29fa009d053
--- /dev/null
+++ b/examples/best_practices/PaddleOCR-VL/run_paddleocr-vl_lora_16k.sh
@@ -0,0 +1,19 @@
+# Copyright (c) 2026 PaddlePaddle Authors. All Rights Reserved.
+# 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# 
+#     http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+wget https://paddleformers.bj.bcebos.com/datasets/ocr-vl/ocr_vl_sft-train_Bengali.jsonl
+wget https://paddleformers.bj.bcebos.com/datasets/ocr-vl/ocr_vl_sft-test_Bengali.jsonl
+
+CUDA_VISIBLE_DEVICES=0 \
+paddleformers-cli train examples/best_practices/PaddleOCR-VL/paddleocr-vl_lora_16k_config.yaml
\ No newline at end of file
diff --git a/examples/best_practices/PaddleOCR-VL/run_paddleocr-vl_lora_16k_4090D.sh b/examples/best_practices/PaddleOCR-VL/run_paddleocr-vl_lora_16k_4090D.sh
new file mode 100644
index 00000000000..b07e31eec28
--- /dev/null
+++ b/examples/best_practices/PaddleOCR-VL/run_paddleocr-vl_lora_16k_4090D.sh
@@ -0,0 +1,23 @@
+# Copyright (c) 2026 PaddlePaddle Authors. All Rights Reserved.
+# 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# 
+#     http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+wget https://paddleformers.bj.bcebos.com/datasets/ocr-vl/ocr_vl_sft-train_Bengali.jsonl
+wget https://paddleformers.bj.bcebos.com/datasets/ocr-vl/ocr_vl_sft-test_Bengali.jsonl
+
+CUDA_VISIBLE_DEVICES=0 \
+paddleformers-cli train examples/best_practices/PaddleOCR-VL/paddleocr-vl_lora_16k_config.yaml \
+                        per_device_train_batch_size=4 \
+                        per_device_eval_batch_size=4 \
+                        gradient_accumulation_steps=16 \
+                        pre_alloc_memory=12
\ No newline at end of file
diff --git a/examples/best_practices/PaddleOCR-VL/run_paddleocr-vl_lora_export.sh b/examples/best_practices/PaddleOCR-VL/run_paddleocr-vl_lora_export.sh
new file mode 100644
index 00000000000..6b0ea44bfde
--- /dev/null
+++ b/examples/best_practices/PaddleOCR-VL/run_paddleocr-vl_lora_export.sh
@@ -0,0 +1,15 @@
+# Copyright (c) 2026 PaddlePaddle Authors. All Rights Reserved.
+# 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# 
+#     http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+paddleformers-cli export examples/best_practices/PaddleOCR-VL/paddleocr-vl_lora_export.yaml
\ No newline at end of file
diff --git a/examples/config/iluvatar/PaddleOCR-VL/sft/paddleocr-vl_full_16k_config.yaml b/examples/config/iluvatar/PaddleOCR-VL/sft/paddleocr-vl_full_16k_config.yaml
new file mode 100644
index 00000000000..4fa8d6e0dfb
--- /dev/null
+++ b/examples/config/iluvatar/PaddleOCR-VL/sft/paddleocr-vl_full_16k_config.yaml
@@ -0,0 +1,72 @@
+### data
+train_dataset_type: messages
+eval_dataset_type: messages
+train_dataset_path: ./ocr_vl_sft-train_Bengali.jsonl
+train_dataset_prob: "1.0"
+eval_dataset_path: ./ocr_vl_sft-test_Bengali.jsonl
+eval_dataset_prob: "1.0"
+max_seq_len: 16384
+padding_free: True
+truncate_packing: False
+dataloader_num_workers: 8
+mix_strategy: concat
+template_backend: custom
+template: paddleocr_vl
+
+### model
+model_name_or_path: PaddlePaddle/PaddleOCR-VL
+attn_impl: sdpa
+
+### finetuning
+# base
+stage: VL-SFT
+fine_tuning: full
+seed: 23
+do_train: true
+do_eval: true
+per_device_eval_batch_size: 2
+per_device_train_batch_size: 2
+num_train_epochs: 2
+max_steps: -1
+max_estimate_samples: 500
+eval_steps: 400
+evaluation_strategy: steps
+save_steps: 400
+save_strategy: steps
+logging_steps: 1
+gradient_accumulation_steps: 32
+logging_dir: ./PaddleOCR-VL-SFT-Bengali/visualdl_logs/
+output_dir: ./PaddleOCR-VL-SFT-Bengali
+disable_tqdm: true
+eval_accumulation_steps: 16
+
+# train
+lr_scheduler_type: cosine
+warmup_ratio: 0.01
+learning_rate: 5.0e-6
+min_lr: 5.0e-7
+
+# optimizer
+weight_decay: 0.1
+adam_epsilon: 1.0e-8
+adam_beta1: 0.9
+adam_beta2: 0.95
+
+# performance
+tensor_model_parallel_size: 1
+pipeline_model_parallel_size: 1
+sharding: stage2
+recompute_granularity: full
+recompute_method: uniform
+recompute_num_layers: 1
+bf16: true
+fp16_opt_level: O2
+pre_alloc_memory: 18
+
+# save
+unified_checkpoint: False
+save_checkpoint_format: "flex_checkpoint"
+load_checkpoint_format: "flex_checkpoint"
+
+# device
+device: iluvatar_gpu
\ No newline at end of file
diff --git a/examples/config/iluvatar/PaddleOCR-VL/sft/paddleocr-vl_lora_16k_config.yaml b/examples/config/iluvatar/PaddleOCR-VL/sft/paddleocr-vl_lora_16k_config.yaml
new file mode 100644
index 00000000000..de7d9417d37
--- /dev/null
+++ b/examples/config/iluvatar/PaddleOCR-VL/sft/paddleocr-vl_lora_16k_config.yaml
@@ -0,0 +1,74 @@
+### data
+train_dataset_type: messages
+eval_dataset_type: messages
+train_dataset_path: ./ocr_vl_sft-train_Bengali.jsonl
+train_dataset_prob: "1.0"
+eval_dataset_path: ./ocr_vl_sft-test_Bengali.jsonl
+eval_dataset_prob: "1.0"
+max_seq_len: 16384
+padding_free: True
+truncate_packing: False
+dataloader_num_workers: 8
+mix_strategy: concat
+template_backend: custom
+template: paddleocr_vl
+
+### model
+model_name_or_path: PaddlePaddle/PaddleOCR-VL
+attn_impl: sdpa
+lora: true
+lora_rank: 8
+
+### finetuning
+# base
+stage: VL-SFT
+fine_tuning: lora
+seed: 23
+do_train: true
+do_eval: true
+per_device_eval_batch_size: 2
+per_device_train_batch_size: 2
+num_train_epochs: 2
+max_steps: -1
+max_estimate_samples: 500
+eval_steps: 400
+evaluation_strategy: steps
+save_steps: 400
+save_strategy: steps
+logging_steps: 1
+gradient_accumulation_steps: 32
+logging_dir: ./PaddleOCR-VL-SFT-Bengali-lora/visualdl_logs/
+output_dir: ./PaddleOCR-VL-SFT-Bengali-lora
+disable_tqdm: true
+eval_accumulation_steps: 16
+
+# train
+lr_scheduler_type: cosine
+warmup_ratio: 0.01
+learning_rate: 5.0e-4
+min_lr: 5.0e-5
+
+# optimizer
+weight_decay: 0.1
+adam_epsilon: 1.0e-8
+adam_beta1: 0.9
+adam_beta2: 0.95
+
+# performance
+tensor_model_parallel_size: 1
+pipeline_model_parallel_size: 1
+sharding: stage2
+recompute_granularity: full
+recompute_method: uniform
+recompute_num_layers: 1
+bf16: true
+fp16_opt_level: O2
+pre_alloc_memory: 12
+
+# save
+unified_checkpoint: false
+save_checkpoint_format: "flex_checkpoint"
+load_checkpoint_format: "flex_checkpoint"
+
+# device
+device: iluvatar_gpu
\ No newline at end of file
diff --git a/examples/config/iluvatar/PaddleOCR-VL/sft/paddleocr-vl_lora_export.yaml b/examples/config/iluvatar/PaddleOCR-VL/sft/paddleocr-vl_lora_export.yaml
new file mode 100644
index 00000000000..9fa099df864
--- /dev/null
+++ b/examples/config/iluvatar/PaddleOCR-VL/sft/paddleocr-vl_lora_export.yaml
@@ -0,0 +1,5 @@
+### model
+fine_tuning: LoRA
+model_name_or_path: PaddlePaddle/PaddleOCR-VL
+output_dir: ./PaddleOCR-VL-SFT-Bengali-lora
+device: iluvatar_gpu
\ No newline at end of file
diff --git a/examples/config/iluvatar/PaddleOCR-VL/sft/run_paddleocr-vl_full_16k.sh b/examples/config/iluvatar/PaddleOCR-VL/sft/run_paddleocr-vl_full_16k.sh
new file mode 100644
index 00000000000..5703b3efc27
--- /dev/null
+++ b/examples/config/iluvatar/PaddleOCR-VL/sft/run_paddleocr-vl_full_16k.sh
@@ -0,0 +1,20 @@
+# Copyright (c) 2026 PaddlePaddle Authors. All Rights Reserved.
+# 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# 
+#     http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+wget https://paddleformers.bj.bcebos.com/datasets/ocr-vl/ocr_vl_sft-train_Bengali.jsonl
+wget https://paddleformers.bj.bcebos.com/datasets/ocr-vl/ocr_vl_sft-test_Bengali.jsonl
+
+CUDA_VISIBLE_DEVICES=0 \
+paddleformers-cli train examples/config/iluvatar/PaddleOCR-VL/sft/paddleocr-vl_full_16k_config.yaml
+                        
\ No newline at end of file
diff --git a/examples/config/iluvatar/PaddleOCR-VL/sft/run_paddleocr-vl_lora_16k.sh b/examples/config/iluvatar/PaddleOCR-VL/sft/run_paddleocr-vl_lora_16k.sh
new file mode 100644
index 00000000000..7c5b48661b8
--- /dev/null
+++ b/examples/config/iluvatar/PaddleOCR-VL/sft/run_paddleocr-vl_lora_16k.sh
@@ -0,0 +1,19 @@
+# Copyright (c) 2026 PaddlePaddle Authors. All Rights Reserved.
+# 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# 
+#     http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+wget https://paddleformers.bj.bcebos.com/datasets/ocr-vl/ocr_vl_sft-train_Bengali.jsonl
+wget https://paddleformers.bj.bcebos.com/datasets/ocr-vl/ocr_vl_sft-test_Bengali.jsonl
+
+CUDA_VISIBLE_DEVICES=0 \
+paddleformers-cli train examples/config/iluvatar/PaddleOCR-VL/sft/paddleocr-vl_lora_16k_config.yaml
\ No newline at end of file
diff --git a/examples/config/iluvatar/PaddleOCR-VL/sft/run_paddleocr-vl_lora_export.sh b/examples/config/iluvatar/PaddleOCR-VL/sft/run_paddleocr-vl_lora_export.sh
new file mode 100644
index 00000000000..bbe7fd1d00e
--- /dev/null
+++ b/examples/config/iluvatar/PaddleOCR-VL/sft/run_paddleocr-vl_lora_export.sh
@@ -0,0 +1,15 @@
+# Copyright (c) 2026 PaddlePaddle Authors. All Rights Reserved.
+# 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# 
+#     http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+paddleformers-cli export examples/config/iluvatar/PaddleOCR-VL/sft/paddleocr-vl_lora_export.yaml
\ No newline at end of file
diff --git a/examples/config/xpu/PaddleOCR-VL/sft/paddleocr-vl_full_16k_config.yaml b/examples/config/xpu/PaddleOCR-VL/sft/paddleocr-vl_full_16k_config.yaml
new file mode 100644
index 00000000000..9aa887f5b50
--- /dev/null
+++ b/examples/config/xpu/PaddleOCR-VL/sft/paddleocr-vl_full_16k_config.yaml
@@ -0,0 +1,72 @@
+### data
+train_dataset_type: messages
+eval_dataset_type: messages
+train_dataset_path: ./ocr_vl_sft-train_Bengali.jsonl
+train_dataset_prob: "1.0"
+eval_dataset_path: ./ocr_vl_sft-test_Bengali.jsonl
+eval_dataset_prob: "1.0"
+max_seq_len: 16384
+padding_free: True
+truncate_packing: False
+dataloader_num_workers: 8
+mix_strategy: concat
+template_backend: custom
+template: paddleocr_vl
+
+### model
+model_name_or_path: PaddlePaddle/PaddleOCR-VL
+attn_impl: flashmask
+
+### finetuning
+# base
+stage: VL-SFT
+fine_tuning: full
+seed: 23
+do_train: true
+do_eval: true
+per_device_eval_batch_size: 8
+per_device_train_batch_size: 8
+num_train_epochs: 2
+max_steps: -1
+max_estimate_samples: 500
+eval_steps: 400
+evaluation_strategy: steps
+save_steps: 400
+save_strategy: steps
+logging_steps: 1
+gradient_accumulation_steps: 8
+logging_dir: ./PaddleOCR-VL-SFT-Bengali/visualdl_logs/
+output_dir: ./PaddleOCR-VL-SFT-Bengali
+disable_tqdm: true
+eval_accumulation_steps: 16
+
+# train
+lr_scheduler_type: cosine
+warmup_ratio: 0.01
+learning_rate: 5.0e-6
+min_lr: 5.0e-7
+
+# optimizer
+weight_decay: 0.1
+adam_epsilon: 1.0e-8
+adam_beta1: 0.9
+adam_beta2: 0.95
+
+# performance
+tensor_model_parallel_size: 1
+pipeline_model_parallel_size: 1
+sharding: stage1
+recompute_granularity: full
+recompute_method: uniform
+recompute_num_layers: 1
+bf16: true
+fp16_opt_level: O2
+pre_alloc_memory: 24
+
+# save
+unified_checkpoint: False
+save_checkpoint_format: "flex_checkpoint"
+load_checkpoint_format: "flex_checkpoint"
+
+# device
+device: xpu
\ No newline at end of file
diff --git a/examples/config/xpu/PaddleOCR-VL/sft/paddleocr-vl_lora_16k_config.yaml b/examples/config/xpu/PaddleOCR-VL/sft/paddleocr-vl_lora_16k_config.yaml
new file mode 100644
index 00000000000..093722c055f
--- /dev/null
+++ b/examples/config/xpu/PaddleOCR-VL/sft/paddleocr-vl_lora_16k_config.yaml
@@ -0,0 +1,74 @@
+### data
+train_dataset_type: messages
+eval_dataset_type: messages
+train_dataset_path: ./ocr_vl_sft-train_Bengali.jsonl
+train_dataset_prob: "1.0"
+eval_dataset_path: ./ocr_vl_sft-test_Bengali.jsonl
+eval_dataset_prob: "1.0"
+max_seq_len: 16384
+padding_free: True
+truncate_packing: False
+dataloader_num_workers: 8
+mix_strategy: concat
+template_backend: custom
+template: paddleocr_vl
+
+### model
+model_name_or_path: PaddlePaddle/PaddleOCR-VL
+attn_impl: flashmask
+lora: true
+lora_rank: 8
+
+### finetuning
+# base
+stage: VL-SFT
+fine_tuning: lora
+seed: 23
+do_train: true
+do_eval: true
+per_device_eval_batch_size: 8
+per_device_train_batch_size: 8
+num_train_epochs: 2
+max_steps: -1
+max_estimate_samples: 500
+eval_steps: 400
+evaluation_strategy: steps
+save_steps: 400
+save_strategy: steps
+logging_steps: 1
+gradient_accumulation_steps: 8
+logging_dir: ./PaddleOCR-VL-SFT-Bengali-lora/visualdl_logs/
+output_dir: ./PaddleOCR-VL-SFT-Bengali-lora
+disable_tqdm: true
+eval_accumulation_steps: 16
+
+# train
+lr_scheduler_type: cosine
+warmup_ratio: 0.01
+learning_rate: 5.0e-4
+min_lr: 5.0e-5
+
+# optimizer
+weight_decay: 0.1
+adam_epsilon: 1.0e-8
+adam_beta1: 0.9
+adam_beta2: 0.95
+
+# performance
+tensor_model_parallel_size: 1
+pipeline_model_parallel_size: 1
+sharding: stage1
+recompute_granularity: full
+recompute_method: uniform
+recompute_num_layers: 1
+bf16: true
+fp16_opt_level: O2
+pre_alloc_memory: 16
+
+# save
+unified_checkpoint: false
+save_checkpoint_format: "flex_checkpoint"
+load_checkpoint_format: "flex_checkpoint"
+
+# device
+device: xpu
\ No newline at end of file
diff --git a/examples/config/xpu/PaddleOCR-VL/sft/paddleocr-vl_lora_export.yaml b/examples/config/xpu/PaddleOCR-VL/sft/paddleocr-vl_lora_export.yaml
new file mode 100644
index 00000000000..612e72bc19b
--- /dev/null
+++ b/examples/config/xpu/PaddleOCR-VL/sft/paddleocr-vl_lora_export.yaml
@@ -0,0 +1,5 @@
+### model
+fine_tuning: LoRA
+model_name_or_path: PaddlePaddle/PaddleOCR-VL
+output_dir: ./PaddleOCR-VL-SFT-Bengali-lora
+device: xpu
\ No newline at end of file
diff --git a/examples/config/xpu/PaddleOCR-VL/sft/run_paddleocr-vl_full_16k.sh b/examples/config/xpu/PaddleOCR-VL/sft/run_paddleocr-vl_full_16k.sh
new file mode 100644
index 00000000000..1eb9ce3c6be
--- /dev/null
+++ b/examples/config/xpu/PaddleOCR-VL/sft/run_paddleocr-vl_full_16k.sh
@@ -0,0 +1,22 @@
+# Copyright (c) 2026 PaddlePaddle Authors. All Rights Reserved.
+# 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# 
+#     http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+wget https://paddleformers.bj.bcebos.com/datasets/ocr-vl/ocr_vl_sft-train_Bengali.jsonl
+wget https://paddleformers.bj.bcebos.com/datasets/ocr-vl/ocr_vl_sft-test_Bengali.jsonl
+
+export FLAGS_use_stride_kernel=True
+
+CUDA_VISIBLE_DEVICES=0 \
+paddleformers-cli train examples/config/xpu/PaddleOCR-VL/sft/paddleocr-vl_full_16k_config.yaml
+                        
\ No newline at end of file
diff --git a/examples/config/xpu/PaddleOCR-VL/sft/run_paddleocr-vl_lora_16k.sh b/examples/config/xpu/PaddleOCR-VL/sft/run_paddleocr-vl_lora_16k.sh
new file mode 100644
index 00000000000..e937b575bf5
--- /dev/null
+++ b/examples/config/xpu/PaddleOCR-VL/sft/run_paddleocr-vl_lora_16k.sh
@@ -0,0 +1,21 @@
+# Copyright (c) 2026 PaddlePaddle Authors. All Rights Reserved.
+# 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# 
+#     http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+wget https://paddleformers.bj.bcebos.com/datasets/ocr-vl/ocr_vl_sft-train_Bengali.jsonl
+wget https://paddleformers.bj.bcebos.com/datasets/ocr-vl/ocr_vl_sft-test_Bengali.jsonl
+
+export FLAGS_use_stride_kernel=True
+
+CUDA_VISIBLE_DEVICES=0 \
+paddleformers-cli train examples/config/xpu/PaddleOCR-VL/sft/paddleocr-vl_lora_16k_config.yaml
\ No newline at end of file
diff --git a/examples/config/xpu/PaddleOCR-VL/sft/run_paddleocr-vl_lora_export.sh b/examples/config/xpu/PaddleOCR-VL/sft/run_paddleocr-vl_lora_export.sh
new file mode 100644
index 00000000000..58f38f1c9b3
--- /dev/null
+++ b/examples/config/xpu/PaddleOCR-VL/sft/run_paddleocr-vl_lora_export.sh
@@ -0,0 +1,15 @@
+# Copyright (c) 2026 PaddlePaddle Authors. All Rights Reserved.
+# 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# 
+#     http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+paddleformers-cli export examples/config/xpu/PaddleOCR-VL/sft/paddleocr-vl_lora_export.yaml
\ No newline at end of file
diff --git a/paddleformers/utils/masking_utils.py b/paddleformers/utils/masking_utils.py
index f0d41397eae..d5853604077 100644
--- a/paddleformers/utils/masking_utils.py
+++ b/paddleformers/utils/masking_utils.py
@@ -79,11 +79,11 @@ def _gen_from_sparse_attn_mask_indices(
     # [batch_size, k_num_heads, k_seq_len, {1, 2, 4}] -> [batch_size, k_num_heads, {1, 2, 4}, k_seq_len]
     mask_indices = attn_mask_startend_row_indices.transpose([0, 1, 3, 2])
 
-    downstart_mask_indices = mask_indices[:, :, 0, :]
+    downstart_mask_indices = mask_indices[:, :, 0:1, :]
     downstart_mask_indices = downstart_mask_indices.expand([batch_size, num_head, seq_len, -1])
     lower_tri = base < downstart_mask_indices
     if has_end:
-        downend_mask_indices = mask_indices[:, :, 1, :]
+        downend_mask_indices = mask_indices[:, :, 1:2, :]
         downend_mask_indices = downend_mask_indices.expand([batch_size, num_head, seq_len, -1])
         lower_tri = paddle.logical_or(lower_tri, base >= downend_mask_indices)
 
@@ -91,14 +91,14 @@ def _gen_from_sparse_attn_mask_indices(
 
     if not is_causal:
         if has_end:
-            upstart_mask_indices = mask_indices[:, :, 2, :]
+            upstart_mask_indices = mask_indices[:, :, 2:3, :]
             upstart_mask_indices = upstart_mask_indices.expand([batch_size, num_head, seq_len, -1])
-            upend_mask_indices = mask_indices[:, :, 3, :]
+            upend_mask_indices = mask_indices[:, :, 3:4, :]
             upend_mask_indices = upend_mask_indices.expand([batch_size, num_head, seq_len, -1])
             upper_tri = base >= upend_mask_indices
             upper_tri = paddle.logical_or(upper_tri, base < upstart_mask_indices)
         else:
-            upend_mask_indices = mask_indices[:, :, 1, :]
+            upend_mask_indices = mask_indices[:, :, 1:2, :]
             upend_mask_indices = upend_mask_indices.expand([batch_size, num_head, seq_len, -1])
             upper_tri = base >= upend_mask_indices