From 1942b2d1935672fda40b422cda2d73217d361699 Mon Sep 17 00:00:00 2001 From: baiyue Date: Fri, 16 Jan 2026 17:57:03 +0800 Subject: [PATCH] [xpu] add ernie4.5 yaml (#3588) --- .../xpu/ERNIE-4.5-0.3B/sft/full_8k.yaml | 56 +++++++++++++++++ .../xpu/ERNIE-4.5-0.3B/sft/lora_8k.yaml | 58 ++++++++++++++++++ .../ERNIE-4.5-0.3B/sft/lora_8k_export.yaml | 6 ++ .../sft/{32k.yaml => full_32k.yaml} | 9 ++- .../xpu/ERNIE-4.5-21B-A3B/sft/lora_32k.yaml | 61 +++++++++++++++++++ .../sft/lora_32k_export.yaml | 6 ++ .../xpu/ERNIE-4.5-21B-A3B/sft/run_lora_32k.sh | 16 +++++ 7 files changed, 209 insertions(+), 3 deletions(-) create mode 100644 examples/config/xpu/ERNIE-4.5-0.3B/sft/full_8k.yaml create mode 100644 examples/config/xpu/ERNIE-4.5-0.3B/sft/lora_8k.yaml create mode 100644 examples/config/xpu/ERNIE-4.5-0.3B/sft/lora_8k_export.yaml rename examples/config/xpu/ERNIE-4.5-21B-A3B/sft/{32k.yaml => full_32k.yaml} (83%) create mode 100644 examples/config/xpu/ERNIE-4.5-21B-A3B/sft/lora_32k.yaml create mode 100644 examples/config/xpu/ERNIE-4.5-21B-A3B/sft/lora_32k_export.yaml create mode 100644 examples/config/xpu/ERNIE-4.5-21B-A3B/sft/run_lora_32k.sh diff --git a/examples/config/xpu/ERNIE-4.5-0.3B/sft/full_8k.yaml b/examples/config/xpu/ERNIE-4.5-0.3B/sft/full_8k.yaml new file mode 100644 index 00000000000..0a293e883a3 --- /dev/null +++ b/examples/config/xpu/ERNIE-4.5-0.3B/sft/full_8k.yaml @@ -0,0 +1,56 @@ +### data +train_dataset_type: erniekit +eval_dataset_type: erniekit +train_dataset_path: ./tests/fixtures/dummy/sft/train.jsonl +train_dataset_prob: "1.0" +eval_dataset_path: ./tests/fixtures/dummy/sft/eval.jsonl +eval_dataset_prob: "1.0" +max_seq_len: 8192 +packing: false +mix_strategy: concat +template_backend: custom +template: ernie_nothink + +### model +model_name_or_path: baidu/ERNIE-4.5-0.3B-PT +attn_impl: flashmask + +### finetuning +# base +stage: SFT +fine_tuning: full +seed: 23 +do_train: true +do_eval: true +per_device_eval_batch_size: 1 +per_device_train_batch_size: 1 +num_train_epochs: 1 +max_steps: -1 +eval_steps: 100 +evaluation_strategy: steps +save_steps: 100 +save_strategy: steps +logging_steps: 1 +gradient_accumulation_steps: 4 +logging_dir: ./vdl_log +output_dir: ./checkpoints/ernie-0.3b-sft-8k +disable_tqdm: true +eval_accumulation_steps: 16 + +# train +warmup_steps: 20 +learning_rate: 1.0e-5 + +# performance +tensor_model_parallel_size: 1 +pipeline_model_parallel_size: 1 +sharding: stage1 +recompute_granularity: full +recompute_method: uniform +recompute_num_layers: 1 +bf16: true +fp16_opt_level: O2 +load_checkpoint_format: flex_checkpoint +save_checkpoint_format: flex_checkpoint + +device: xpu diff --git a/examples/config/xpu/ERNIE-4.5-0.3B/sft/lora_8k.yaml b/examples/config/xpu/ERNIE-4.5-0.3B/sft/lora_8k.yaml new file mode 100644 index 00000000000..9cbf220164c --- /dev/null +++ b/examples/config/xpu/ERNIE-4.5-0.3B/sft/lora_8k.yaml @@ -0,0 +1,58 @@ +### data +train_dataset_type: erniekit +eval_dataset_type: erniekit +train_dataset_path: ./tests/fixtures/dummy/sft/train.jsonl +train_dataset_prob: "1.0" +eval_dataset_path: ./tests/fixtures/dummy/sft/eval.jsonl +eval_dataset_prob: "1.0" +max_seq_len: 8192 +packing: false +mix_strategy: concat +template_backend: custom +template: ernie_nothink + +### model +model_name_or_path: baidu/ERNIE-4.5-0.3B-PT +attn_impl: flashmask +lora: true +lora_rank: 8 + +### finetuning +# base +stage: SFT +fine_tuning: lora +seed: 23 +do_train: true +do_eval: true +per_device_eval_batch_size: 1 +per_device_train_batch_size: 1 +num_train_epochs: 1 +max_steps: -1 +eval_steps: 100 +evaluation_strategy: steps +save_steps: 100 +save_strategy: steps +logging_steps: 1 +gradient_accumulation_steps: 4 +logging_dir: ./vdl_log +output_dir: ./checkpoints/ernie-0.3b-sft-lora-8k +disable_tqdm: true +eval_accumulation_steps: 16 + +# train +warmup_steps: 20 +learning_rate: 1.0e-4 + +# performance +tensor_model_parallel_size: 1 +pipeline_model_parallel_size: 1 +sharding: stage1 +recompute_granularity: full +recompute_method: uniform +recompute_num_layers: 1 +bf16: true +fp16_opt_level: O2 +load_checkpoint_format: flex_checkpoint +save_checkpoint_format: flex_checkpoint + +device: xpu diff --git a/examples/config/xpu/ERNIE-4.5-0.3B/sft/lora_8k_export.yaml b/examples/config/xpu/ERNIE-4.5-0.3B/sft/lora_8k_export.yaml new file mode 100644 index 00000000000..257329e379b --- /dev/null +++ b/examples/config/xpu/ERNIE-4.5-0.3B/sft/lora_8k_export.yaml @@ -0,0 +1,6 @@ +### model +fine_tuning: LoRA +model_name_or_path: baidu/ERNIE-4.5-0.3B-PT +output_dir: checkpoints/ernie-0.3b-sft-lora-8k + +device: xpu \ No newline at end of file diff --git a/examples/config/xpu/ERNIE-4.5-21B-A3B/sft/32k.yaml b/examples/config/xpu/ERNIE-4.5-21B-A3B/sft/full_32k.yaml similarity index 83% rename from examples/config/xpu/ERNIE-4.5-21B-A3B/sft/32k.yaml rename to examples/config/xpu/ERNIE-4.5-21B-A3B/sft/full_32k.yaml index 8f1f9685861..26211d2f88a 100644 --- a/examples/config/xpu/ERNIE-4.5-21B-A3B/sft/32k.yaml +++ b/examples/config/xpu/ERNIE-4.5-21B-A3B/sft/full_32k.yaml @@ -8,6 +8,8 @@ eval_dataset_prob: "1.0" max_seq_len: 32768 packing: true mix_strategy: concat +template_backend: custom +template: ernie_nothink ### model model_name_or_path: baidu/ERNIE-4.5-21B-A3B-PT @@ -31,7 +33,7 @@ save_strategy: steps logging_steps: 1 gradient_accumulation_steps: 4 logging_dir: ./vdl_log -output_dir: ./checkpoints/ernie-sft-full-tp-pp +output_dir: ./checkpoints/ernie-21b-sft-32k disable_tqdm: true eval_accumulation_steps: 16 @@ -45,12 +47,13 @@ pipeline_model_parallel_size: 2 sequence_parallel: true sharding: stage1 offload_optim: false -tensorwise_offload_optimizer: false +tensorwise_offload_optimizer: true recompute_granularity: full recompute_method: uniform recompute_num_layers: 1 bf16: true fp16_opt_level: O2 -unified_checkpoint: true +load_checkpoint_format: flex_checkpoint +save_checkpoint_format: flex_checkpoint device: xpu \ No newline at end of file diff --git a/examples/config/xpu/ERNIE-4.5-21B-A3B/sft/lora_32k.yaml b/examples/config/xpu/ERNIE-4.5-21B-A3B/sft/lora_32k.yaml new file mode 100644 index 00000000000..2f29a7ee833 --- /dev/null +++ b/examples/config/xpu/ERNIE-4.5-21B-A3B/sft/lora_32k.yaml @@ -0,0 +1,61 @@ +### data +train_dataset_type: erniekit +eval_dataset_type: erniekit +train_dataset_path: ./tests/fixtures/dummy/sft/train.jsonl +train_dataset_prob: "1.0" +eval_dataset_path: ./tests/fixtures/dummy/sft/eval.jsonl +eval_dataset_prob: "1.0" +max_seq_len: 32768 +packing: true +mix_strategy: concat +template_backend: custom +template: ernie_nothink + +### model +model_name_or_path: baidu/ERNIE-4.5-21B-A3B-PT +attn_impl: flashmask +lora: true +lora_rank: 8 + +### finetuning +# base +stage: SFT +fine_tuning: lora +seed: 23 +do_train: true +do_eval: true +per_device_eval_batch_size: 1 +per_device_train_batch_size: 1 +num_train_epochs: 1 +max_steps: -1 +eval_steps: 100 +evaluation_strategy: steps +save_steps: 100 +save_strategy: steps +logging_steps: 1 +gradient_accumulation_steps: 4 +logging_dir: ./vdl_log +output_dir: ./checkpoints/ernie-21b-sft-lora-32k +disable_tqdm: true +eval_accumulation_steps: 16 + +# train +warmup_steps: 20 +learning_rate: 1.0e-4 + +# performance +tensor_model_parallel_size: 4 +pipeline_model_parallel_size: 1 +sequence_parallel: true +sharding: stage1 +offload_optim: false +tensorwise_offload_optimizer: false +recompute_granularity: full +recompute_method: uniform +recompute_num_layers: 1 +bf16: true +fp16_opt_level: O2 +load_checkpoint_format: flex_checkpoint +save_checkpoint_format: flex_checkpoint + +device: xpu \ No newline at end of file diff --git a/examples/config/xpu/ERNIE-4.5-21B-A3B/sft/lora_32k_export.yaml b/examples/config/xpu/ERNIE-4.5-21B-A3B/sft/lora_32k_export.yaml new file mode 100644 index 00000000000..eac43d65038 --- /dev/null +++ b/examples/config/xpu/ERNIE-4.5-21B-A3B/sft/lora_32k_export.yaml @@ -0,0 +1,6 @@ +### model +fine_tuning: LoRA +model_name_or_path: baidu/ERNIE-4.5-21B-A3B-PT +output_dir: checkpoints/ernie-21b-sft-lora-32k + +device: xpu \ No newline at end of file diff --git a/examples/config/xpu/ERNIE-4.5-21B-A3B/sft/run_lora_32k.sh b/examples/config/xpu/ERNIE-4.5-21B-A3B/sft/run_lora_32k.sh new file mode 100644 index 00000000000..b41e4dd92c7 --- /dev/null +++ b/examples/config/xpu/ERNIE-4.5-21B-A3B/sft/run_lora_32k.sh @@ -0,0 +1,16 @@ +# Copyright (c) 2026 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +export XPU_VISIBLE_DEVICES="0,1,2,3" +paddleformers-cli train examples/config/xpu/ERNIE-4.5-21B-A3B/sft/lora_32k.yaml \ No newline at end of file