Skip to content

Commit a4e6668

Browse files
authored
[cherry-pick][xpu] add ernie4.5 yaml (#3588) (#3593)
1 parent 30157ec commit a4e6668

7 files changed

Lines changed: 209 additions & 3 deletions

File tree

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
### data
2+
train_dataset_type: erniekit
3+
eval_dataset_type: erniekit
4+
train_dataset_path: ./tests/fixtures/dummy/sft/train.jsonl
5+
train_dataset_prob: "1.0"
6+
eval_dataset_path: ./tests/fixtures/dummy/sft/eval.jsonl
7+
eval_dataset_prob: "1.0"
8+
max_seq_len: 8192
9+
packing: false
10+
mix_strategy: concat
11+
template_backend: custom
12+
template: ernie_nothink
13+
14+
### model
15+
model_name_or_path: baidu/ERNIE-4.5-0.3B-PT
16+
attn_impl: flashmask
17+
18+
### finetuning
19+
# base
20+
stage: SFT
21+
fine_tuning: full
22+
seed: 23
23+
do_train: true
24+
do_eval: true
25+
per_device_eval_batch_size: 1
26+
per_device_train_batch_size: 1
27+
num_train_epochs: 1
28+
max_steps: -1
29+
eval_steps: 100
30+
evaluation_strategy: steps
31+
save_steps: 100
32+
save_strategy: steps
33+
logging_steps: 1
34+
gradient_accumulation_steps: 4
35+
logging_dir: ./vdl_log
36+
output_dir: ./checkpoints/ernie-0.3b-sft-8k
37+
disable_tqdm: true
38+
eval_accumulation_steps: 16
39+
40+
# train
41+
warmup_steps: 20
42+
learning_rate: 1.0e-5
43+
44+
# performance
45+
tensor_model_parallel_size: 1
46+
pipeline_model_parallel_size: 1
47+
sharding: stage1
48+
recompute_granularity: full
49+
recompute_method: uniform
50+
recompute_num_layers: 1
51+
bf16: true
52+
fp16_opt_level: O2
53+
load_checkpoint_format: flex_checkpoint
54+
save_checkpoint_format: flex_checkpoint
55+
56+
device: xpu
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
### data
2+
train_dataset_type: erniekit
3+
eval_dataset_type: erniekit
4+
train_dataset_path: ./tests/fixtures/dummy/sft/train.jsonl
5+
train_dataset_prob: "1.0"
6+
eval_dataset_path: ./tests/fixtures/dummy/sft/eval.jsonl
7+
eval_dataset_prob: "1.0"
8+
max_seq_len: 8192
9+
packing: false
10+
mix_strategy: concat
11+
template_backend: custom
12+
template: ernie_nothink
13+
14+
### model
15+
model_name_or_path: baidu/ERNIE-4.5-0.3B-PT
16+
attn_impl: flashmask
17+
lora: true
18+
lora_rank: 8
19+
20+
### finetuning
21+
# base
22+
stage: SFT
23+
fine_tuning: lora
24+
seed: 23
25+
do_train: true
26+
do_eval: true
27+
per_device_eval_batch_size: 1
28+
per_device_train_batch_size: 1
29+
num_train_epochs: 1
30+
max_steps: -1
31+
eval_steps: 100
32+
evaluation_strategy: steps
33+
save_steps: 100
34+
save_strategy: steps
35+
logging_steps: 1
36+
gradient_accumulation_steps: 4
37+
logging_dir: ./vdl_log
38+
output_dir: ./checkpoints/ernie-0.3b-sft-lora-8k
39+
disable_tqdm: true
40+
eval_accumulation_steps: 16
41+
42+
# train
43+
warmup_steps: 20
44+
learning_rate: 1.0e-4
45+
46+
# performance
47+
tensor_model_parallel_size: 1
48+
pipeline_model_parallel_size: 1
49+
sharding: stage1
50+
recompute_granularity: full
51+
recompute_method: uniform
52+
recompute_num_layers: 1
53+
bf16: true
54+
fp16_opt_level: O2
55+
load_checkpoint_format: flex_checkpoint
56+
save_checkpoint_format: flex_checkpoint
57+
58+
device: xpu
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
### model
2+
fine_tuning: LoRA
3+
model_name_or_path: baidu/ERNIE-4.5-0.3B-PT
4+
output_dir: checkpoints/ernie-0.3b-sft-lora-8k
5+
6+
device: xpu

examples/config/xpu/ERNIE-4.5-21B-A3B/sft/32k.yaml renamed to examples/config/xpu/ERNIE-4.5-21B-A3B/sft/full_32k.yaml

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@ eval_dataset_prob: "1.0"
88
max_seq_len: 32768
99
packing: true
1010
mix_strategy: concat
11+
template_backend: custom
12+
template: ernie_nothink
1113

1214
### model
1315
model_name_or_path: baidu/ERNIE-4.5-21B-A3B-PT
@@ -31,7 +33,7 @@ save_strategy: steps
3133
logging_steps: 1
3234
gradient_accumulation_steps: 4
3335
logging_dir: ./vdl_log
34-
output_dir: ./checkpoints/ernie-sft-full-tp-pp
36+
output_dir: ./checkpoints/ernie-21b-sft-32k
3537
disable_tqdm: true
3638
eval_accumulation_steps: 16
3739

@@ -45,12 +47,13 @@ pipeline_model_parallel_size: 2
4547
sequence_parallel: true
4648
sharding: stage1
4749
offload_optim: false
48-
tensorwise_offload_optimizer: false
50+
tensorwise_offload_optimizer: true
4951
recompute_granularity: full
5052
recompute_method: uniform
5153
recompute_num_layers: 1
5254
bf16: true
5355
fp16_opt_level: O2
54-
unified_checkpoint: true
56+
load_checkpoint_format: flex_checkpoint
57+
save_checkpoint_format: flex_checkpoint
5558

5659
device: xpu
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
### data
2+
train_dataset_type: erniekit
3+
eval_dataset_type: erniekit
4+
train_dataset_path: ./tests/fixtures/dummy/sft/train.jsonl
5+
train_dataset_prob: "1.0"
6+
eval_dataset_path: ./tests/fixtures/dummy/sft/eval.jsonl
7+
eval_dataset_prob: "1.0"
8+
max_seq_len: 32768
9+
packing: true
10+
mix_strategy: concat
11+
template_backend: custom
12+
template: ernie_nothink
13+
14+
### model
15+
model_name_or_path: baidu/ERNIE-4.5-21B-A3B-PT
16+
attn_impl: flashmask
17+
lora: true
18+
lora_rank: 8
19+
20+
### finetuning
21+
# base
22+
stage: SFT
23+
fine_tuning: lora
24+
seed: 23
25+
do_train: true
26+
do_eval: true
27+
per_device_eval_batch_size: 1
28+
per_device_train_batch_size: 1
29+
num_train_epochs: 1
30+
max_steps: -1
31+
eval_steps: 100
32+
evaluation_strategy: steps
33+
save_steps: 100
34+
save_strategy: steps
35+
logging_steps: 1
36+
gradient_accumulation_steps: 4
37+
logging_dir: ./vdl_log
38+
output_dir: ./checkpoints/ernie-21b-sft-lora-32k
39+
disable_tqdm: true
40+
eval_accumulation_steps: 16
41+
42+
# train
43+
warmup_steps: 20
44+
learning_rate: 1.0e-4
45+
46+
# performance
47+
tensor_model_parallel_size: 4
48+
pipeline_model_parallel_size: 1
49+
sequence_parallel: true
50+
sharding: stage1
51+
offload_optim: false
52+
tensorwise_offload_optimizer: false
53+
recompute_granularity: full
54+
recompute_method: uniform
55+
recompute_num_layers: 1
56+
bf16: true
57+
fp16_opt_level: O2
58+
load_checkpoint_format: flex_checkpoint
59+
save_checkpoint_format: flex_checkpoint
60+
61+
device: xpu
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
### model
2+
fine_tuning: LoRA
3+
model_name_or_path: baidu/ERNIE-4.5-21B-A3B-PT
4+
output_dir: checkpoints/ernie-21b-sft-lora-32k
5+
6+
device: xpu
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
# Copyright (c) 2026 PaddlePaddle Authors. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
export XPU_VISIBLE_DEVICES="0,1,2,3"
16+
paddleformers-cli train examples/config/xpu/ERNIE-4.5-21B-A3B/sft/lora_32k.yaml

0 commit comments

Comments
 (0)