Skip to content

Commit a710904

Browse files
author
Gang Li
committed
fix issue /r
1 parent 4a91cc2 commit a710904

4 files changed

Lines changed: 254 additions & 254 deletions

File tree

recipe/disco/prepare_data.sh

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,18 @@
1-
#!/usr/bin/env bash
2-
set -uxo pipefail
3-
4-
cur_path=$PWD
5-
6-
export TRAIN_FILE=${TRAIN_FILE:-"${cur_path}/data/deepscaler_preview.parquet"}
7-
export TEST_FILE=${TEST_FILE:-"${cur_path}/data/aime24.parquet"}
8-
export OVERWRITE=${OVERWRITE:-0}
9-
10-
mkdir -p "${cur_path}/data"
11-
12-
if [ ! -f "${TRAIN_FILE}" ] || [ "${OVERWRITE}" -eq 1 ]; then
13-
wget -O "${TRAIN_FILE}" "https://huggingface.co/datasets/ganglii/DeepScaleR-Preview-Dataset/resolve/main/deepscaler_preview.parquet?download=true"
14-
fi
15-
16-
if [ ! -f "${TEST_FILE}" ] || [ "${OVERWRITE}" -eq 1 ]; then
17-
wget -O "${TEST_FILE}" "https://huggingface.co/datasets/ganglii/AIME24/resolve/main/aime24.parquet?download=true"
18-
fi
1+
#!/usr/bin/env bash
2+
set -uxo pipefail
3+
4+
cur_path=$PWD
5+
6+
export TRAIN_FILE=${TRAIN_FILE:-"${cur_path}/data/deepscaler_preview.parquet"}
7+
export TEST_FILE=${TEST_FILE:-"${cur_path}/data/aime24.parquet"}
8+
export OVERWRITE=${OVERWRITE:-0}
9+
10+
mkdir -p "${cur_path}/data"
11+
12+
if [ ! -f "${TRAIN_FILE}" ] || [ "${OVERWRITE}" -eq 1 ]; then
13+
wget -O "${TRAIN_FILE}" "https://huggingface.co/datasets/ganglii/DeepScaleR-Preview-Dataset/resolve/main/deepscaler_preview.parquet?download=true"
14+
fi
15+
16+
if [ ! -f "${TEST_FILE}" ] || [ "${OVERWRITE}" -eq 1 ]; then
17+
wget -O "${TEST_FILE}" "https://huggingface.co/datasets/ganglii/AIME24/resolve/main/aime24.parquet?download=true"
18+
fi

recipe/disco/run_disco_1.5b.sh

Lines changed: 77 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -1,78 +1,78 @@
1-
#!/bin/bash
2-
set -x
3-
4-
5-
###**** For better performance, it's recommended to have:
6-
### (ppo_micro_batch_size_per_gpu * nnodes * n_gpus_per_node) % rollout.n = 0
7-
# Below setting for training on 4*A100-80GB GPUs
8-
nnodes=1
9-
n_gpus_per_node=4
10-
ppo_micro_batch_size_per_gpu=4
11-
rollout_n=8
12-
13-
loss_mode='disco'
14-
### score function selection for disco
15-
score_func='logL' # Options: 'logL', 'Lratio'
16-
tau=10 ### tau=10 is recommended for 'logL', tau=1 is recommended for 'Lratio'
17-
18-
MODEL_PATH="deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
19-
# Train over a single node, 4 A100-80GB GPUs.
20-
python3 -m recipe.disco.main_disco \
21-
algorithm.adv_estimator=disco \
22-
algorithm.filter_groups.enable=False \
23-
data.train_files=./recipe/disco/data/deepscaler_preview.parquet \
24-
data.val_files=./recipe/disco/data/aime24.parquet \
25-
data.train_batch_size=128 \
26-
data.val_batch_size=512 \
27-
data.max_prompt_length=1024 \
28-
data.max_response_length=8192 \
29-
data.filter_overlong_prompts=True \
30-
actor_rollout_ref.model.path=$MODEL_PATH \
31-
actor_rollout_ref.actor.optim.lr=2e-6 \
32-
actor_rollout_ref.model.use_remove_padding=True \
33-
actor_rollout_ref.actor.ppo_mini_batch_size=32 \
34-
actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=$ppo_micro_batch_size_per_gpu \
35-
actor_rollout_ref.actor.use_dynamic_bsz=False \
36-
actor_rollout_ref.actor.ppo_max_token_len_per_gpu=36864 \
37-
actor_rollout_ref.actor.ppo_epochs=1 \
38-
+actor_rollout_ref.ref.enable=False \
39-
actor_rollout_ref.actor.use_kl_loss=False \
40-
actor_rollout_ref.actor.kl_loss_coef=0.001 \
41-
actor_rollout_ref.actor.kl_loss_type=low_var_kl \
42-
actor_rollout_ref.actor.policy_loss.loss_mode=$loss_mode \
43-
actor_rollout_ref.actor.policy_loss.score_func=$score_func \
44-
actor_rollout_ref.actor.policy_loss.delta=1e-4 \
45-
actor_rollout_ref.actor.policy_loss.beta=1e3 \
46-
actor_rollout_ref.actor.policy_loss.tau=$tau \
47-
actor_rollout_ref.actor.entropy_coeff=0.0 \
48-
actor_rollout_ref.actor.ulysses_sequence_parallel_size=1 \
49-
actor_rollout_ref.model.enable_gradient_checkpointing=True \
50-
actor_rollout_ref.actor.fsdp_config.param_offload=False \
51-
actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \
52-
actor_rollout_ref.rollout.tensor_model_parallel_size=1 \
53-
actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=$ppo_micro_batch_size_per_gpu \
54-
actor_rollout_ref.rollout.name=vllm \
55-
actor_rollout_ref.rollout.temperature=0.6 \
56-
actor_rollout_ref.rollout.gpu_memory_utilization=0.85 \
57-
actor_rollout_ref.rollout.n=8 \
58-
actor_rollout_ref.rollout.val_kwargs.temperature=0.6 \
59-
actor_rollout_ref.rollout.val_kwargs.top_p=0.95 \
60-
actor_rollout_ref.rollout.val_kwargs.top_k=-1 \
61-
actor_rollout_ref.rollout.val_kwargs.do_sample=True \
62-
actor_rollout_ref.rollout.val_kwargs.n=16 \
63-
actor_rollout_ref.rollout.max_num_batched_tokens=10240 \
64-
actor_rollout_ref.rollout.max_num_seqs=1024 \
65-
actor_rollout_ref.ref.fsdp_config.param_offload=True \
66-
trainer.critic_warmup=0 \
67-
trainer.logger=['console','wandb'] \
68-
trainer.project_name='verl-disco' \
69-
trainer.experiment_name='1.5B-disco-logL' \
70-
trainer.balance_batch=False \
71-
trainer.val_before_train=True \
72-
trainer.n_gpus_per_node=$n_gpus_per_node \
73-
trainer.nnodes=$nnodes \
74-
trainer.save_freq=20 \
75-
trainer.test_freq=20 \
76-
trainer.default_hdfs_dir=null \
77-
trainer.total_epochs=30 "${@:1}" \
1+
#!/bin/bash
2+
set -x
3+
4+
5+
###**** For better performance, it's recommended to have:
6+
### (ppo_micro_batch_size_per_gpu * nnodes * n_gpus_per_node) % rollout.n = 0
7+
# Below setting for training on 4*A100-80GB GPUs
8+
nnodes=1
9+
n_gpus_per_node=4
10+
ppo_micro_batch_size_per_gpu=4
11+
rollout_n=8
12+
13+
loss_mode='disco'
14+
### score function selection for disco
15+
score_func='logL' # Options: 'logL', 'Lratio'
16+
tau=10 ### tau=10 is recommended for 'logL', tau=1 is recommended for 'Lratio'
17+
18+
MODEL_PATH="deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
19+
# Train over a single node, 4 A100-80GB GPUs.
20+
python3 -m recipe.disco.main_disco \
21+
algorithm.adv_estimator=disco \
22+
algorithm.filter_groups.enable=False \
23+
data.train_files=./recipe/disco/data/deepscaler_preview.parquet \
24+
data.val_files=./recipe/disco/data/aime24.parquet \
25+
data.train_batch_size=128 \
26+
data.val_batch_size=512 \
27+
data.max_prompt_length=1024 \
28+
data.max_response_length=8192 \
29+
data.filter_overlong_prompts=True \
30+
actor_rollout_ref.model.path=$MODEL_PATH \
31+
actor_rollout_ref.actor.optim.lr=2e-6 \
32+
actor_rollout_ref.model.use_remove_padding=True \
33+
actor_rollout_ref.actor.ppo_mini_batch_size=32 \
34+
actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=$ppo_micro_batch_size_per_gpu \
35+
actor_rollout_ref.actor.use_dynamic_bsz=False \
36+
actor_rollout_ref.actor.ppo_max_token_len_per_gpu=36864 \
37+
actor_rollout_ref.actor.ppo_epochs=1 \
38+
+actor_rollout_ref.ref.enable=False \
39+
actor_rollout_ref.actor.use_kl_loss=False \
40+
actor_rollout_ref.actor.kl_loss_coef=0.001 \
41+
actor_rollout_ref.actor.kl_loss_type=low_var_kl \
42+
actor_rollout_ref.actor.policy_loss.loss_mode=$loss_mode \
43+
actor_rollout_ref.actor.policy_loss.score_func=$score_func \
44+
actor_rollout_ref.actor.policy_loss.delta=1e-4 \
45+
actor_rollout_ref.actor.policy_loss.beta=1e3 \
46+
actor_rollout_ref.actor.policy_loss.tau=$tau \
47+
actor_rollout_ref.actor.entropy_coeff=0.0 \
48+
actor_rollout_ref.actor.ulysses_sequence_parallel_size=1 \
49+
actor_rollout_ref.model.enable_gradient_checkpointing=True \
50+
actor_rollout_ref.actor.fsdp_config.param_offload=False \
51+
actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \
52+
actor_rollout_ref.rollout.tensor_model_parallel_size=1 \
53+
actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=$ppo_micro_batch_size_per_gpu \
54+
actor_rollout_ref.rollout.name=vllm \
55+
actor_rollout_ref.rollout.temperature=0.6 \
56+
actor_rollout_ref.rollout.gpu_memory_utilization=0.85 \
57+
actor_rollout_ref.rollout.n=8 \
58+
actor_rollout_ref.rollout.val_kwargs.temperature=0.6 \
59+
actor_rollout_ref.rollout.val_kwargs.top_p=0.95 \
60+
actor_rollout_ref.rollout.val_kwargs.top_k=-1 \
61+
actor_rollout_ref.rollout.val_kwargs.do_sample=True \
62+
actor_rollout_ref.rollout.val_kwargs.n=16 \
63+
actor_rollout_ref.rollout.max_num_batched_tokens=10240 \
64+
actor_rollout_ref.rollout.max_num_seqs=1024 \
65+
actor_rollout_ref.ref.fsdp_config.param_offload=True \
66+
trainer.critic_warmup=0 \
67+
trainer.logger=['console','wandb'] \
68+
trainer.project_name='verl-disco' \
69+
trainer.experiment_name='1.5B-disco-logL' \
70+
trainer.balance_batch=False \
71+
trainer.val_before_train=True \
72+
trainer.n_gpus_per_node=$n_gpus_per_node \
73+
trainer.nnodes=$nnodes \
74+
trainer.save_freq=20 \
75+
trainer.test_freq=20 \
76+
trainer.default_hdfs_dir=null \
77+
trainer.total_epochs=30 "${@:1}" \
7878
trainer.resume_mode=auto

recipe/disco/run_disco_7b.sh

Lines changed: 77 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -1,78 +1,78 @@
1-
#!/bin/bash
2-
set -x
3-
4-
5-
###**** For better performance, it's recommended to have:
6-
### (ppo_micro_batch_size_per_gpu * nnodes * n_gpus_per_node) % rollout.n = 0
7-
# Below setting for training on 8*A100-80GB GPUs
8-
nnodes=1
9-
n_gpus_per_node=8
10-
ppo_micro_batch_size_per_gpu=4
11-
rollout_n=8
12-
13-
loss_mode='disco'
14-
### score function selection for disco
15-
score_func='logL' # Options: 'logL', 'Lratio'
16-
tau=10 ### tau=10 is recommended for 'logL', tau=1 is recommended for 'Lratio'
17-
18-
MODEL_PATH="deepseek-ai/DeepSeek-R1-Distill-Qwen-7B"
19-
# Train over a single node, 8 A100-80GB GPUs.
20-
python3 -m recipe.disco.main_disco \
21-
algorithm.adv_estimator=disco \
22-
algorithm.filter_groups.enable=False \
23-
data.train_files=./recipe/disco/data/deepscaler_preview.parquet \
24-
data.val_files=./recipe/disco/data/aime24.parquet \
25-
data.train_batch_size=128 \
26-
data.val_batch_size=512 \
27-
data.max_prompt_length=1024 \
28-
data.max_response_length=8192 \
29-
data.filter_overlong_prompts=True \
30-
actor_rollout_ref.model.path=$MODEL_PATH \
31-
actor_rollout_ref.actor.optim.lr=1e-6 \
32-
actor_rollout_ref.model.use_remove_padding=True \
33-
actor_rollout_ref.actor.ppo_mini_batch_size=32 \
34-
actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=$ppo_micro_batch_size_per_gpu \
35-
actor_rollout_ref.actor.use_dynamic_bsz=False \
36-
actor_rollout_ref.actor.ppo_max_token_len_per_gpu=36864 \
37-
actor_rollout_ref.actor.ppo_epochs=1 \
38-
+actor_rollout_ref.ref.enable=False \
39-
actor_rollout_ref.actor.use_kl_loss=False \
40-
actor_rollout_ref.actor.kl_loss_coef=0.001 \
41-
actor_rollout_ref.actor.kl_loss_type=low_var_kl \
42-
actor_rollout_ref.actor.policy_loss.loss_mode=$loss_mode \
43-
actor_rollout_ref.actor.policy_loss.score_func=$score_func \
44-
actor_rollout_ref.actor.policy_loss.delta=1e-4 \
45-
actor_rollout_ref.actor.policy_loss.beta=1e3 \
46-
actor_rollout_ref.actor.policy_loss.tau=$tau \
47-
actor_rollout_ref.actor.entropy_coeff=0.0 \
48-
actor_rollout_ref.actor.ulysses_sequence_parallel_size=1 \
49-
actor_rollout_ref.model.enable_gradient_checkpointing=True \
50-
actor_rollout_ref.actor.fsdp_config.param_offload=False \
51-
actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \
52-
actor_rollout_ref.rollout.tensor_model_parallel_size=1 \
53-
actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=$ppo_micro_batch_size_per_gpu \
54-
actor_rollout_ref.rollout.name=vllm \
55-
actor_rollout_ref.rollout.temperature=0.6 \
56-
actor_rollout_ref.rollout.gpu_memory_utilization=0.85 \
57-
actor_rollout_ref.rollout.n=8 \
58-
actor_rollout_ref.rollout.val_kwargs.temperature=0.6 \
59-
actor_rollout_ref.rollout.val_kwargs.top_p=0.95 \
60-
actor_rollout_ref.rollout.val_kwargs.top_k=-1 \
61-
actor_rollout_ref.rollout.val_kwargs.do_sample=True \
62-
actor_rollout_ref.rollout.val_kwargs.n=16 \
63-
actor_rollout_ref.rollout.max_num_batched_tokens=10240 \
64-
actor_rollout_ref.rollout.max_num_seqs=1024 \
65-
actor_rollout_ref.ref.fsdp_config.param_offload=True \
66-
trainer.critic_warmup=0 \
67-
trainer.logger=['console','wandb'] \
68-
trainer.project_name='verl-disco' \
69-
trainer.experiment_name='7B-disco-logL' \
70-
trainer.balance_batch=False \
71-
trainer.val_before_train=True \
72-
trainer.n_gpus_per_node=$n_gpus_per_node \
73-
trainer.nnodes=$nnodes \
74-
trainer.save_freq=20 \
75-
trainer.test_freq=20 \
76-
trainer.default_hdfs_dir=null \
77-
trainer.total_epochs=30 "${@:1}" \
1+
#!/bin/bash
2+
set -x
3+
4+
5+
###**** For better performance, it's recommended to have:
6+
### (ppo_micro_batch_size_per_gpu * nnodes * n_gpus_per_node) % rollout.n = 0
7+
# Below setting for training on 8*A100-80GB GPUs
8+
nnodes=1
9+
n_gpus_per_node=8
10+
ppo_micro_batch_size_per_gpu=4
11+
rollout_n=8
12+
13+
loss_mode='disco'
14+
### score function selection for disco
15+
score_func='logL' # Options: 'logL', 'Lratio'
16+
tau=10 ### tau=10 is recommended for 'logL', tau=1 is recommended for 'Lratio'
17+
18+
MODEL_PATH="deepseek-ai/DeepSeek-R1-Distill-Qwen-7B"
19+
# Train over a single node, 8 A100-80GB GPUs.
20+
python3 -m recipe.disco.main_disco \
21+
algorithm.adv_estimator=disco \
22+
algorithm.filter_groups.enable=False \
23+
data.train_files=./recipe/disco/data/deepscaler_preview.parquet \
24+
data.val_files=./recipe/disco/data/aime24.parquet \
25+
data.train_batch_size=128 \
26+
data.val_batch_size=512 \
27+
data.max_prompt_length=1024 \
28+
data.max_response_length=8192 \
29+
data.filter_overlong_prompts=True \
30+
actor_rollout_ref.model.path=$MODEL_PATH \
31+
actor_rollout_ref.actor.optim.lr=1e-6 \
32+
actor_rollout_ref.model.use_remove_padding=True \
33+
actor_rollout_ref.actor.ppo_mini_batch_size=32 \
34+
actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=$ppo_micro_batch_size_per_gpu \
35+
actor_rollout_ref.actor.use_dynamic_bsz=False \
36+
actor_rollout_ref.actor.ppo_max_token_len_per_gpu=36864 \
37+
actor_rollout_ref.actor.ppo_epochs=1 \
38+
+actor_rollout_ref.ref.enable=False \
39+
actor_rollout_ref.actor.use_kl_loss=False \
40+
actor_rollout_ref.actor.kl_loss_coef=0.001 \
41+
actor_rollout_ref.actor.kl_loss_type=low_var_kl \
42+
actor_rollout_ref.actor.policy_loss.loss_mode=$loss_mode \
43+
actor_rollout_ref.actor.policy_loss.score_func=$score_func \
44+
actor_rollout_ref.actor.policy_loss.delta=1e-4 \
45+
actor_rollout_ref.actor.policy_loss.beta=1e3 \
46+
actor_rollout_ref.actor.policy_loss.tau=$tau \
47+
actor_rollout_ref.actor.entropy_coeff=0.0 \
48+
actor_rollout_ref.actor.ulysses_sequence_parallel_size=1 \
49+
actor_rollout_ref.model.enable_gradient_checkpointing=True \
50+
actor_rollout_ref.actor.fsdp_config.param_offload=False \
51+
actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \
52+
actor_rollout_ref.rollout.tensor_model_parallel_size=1 \
53+
actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=$ppo_micro_batch_size_per_gpu \
54+
actor_rollout_ref.rollout.name=vllm \
55+
actor_rollout_ref.rollout.temperature=0.6 \
56+
actor_rollout_ref.rollout.gpu_memory_utilization=0.85 \
57+
actor_rollout_ref.rollout.n=8 \
58+
actor_rollout_ref.rollout.val_kwargs.temperature=0.6 \
59+
actor_rollout_ref.rollout.val_kwargs.top_p=0.95 \
60+
actor_rollout_ref.rollout.val_kwargs.top_k=-1 \
61+
actor_rollout_ref.rollout.val_kwargs.do_sample=True \
62+
actor_rollout_ref.rollout.val_kwargs.n=16 \
63+
actor_rollout_ref.rollout.max_num_batched_tokens=10240 \
64+
actor_rollout_ref.rollout.max_num_seqs=1024 \
65+
actor_rollout_ref.ref.fsdp_config.param_offload=True \
66+
trainer.critic_warmup=0 \
67+
trainer.logger=['console','wandb'] \
68+
trainer.project_name='verl-disco' \
69+
trainer.experiment_name='7B-disco-logL' \
70+
trainer.balance_batch=False \
71+
trainer.val_before_train=True \
72+
trainer.n_gpus_per_node=$n_gpus_per_node \
73+
trainer.nnodes=$nnodes \
74+
trainer.save_freq=20 \
75+
trainer.test_freq=20 \
76+
trainer.default_hdfs_dir=null \
77+
trainer.total_epochs=30 "${@:1}" \
7878
trainer.resume_mode=auto

0 commit comments

Comments
 (0)