From 8073a0ab453811d92d5aa0a442701c746740f83e Mon Sep 17 00:00:00 2001 From: hjh Date: Fri, 15 May 2026 16:04:33 +0800 Subject: [PATCH 1/2] fix example --- examples/train/grpo/internal/chord.sh | 1 - examples/train/grpo/internal/gspo.sh | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/examples/train/grpo/internal/chord.sh b/examples/train/grpo/internal/chord.sh index cad12e0947..aac7fb69f8 100644 --- a/examples/train/grpo/internal/chord.sh +++ b/examples/train/grpo/internal/chord.sh @@ -19,7 +19,6 @@ swift rlhf \ --load_from_cache_file true \ --torch_dtype bfloat16 \ --beta 0.0 \ - --steps_per_generation 4 \ --num_train_epochs 1 \ --per_device_train_batch_size 4 \ --gradient_accumulation_steps 8 \ diff --git a/examples/train/grpo/internal/gspo.sh b/examples/train/grpo/internal/gspo.sh index e8a51bd955..1a6ba776e5 100644 --- a/examples/train/grpo/internal/gspo.sh +++ b/examples/train/grpo/internal/gspo.sh @@ -17,7 +17,7 @@ swift rlhf \ --beta 0.0 \ --epsilon 3e-4 \ --epsilon_high 4e-4 \ - --steps_per_generation 4 \ + --steps_per_generation 32 \ --importance_sampling_level sequence \ --num_train_epochs 1 \ --per_device_train_batch_size 2 \ From 5c2374e7a58caafc8059d84ac2eca43e49ca3aa2 Mon Sep 17 00:00:00 2001 From: hjh Date: Fri, 15 May 2026 16:12:45 +0800 Subject: [PATCH 2/2] update doc --- docs/source/Instruction/GRPO/AdvancedResearch/GSPO.md | 3 ++- docs/source_en/Instruction/GRPO/AdvancedResearch/GSPO.md | 3 ++- examples/train/grpo/internal/gspo.sh | 2 +- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/docs/source/Instruction/GRPO/AdvancedResearch/GSPO.md b/docs/source/Instruction/GRPO/AdvancedResearch/GSPO.md index 17c26b9af9..3ea5b99cfe 100644 --- a/docs/source/Instruction/GRPO/AdvancedResearch/GSPO.md +++ b/docs/source/Instruction/GRPO/AdvancedResearch/GSPO.md @@ -56,7 +56,8 @@ importance_weights = torch.exp(log_importance_weights) ```bash --epsilon 3e-4 # from paper section 5.1 --epsilon_high 4e-4 # from paper section 5.1 - --steps_per_generation 4 # from paper section 5.1 (each batch of rollout data is partitioned into four minibatches for gradient updates) + --gradient_accumulation_steps 8 + --steps_per_generation 32 # from paper section 5.1 (each batch of rollout data is partitioned into four minibatches for gradient updates) --beta 0 # zero kl regularization https://github.com/volcengine/verl/pull/2775#issuecomment-3131807306 ``` diff --git a/docs/source_en/Instruction/GRPO/AdvancedResearch/GSPO.md b/docs/source_en/Instruction/GRPO/AdvancedResearch/GSPO.md index e79ca791a5..f2e6e03fc5 100644 --- a/docs/source_en/Instruction/GRPO/AdvancedResearch/GSPO.md +++ b/docs/source_en/Instruction/GRPO/AdvancedResearch/GSPO.md @@ -58,7 +58,8 @@ Other hyperparameters in the paper ```bash --epsilon 3e-4 # from paper section 5.1 --epsilon_high 4e-4 # from paper section 5.1 - --steps_per_generation 4 # from paper section 5.1 (each batch of rollout data is partitioned into four minibatches for gradient updates) + --gradient_accumulation_steps 8 + --steps_per_generation 32 # from paper section 5.1 (each batch of rollout data is partitioned into four minibatches for gradient updates) --beta 0 # zero kl regularization https://github.com/volcengine/verl/pull/2775#issuecomment-3131807306 ``` diff --git a/examples/train/grpo/internal/gspo.sh b/examples/train/grpo/internal/gspo.sh index 1a6ba776e5..e5b9ed4ff3 100644 --- a/examples/train/grpo/internal/gspo.sh +++ b/examples/train/grpo/internal/gspo.sh @@ -3,7 +3,7 @@ # hyperparameter # - epsilon = 3e-4 from paper serction 5.1 # - epsilon_high = 4e-4 from paper serction 5.1 -# - steps_per_generation = 4 from paper serction 5.1 (each batch of rollout data is partitioned into four minibatches for gradient updates) +# - steps_per_generation = 32 (= 4 * gradient_accumulation_steps): paper section 5.1 partitions each batch of rollout data into four minibatches for gradient updates; in swift HF GRPO, steps_per_generation counts micro-batches, so it must be multiplied by gradient_accumulation_steps # - beta = 0: zero kl regularization https://github.com/volcengine/verl/pull/2775#issuecomment-3131807306 CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \