Skip to content

Commit fa3b68a

Browse files
committed
only include used tests and will do more clean up later
Signed-off-by: pengdurice <pengduhit@gmail.com>
1 parent bfa408f commit fa3b68a

12 files changed

Lines changed: 16 additions & 486 deletions

examples/configs/recipes/llm/cispo-ab-qwen2.5-math-1.5b-instruct-1n8g-cispo.yaml

Lines changed: 0 additions & 87 deletions
This file was deleted.

examples/configs/recipes/llm/cispo-ab-qwen2.5-math-1.5b-instruct-1n8g-grpo.yaml

Lines changed: 0 additions & 77 deletions
This file was deleted.

examples/configs/recipes/llm/cispo-mm1-replica-qwen3-30ba3b-2n8g-megatron-cispo.yaml renamed to examples/configs/recipes/llm/cispo-mm1-highoffpolicy-qwen3-30ba3b-2n8g-megatron-cispo.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# MiniMax-M1 replication study (https://arxiv.org/abs/2506.13585), CISPO arm.
1+
# MiniMax-M1 high-off-policy study (https://arxiv.org/abs/2506.13585), CISPO arm.
22
# Minimal-diff copy of workspace-4's proven 2n8g SAPO recipe. Only the
33
# loss_fn block differs across arms.
44
#
@@ -25,7 +25,7 @@ grpo:
2525

2626
policy:
2727
model_name: Qwen/Qwen3-30B-A3B
28-
train_global_batch_size: 128
28+
train_global_batch_size: 32
2929
train_micro_batch_size: 1
3030
logprob_batch_size: 1
3131
max_total_sequence_length: 4096
@@ -82,13 +82,13 @@ checkpointing:
8282
enabled: false
8383

8484
logger:
85-
log_dir: logs/cispo-mm1-replica-qwen3-30ba3b-2n8g-megatron-cispo
85+
log_dir: logs/cispo-mm1-highoffpolicy-qwen3-30ba3b-2n8g-megatron-cispo
8686
wandb_enabled: true
8787
tensorboard_enabled: true
8888
monitor_gpus: false
8989
wandb:
9090
project: nemo-rl
91-
name: cispo-mm1-replica-qwen3-30ba3b-2n8g-megatron-cispo
91+
name: cispo-mm1-highoffpolicy-qwen3-30ba3b-2n8g-megatron-cispo
9292

9393
cluster:
9494
gpus_per_node: 8

examples/configs/recipes/llm/cispo-mm1-replica-qwen3-30ba3b-2n8g-megatron-grpo.yaml renamed to examples/configs/recipes/llm/cispo-mm1-highoffpolicy-qwen3-30ba3b-2n8g-megatron-grpo.yaml

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,15 @@
1-
# MiniMax-M1 replication study (https://arxiv.org/abs/2506.13585), GRPO arm.
1+
# MiniMax-M1 high-off-policy study (https://arxiv.org/abs/2506.13585), GRPO arm.
22
# Minimal-diff copy of workspace-4's proven 2n8g SAPO recipe:
33
# grpo-qwen3-30ba3b-2n8g-megatron-sapo-asym.yaml
44
# Only the loss_fn block (and logger names) differs.
55
#
66
# Three-way A/B/C: this is the GRPO baseline; DAPO and CISPO are at
7-
# cispo-mm1-replica-qwen3-30ba3b-2n8g-megatron-dapo.yaml
8-
# cispo-mm1-replica-qwen3-30ba3b-2n8g-megatron-cispo.yaml
7+
# cispo-mm1-highoffpolicy-qwen3-30ba3b-2n8g-megatron-dapo.yaml
8+
# cispo-mm1-highoffpolicy-qwen3-30ba3b-2n8g-megatron-cispo.yaml
99
# Submit via cispo_mm1_replica.slurm with ARM=grpo|dapo|cispo.
1010
#
11-
# Off-policy regime: 32 x 16 = 512 trajectories, train_global_batch_size=128
12-
# -> 4 gradient updates per rollout (SAPO/GSPO Sec 5.1 setting). KL beta=0,
11+
# Off-policy regime: 32 x 16 = 512 trajectories, train_global_batch_size=32
12+
# -> 16 gradient updates per rollout (SAPO/GSPO Sec 5.1 setting). KL beta=0,
1313
# token-level loss, sampling temperature 1.0.
1414
# NOT in the PR; local research artifact.
1515
defaults: ../../grpo_math_qwen30ba3b_megatron.yaml
@@ -26,7 +26,7 @@ grpo:
2626

2727
policy:
2828
model_name: Qwen/Qwen3-30B-A3B
29-
train_global_batch_size: 128
29+
train_global_batch_size: 32
3030
train_micro_batch_size: 1
3131
logprob_batch_size: 1
3232
max_total_sequence_length: 4096
@@ -85,13 +85,13 @@ checkpointing:
8585
enabled: false
8686

8787
logger:
88-
log_dir: logs/cispo-mm1-replica-qwen3-30ba3b-2n8g-megatron-grpo
88+
log_dir: logs/cispo-mm1-highoffpolicy-qwen3-30ba3b-2n8g-megatron-grpo
8989
wandb_enabled: true
9090
tensorboard_enabled: true
9191
monitor_gpus: false
9292
wandb:
9393
project: nemo-rl
94-
name: cispo-mm1-replica-qwen3-30ba3b-2n8g-megatron-grpo
94+
name: cispo-mm1-highoffpolicy-qwen3-30ba3b-2n8g-megatron-grpo
9595

9696
cluster:
9797
gpus_per_node: 8

examples/configs/recipes/llm/cispo-mm1-replica-qwen3-30ba3b-2n8g-megatron-dapo.yaml

Lines changed: 0 additions & 88 deletions
This file was deleted.

examples/configs/recipes/llm/cispo-qwen2.5-math-1.5b-instruct-1n8g-fsdp2tp1.yaml

Lines changed: 0 additions & 59 deletions
This file was deleted.

0 commit comments

Comments
 (0)