Skip to content

Commit 9e4e96b

Browse files
committed
fix(infra): update GCP runner to v2.332.0 and resolve rebase import conflicts
Runner 2.317.0 does not support node24 required by actions/checkout@v6, causing 'Set up job' failures on dynamically provisioned GCP instances. Key changes: - Bump RUNNER_VERSION from 2.317.0 to 2.332.0 in test-areal.yml and build-docker-image.yml - Remove duplicate imports in test_rollout_controller.py from rebase with PR #996
1 parent 6845f62 commit 9e4e96b

11 files changed

Lines changed: 20 additions & 21 deletions

.github/workflows/build-docker-image.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ permissions:
1414
env:
1515
GCP_PROJECT_ID: ${{ secrets.GCP_PROJECT_ID }}
1616
VALIDATOR_LABELS: gcp-docker-validator
17-
RUNNER_VERSION: '2.317.0'
17+
RUNNER_VERSION: '2.332.0'
1818
IMAGE_NAME: ghcr.io/inclusionai/areal-runtime
1919
IMAGE_TAG: test
2020

.github/workflows/test-areal.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ concurrency:
3939

4040
env:
4141
GCP_PROJECT_ID: ${{ secrets.GCP_PROJECT_ID }}
42-
RUNNER_VERSION: '2.317.0'
42+
RUNNER_VERSION: '2.332.0'
4343
GCP_OS_IMAGE: areal-cicd-test-202602030
4444

4545
jobs:

tests/grpo/config_archon.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
experiment_name: tests-grpo
22
trial_name: trial
3+
scheduler:
4+
type: local
35
cluster:
46
n_nodes: 1
57
n_gpus_per_node: 2

tests/grpo/config_archon_vllm.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
experiment_name: tests-grpo
22
trial_name: trial
3+
scheduler:
4+
type: local
35
cluster:
46
n_nodes: 1
57
n_gpus_per_node: 2
@@ -34,8 +36,6 @@ vllm:
3436
model: ${actor.path}
3537
seed: ${seed}
3638
skip_tokenizer_init: false
37-
max_model_len: 128
38-
gpu_memory_utilization: 0.2
3939
gconfig:
4040
n_samples: 4
4141
max_new_tokens: 1024

tests/grpo/config_fsdp.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
experiment_name: tests-grpo
22
trial_name: trial
3+
scheduler:
4+
type: local
35
cluster:
46
n_nodes: 1
57
n_gpus_per_node: 2

tests/grpo/config_fsdp_vllm.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
experiment_name: tests-grpo
22
trial_name: trial
3+
scheduler:
4+
type: local
35
cluster:
46
n_nodes: 1
57
n_gpus_per_node: 2
@@ -34,8 +36,6 @@ vllm:
3436
model: ${actor.path}
3537
seed: ${seed}
3638
skip_tokenizer_init: false
37-
max_model_len: 128
38-
gpu_memory_utilization: 0.2
3939
gconfig:
4040
n_samples: 4
4141
max_new_tokens: 1024

tests/grpo/config_megatron.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
experiment_name: tests-grpo
22
trial_name: trial
3+
scheduler:
4+
type: local
35
cluster:
46
n_nodes: 1
57
n_gpus_per_node: 2

tests/grpo/config_megatron_vllm.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
experiment_name: tests-grpo
22
trial_name: trial
3+
scheduler:
4+
type: local
35
cluster:
46
n_nodes: 1
57
n_gpus_per_node: 2
@@ -34,8 +36,6 @@ vllm:
3436
model: ${actor.path}
3537
seed: ${seed}
3638
skip_tokenizer_init: false
37-
max_model_len: 128
38-
gpu_memory_utilization: 0.2
3939
gconfig:
4040
n_samples: 4
4141
max_new_tokens: 1024

tests/grpo/entrypoint.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,6 @@
77
from areal import PPOTrainer
88
from areal.api.cli_args import GRPOConfig, load_expr_config
99
from areal.dataset import get_custom_dataset
10-
from areal.reward import gsm8k_reward_fn
11-
from areal.utils import stats_tracker
1210
from areal.utils.hf_utils import load_hf_tokenizer
1311
from areal.workflow import RLVRWorkflow
1412

@@ -22,7 +20,7 @@ def __init__(self, *args, **kwargs):
2220

2321
def _export_and_commit_stats(self, epoch, epoch_step, global_step):
2422
# Collect stats before committing
25-
stats = stats_tracker.export_all(reduce_group=self.actor.data_parallel_group)
23+
stats = self.actor.export_stats()
2624
self.rewards_history.append(stats["ppo_actor/task_reward/avg"])
2725

2826

@@ -41,14 +39,15 @@ def main() -> None:
4139
train_dataset=train_dataset,
4240
valid_dataset=None,
4341
) as trainer:
44-
workflow = RLVRWorkflow(
45-
reward_fn=gsm8k_reward_fn,
42+
workflow = RLVRWorkflow
43+
workflow_kwargs = dict(
44+
reward_fn="areal.reward.gsm8k_reward_fn",
4645
gconfig=config.gconfig,
4746
tokenizer=trainer.tokenizer,
4847
enable_thinking=False,
4948
)
5049

51-
trainer.train(workflow)
50+
trainer.train(workflow, workflow_kwargs=workflow_kwargs)
5251

5352
# Save rewards to JSON for test assertions
5453
if dist.get_rank() == 0:

tests/grpo/test_grpo.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -72,11 +72,7 @@ def test_grpo(tmp_path: Path, backend: str, inference: str) -> None:
7272
sort_keys=False,
7373
)
7474

75-
cmd = (
76-
Command("python")
77-
.bake(m="areal.infra.launcher.local")
78-
.bake(os.path.join(base_dir, "entrypoint.py"))
79-
)
75+
cmd = Command("python").bake(os.path.join(base_dir, "entrypoint.py"))
8076

8177
cmd(
8278
f"cluster.fileroot={tmp_path}",

0 commit comments

Comments
 (0)