feat: enhance configuration for LoRA support and validation checks

binary-husky · binary-husky · commit 7eb9a700263d · 2026-03-31T15:43:09.000+08:00
diff --git a/.gitignore b/.gitignore
@@ -177,3 +177,4 @@ tutorial/**/*.json
 node_modules
 .agents
 skills-lock.json
+blueprint*
diff --git a/ajet/default_config/ajet_default.yaml b/ajet/default_config/ajet_default.yaml
@@ -276,13 +276,11 @@ ajet:
       betas:
       - 0.9
       - 0.999
-      clip_grad: 1.0
       min_lr_ratio: 0.0
       num_cycles: 0.5
       lr_scheduler_type: constant
       zero_indexed_step: true
-      warmup_style: null
-      override_optimizer_config: null
+      grad_clip: 20.0
 
     # enable KL loss regularization
     use_kl_loss: True
@@ -303,6 +301,13 @@ ajet:
     # whether to save train/eval trajectories to JSON files
     save_trajectory_as_json_file: False
 
+  lora:
+    # LoRA configuration (disabled by default, set lora_rank > 0 to enable)
+    lora_rank: 0
+    lora_alpha: 16
+    target_modules: all-linear
+    load_format: auto
+
 
   # the experimental ZeroMQ interchange server feature that allows `tuner.as_oai_baseurl_apikey` feature
   enable_swarm_mode: False
diff --git a/ajet/default_config/verl/config_auto_convertion_verl.jsonc b/ajet/default_config/verl/config_auto_convertion_verl.jsonc
@@ -11,13 +11,30 @@
     "ajet.trainer_common.algorithm.use_kl_in_reward": "algorithm.use_kl_in_reward",
     "ajet.trainer_common.mini_batch_num": "actor_rollout_ref.actor.override_ppo_mini_batch_num",
     "ajet.trainer_common.fsdp_config": "actor_rollout_ref.actor.fsdp_config",
-    "ajet.trainer_common.optim": "actor_rollout_ref.actor.optim",
+    "ajet.trainer_common.optim.optimizer": "actor_rollout_ref.actor.optim.optimizer",
+    "ajet.trainer_common.optim.optimizer_impl": "actor_rollout_ref.actor.optim.optimizer_impl",
+    "ajet.trainer_common.optim.lr": "actor_rollout_ref.actor.optim.lr",
+    "ajet.trainer_common.optim.lr_warmup_steps_ratio": "actor_rollout_ref.actor.optim.lr_warmup_steps_ratio",
+    "ajet.trainer_common.optim.total_training_steps": "actor_rollout_ref.actor.optim.total_training_steps",
+    "ajet.trainer_common.optim.weight_decay": "actor_rollout_ref.actor.optim.weight_decay",
+    "ajet.trainer_common.optim.lr_warmup_steps": "actor_rollout_ref.actor.optim.lr_warmup_steps",
+    "ajet.trainer_common.optim.betas": "actor_rollout_ref.actor.optim.betas",
+    "ajet.trainer_common.optim.min_lr_ratio": "actor_rollout_ref.actor.optim.min_lr_ratio",
+    "ajet.trainer_common.optim.num_cycles": "actor_rollout_ref.actor.optim.num_cycles",
+    "ajet.trainer_common.optim.lr_scheduler_type": "actor_rollout_ref.actor.optim.lr_scheduler_type",
+    "ajet.trainer_common.optim.zero_indexed_step": "actor_rollout_ref.actor.optim.zero_indexed_step",
+    "ajet.trainer_common.optim.grad_clip": "actor_rollout_ref.actor.optim.grad_clip",
     "ajet.trainer_common.use_kl_loss": "actor_rollout_ref.actor.use_kl_loss",
     "ajet.trainer_common.kl_loss_coef": "actor_rollout_ref.actor.kl_loss_coef",
     "ajet.trainer_common.kl_loss_type": "actor_rollout_ref.actor.kl_loss_type",
     "ajet.trainer_common.ulysses_sequence_parallel_size": "actor_rollout_ref.actor.ulysses_sequence_parallel_size",
     "ajet.trainer_common.loss_extra_scale_ratio": "actor_rollout_ref.actor.loss_extra_scale_ratio",
 
+    "ajet.lora.lora_rank": "actor_rollout_ref.model.lora_rank",
+    "ajet.lora.lora_alpha": "actor_rollout_ref.model.lora_alpha",
+    "ajet.lora.target_modules": "actor_rollout_ref.model.target_modules",
+    "ajet.lora.load_format": "actor_rollout_ref.rollout.load_format",
+
     "ajet.trainer_common.save_freq": "trainer.save_freq",
     "ajet.trainer_common.test_freq": "trainer.test_freq",
 
diff --git a/ajet/default_config/verl/verl_default.yaml b/ajet/default_config/verl/verl_default.yaml
@@ -488,7 +488,6 @@ critic:
     betas:
     - 0.9
     - 0.999
-    clip_grad: 1.0
     min_lr_ratio: 0.0
     num_cycles: 0.5
     lr_scheduler_type: constant
diff --git a/ajet/utils/config_utils.py b/ajet/utils/config_utils.py
@@ -278,6 +278,41 @@ def expand_ajet_hierarchical_config(config, write_to=None):
     return config_final
 
 
+def _validate_input_yaml_no_overlap_with_auto_convertion_config(input_yaml_config, config_final):
+    """Validate that input yaml doesn't contain keys that will be auto-converted with different values."""
+    import json
+    import re
+
+    jsonc_path = os.path.join(os.path.dirname(__file__), "..", "default_config", "verl", "config_auto_convertion_verl.jsonc")
+    with open(jsonc_path, "r", encoding="utf-8") as f:
+        content = f.read()
+        content = re.sub(r'//.*', '', content)
+        convertion_json = json.loads(content)
+
+    errors = []
+    for from_key, to_keys in convertion_json.items():
+        to_keys = to_keys if isinstance(to_keys, list) else [to_keys]
+        for to_key in to_keys:
+            try:
+                input_value = _dive_to_fetch_value(input_yaml_config, to_key)
+            except ValueError:
+                continue
+            final_value = _dive_to_fetch_value(config_final, to_key)
+            if str(input_value) != str(final_value):
+                errors.append(
+                    f"  - Key '{to_key}': input_yaml value = {input_value}, "
+                    f"but ajet config sets it to = {final_value}"
+                )
+
+    if errors:
+        error_msg = (
+            "We found a configuration conflict between AgentJet and Verl! Input yaml contains keys that conflict with ajet default config values:\n"
+            + "\n".join(errors)
+            + "\nPlease use ajet.xxx to assign training parameters instead."
+        )
+        raise ValueError(error_msg)
+
+
 def prepare_experiment_config(yaml_path, exp_base_dir, backbone, override_param_callback=None, storage=True):
     """
     Prepare experiment configuration by reading YAML, setting up backup directories,
@@ -299,7 +334,7 @@ def prepare_experiment_config(yaml_path, exp_base_dir, backbone, override_param_
 
     ## 0. read yaml & get experiment_name
     with open(yaml_path, "r", encoding="utf-8") as file:
-        config = yaml.safe_load(file)
+        config = input_yaml_config = yaml.safe_load(file)
     try:
         exp_name = config.get("ajet").get("experiment_name")
     except Exception:
@@ -367,6 +402,8 @@ def prepare_experiment_config(yaml_path, exp_base_dir, backbone, override_param_
     )
     config_final = expand_ajet_hierarchical_config(config, write_to=yaml_backup_dst)
 
+    _validate_input_yaml_no_overlap_with_auto_convertion_config(input_yaml_config, config_final)
+
     if not storage:
         shutil.rmtree(os.path.join(exp_base_dir, exp_name))
 
diff --git a/tutorial/example_math_lora/math_agent.yaml b/tutorial/example_math_lora/math_agent.yaml
@@ -1,6 +1,6 @@
 # ------------------ main configuration ------------------
 ajet:
-  project_name: example_math_agent
+  project_name: test_lora
   task_reader:
     type: huggingface_dat_repo # ✨✨✨✨ `env_service` or `dataset_file` or `huggingface_dat_repo`
     # effective when `type: huggingface_dat_repo`
@@ -44,35 +44,29 @@ ajet:
     max_prompt_length:   3000
     max_response_length: 7000
 
+  execute_test: false
+
   debug:
     debug_max_parallel: 1
     debug_first_n_tasks: 1
 
   trainer_common:
+    val_print_to_markdown_file_path: /mnt/data_cpfs/qingxu.fu/autoresearch-rl/exp_result/hello-agentjet-math-lora/val_result.md
+    train_print_to_markdown_file_path: /mnt/data_cpfs/qingxu.fu/autoresearch-rl/exp_result/hello-agentjet-math-lora/train_result.md
     save_freq: 100
     test_freq: 100
     total_epochs: 100
     logger: swanlab
     val_before_train: true
+    optim:
+      lr: 3e-05
 
-actor_rollout_ref:
-  model:
+  lora:
     lora_rank: 32
     lora_alpha: 32
     target_modules: all-linear
-  actor:
-    optim:
-      lr: 3e-5
-    fsdp_config:
-      param_offload: true
-      optimizer_offload: true
-  rollout:
     load_format: safetensors
 
-trinity:
-  synchronizer:
-    sync_offset: 1
-    sync_method: nccl
 
 
 # ------------------ do not modify ------------------