Update project names in benchmark YAML files to 'benchmarking'; modify process killing function to exclude additional substring.

binary-husky · binary-husky · commit 1ed958f85fe6 · 2026-04-23T11:11:41.000+08:00
diff --git a/ajet/utils/cleaner.py b/ajet/utils/cleaner.py
@@ -20,7 +20,7 @@ def kill_ray_processes():
 
 def fast_kill_by_keyword_bash(
     keyword: str,
-    exclude_substrings=["alpha_auto_research", "vscode", "benchmark", "jupyter", "supervisord", "download_model", "opencode", "opencodex"],
+    exclude_substrings=["alpha", "vscode", "benchmark", "jupyter", "supervisord", "download_model", "opencode", "opencodex", "pai-nohang"],
     grace_seconds: float = 1.0,
 ):
     """Use bash pipelines to kill processes matching keyword quickly.
diff --git a/scripts/deploy_model.py b/scripts/deploy_model.py
@@ -15,14 +15,14 @@
 parser = argparse.ArgumentParser(description="deploy Hugging Face model")
 parser.add_argument(
     "--target",
-    # default="/mnt/data_cpfs/model_cache/modelscope/hub/Qwen/Qwen/Qwen3-235B-A22B-Instruct-2507/",
-    default="/mnt/data_cpfs/model_cache/modelscope/hub/Qwen/Qwen/Qwen2___5-14B-Instruct",
+    default="/mnt/data_cpfs/model_cache/modelscope/hub/Qwen/Qwen/Qwen3-235B-A22B-Instruct-2507/",
+    # default="/mnt/data_cpfs/model_cache/modelscope/hub/Qwen/Qwen/Qwen2___5-14B-Instruct",
     type=str,
     help="Model path",
 )
 parser.add_argument(
     "--alias",
-    default="Qwen/Qwen2.5-14B-Instruct",
+    default="Qwen/Qwen3-235B-A22B-Instruct-2507",
     type=str,
     help="Model alias",
 )
diff --git a/tests/bench/README.md b/tests/bench/README.md
@@ -19,6 +19,8 @@ python -m pytest -s tests/bench/benchmark_countdown/execute_benchmark_countdown.
 python -m pytest -s tests/bench/benchmark_learn2ask/execute_benchmark_learn2ask.py
 python -m pytest -s tests/bench/benchmark_frozenlake/execute_benchmark_frozenlake.py
 
+
+别忘了Benchmark Api Key
 VERL_PYTHON="./.venv/bin/python" python -m pytest -s tests/bench/benchmark_math/execute_benchmark_math.py::TestBenchmarkMath::test_01_begin_verl
 VERL_PYTHON="./.venv/bin/python" python -m pytest -s tests/bench/benchmark_appworld/execute_benchmark_appworld.py::TestBenchmarkAppworld::test_01_begin_verl
 VERL_PYTHON="./.venv/bin/python" python -m pytest -s tests/bench/benchmark_countdown/execute_benchmark_countdown.py::TestBenchmarkCountdown::test_01_begin_verl
diff --git a/tests/bench/benchmark_appworld/benchmark_appworld.yaml b/tests/bench/benchmark_appworld/benchmark_appworld.yaml
@@ -1,6 +1,6 @@
 # ------------------ 主要配置 ------------------
 ajet:
-  project_name: example_appworld
+  project_name: benchmarking
   experiment_name: "read_yaml_name"
   task_judge:
     # ✨✨✨✨ 编写并选择评价函数
diff --git a/tests/bench/benchmark_appworld/benchmark_appworld_2nodes.yaml b/tests/bench/benchmark_appworld/benchmark_appworld_2nodes.yaml
@@ -1,6 +1,6 @@
 # ------------------ 主要配置 ------------------
 ajet:
-  project_name: example_appworld
+  project_name: benchmarking
   experiment_name: "read_yaml_name"
   task_judge:
     # ✨✨✨✨ 编写并选择评价函数
diff --git a/tests/bench/benchmark_appworld/benchmark_appworld_oai_sdk.yaml b/tests/bench/benchmark_appworld/benchmark_appworld_oai_sdk.yaml
@@ -1,6 +1,6 @@
 # ------------------ 主要配置 ------------------
 ajet:
-  project_name: example_appworld
+  project_name: benchmarking
   task_judge:
     # ✨✨✨✨ 编写并选择评价函数
     judge_protocol: ajet.task_judge.env_service_as_judge->EnvServiceJudge
diff --git a/tests/bench/benchmark_appworldlora/benchmark_appworldlora.yaml b/tests/bench/benchmark_appworldlora/benchmark_appworldlora.yaml
@@ -1,6 +1,6 @@
 # ------------------ 主要配置 ------------------
 ajet:
-  project_name: example_appworld_lora
+  project_name: benchmarking
   experiment_name: "read_yaml_name"
   task_judge:
     # ✨✨✨✨ 编写并选择评价函数
diff --git a/tests/bench/benchmark_countdownlora/benchmark_countdownlora.yaml b/tests/bench/benchmark_countdownlora/benchmark_countdownlora.yaml
@@ -1,6 +1,6 @@
 # ------------------ main configuration ------------------
 ajet:
-  project_name: benchmarking_lora
+  project_name: benchmarking
 
   model:
     # ✨✨✨✨ which model should be trained
diff --git a/tests/bench/benchmark_frozenlakelora/benchmark_frozenlakelora.yaml b/tests/bench/benchmark_frozenlakelora/benchmark_frozenlakelora.yaml
@@ -1,6 +1,6 @@
 # ------------------ main config ------------------
 ajet:
-  project_name: benchmarking_lora
+  project_name: benchmarking
 
   task_reader:
     type: random_dummy # ✨✨✨✨ `env_service` or `dataset_file` or `huggingface_dat_repo` or `random_dummy`
diff --git a/tests/bench/benchmark_learn2ask/benchmark_learn2ask.yaml b/tests/bench/benchmark_learn2ask/benchmark_learn2ask.yaml
@@ -1,6 +1,6 @@
 # ------------------ 主要配置 ------------------
 ajet:
-  project_name: example_learn2ask_enhancedreward
+  project_name: benchmarking
   task_reader:
     type: jsonl_dataset_file
     jsonl_dataset_file:
diff --git a/tests/bench/benchmark_learn2asklora/benchmark_learn2asklora.yaml b/tests/bench/benchmark_learn2asklora/benchmark_learn2asklora.yaml
@@ -1,6 +1,6 @@
 # ------------------ 主要配置 ------------------
 ajet:
-  project_name: example_learn2ask_enhancedreward_lora
+  project_name: benchmarking
   task_reader:
     type: jsonl_dataset_file
     jsonl_dataset_file:
diff --git a/tests/bench/benchmark_mathlora/benchmark_mathlora.yaml b/tests/bench/benchmark_mathlora/benchmark_mathlora.yaml
@@ -1,6 +1,6 @@
 # ------------------ 主要配置 ------------------
 ajet:
-  project_name: benchmarking_lora
+  project_name: benchmarking
   task_reader:
     type: huggingface_dat_repo # ✨✨✨✨ `env_service` or `dataset_file` or `huggingface_dat_repo`
     huggingface_dat_repo: