From 3f9aeb33c10fa6f87308a4982ddeedfdd87136b5 Mon Sep 17 00:00:00 2001
From: Christian Munley <cmunley@nvidia.com>
Date: Fri, 17 Apr 2026 00:32:39 -0700
Subject: [PATCH 1/7] env list

Signed-off-by: Christian Munley <cmunley@nvidia.com>
---
 .pre-commit-config.yaml                       |   6 +-
 README.md                                     |   9 +-
 nemo_gym/server_metadata.py                   |  13 +-
 ...resource_servers.py => update_env_list.py} | 150 +++++++++++-------
 4 files changed, 113 insertions(+), 65 deletions(-)
 rename scripts/{update_resource_servers.py => update_env_list.py} (75%)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index e520b3c78..a97686ae3 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -58,8 +58,8 @@ repos:
         files: ^resources_servers/.*/configs/.*\.yaml$
         pass_filenames: true
       - id: update-readme-table
-        name: Update resources server list in README
+        name: Update environment list in README
         language: python
-        entry: python scripts/update_resource_servers.py
+        entry: python scripts/update_env_list.py
         additional_dependencies: [pyyaml]
-        files: ^README\.md$|^resources_servers/.*/configs/.*\.yaml$
+        files: ^README\.md$|^resources_servers/.*/configs/.*\.yaml$|^responses_api_agents/.*/configs/.*\.yaml$
diff --git a/README.md b/README.md
index 5fca1bf18..2d945506f 100644
--- a/README.md
+++ b/README.md
@@ -151,7 +151,7 @@ Each resources server includes example data, configuration files, and tests. See
 The Dataset column links to publicly available datasets (e.g., on HuggingFace). A `-` means the train/validation data has not been publicly released yet, or that it is procedurally generated using a provided script. If no data is released yet, new data can be generated, or the environment can be used as a reference. Each server includes 5 example tasks in `data/example.jsonl`.
 
 <!-- START_TRAINING_SERVERS_TABLE -->
-| Resources Server                              | Domain                | Description                                                                                                                                                                                                                  | Value                                                                                                                        | Train | Validation | License                                                   | Config                                                                                                                                                                                                                      | Dataset                                                                                                                                                        |
+| Environment                                   | Domain                | Description                                                                                                                                                                                                                  | Value                                                                                                                        | Train | Validation | License                                                   | Config                                                                                                                                                                                                                      | Dataset                                                                                                                                                        |
 | --------------------------------------------- | --------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------- | ----- | ---------- | --------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------- |
 | Aalcr                                         | other                 | -                                                                                                                                                                                                                            | -                                                                                                                            | -     | -          | -                                                         | <a href='resources_servers/aalcr/configs/aalcr.yaml'>aalcr.yaml</a>                                                                                                                                                         | -                                                                                                                                                              |
 | Abstention                                    | rlhf                  | Train models to abstain when unsure using three-tier reward on HotPotQA with LLM judge                                                                                                                                       | Improve calibration by rewarding abstention over incorrect answers                                                           | ✓     | ✓          | Creative Commons Attribution-ShareAlike 4.0 International | <a href='resources_servers/abstention/configs/abstention.yaml'>abstention.yaml</a>                                                                                                                                          | -                                                                                                                                                              |
@@ -174,6 +174,7 @@ The Dataset column links to publicly available datasets (e.g., on HuggingFace).
 | Genrm Compare                                 | rlhf                  | GenRM pairwise comparison for RLHF training                                                                                                                                                                                  | Compare multiple candidate responses using GenRM model                                                                       | -     | -          | -                                                         | <a href='resources_servers/genrm_compare/configs/genrm_compare.yaml'>genrm_compare.yaml</a>                                                                                                                                 | -                                                                                                                                                              |
 | Google Search                                 | agent                 | Multi-choice question answering problems with search tools integrated                                                                                                                                                        | Improve knowledge-related benchmarks with search tools                                                                       | ✓     | -          | Apache 2.0                                                | <a href='resources_servers/google_search/configs/google_search.yaml'>google_search.yaml</a>                                                                                                                                 | <a href='https://huggingface.co/datasets/nvidia/Nemotron-RL-knowledge-web_search-mcqa'>Nemotron-RL-knowledge-web_search-mcqa</a>                               |
 | Gpqa Diamond                                  | knowledge             | GPQA Diamond multiple-choice question answering problems                                                                                                                                                                     | Evaluate graduate-level scientific reasoning via MCQ verification                                                            | ✓     | -          | MIT                                                       | <a href='resources_servers/gpqa_diamond/configs/gpqa_diamond.yaml'>gpqa_diamond.yaml</a>                                                                                                                                    | -                                                                                                                                                              |
+| Harbor Agent                                  |                       | -                                                                                                                                                                                                                            | -                                                                                                                            | ✓     | -          | -                                                         | <a href='responses_api_agents/harbor_agent/configs/harbor_agent.yaml'>harbor_agent.yaml</a>                                                                                                                                 | -                                                                                                                                                              |
 | Ifbench                                       | instruction_following | IFBench instruction following evaluation using AllenAI's IFBench library (57 instruction types)                                                                                                                              | Improve IFBench instruction following                                                                                        | -     | -          | -                                                         | <a href='resources_servers/ifbench/configs/ifbench.yaml'>ifbench.yaml</a>                                                                                                                                                   | -                                                                                                                                                              |
 | Indirect Prompt Injection                     | safety                | Indirect prompt injection resistance for multi-domain tool-use agents                                                                                                                                                        | Improve agentic security by teaching robustness against tool outputs containing malicious instructions                       | ✓     | ✓          | Apache 2.0                                                | <a href='resources_servers/indirect_prompt_injection/configs/indirect_prompt_injection.yaml'>indirect_prompt_injection.yaml</a>                                                                                             | -                                                                                                                                                              |
 | Instruction Following                         | instruction_following | Instruction following datasets targeting IFEval and IFBench style instruction following capabilities                                                                                                                         | Improve IFEval and IFBench                                                                                                   | ✓     | -          | Apache 2.0                                                | <a href='resources_servers/instruction_following/configs/instruction_following.yaml'>instruction_following.yaml</a>                                                                                                         | <a href='https://huggingface.co/datasets/nvidia/Nemotron-RL-instruction_following'>Nemotron-RL-instruction_following</a>                                       |
@@ -219,8 +220,13 @@ The Dataset column links to publicly available datasets (e.g., on HuggingFace).
 | Structured Outputs                            | instruction_following | Check if responses are following structured output requirements in prompts                                                                                                                                                   | Improve instruction following capabilities                                                                                   | ✓     | ✓          | Apache 2.0                                                | <a href='resources_servers/structured_outputs/configs/structured_outputs_json.yaml'>structured_outputs_json.yaml</a>                                                                                                        | <a href='https://huggingface.co/datasets/nvidia/Nemotron-RL-instruction_following-structured_outputs'>Nemotron-RL-instruction_following-structured_outputs</a> |
 | Structured Outputs                            | instruction_following | Check if responses are following structured output requirements in prompts                                                                                                                                                   | Improve instruction following capabilities                                                                                   | ✓     | ✓          | Apache 2.0                                                | <a href='resources_servers/structured_outputs/configs/structured_outputs_json_yaml_xml_v1.yaml'>structured_outputs_json_yaml_xml_v1.yaml</a>                                                                                | -                                                                                                                                                              |
 | Structured Outputs                            | instruction_following | Check if responses follow structured output requirements (JSON, YAML, XML, TOML, CSV). Created 20260409.                                                                                                                     | Improve schema adherence across all structured output formats                                                                | ✓     | -          | Apache 2.0                                                | <a href='resources_servers/structured_outputs/configs/structured_outputs_v3.yaml'>structured_outputs_v3.yaml</a>                                                                                                            | -                                                                                                                                                              |
+| Swe Agents                                    |                       | -                                                                                                                                                                                                                            | -                                                                                                                            | ✓     | ✓          | Apache 2.0                                                | <a href='responses_api_agents/swe_agents/configs/swebench_multi_tools.yaml'>swebench_multi_tools.yaml</a>                                                                                                                   | -                                                                                                                                                              |
+| Swe Agents                                    |                       | -                                                                                                                                                                                                                            | -                                                                                                                            | ✓     | ✓          | Apache 2.0                                                | <a href='responses_api_agents/swe_agents/configs/swebench_openhands.yaml'>swebench_openhands.yaml</a>                                                                                                                       | -                                                                                                                                                              |
+| Swe Agents                                    |                       | -                                                                                                                                                                                                                            | -                                                                                                                            | ✓     | ✓          | Apache 2.0                                                | <a href='responses_api_agents/swe_agents/configs/swebench_openhands_training.yaml'>swebench_openhands_training.yaml</a>                                                                                                     | -                                                                                                                                                              |
+| Swe Agents                                    |                       | -                                                                                                                                                                                                                            | -                                                                                                                            | ✓     | ✓          | MIT                                                       | <a href='responses_api_agents/swe_agents/configs/swebench_swe_agent.yaml'>swebench_swe_agent.yaml</a>                                                                                                                       | -                                                                                                                                                              |
 | Swerl Gen                                     | coding                | Running sandboxed evaluation for SWE-style tasks (either patch generation or reproduction test generation)                                                                                                                   | Improve SWE capabilities useful for benchmarks like SWE-bench                                                                | ✓     | ✓          | Apache 2.0                                                | <a href='resources_servers/swerl_gen/configs/swerl_gen.yaml'>swerl_gen.yaml</a>                                                                                                                                             | -                                                                                                                                                              |
 | Swerl Llm Judge                               | coding                | SWE-style multiple-choice LLM-judge tasks scored via <solution>...</solution> choice.                                                                                                                                        | Improve SWE capabilities useful for benchmarks like SWE-bench                                                                | ✓     | ✓          | MIT                                                       | <a href='resources_servers/swerl_llm_judge/configs/swerl_llm_judge.yaml'>swerl_llm_judge.yaml</a>                                                                                                                           | -                                                                                                                                                              |
+| Tau2                                          |                       | -                                                                                                                                                                                                                            | -                                                                                                                            | -     | -          | -                                                         | <a href='responses_api_agents/tau2/configs/tau2_agent.yaml'>tau2_agent.yaml</a>                                                                                                                                             | -                                                                                                                                                              |
 | Tavily Search                                 | agent                 | Model uses search tools to satisfy a user query.                                                                                                                                                                             | Measure agentic search capability                                                                                            | ✓     | ✓          | Apache 2.0                                                | <a href='resources_servers/tavily_search/configs/tavily_search_judge_vllm_model.yaml'>tavily_search_judge_vllm_model.yaml</a>                                                                                               | -                                                                                                                                                              |
 | Terminal Multi Harness                        | agent                 | Agent006 harness structured-action verifier for next-step pivot RL.                                                                                                                                                          | -                                                                                                                            | -     | -          | -                                                         | <a href='resources_servers/terminal_multi_harness/configs/terminal_multi_harness_agent006.yaml'>terminal_multi_harness_agent006.yaml</a>                                                                                    | -                                                                                                                                                              |
 | Terminal Multi Harness                        | agent                 | Codex harness structured-action verifier for next-step pivot RL.                                                                                                                                                             | -                                                                                                                            | -     | -          | -                                                         | <a href='resources_servers/terminal_multi_harness/configs/terminal_multi_harness_codex.yaml'>terminal_multi_harness_codex.yaml</a>                                                                                          | -                                                                                                                                                              |
@@ -229,6 +235,7 @@ The Dataset column links to publicly available datasets (e.g., on HuggingFace).
 | Terminus Judge                                | agent                 | single-step terminal based task (simple judge prompt)                                                                                                                                                                        | Improve on terminal-style tasks                                                                                              | ✓     | ✓          | Apache 2.0                                                | <a href='resources_servers/terminus_judge/configs/terminus_judge_simple.yaml'>terminus_judge_simple.yaml</a>                                                                                                                | -                                                                                                                                                              |
 | Terminus Judge                                | agent                 | single-step terminal based task (string similarity only)                                                                                                                                                                     | Improve on terminal-style tasks                                                                                              | ✓     | ✓          | Apache 2.0                                                | <a href='resources_servers/terminus_judge/configs/terminus_judge_string_only.yaml'>terminus_judge_string_only.yaml</a>                                                                                                      | -                                                                                                                                                              |
 | Text To Sql                                   | coding                | Text-to-SQL generation with LLM-as-a-judge equivalence checking                                                                                                                                                              | Improve text-to-SQL capabilities across multiple dialects                                                                    | -     | -          | -                                                         | <a href='resources_servers/text_to_sql/configs/text_to_sql.yaml'>text_to_sql.yaml</a>                                                                                                                                       | -                                                                                                                                                              |
+| Verifiers Agent                               |                       | -                                                                                                                                                                                                                            | -                                                                                                                            | ✓     | -          | -                                                         | <a href='responses_api_agents/verifiers_agent/configs/acereason-math.yaml'>acereason-math.yaml</a>                                                                                                                          | -                                                                                                                                                              |
 | Vlm Eval Kit                                  | other                 | -                                                                                                                                                                                                                            | Measure VLM capabilities                                                                                                     | -     | ✓          | -                                                         | <a href='resources_servers/vlm_eval_kit/configs/MMBench_DEV_EN_V11.yaml'>MMBench_DEV_EN_V11.yaml</a>                                                                                                                        | -                                                                                                                                                              |
 | Vlm Eval Kit                                  | other                 | -                                                                                                                                                                                                                            | Measure VLM capabilities                                                                                                     | -     | ✓          | -                                                         | <a href='resources_servers/vlm_eval_kit/configs/OCRBench.yaml'>OCRBench.yaml</a>                                                                                                                                            | -                                                                                                                                                              |
 | Vlm Eval Kit                                  | other                 | Run all supported VLMEvalKit benchmarks.                                                                                                                                                                                     | Measure VLM capabilities                                                                                                     | -     | ✓          | -                                                         | <a href='resources_servers/vlm_eval_kit/configs/vlm_eval_kit.yaml'>vlm_eval_kit.yaml</a>                                                                                                                                    | -                                                                                                                                                              |
diff --git a/nemo_gym/server_metadata.py b/nemo_gym/server_metadata.py
index ce084c36a..f560b484d 100644
--- a/nemo_gym/server_metadata.py
+++ b/nemo_gym/server_metadata.py
@@ -39,6 +39,15 @@ def to_dict(self) -> dict[str, str | bool | None]:  # pragma: no cover
 
 def visit_resources_server(data: dict, level: int = 1) -> ResourcesServerMetadata:  # pragma: no cover
     """Extract resources server metadata from YAML data."""
+    return _visit_server(data, "resources_servers", level)
+
+
+def visit_agent_server(data: dict, level: int = 1) -> ResourcesServerMetadata:  # pragma: no cover
+    """Extract agent server metadata from YAML data."""
+    return _visit_server(data, "responses_api_agents", level)
+
+
+def _visit_server(data: dict, server_type_key: str, level: int = 1) -> ResourcesServerMetadata:  # pragma: no cover
     resource = ResourcesServerMetadata()
     if level == 4:
         resource.domain = data.get("domain")
@@ -49,7 +58,7 @@ def visit_resources_server(data: dict, level: int = 1) -> ResourcesServerMetadat
         return resource
     elif isinstance(data, dict):
         for k, v in data.items():
-            if level == 2 and k != "resources_servers":
+            if level == 2 and k != server_type_key:
                 continue
-            return visit_resources_server(v, level + 1)
+            return _visit_server(v, server_type_key, level + 1)
     return resource
diff --git a/scripts/update_resource_servers.py b/scripts/update_env_list.py
similarity index 75%
rename from scripts/update_resource_servers.py
rename to scripts/update_env_list.py
index 261fbee3b..12f2be966 100644
--- a/scripts/update_resource_servers.py
+++ b/scripts/update_env_list.py
@@ -25,12 +25,13 @@
 
 import yaml
 
-from nemo_gym.server_metadata import ResourcesServerMetadata, visit_resources_server
+from nemo_gym.server_metadata import ResourcesServerMetadata, visit_agent_server, visit_resources_server
 
 
 README_PATH = Path("README.md")
 
-TARGET_FOLDER = Path("resources_servers")
+RESOURCES_SERVERS_FOLDER = Path("resources_servers")
+RESPONSES_API_AGENTS_FOLDER = Path("responses_api_agents")
 
 
 @dataclass
@@ -91,6 +92,7 @@ class ServerInfo:
     config_filename: str
     readme_path: str
     yaml_file: Path
+    base_folder: str = "resources_servers"
 
     @property
     def huggingface_repo_id(self) -> str | None:  # pragma: no cover
@@ -155,27 +157,48 @@ def get_readme_link(self) -> str:  # pragma: no cover
 
 def visit_agent_datasets(data: dict) -> AgentDatasetsMetadata:  # pragma: no cover
     agent = AgentDatasetsMetadata()
-    for k1, v1 in data.items():
-        if k1.endswith("_agent") and isinstance(v1, dict):
-            v2 = v1.get("responses_api_agents")
-            if isinstance(v2, dict):
-                # Look for any agent key
-                for agent_key, v3 in v2.items():
-                    if isinstance(v3, dict):
-                        datasets = v3.get("datasets")
-                        if isinstance(datasets, list):
-                            for entry in datasets:
-                                if isinstance(entry, dict):
-                                    agent.types.append(entry.get("type"))
-                                    if entry.get("type") == "train":
-                                        agent.license = entry.get("license")
-                                        hf_id = entry.get("huggingface_identifier")
-                                        if hf_id and isinstance(hf_id, dict):
-                                            agent.huggingface_repo_id = hf_id.get("repo_id")
+    if not isinstance(data, dict):
+        return agent
+    for v1 in data.values():
+        if not isinstance(v1, dict):
+            continue
+        v2 = v1.get("responses_api_agents")
+        if not isinstance(v2, dict):
+            continue
+        for v3 in v2.values():
+            if not isinstance(v3, dict):
+                continue
+            datasets = v3.get("datasets")
+            if isinstance(datasets, list):
+                for entry in datasets:
+                    if isinstance(entry, dict):
+                        agent.types.append(entry.get("type"))
+                        if entry.get("type") == "train":
+                            agent.license = entry.get("license")
+                            hf_id = entry.get("huggingface_identifier")
+                            if hf_id and isinstance(hf_id, dict):
+                                agent.huggingface_repo_id = hf_id.get("repo_id")
+            elif v3.get("harbor_datasets") or v3.get("vf_env_id"):
+                agent.types.append("train")
     return agent
 
 
-def extract_config_metadata(yaml_path: Path) -> ConfigMetadata:  # pragma: no cover
+def agent_has_resources_server_ref(data: dict) -> bool:  # pragma: no cover
+    if not isinstance(data, dict):
+        return False
+    for v1 in data.values():
+        if not isinstance(v1, dict):
+            continue
+        v2 = v1.get("responses_api_agents")
+        if not isinstance(v2, dict):
+            continue
+        for v3 in v2.values():
+            if isinstance(v3, dict) and v3.get("resources_server"):
+                return True
+    return False
+
+
+def extract_config_metadata(yaml_path: Path, from_agent: bool = False) -> ConfigMetadata:  # pragma: no cover
     """
     Domain:
         {name}_resources_server:
@@ -203,7 +226,7 @@ def extract_config_metadata(yaml_path: Path) -> ConfigMetadata:  # pragma: no co
     with yaml_path.open() as f:
         data = yaml.safe_load(f)
 
-    resource_data = visit_resources_server(data)
+    resource_data = visit_agent_server(data) if from_agent else visit_resources_server(data)
     agent_data = visit_agent_datasets(data)
 
     return ConfigMetadata.from_yaml_data(resource_data, agent_data)
@@ -214,47 +237,56 @@ def get_example_and_training_server_info() -> tuple[list[ServerInfo], list[Serve
     example_only_servers = []
     training_servers = []
 
-    for subdir in TARGET_FOLDER.iterdir():
-        if not subdir.is_dir():
-            continue
-
-        configs_folder = subdir / "configs"
-        if not (configs_folder.exists() and configs_folder.is_dir()):
-            continue
-
-        yaml_files = list(configs_folder.glob("*.yaml"))
-        if not yaml_files:
-            continue
-
-        for yaml_file in yaml_files:
-            yaml_data = extract_config_metadata(yaml_file)
-            if not yaml_data.types:
+    for base_folder in (RESOURCES_SERVERS_FOLDER, RESPONSES_API_AGENTS_FOLDER):
+        from_agent = base_folder == RESPONSES_API_AGENTS_FOLDER
+        for subdir in base_folder.iterdir():
+            if not subdir.is_dir():
                 continue
 
-            server_name = subdir.name
-            is_example_only = server_name.startswith("example_")
-
-            display_name = (
-                (server_name[len("example_") :] if is_example_only else server_name).replace("_", " ").title()
-            )
-
-            config_path = f"{TARGET_FOLDER.name}/{server_name}/configs/{yaml_file.name}"
-            readme_path = f"{TARGET_FOLDER.name}/{server_name}/README.md"
+            configs_folder = subdir / "configs"
+            if not (configs_folder.exists() and configs_folder.is_dir()):
+                continue
 
-            server_info = ServerInfo(
-                name=server_name,
-                display_name=display_name,
-                config_metadata=yaml_data,
-                config_path=config_path,
-                config_filename=yaml_file.name,
-                readme_path=readme_path,
-                yaml_file=yaml_file,
-            )
+            yaml_files = list(configs_folder.glob("*.yaml"))
+            if not yaml_files:
+                continue
 
-            if is_example_only:
-                example_only_servers.append(server_info)
-            else:
-                training_servers.append(server_info)
+            for yaml_file in yaml_files:
+                if from_agent:
+                    with yaml_file.open() as f:
+                        raw = yaml.safe_load(f) or {}
+                    if agent_has_resources_server_ref(raw):
+                        continue
+
+                yaml_data = extract_config_metadata(yaml_file, from_agent=from_agent)
+                if not yaml_data.types:
+                    continue
+
+                server_name = subdir.name
+                is_example_only = server_name.startswith("example_")
+
+                display_name = (
+                    (server_name[len("example_") :] if is_example_only else server_name).replace("_", " ").title()
+                )
+
+                config_path = f"{base_folder.name}/{server_name}/configs/{yaml_file.name}"
+                readme_path = f"{base_folder.name}/{server_name}/README.md"
+
+                server_info = ServerInfo(
+                    name=server_name,
+                    display_name=display_name,
+                    config_metadata=yaml_data,
+                    config_path=config_path,
+                    config_filename=yaml_file.name,
+                    readme_path=readme_path,
+                    yaml_file=yaml_file,
+                    base_folder=base_folder.name,
+                )
+
+                if is_example_only:
+                    example_only_servers.append(server_info)
+                else:
+                    training_servers.append(server_info)
 
     return example_only_servers, training_servers
 
@@ -287,7 +319,7 @@ def generate_example_only_table(servers: list[ServerInfo]) -> str:  # pragma: no
 def generate_training_table(servers: list[ServerInfo]) -> str:  # pragma: no cover
     """Generate table for training resources servers."""
     col_names = [
-        "Resources Server",
+        "Environment",
         "Domain",
         "Description",
         "Value",

From 0454bd525d039238a670cf74bb9526af4406c83e Mon Sep 17 00:00:00 2001
From: cmunley1 <cmunley@nvidia.com>
Date: Thu, 23 Apr 2026 12:24:16 -0700
Subject: [PATCH 2/7] rename servrmetadata

Signed-off-by: cmunley1 <cmunley@nvidia.com>
---
 nemo_gym/server_metadata.py | 12 ++++++------
 scripts/update_env_list.py  |  4 ++--
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/nemo_gym/server_metadata.py b/nemo_gym/server_metadata.py
index f560b484d..ee41c5410 100644
--- a/nemo_gym/server_metadata.py
+++ b/nemo_gym/server_metadata.py
@@ -17,8 +17,8 @@
 
 
 @dataclass
-class ResourcesServerMetadata:
-    """Metadata extracted from resources server YAML config."""
+class ServerMetadata:
+    """Metadata extracted from a resources-server or agent-server YAML config."""
 
     domain: Optional[str] = None
     description: Optional[str] = None
@@ -37,18 +37,18 @@ def to_dict(self) -> dict[str, str | bool | None]:  # pragma: no cover
         }
 
 
-def visit_resources_server(data: dict, level: int = 1) -> ResourcesServerMetadata:  # pragma: no cover
+def visit_resources_server(data: dict, level: int = 1) -> ServerMetadata:  # pragma: no cover
     """Extract resources server metadata from YAML data."""
     return _visit_server(data, "resources_servers", level)
 
 
-def visit_agent_server(data: dict, level: int = 1) -> ResourcesServerMetadata:  # pragma: no cover
+def visit_agent_server(data: dict, level: int = 1) -> ServerMetadata:  # pragma: no cover
     """Extract agent server metadata from YAML data."""
     return _visit_server(data, "responses_api_agents", level)
 
 
-def _visit_server(data: dict, server_type_key: str, level: int = 1) -> ResourcesServerMetadata:  # pragma: no cover
-    resource = ResourcesServerMetadata()
+def _visit_server(data: dict, server_type_key: str, level: int = 1) -> ServerMetadata:  # pragma: no cover
+    resource = ServerMetadata()
     if level == 4:
         resource.domain = data.get("domain")
         resource.description = data.get("description")
diff --git a/scripts/update_env_list.py b/scripts/update_env_list.py
index 12f2be966..1a4ce2be3 100644
--- a/scripts/update_env_list.py
+++ b/scripts/update_env_list.py
@@ -25,7 +25,7 @@
 
 import yaml
 
-from nemo_gym.server_metadata import ResourcesServerMetadata, visit_agent_server, visit_resources_server
+from nemo_gym.server_metadata import ServerMetadata, visit_agent_server, visit_resources_server
 
 
 README_PATH = Path("README.md")
@@ -66,7 +66,7 @@ class ConfigMetadata:
 
     @classmethod
     def from_yaml_data(
-        cls, resource: ResourcesServerMetadata, agent: AgentDatasetsMetadata
+        cls, resource: ServerMetadata, agent: AgentDatasetsMetadata
     ) -> "ConfigMetadata":  # pragma: no cover
         """Combine resources server and agent datasets metadata."""
         return cls(

From 3f38fd9fa4213df035d88570515e5449b6e03f83 Mon Sep 17 00:00:00 2001
From: cmunley1 <cmunley@nvidia.com>
Date: Thu, 23 Apr 2026 12:34:36 -0700
Subject: [PATCH 3/7] readme conflict

Signed-off-by: cmunley1 <cmunley@nvidia.com>
---
 README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README.md b/README.md
index 2d945506f..54bc06c01 100644
--- a/README.md
+++ b/README.md
@@ -192,6 +192,7 @@ The Dataset column links to publicly available datasets (e.g., on HuggingFace).
 | Math With Judge                               | math                  | OpenMathReasoning math dataset with math-verify and LLM-as-a-judge                                                                                                                                                           | Improve math capabilities including AIME 24 / 25                                                                             | ✓     | ✓          | Creative Commons Attribution 4.0 International            | <a href='resources_servers/math_with_judge/configs/math_with_judge.yaml'>math_with_judge.yaml</a>                                                                                                                           | <a href='https://huggingface.co/datasets/nvidia/Nemotron-RL-math-OpenMathReasoning'>Nemotron-RL-math-OpenMathReasoning</a>                                     |
 | Mcqa                                          | knowledge             | Multi-choice question answering problems                                                                                                                                                                                     | Improve benchmarks like MMLU / GPQA / HLE                                                                                    | ✓     | ✓          | Apache 2.0                                                | <a href='resources_servers/mcqa/configs/mcqa.yaml'>mcqa.yaml</a>                                                                                                                                                            | <a href='https://huggingface.co/datasets/nvidia/Nemotron-RL-knowledge-mcqa'>Nemotron-RL-knowledge-mcqa</a>                                                     |
 | Mini Swe Agent                                | coding                | A software development with mini-swe-agent orchestration                                                                                                                                                                     | Improve software development capabilities, like SWE-bench                                                                    | ✓     | ✓          | MIT                                                       | <a href='resources_servers/mini_swe_agent/configs/mini_swe_agent.yaml'>mini_swe_agent.yaml</a>                                                                                                                              | <a href='https://huggingface.co/datasets/SWE-Gym/SWE-Gym'>SWE-Gym</a>                                                                                          |
+| Mini Swe Agent                                |                       | -                                                                                                                                                                                                                            | -                                                                                                                            | ✓     | ✓          | MIT                                                       | <a href='responses_api_agents/mini_swe_agent/configs/mini_swe_agent.yaml'>mini_swe_agent.yaml</a>                                                                                                                           | <a href='https://huggingface.co/datasets/SWE-Gym/SWE-Gym'>SWE-Gym</a>                                                                                          |
 | Multichallenge                                | knowledge             | Targets inference memory, instruction retention, version editing, and self-coherence.                                                                                                                                        | Improve complex multi-turn conversational capability                                                                         | ✓     | -          | Creative Commons Attribution 4.0 International            | <a href='resources_servers/multichallenge/configs/multichallenge_nrl.yaml'>multichallenge_nrl.yaml</a>                                                                                                                      | <a href='https://huggingface.co/datasets/nvidia/Nemotron-RL-Instruction-Following-MultiTurnChat-v1'>Nemotron-RL-Instruction-Following-MultiTurnChat-v1</a>     |
 | Newton Bench                                  | math                  | Scientific law discovery tasks through agentic experimentation across 12 physics domains                                                                                                                                     | Improve science, reasoning, and tool use capabilities                                                                        | ✓     | -          | Apache 2.0                                                | <a href='resources_servers/newton_bench/configs/newton_bench.yaml'>newton_bench.yaml</a>                                                                                                                                    | -                                                                                                                                                              |
 | Ns Tools                                      | agent                 | NeMo Skills tool execution with math verification                                                                                                                                                                            | -                                                                                                                            | -     | -          | -                                                         | <a href='resources_servers/ns_tools/configs/ns_tools.yaml'>ns_tools.yaml</a>                                                                                                                                                | -                                                                                                                                                              |

From 70e3763258de33a0056ebb6d80d09e682a682e32 Mon Sep 17 00:00:00 2001
From: cmunley1 <cmunley@nvidia.com>
Date: Thu, 23 Apr 2026 13:08:28 -0700
Subject: [PATCH 4/7] readme

Signed-off-by: cmunley1 <cmunley@nvidia.com>
---
 README.md | 1 -
 1 file changed, 1 deletion(-)

diff --git a/README.md b/README.md
index c3e946954..437bcacd8 100644
--- a/README.md
+++ b/README.md
@@ -197,7 +197,6 @@ The Dataset column links to publicly available datasets (e.g., on HuggingFace).
 | Math With Judge                               | math                  | MathStackOverflow math dataset with math-verify                                                                                                                                                                              | Improve math capabilities including AIME 24 / 25                                                                             | ✓     | ✓          | Creative Commons Attribution-ShareAlike 4.0 International | <a href='resources_servers/math_with_judge/configs/math_stack_overflow.yaml'>math_stack_overflow.yaml</a>                                                                                                                   | <a href='https://huggingface.co/datasets/nvidia/Nemotron-RL-math-stack_overflow'>Nemotron-RL-math-stack_overflow</a>                                           |
 | Math With Judge                               | math                  | OpenMathReasoning math dataset with math-verify and LLM-as-a-judge                                                                                                                                                           | Improve math capabilities including AIME 24 / 25                                                                             | ✓     | ✓          | Creative Commons Attribution 4.0 International            | <a href='resources_servers/math_with_judge/configs/math_with_judge.yaml'>math_with_judge.yaml</a>                                                                                                                           | <a href='https://huggingface.co/datasets/nvidia/Nemotron-RL-math-OpenMathReasoning'>Nemotron-RL-math-OpenMathReasoning</a>                                     |
 | Mcqa                                          | knowledge             | Multi-choice question answering problems                                                                                                                                                                                     | Improve benchmarks like MMLU / GPQA / HLE                                                                                    | ✓     | ✓          | Apache 2.0                                                | <a href='resources_servers/mcqa/configs/mcqa.yaml'>mcqa.yaml</a>                                                                                                                                                            | <a href='https://huggingface.co/datasets/nvidia/Nemotron-RL-knowledge-mcqa'>Nemotron-RL-knowledge-mcqa</a>                                                     |
-| Mini Swe Agent                                | coding                | A software development with mini-swe-agent orchestration                                                                                                                                                                     | Improve software development capabilities, like SWE-bench                                                                    | ✓     | ✓          | MIT                                                       | <a href='resources_servers/mini_swe_agent/configs/mini_swe_agent.yaml'>mini_swe_agent.yaml</a>                                                                                                                              | <a href='https://huggingface.co/datasets/SWE-Gym/SWE-Gym'>SWE-Gym</a>                                                                                          |
 | Mini Swe Agent                                |                       | -                                                                                                                                                                                                                            | -                                                                                                                            | ✓     | ✓          | MIT                                                       | <a href='responses_api_agents/mini_swe_agent/configs/mini_swe_agent.yaml'>mini_swe_agent.yaml</a>                                                                                                                           | <a href='https://huggingface.co/datasets/SWE-Gym/SWE-Gym'>SWE-Gym</a>                                                                                          |
 | Mrcr                                          | other                 | Multi-round coreference resolution over multi-turn conversations with prefix-gated SequenceMatcher grading                                                                                                                   | Improve long-context in-context retrieval and needle-count-aware reasoning                                                   | -     | -          | -                                                         | <a href='resources_servers/mrcr/configs/mrcr.yaml'>mrcr.yaml</a>                                                                                                                                                            | -                                                                                                                                                              |
 | Multichallenge                                | knowledge             | Targets inference memory, instruction retention, version editing, and self-coherence.                                                                                                                                        | Improve complex multi-turn conversational capability                                                                         | ✓     | -          | Creative Commons Attribution 4.0 International            | <a href='resources_servers/multichallenge/configs/multichallenge_nrl.yaml'>multichallenge_nrl.yaml</a>                                                                                                                      | <a href='https://huggingface.co/datasets/nvidia/Nemotron-RL-Instruction-Following-MultiTurnChat-v1'>Nemotron-RL-Instruction-Following-MultiTurnChat-v1</a>     |

From d5f5896ad43371edf09eea29edb1c24d61830437 Mon Sep 17 00:00:00 2001
From: cmunley1 <cmunley@nvidia.com>
Date: Thu, 23 Apr 2026 14:21:53 -0700
Subject: [PATCH 5/7] agent desc and val

Signed-off-by: cmunley1 <cmunley@nvidia.com>
---
 responses_api_agents/harbor_agent/configs/harbor_agent.yaml  | 3 +++
 .../mini_swe_agent/configs/mini_swe_agent.yaml               | 3 +++
 .../swe_agents/configs/swebench_multi_tools.yaml             | 5 ++++-
 .../swe_agents/configs/swebench_openhands.yaml               | 5 ++++-
 .../swe_agents/configs/swebench_openhands_training.yaml      | 3 +++
 .../swe_agents/configs/swebench_swe_agent.yaml               | 5 ++++-
 responses_api_agents/tau2/configs/tau2_agent.yaml            | 3 +++
 .../verifiers_agent/configs/acereason-math.yaml              | 3 +++
 8 files changed, 27 insertions(+), 3 deletions(-)

diff --git a/responses_api_agents/harbor_agent/configs/harbor_agent.yaml b/responses_api_agents/harbor_agent/configs/harbor_agent.yaml
index 12cfb6bf1..74548a8e3 100644
--- a/responses_api_agents/harbor_agent/configs/harbor_agent.yaml
+++ b/responses_api_agents/harbor_agent/configs/harbor_agent.yaml
@@ -3,6 +3,9 @@ harbor_agent:
     harbor_agent:
       # Python module entrypoint loaded by NeMo Gym.
       entrypoint: app.py
+      domain: agent
+      description: Harbor integration for ageng harnesses and environments.
+      value: Improve models in popular agentic environments supported by Harbor such as Terminus2.
       # Max concurrent requests handled by this agent server process.
       concurrency: 50
 
diff --git a/responses_api_agents/mini_swe_agent/configs/mini_swe_agent.yaml b/responses_api_agents/mini_swe_agent/configs/mini_swe_agent.yaml
index 7945408e4..ce48dba01 100644
--- a/responses_api_agents/mini_swe_agent/configs/mini_swe_agent.yaml
+++ b/responses_api_agents/mini_swe_agent/configs/mini_swe_agent.yaml
@@ -2,6 +2,9 @@ mini_swe_simple_agent:
   responses_api_agents:
     mini_swe_agent:
       entrypoint: app.py
+      domain: coding
+      description: Software engineering tasks driven by mini-swe agent harness.
+      value: Improve agentic software engineering capabilities.
       model_server:
         type: responses_api_models
         name: policy_model
diff --git a/responses_api_agents/swe_agents/configs/swebench_multi_tools.yaml b/responses_api_agents/swe_agents/configs/swebench_multi_tools.yaml
index dd8335940..3c5fb8314 100644
--- a/responses_api_agents/swe_agents/configs/swebench_multi_tools.yaml
+++ b/responses_api_agents/swe_agents/configs/swebench_multi_tools.yaml
@@ -6,7 +6,10 @@ swe_agents:
   responses_api_agents:
     swe_agents: &swe_agents_config
       entrypoint: app.py
-      
+      domain: coding
+      description: Software engineering tasks with OpenHands agent harness.
+      value: Improve agentic software engineering capabilities.
+
       # Agent framework configuration
       agent_framework: openhands
       agent_config: responses_api_agents/swe_agents/configs/oh_config.toml
diff --git a/responses_api_agents/swe_agents/configs/swebench_openhands.yaml b/responses_api_agents/swe_agents/configs/swebench_openhands.yaml
index 0320d1d06..ca0f4fda1 100644
--- a/responses_api_agents/swe_agents/configs/swebench_openhands.yaml
+++ b/responses_api_agents/swe_agents/configs/swebench_openhands.yaml
@@ -3,7 +3,10 @@ swe_agents:
   responses_api_agents:
     swe_agents:
       entrypoint: app.py
-      
+      domain: coding
+      description: SWE-bench driven by the OpenHands agent framework.
+      value: Eval software engineering capabilities on SWE-bench.
+
       # Agent framework configuration
       agent_framework: openhands
       agent_config: responses_api_agents/swe_agents/configs/oh_config.toml
diff --git a/responses_api_agents/swe_agents/configs/swebench_openhands_training.yaml b/responses_api_agents/swe_agents/configs/swebench_openhands_training.yaml
index 466bdfc91..90527703c 100644
--- a/responses_api_agents/swe_agents/configs/swebench_openhands_training.yaml
+++ b/responses_api_agents/swe_agents/configs/swebench_openhands_training.yaml
@@ -3,6 +3,9 @@ swe_agents_train:
   responses_api_agents:
     swe_agents:
       entrypoint: app.py
+      domain: coding
+      description: Software engineering tasks with OpenHands agent harness.
+      value: Improve agentic software engineering capabilities.
       # Agent framework configuration
       agent_framework: openhands
       agent_config: responses_api_agents/swe_agents/configs/oh_config.toml
diff --git a/responses_api_agents/swe_agents/configs/swebench_swe_agent.yaml b/responses_api_agents/swe_agents/configs/swebench_swe_agent.yaml
index ac592d62c..22d053549 100644
--- a/responses_api_agents/swe_agents/configs/swebench_swe_agent.yaml
+++ b/responses_api_agents/swe_agents/configs/swebench_swe_agent.yaml
@@ -3,7 +3,10 @@ swe_agents:
   responses_api_agents:
     swe_agents:
       entrypoint: app.py
-      
+      domain: coding
+      description: Software engineering tasks with OpenHands agent harness.
+      value: Improve agentic software engineering capabilities.
+
       # Agent framework configuration
       agent_framework: swe_agent
       agent_config: configs/swe_agent_config.yaml
diff --git a/responses_api_agents/tau2/configs/tau2_agent.yaml b/responses_api_agents/tau2/configs/tau2_agent.yaml
index 131492697..c894c4046 100644
--- a/responses_api_agents/tau2/configs/tau2_agent.yaml
+++ b/responses_api_agents/tau2/configs/tau2_agent.yaml
@@ -2,6 +2,9 @@ tau2_agent:
   responses_api_agents:
     tau2:
       entrypoint: app.py
+      domain: agent
+      description: Tau2 benchmark integration
+      value: Evaluate multi-turn agentic capability with user simulation.
       model_server:
         type: responses_api_models
         name: policy_model
diff --git a/responses_api_agents/verifiers_agent/configs/acereason-math.yaml b/responses_api_agents/verifiers_agent/configs/acereason-math.yaml
index 7e447cbb8..d0da44dd0 100644
--- a/responses_api_agents/verifiers_agent/configs/acereason-math.yaml
+++ b/responses_api_agents/verifiers_agent/configs/acereason-math.yaml
@@ -2,6 +2,9 @@ verifiers_agent:
   responses_api_agents:
     verifiers_agent:
       entrypoint: app.py
+      domain: math
+      description: Prime intellect verifiers and environments hub integration, ace-reason math environment example.
+      value: Improve math reasoning capabilities.
       model_server:
         type: responses_api_models
         name: policy_model

From c5b310da4c6cb6f5cdedab6864e939808ca9caa4 Mon Sep 17 00:00:00 2001
From: cmunley1 <cmunley@nvidia.com>
Date: Thu, 23 Apr 2026 14:30:01 -0700
Subject: [PATCH 6/7] readme

Signed-off-by: cmunley1 <cmunley@nvidia.com>
---
 README.md | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/README.md b/README.md
index 437bcacd8..c7a7ab639 100644
--- a/README.md
+++ b/README.md
@@ -179,7 +179,7 @@ The Dataset column links to publicly available datasets (e.g., on HuggingFace).
 | Genrm Compare                                 | rlhf                  | GenRM pairwise comparison for RLHF training                                                                                                                                                                                  | Compare multiple candidate responses using GenRM model                                                                       | -     | -          | -                                                         | <a href='resources_servers/genrm_compare/configs/genrm_compare.yaml'>genrm_compare.yaml</a>                                                                                                                                 | -                                                                                                                                                              |
 | Google Search                                 | agent                 | Multi-choice question answering problems with search tools integrated                                                                                                                                                        | Improve knowledge-related benchmarks with search tools                                                                       | ✓     | -          | Apache 2.0                                                | <a href='resources_servers/google_search/configs/google_search.yaml'>google_search.yaml</a>                                                                                                                                 | <a href='https://huggingface.co/datasets/nvidia/Nemotron-RL-knowledge-web_search-mcqa'>Nemotron-RL-knowledge-web_search-mcqa</a>                               |
 | Gpqa Diamond                                  | knowledge             | GPQA Diamond multiple-choice question answering problems                                                                                                                                                                     | Evaluate graduate-level scientific reasoning via MCQ verification                                                            | ✓     | -          | MIT                                                       | <a href='resources_servers/gpqa_diamond/configs/gpqa_diamond.yaml'>gpqa_diamond.yaml</a>                                                                                                                                    | -                                                                                                                                                              |
-| Harbor Agent                                  |                       | -                                                                                                                                                                                                                            | -                                                                                                                            | ✓     | -          | -                                                         | <a href='responses_api_agents/harbor_agent/configs/harbor_agent.yaml'>harbor_agent.yaml</a>                                                                                                                                 | -                                                                                                                                                              |
+| Harbor Agent                                  | agent                 | Harbor integration for ageng harnesses and environments.                                                                                                                                                                     | Improve models in popular agentic environments supported by Harbor such as Terminus2.                                        | ✓     | -          | -                                                         | <a href='responses_api_agents/harbor_agent/configs/harbor_agent.yaml'>harbor_agent.yaml</a>                                                                                                                                 | -                                                                                                                                                              |
 | Ifbench                                       | instruction_following | IFBench instruction following evaluation using AllenAI's IFBench library (57 instruction types)                                                                                                                              | Improve IFBench instruction following                                                                                        | -     | -          | -                                                         | <a href='resources_servers/ifbench/configs/ifbench.yaml'>ifbench.yaml</a>                                                                                                                                                   | -                                                                                                                                                              |
 | Indirect Prompt Injection                     | safety                | Indirect prompt injection resistance for multi-domain tool-use agents                                                                                                                                                        | Improve agentic security by teaching robustness against tool outputs containing malicious instructions                       | ✓     | ✓          | Apache 2.0                                                | <a href='resources_servers/indirect_prompt_injection/configs/indirect_prompt_injection.yaml'>indirect_prompt_injection.yaml</a>                                                                                             | -                                                                                                                                                              |
 | Instruction Following                         | instruction_following | Instruction following datasets targeting IFEval and IFBench style instruction following capabilities                                                                                                                         | Improve IFEval and IFBench                                                                                                   | ✓     | -          | Apache 2.0                                                | <a href='resources_servers/instruction_following/configs/instruction_following.yaml'>instruction_following.yaml</a>                                                                                                         | <a href='https://huggingface.co/datasets/nvidia/Nemotron-RL-instruction_following'>Nemotron-RL-instruction_following</a>                                       |
@@ -197,7 +197,7 @@ The Dataset column links to publicly available datasets (e.g., on HuggingFace).
 | Math With Judge                               | math                  | MathStackOverflow math dataset with math-verify                                                                                                                                                                              | Improve math capabilities including AIME 24 / 25                                                                             | ✓     | ✓          | Creative Commons Attribution-ShareAlike 4.0 International | <a href='resources_servers/math_with_judge/configs/math_stack_overflow.yaml'>math_stack_overflow.yaml</a>                                                                                                                   | <a href='https://huggingface.co/datasets/nvidia/Nemotron-RL-math-stack_overflow'>Nemotron-RL-math-stack_overflow</a>                                           |
 | Math With Judge                               | math                  | OpenMathReasoning math dataset with math-verify and LLM-as-a-judge                                                                                                                                                           | Improve math capabilities including AIME 24 / 25                                                                             | ✓     | ✓          | Creative Commons Attribution 4.0 International            | <a href='resources_servers/math_with_judge/configs/math_with_judge.yaml'>math_with_judge.yaml</a>                                                                                                                           | <a href='https://huggingface.co/datasets/nvidia/Nemotron-RL-math-OpenMathReasoning'>Nemotron-RL-math-OpenMathReasoning</a>                                     |
 | Mcqa                                          | knowledge             | Multi-choice question answering problems                                                                                                                                                                                     | Improve benchmarks like MMLU / GPQA / HLE                                                                                    | ✓     | ✓          | Apache 2.0                                                | <a href='resources_servers/mcqa/configs/mcqa.yaml'>mcqa.yaml</a>                                                                                                                                                            | <a href='https://huggingface.co/datasets/nvidia/Nemotron-RL-knowledge-mcqa'>Nemotron-RL-knowledge-mcqa</a>                                                     |
-| Mini Swe Agent                                |                       | -                                                                                                                                                                                                                            | -                                                                                                                            | ✓     | ✓          | MIT                                                       | <a href='responses_api_agents/mini_swe_agent/configs/mini_swe_agent.yaml'>mini_swe_agent.yaml</a>                                                                                                                           | <a href='https://huggingface.co/datasets/SWE-Gym/SWE-Gym'>SWE-Gym</a>                                                                                          |
+| Mini Swe Agent                                | coding                | Software engineering tasks driven by mini-swe agent harness.                                                                                                                                                                 | Improve agentic software engineering capabilities.                                                                           | ✓     | ✓          | MIT                                                       | <a href='responses_api_agents/mini_swe_agent/configs/mini_swe_agent.yaml'>mini_swe_agent.yaml</a>                                                                                                                           | <a href='https://huggingface.co/datasets/SWE-Gym/SWE-Gym'>SWE-Gym</a>                                                                                          |
 | Mrcr                                          | other                 | Multi-round coreference resolution over multi-turn conversations with prefix-gated SequenceMatcher grading                                                                                                                   | Improve long-context in-context retrieval and needle-count-aware reasoning                                                   | -     | -          | -                                                         | <a href='resources_servers/mrcr/configs/mrcr.yaml'>mrcr.yaml</a>                                                                                                                                                            | -                                                                                                                                                              |
 | Multichallenge                                | knowledge             | Targets inference memory, instruction retention, version editing, and self-coherence.                                                                                                                                        | Improve complex multi-turn conversational capability                                                                         | ✓     | -          | Creative Commons Attribution 4.0 International            | <a href='resources_servers/multichallenge/configs/multichallenge_nrl.yaml'>multichallenge_nrl.yaml</a>                                                                                                                      | <a href='https://huggingface.co/datasets/nvidia/Nemotron-RL-Instruction-Following-MultiTurnChat-v1'>Nemotron-RL-Instruction-Following-MultiTurnChat-v1</a>     |
 | Newton Bench                                  | math                  | Scientific law discovery tasks through agentic experimentation across 12 physics domains                                                                                                                                     | Improve science, reasoning, and tool use capabilities                                                                        | ✓     | -          | Apache 2.0                                                | <a href='resources_servers/newton_bench/configs/newton_bench.yaml'>newton_bench.yaml</a>                                                                                                                                    | -                                                                                                                                                              |
@@ -228,13 +228,13 @@ The Dataset column links to publicly available datasets (e.g., on HuggingFace).
 | Structured Outputs                            | instruction_following | Check if responses are following structured output requirements in prompts                                                                                                                                                   | Improve instruction following capabilities                                                                                   | ✓     | ✓          | Apache 2.0                                                | <a href='resources_servers/structured_outputs/configs/structured_outputs_json.yaml'>structured_outputs_json.yaml</a>                                                                                                        | <a href='https://huggingface.co/datasets/nvidia/Nemotron-RL-instruction_following-structured_outputs'>Nemotron-RL-instruction_following-structured_outputs</a> |
 | Structured Outputs                            | instruction_following | Check if responses are following structured output requirements in prompts                                                                                                                                                   | Improve instruction following capabilities                                                                                   | ✓     | ✓          | Apache 2.0                                                | <a href='resources_servers/structured_outputs/configs/structured_outputs_json_yaml_xml_v1.yaml'>structured_outputs_json_yaml_xml_v1.yaml</a>                                                                                | -                                                                                                                                                              |
 | Structured Outputs                            | instruction_following | Check if responses follow structured output requirements (JSON, YAML, XML, TOML, CSV). Created 20260409.                                                                                                                     | Improve schema adherence across all structured output formats                                                                | ✓     | -          | Apache 2.0                                                | <a href='resources_servers/structured_outputs/configs/structured_outputs_v3.yaml'>structured_outputs_v3.yaml</a>                                                                                                            | -                                                                                                                                                              |
-| Swe Agents                                    |                       | -                                                                                                                                                                                                                            | -                                                                                                                            | ✓     | ✓          | Apache 2.0                                                | <a href='responses_api_agents/swe_agents/configs/swebench_multi_tools.yaml'>swebench_multi_tools.yaml</a>                                                                                                                   | -                                                                                                                                                              |
-| Swe Agents                                    |                       | -                                                                                                                                                                                                                            | -                                                                                                                            | ✓     | ✓          | Apache 2.0                                                | <a href='responses_api_agents/swe_agents/configs/swebench_openhands.yaml'>swebench_openhands.yaml</a>                                                                                                                       | -                                                                                                                                                              |
-| Swe Agents                                    |                       | -                                                                                                                                                                                                                            | -                                                                                                                            | ✓     | ✓          | Apache 2.0                                                | <a href='responses_api_agents/swe_agents/configs/swebench_openhands_training.yaml'>swebench_openhands_training.yaml</a>                                                                                                     | -                                                                                                                                                              |
-| Swe Agents                                    |                       | -                                                                                                                                                                                                                            | -                                                                                                                            | ✓     | ✓          | MIT                                                       | <a href='responses_api_agents/swe_agents/configs/swebench_swe_agent.yaml'>swebench_swe_agent.yaml</a>                                                                                                                       | -                                                                                                                                                              |
+| Swe Agents                                    | coding                | Software engineering tasks with OpenHands agent harness.                                                                                                                                                                     | Improve agentic software engineering capabilities.                                                                           | ✓     | ✓          | Apache 2.0                                                | <a href='responses_api_agents/swe_agents/configs/swebench_multi_tools.yaml'>swebench_multi_tools.yaml</a>                                                                                                                   | -                                                                                                                                                              |
+| Swe Agents                                    | coding                | Software engineering tasks with OpenHands agent harness.                                                                                                                                                                     | Improve agentic software engineering capabilities.                                                                           | ✓     | ✓          | Apache 2.0                                                | <a href='responses_api_agents/swe_agents/configs/swebench_openhands_training.yaml'>swebench_openhands_training.yaml</a>                                                                                                     | -                                                                                                                                                              |
+| Swe Agents                                    | coding                | Software engineering tasks with OpenHands agent harness.                                                                                                                                                                     | Improve agentic software engineering capabilities.                                                                           | ✓     | ✓          | MIT                                                       | <a href='responses_api_agents/swe_agents/configs/swebench_swe_agent.yaml'>swebench_swe_agent.yaml</a>                                                                                                                       | -                                                                                                                                                              |
+| Swe Agents                                    | coding                | SWE-bench driven by the OpenHands agent framework.                                                                                                                                                                           | Eval software engineering capabilities on SWE-bench.                                                                         | ✓     | ✓          | Apache 2.0                                                | <a href='responses_api_agents/swe_agents/configs/swebench_openhands.yaml'>swebench_openhands.yaml</a>                                                                                                                       | -                                                                                                                                                              |
 | Swerl Gen                                     | coding                | Running sandboxed evaluation for SWE-style tasks (either patch generation or reproduction test generation)                                                                                                                   | Improve SWE capabilities useful for benchmarks like SWE-bench                                                                | ✓     | ✓          | Apache 2.0                                                | <a href='resources_servers/swerl_gen/configs/swerl_gen.yaml'>swerl_gen.yaml</a>                                                                                                                                             | -                                                                                                                                                              |
 | Swerl Llm Judge                               | coding                | SWE-style multiple-choice LLM-judge tasks scored via <solution>...</solution> choice.                                                                                                                                        | Improve SWE capabilities useful for benchmarks like SWE-bench                                                                | ✓     | ✓          | MIT                                                       | <a href='resources_servers/swerl_llm_judge/configs/swerl_llm_judge.yaml'>swerl_llm_judge.yaml</a>                                                                                                                           | -                                                                                                                                                              |
-| Tau2                                          |                       | -                                                                                                                                                                                                                            | -                                                                                                                            | -     | -          | -                                                         | <a href='responses_api_agents/tau2/configs/tau2_agent.yaml'>tau2_agent.yaml</a>                                                                                                                                             | -                                                                                                                                                              |
+| Tau2                                          | agent                 | Tau2 benchmark integration                                                                                                                                                                                                   | Evaluate multi-turn agentic capability with user simulation.                                                                 | -     | -          | -                                                         | <a href='responses_api_agents/tau2/configs/tau2_agent.yaml'>tau2_agent.yaml</a>                                                                                                                                             | -                                                                                                                                                              |
 | Tavily Search                                 | agent                 | Model uses search tools to satisfy a user query.                                                                                                                                                                             | Measure agentic search capability                                                                                            | ✓     | ✓          | Apache 2.0                                                | <a href='resources_servers/tavily_search/configs/tavily_search_judge_vllm_model.yaml'>tavily_search_judge_vllm_model.yaml</a>                                                                                               | -                                                                                                                                                              |
 | Terminal Multi Harness                        | agent                 | Agent006 harness structured-action verifier for next-step pivot RL.                                                                                                                                                          | -                                                                                                                            | -     | -          | -                                                         | <a href='resources_servers/terminal_multi_harness/configs/terminal_multi_harness_agent006.yaml'>terminal_multi_harness_agent006.yaml</a>                                                                                    | -                                                                                                                                                              |
 | Terminal Multi Harness                        | agent                 | Codex harness structured-action verifier for next-step pivot RL.                                                                                                                                                             | -                                                                                                                            | -     | -          | -                                                         | <a href='resources_servers/terminal_multi_harness/configs/terminal_multi_harness_codex.yaml'>terminal_multi_harness_codex.yaml</a>                                                                                          | -                                                                                                                                                              |
@@ -243,11 +243,12 @@ The Dataset column links to publicly available datasets (e.g., on HuggingFace).
 | Terminus Judge                                | agent                 | single-step terminal based task (simple judge prompt)                                                                                                                                                                        | Improve on terminal-style tasks                                                                                              | ✓     | ✓          | Apache 2.0                                                | <a href='resources_servers/terminus_judge/configs/terminus_judge_simple.yaml'>terminus_judge_simple.yaml</a>                                                                                                                | -                                                                                                                                                              |
 | Terminus Judge                                | agent                 | single-step terminal based task (string similarity only)                                                                                                                                                                     | Improve on terminal-style tasks                                                                                              | ✓     | ✓          | Apache 2.0                                                | <a href='resources_servers/terminus_judge/configs/terminus_judge_string_only.yaml'>terminus_judge_string_only.yaml</a>                                                                                                      | -                                                                                                                                                              |
 | Text To Sql                                   | coding                | Text-to-SQL generation with LLM-as-a-judge equivalence checking                                                                                                                                                              | Improve text-to-SQL capabilities across multiple dialects                                                                    | -     | -          | -                                                         | <a href='resources_servers/text_to_sql/configs/text_to_sql.yaml'>text_to_sql.yaml</a>                                                                                                                                       | -                                                                                                                                                              |
-| Verifiers Agent                               |                       | -                                                                                                                                                                                                                            | -                                                                                                                            | ✓     | -          | -                                                         | <a href='responses_api_agents/verifiers_agent/configs/acereason-math.yaml'>acereason-math.yaml</a>                                                                                                                          | -                                                                                                                                                              |
+| Verifiers Agent                               | math                  | Prime intellect verifiers and environments hub integration, ace-reason math environment example.                                                                                                                             | Improve math reasoning capabilities.                                                                                         | ✓     | -          | -                                                         | <a href='responses_api_agents/verifiers_agent/configs/acereason-math.yaml'>acereason-math.yaml</a>                                                                                                                          | -                                                                                                                                                              |
 | Vlm Eval Kit                                  | other                 | -                                                                                                                                                                                                                            | Measure VLM capabilities                                                                                                     | -     | ✓          | -                                                         | <a href='resources_servers/vlm_eval_kit/configs/MMBench_DEV_EN_V11.yaml'>MMBench_DEV_EN_V11.yaml</a>                                                                                                                        | -                                                                                                                                                              |
 | Vlm Eval Kit                                  | other                 | -                                                                                                                                                                                                                            | Measure VLM capabilities                                                                                                     | -     | ✓          | -                                                         | <a href='resources_servers/vlm_eval_kit/configs/OCRBench.yaml'>OCRBench.yaml</a>                                                                                                                                            | -                                                                                                                                                              |
 | Vlm Eval Kit                                  | other                 | Run all supported VLMEvalKit benchmarks.                                                                                                                                                                                     | Measure VLM capabilities                                                                                                     | -     | ✓          | -                                                         | <a href='resources_servers/vlm_eval_kit/configs/vlm_eval_kit.yaml'>vlm_eval_kit.yaml</a>                                                                                                                                    | -                                                                                                                                                              |
 | Workplace Assistant                           | agent                 | Workplace assistant multi-step tool-using environment                                                                                                                                                                        | Improve multi-step tool use capability                                                                                       | ✓     | ✓          | Apache 2.0                                                | <a href='resources_servers/workplace_assistant/configs/workplace_assistant.yaml'>workplace_assistant.yaml</a>                                                                                                               | <a href='https://huggingface.co/datasets/nvidia/Nemotron-RL-agent-workplace_assistant'>Nemotron-RL-agent-workplace_assistant</a>                               |
+| Workplace Assistant Gym                       | agent                 | Workplace assistant ported to GymnasiumServer (step/reset). Tool dispatch happens inside step().                                                                                                                             | Mirror of workplace_assistant exercised through the gymnasium agent loop.                                                    | ✓     | ✓          | Apache 2.0                                                | <a href='resources_servers/workplace_assistant_gym/configs/workplace_assistant_gym.yaml'>workplace_assistant_gym.yaml</a>                                                                                                   | <a href='https://huggingface.co/datasets/nvidia/Nemotron-RL-agent-workplace_assistant'>Nemotron-RL-agent-workplace_assistant</a>                               |
 | Xlam Fc                                       | agent                 | Salesforce xlam-function-calling-60k tool calling tasks                                                                                                                                                                      | Improve tool-calling capabilities                                                                                            | ✓     | ✓          | Apache 2.0                                                | <a href='resources_servers/xlam_fc/configs/xlam_fc.yaml'>xlam_fc.yaml</a>                                                                                                                                                   | -                                                                                                                                                              |
 | Xstest                                        | safety                | XSTest safety benchmark - exaggerated safety (over-refusal) evaluation                                                                                                                                                       | Evaluate model safety calibration between helpfulness and harmlessness                                                       | -     | -          | -                                                         | <a href='resources_servers/xstest/configs/xstest.yaml'>xstest.yaml</a>                                                                                                                                                      | -                                                                                                                                                              |
 <!-- END_TRAINING_SERVERS_TABLE -->

From 507c4a3bda17b8f320c4127528618837db6a7dfb Mon Sep 17 00:00:00 2001
From: cmunley1 <cmunley@nvidia.com>
Date: Thu, 23 Apr 2026 15:01:00 -0700
Subject: [PATCH 7/7] readme

Signed-off-by: cmunley1 <cmunley@nvidia.com>
---
 README.md | 1 -
 1 file changed, 1 deletion(-)

diff --git a/README.md b/README.md
index c7a7ab639..a05a6699d 100644
--- a/README.md
+++ b/README.md
@@ -248,7 +248,6 @@ The Dataset column links to publicly available datasets (e.g., on HuggingFace).
 | Vlm Eval Kit                                  | other                 | -                                                                                                                                                                                                                            | Measure VLM capabilities                                                                                                     | -     | ✓          | -                                                         | <a href='resources_servers/vlm_eval_kit/configs/OCRBench.yaml'>OCRBench.yaml</a>                                                                                                                                            | -                                                                                                                                                              |
 | Vlm Eval Kit                                  | other                 | Run all supported VLMEvalKit benchmarks.                                                                                                                                                                                     | Measure VLM capabilities                                                                                                     | -     | ✓          | -                                                         | <a href='resources_servers/vlm_eval_kit/configs/vlm_eval_kit.yaml'>vlm_eval_kit.yaml</a>                                                                                                                                    | -                                                                                                                                                              |
 | Workplace Assistant                           | agent                 | Workplace assistant multi-step tool-using environment                                                                                                                                                                        | Improve multi-step tool use capability                                                                                       | ✓     | ✓          | Apache 2.0                                                | <a href='resources_servers/workplace_assistant/configs/workplace_assistant.yaml'>workplace_assistant.yaml</a>                                                                                                               | <a href='https://huggingface.co/datasets/nvidia/Nemotron-RL-agent-workplace_assistant'>Nemotron-RL-agent-workplace_assistant</a>                               |
-| Workplace Assistant Gym                       | agent                 | Workplace assistant ported to GymnasiumServer (step/reset). Tool dispatch happens inside step().                                                                                                                             | Mirror of workplace_assistant exercised through the gymnasium agent loop.                                                    | ✓     | ✓          | Apache 2.0                                                | <a href='resources_servers/workplace_assistant_gym/configs/workplace_assistant_gym.yaml'>workplace_assistant_gym.yaml</a>                                                                                                   | <a href='https://huggingface.co/datasets/nvidia/Nemotron-RL-agent-workplace_assistant'>Nemotron-RL-agent-workplace_assistant</a>                               |
 | Xlam Fc                                       | agent                 | Salesforce xlam-function-calling-60k tool calling tasks                                                                                                                                                                      | Improve tool-calling capabilities                                                                                            | ✓     | ✓          | Apache 2.0                                                | <a href='resources_servers/xlam_fc/configs/xlam_fc.yaml'>xlam_fc.yaml</a>                                                                                                                                                   | -                                                                                                                                                              |
 | Xstest                                        | safety                | XSTest safety benchmark - exaggerated safety (over-refusal) evaluation                                                                                                                                                       | Evaluate model safety calibration between helpfulness and harmlessness                                                       | -     | -          | -                                                         | <a href='resources_servers/xstest/configs/xstest.yaml'>xstest.yaml</a>                                                                                                                                                      | -                                                                                                                                                              |
 <!-- END_TRAINING_SERVERS_TABLE -->