Skip to content

Commit 59a4fde

Browse files
authored
Fix and improve integration tests (#1996)
* Fix docker builds for the local sandbox * Disable raga ai e2e test as https://catalyst.raga.ai is currently down' * Rename `test_tool_wrapper.py` to `test_lc_tool_wrapper.py` avoiding a pytest name clash * Update agent tests to accept a list of acceptable answers (ex: allowing both "LLM" and "large language model") * Workflow improvements: - Replacing older models with a nemotron model - Increasing max tokens and disabling thinking to prevent truncation ## By Submitting this PR I confirm: - I am familiar with the [Contributing Guidelines](https://github.com/NVIDIA/NeMo-Agent-Toolkit/blob/develop/docs/source/resources/contributing/index.md). - We require that all contributors "sign-off" on their commits. This certifies that the contribution is your original work, or you have rights to submit it under the same license, or a compatible license. - Any contribution which contains commits that are not Signed-Off will not be accepted. - When the PR is ready for review, new or existing tests cover these changes. - When the PR is ready for review, the documentation is up to date with these changes. ## Summary by CodeRabbit * **New Features** * N/A * **Bug Fixes** * Skipped flaky external-service test to avoid CI failures * **Chores** * Updated example configs and notebook to use a different recommended model, increase max tokens to 1024, and disable “thinking” * Adjusted CI image tag and JSON model counts * Made Docker upgrades non-interactive * **Tests** * Enhanced test utilities and fixtures to accept and validate multiple acceptable answers [![Review Change Stack](https://storage.googleapis.com/coderabbit_public_assets/review-stack-in-coderabbit-ui.svg)](https://app.coderabbit.ai/change-stack/NVIDIA/NeMo-Agent-Toolkit/pull/1996?utm_source=github_walkthrough&utm_medium=github&utm_campaign=change_stack) Authors: - David Gardner (https://github.com/dagardner-nv) Approvers: - Yuchen Zhang (https://github.com/yczhang-nv) - Will Killian (https://github.com/willkill07) URL: #1996
1 parent ff245d6 commit 59a4fde

13 files changed

Lines changed: 55 additions & 30 deletions

File tree

.gitlab-ci.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,7 @@ test:python_tests:
134134
alias: phoenix
135135
pull_policy: if-not-present
136136

137-
- name: $CI_REGISTRY_IMAGE/nginx-rewrite-models:20260303
137+
- name: $CI_REGISTRY_IMAGE/nginx-rewrite-models:20260529
138138
alias: nginx-rewrite-models
139139
pull_policy: if-not-present
140140

ci/.nim_models_used.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,11 @@
33
"llms": [
44
{
55
"model": "meta/llama-3.1-70b-instruct",
6-
"num_configs": 53
6+
"num_configs": 51
77
},
88
{
99
"model": "nvidia/nemotron-3-nano-30b-a3b",
10-
"num_configs": 40
10+
"num_configs": 42
1111
},
1212
{
1313
"model": "meta/llama-3.3-70b-instruct",

examples/agents/react/configs/config-reasoning.yml

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,12 @@
1717
llms:
1818
nim_llm:
1919
_type: nim
20-
model_name: meta/llama-3.1-70b-instruct
20+
model_name: nvidia/nemotron-3-nano-30b-a3b
2121
temperature: 0.0
22-
max_tokens: 250
22+
max_tokens: 1024
23+
chat_template_kwargs:
24+
enable_thinking: false
25+
2326
nemotron_model:
2427
_type: nim
2528
model_name: nvidia/llama-3.3-nemotron-super-49b-v1

examples/agents/react/configs/config.yml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,9 @@ llms:
1919
_type: nim
2020
model_name: nvidia/nemotron-3-nano-30b-a3b
2121
temperature: 0.0
22-
max_tokens: 250
22+
max_tokens: 1024
23+
chat_template_kwargs:
24+
enable_thinking: false
2325

2426
functions:
2527
wikipedia_search:

examples/agents/tests/conftest.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -43,9 +43,9 @@ def fixture_question() -> str:
4343
return "What are LLMs"
4444

4545

46-
@pytest.fixture(name="answer", scope="session")
47-
def fixture_answer() -> str:
48-
return "large language model"
46+
@pytest.fixture(name="expected_answers", scope="session")
47+
def fixture_expected_answers() -> list[str]:
48+
return ["llm", "large language model"]
4949

5050

5151
@pytest.fixture(name="rewoo_data", scope="session")

examples/agents/tests/test_agents.py

Lines changed: 17 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -35,10 +35,14 @@ def _extract_serve_response_text(response_json: dict) -> str:
3535
return "\n".join(combined)
3636

3737

38-
def _assert_expected_answer(result: str, expected_answer: str) -> None:
38+
def _assert_expected_answer(result: str, expected_answers: str | list[str]) -> None:
3939
"""Assert that the expected answer appears in the result, normalizing whitespace and case."""
40-
normalized = ' '.join(result.split())
41-
assert expected_answer.lower() in normalized.lower(), f"Expected '{expected_answer}' in '{result}'"
40+
normalized = ' '.join(result.split()).lower()
41+
if isinstance(expected_answers, str):
42+
expected_answers = [expected_answers]
43+
44+
assert any(expected.lower() in normalized for expected in expected_answers), \
45+
f"Expected one of '{expected_answers}' in '{result}'"
4246

4347

4448
# ---------------------------------------------------------------------------
@@ -82,19 +86,21 @@ async def test_question(self, rewoo_nat_client, rewoo_data: list[dict], qa_idx:
8286

8387
@pytest.mark.integration
8488
@pytest.mark.usefixtures("openai_api_key")
85-
async def test_tool_calling_responses_api(agents_dir: Path, question: str, answer: str):
89+
async def test_tool_calling_responses_api(agents_dir: Path, question: str, expected_answers: list[str]):
8690
await run_workflow(config_file=agents_dir / "tool_calling/configs/config-responses-api.yml",
8791
question=question,
88-
expected_answer=answer)
92+
expected_answer=expected_answers)
8993

9094

9195
@pytest.mark.integration
9296
@pytest.mark.usefixtures("openai_api_key")
93-
async def test_nat_run_tool_calling_responses_api(tool_calling_responses_api_nat_client, question: str, answer: str):
97+
async def test_nat_run_tool_calling_responses_api(tool_calling_responses_api_nat_client,
98+
question: str,
99+
expected_answers: list[str]):
94100
resp = await tool_calling_responses_api_nat_client.post("/generate", json={"input_message": question})
95101
resp.raise_for_status()
96102
response_text = _extract_serve_response_text(resp.json())
97-
_assert_expected_answer(response_text, answer)
103+
_assert_expected_answer(response_text, expected_answers)
98104

99105

100106
# ---------------------------------------------------------------------------
@@ -107,23 +113,23 @@ async def test_nat_run_tool_calling_responses_api(tool_calling_responses_api_nat
107113
@pytest.mark.usefixtures("nvidia_api_key")
108114
class TestAgentNatRun:
109115

110-
async def test_question(self, agent_session_manager, question: str, answer: str):
116+
async def test_question(self, agent_session_manager, question: str, expected_answers: list[str]):
111117
async with agent_session_manager.session() as session:
112118
async with session.run(question) as runner:
113119
result = await runner.result(to_type=str)
114-
_assert_expected_answer(result, answer)
120+
_assert_expected_answer(result, expected_answers)
115121

116122

117123
@pytest.mark.slow
118124
@pytest.mark.integration
119125
@pytest.mark.usefixtures("nvidia_api_key")
120126
class TestAgentNatServe:
121127

122-
async def test_question(self, agent_nat_client, question: str, answer: str):
128+
async def test_question(self, agent_nat_client, question: str, expected_answers: list[str]):
123129
resp = await agent_nat_client.post("/generate", json={"messages": [{"role": "user", "content": question}]})
124130
resp.raise_for_status()
125131
response_text = _extract_serve_response_text(resp.json())
126-
_assert_expected_answer(response_text, answer)
132+
_assert_expected_answer(response_text, expected_answers)
127133

128134

129135
# Code examples from `docs/source/resources/running-tests.md`

examples/agents/tool_calling/configs/config-reasoning.yml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,10 @@ llms:
1919
_type: nim
2020
model_name: nvidia/nemotron-3-nano-30b-a3b
2121
temperature: 0.0
22-
max_tokens: 250
22+
max_tokens: 1024
23+
chat_template_kwargs:
24+
enable_thinking: false
25+
2326
reasoning_model:
2427
_type: nim
2528
model_name: nvidia/llama-3.3-nemotron-super-49b-v1

examples/agents/tool_calling/configs/config.yml

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,11 @@
1717
llms:
1818
nim_llm:
1919
_type: nim
20-
model_name: meta/llama-3.1-70b-instruct
20+
model_name: nvidia/nemotron-3-nano-30b-a3b
2121
temperature: 0.0
22-
max_tokens: 250
22+
max_tokens: 1024
23+
chat_template_kwargs:
24+
enable_thinking: false
2325

2426
functions:
2527
wikipedia_search:

examples/notebooks/hello_world.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -179,7 +179,7 @@
179179
" # Tell NeMo Agent Toolkit which LLM to use for the agent\n",
180180
" nim_llm:\n",
181181
" _type: nim\n",
182-
" model_name: meta/llama-3.1-70b-instruct\n",
182+
" model_name: nvidia/nemotron-3-nano-30b-a3b\n",
183183
" temperature: 0.0\n",
184184
"\n",
185185
"workflow:\n",

examples/observability/simple_calculator_observability/tests/test_simple_calc_observability.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -242,6 +242,7 @@ async def test_galileo_full_workflow(config_dir: Path,
242242
assert len(spans.records) > 1
243243

244244

245+
@pytest.mark.skip(reason="https://catalyst.raga.ai appears to be having issues")
245246
@pytest.mark.integration
246247
@pytest.mark.usefixtures("catalyst_keys", "aiq_compatibility_span_prefix")
247248
async def test_catalyst_full_workflow(config_dir: Path,

0 commit comments

Comments
 (0)