Skip to content

Commit a283d26

Browse files
authored
TEST/change gpt4, gpt4o serise to gpt4.1nano (#6375)
## Why are these changes needed? | Package | Test time-Origin (Sec) | Test time-Edited (Sec) | |-------------------------|------------------|-----------------------------------------------| | autogen-studio | 1.64 | 1.64 | | autogen-core | 6.03 | 6.17 | | autogen-ext | 387.15 | 373.40 | | autogen-agentchat | 54.20 | 20.67 | ## Related issue number Related #6361 ## Checks - [ ] I've included any doc changes needed for <https://microsoft.github.io/autogen/>. See <https://github.com/microsoft/autogen/blob/main/CONTRIBUTING.md> to build and test documentation locally. - [ ] I've added tests (if relevant) corresponding to the changes introduced in this PR. - [ ] I've made sure all auto checks have passed.
1 parent b6935f9 commit a283d26

9 files changed

Lines changed: 44 additions & 42 deletions

File tree

python/packages/autogen-agentchat/tests/test_group_chat.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -455,7 +455,7 @@ async def test_round_robin_group_chat_with_tools(runtime: AgentRuntime | None) -
455455
"TERMINATE",
456456
],
457457
model_info={
458-
"family": "gpt-4o",
458+
"family": "gpt-4.1-nano",
459459
"function_calling": True,
460460
"json_output": True,
461461
"vision": True,
@@ -1272,7 +1272,7 @@ async def test_swarm_handoff_using_tool_calls(runtime: AgentRuntime | None) -> N
12721272
"TERMINATE",
12731273
],
12741274
model_info={
1275-
"family": "gpt-4o",
1275+
"family": "gpt-4.1-nano",
12761276
"function_calling": True,
12771277
"json_output": True,
12781278
"vision": True,
@@ -1372,7 +1372,7 @@ async def test_swarm_with_parallel_tool_calls(runtime: AgentRuntime | None) -> N
13721372
"TERMINATE",
13731373
],
13741374
model_info={
1375-
"family": "gpt-4o",
1375+
"family": "gpt-4.1-nano",
13761376
"function_calling": True,
13771377
"json_output": True,
13781378
"vision": True,
@@ -1562,12 +1562,14 @@ async def test_declarative_groupchats_with_config(runtime: AgentRuntime | None)
15621562
# Create basic agents and components for testing
15631563
agent1 = AssistantAgent(
15641564
"agent_1",
1565-
model_client=OpenAIChatCompletionClient(model="gpt-4o-2024-05-13", api_key=""),
1565+
model_client=OpenAIChatCompletionClient(model="gpt-4.1-nano-2025-04-14", api_key=""),
15661566
handoffs=["agent_2"],
15671567
)
1568-
agent2 = AssistantAgent("agent_2", model_client=OpenAIChatCompletionClient(model="gpt-4o-2024-05-13", api_key=""))
1568+
agent2 = AssistantAgent(
1569+
"agent_2", model_client=OpenAIChatCompletionClient(model="gpt-4.1-nano-2025-04-14", api_key="")
1570+
)
15691571
termination = MaxMessageTermination(4)
1570-
model_client = OpenAIChatCompletionClient(model="gpt-4o-2024-05-13", api_key="")
1572+
model_client = OpenAIChatCompletionClient(model="gpt-4.1-nano-2025-04-14", api_key="")
15711573

15721574
# Test round robin - verify config is preserved
15731575
round_robin = RoundRobinGroupChat(participants=[agent1, agent2], termination_condition=termination, max_turns=5)

python/packages/autogen-agentchat/tests/test_group_chat_endpoint.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ async def test_selector_group_chat_openai() -> None:
9898
pytest.skip("OPENAI_API_KEY not set in environment variables.")
9999

100100
model_client = OpenAIChatCompletionClient(
101-
model="gpt-4o-mini",
101+
model="gpt-4.1-nano",
102102
api_key=api_key,
103103
)
104104
await _test_selector_group_chat(model_client)

python/packages/autogen-core/tests/test_model_context.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,7 @@ async def test_unbounded_model_context() -> None:
119119
@pytest.mark.parametrize(
120120
"model_client,token_limit",
121121
[
122-
(OpenAIChatCompletionClient(model="gpt-4o", temperature=0.0, api_key="test"), 30),
122+
(OpenAIChatCompletionClient(model="gpt-4.1-nano", temperature=0.0, api_key="test"), 30),
123123
(OllamaChatCompletionClient(model="llama3.3"), 20),
124124
],
125125
ids=["openai", "ollama"],
@@ -159,7 +159,7 @@ async def test_token_limited_model_context_with_token_limit(
159159
@pytest.mark.parametrize(
160160
"model_client",
161161
[
162-
OpenAIChatCompletionClient(model="gpt-4o", temperature=0.0, api_key="test_key"),
162+
OpenAIChatCompletionClient(model="gpt-4.1-nano", temperature=0.0, api_key="test_key"),
163163
OllamaChatCompletionClient(model="llama3.3"),
164164
],
165165
ids=["openai", "ollama"],
@@ -182,7 +182,7 @@ async def test_token_limited_model_context_without_token_limit(model_client: Cha
182182
@pytest.mark.parametrize(
183183
"model_client,token_limit",
184184
[
185-
(OpenAIChatCompletionClient(model="gpt-4o", temperature=0.0, api_key="test"), 60),
185+
(OpenAIChatCompletionClient(model="gpt-4.1-nano", temperature=0.0, api_key="test"), 60),
186186
(OllamaChatCompletionClient(model="llama3.3"), 50),
187187
],
188188
ids=["openai", "ollama"],

python/packages/autogen-ext/tests/models/test_openai_model_client.py

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ class MockChunkEvent(BaseModel):
9595

9696

9797
async def _mock_create_stream(*args: Any, **kwargs: Any) -> AsyncGenerator[ChatCompletionChunk, None]:
98-
model = resolve_model(kwargs.get("model", "gpt-4o"))
98+
model = resolve_model(kwargs.get("model", "gpt-4.1-nano"))
9999
mock_chunks_content = ["Hello", " Another Hello", " Yet Another Hello"]
100100

101101
# The openai api implementations (OpenAI and Litellm) stream chunks of tokens
@@ -167,7 +167,7 @@ async def _mock_create_stream(*args: Any, **kwargs: Any) -> AsyncGenerator[ChatC
167167

168168
async def _mock_create(*args: Any, **kwargs: Any) -> ChatCompletion | AsyncGenerator[ChatCompletionChunk, None]:
169169
stream = kwargs.get("stream", False)
170-
model = resolve_model(kwargs.get("model", "gpt-4o"))
170+
model = resolve_model(kwargs.get("model", "gpt-4.1-nano"))
171171
if not stream:
172172
await asyncio.sleep(0.1)
173173
return ChatCompletion(
@@ -186,7 +186,7 @@ async def _mock_create(*args: Any, **kwargs: Any) -> ChatCompletion | AsyncGener
186186

187187
@pytest.mark.asyncio
188188
async def test_openai_chat_completion_client() -> None:
189-
client = OpenAIChatCompletionClient(model="gpt-4o", api_key="api_key")
189+
client = OpenAIChatCompletionClient(model="gpt-4.1-nano", api_key="api_key")
190190
assert client
191191

192192

@@ -198,7 +198,7 @@ async def test_openai_chat_completion_client_with_gemini_model() -> None:
198198

199199
@pytest.mark.asyncio
200200
async def test_openai_chat_completion_client_serialization() -> None:
201-
client = OpenAIChatCompletionClient(model="gpt-4o", api_key="sk-password")
201+
client = OpenAIChatCompletionClient(model="gpt-4.1-nano", api_key="sk-password")
202202
assert client
203203
config = client.dump_component()
204204
assert config
@@ -467,7 +467,7 @@ async def run(self, args: MyArgs, cancellation_token: CancellationToken) -> MyRe
467467

468468
@pytest.mark.asyncio
469469
async def test_json_mode(monkeypatch: pytest.MonkeyPatch) -> None:
470-
model = "gpt-4o-2024-11-20"
470+
model = "gpt-4.1-nano-2025-04-14"
471471

472472
called_args = {}
473473

@@ -562,7 +562,7 @@ class AgentResponse(BaseModel):
562562
thoughts: str
563563
response: Literal["happy", "sad", "neutral"]
564564

565-
model = "gpt-4o-2024-11-20"
565+
model = "gpt-4.1-nano-2025-04-14"
566566

567567
called_args = {}
568568

@@ -654,7 +654,7 @@ class AgentResponse(BaseModel):
654654
thoughts: str
655655
response: Literal["happy", "sad", "neutral"]
656656

657-
model = "gpt-4o-2024-11-20"
657+
model = "gpt-4.1-nano-2025-04-14"
658658

659659
async def _mock_parse(*args: Any, **kwargs: Any) -> ParsedChatCompletion[AgentResponse]:
660660
return ParsedChatCompletion(
@@ -737,7 +737,7 @@ class AgentResponse(BaseModel):
737737
thoughts: str
738738
response: Literal["happy", "sad", "neutral"]
739739

740-
model = "gpt-4o-2024-11-20"
740+
model = "gpt-4.1-nano-2025-04-14"
741741

742742
async def _mock_parse(*args: Any, **kwargs: Any) -> ParsedChatCompletion[AgentResponse]:
743743
return ParsedChatCompletion(
@@ -813,7 +813,7 @@ class AgentResponse(BaseModel):
813813
chunked_content = [raw_content[i : i + 5] for i in range(0, len(raw_content), 5)]
814814
assert "".join(chunked_content) == raw_content
815815

816-
model = "gpt-4o-2024-11-20"
816+
model = "gpt-4.1-nano-2025-04-14"
817817
mock_chunk_events = [
818818
MockChunkEvent(
819819
type="chunk",
@@ -886,7 +886,7 @@ class AgentResponse(BaseModel):
886886
chunked_content = [raw_content[i : i + 5] for i in range(0, len(raw_content), 5)]
887887
assert "".join(chunked_content) == raw_content
888888

889-
model = "gpt-4o-2024-11-20"
889+
model = "gpt-4.1-nano-2025-04-14"
890890

891891
# generate the list of mock chunk content
892892
mock_chunk_events = [
@@ -1265,7 +1265,7 @@ async def _mock_create(*args: Any, **kwargs: Any) -> ChatCompletion | AsyncGener
12651265

12661266
@pytest.mark.asyncio
12671267
async def test_tool_calling(monkeypatch: pytest.MonkeyPatch) -> None:
1268-
model = "gpt-4o-2024-05-13"
1268+
model = "gpt-4.1-nano-2025-04-14"
12691269
chat_completions = [
12701270
# Successful completion, single tool call
12711271
ChatCompletion(
@@ -1622,7 +1622,7 @@ def openai_client(request: pytest.FixtureRequest) -> OpenAIChatCompletionClient:
16221622
@pytest.mark.asyncio
16231623
@pytest.mark.parametrize(
16241624
"model",
1625-
["gpt-4o-mini", "gemini-1.5-flash", "claude-3-5-haiku-20241022"],
1625+
["gpt-4.1-nano", "gemini-1.5-flash", "claude-3-5-haiku-20241022"],
16261626
)
16271627
async def test_model_client_basic_completion(model: str, openai_client: OpenAIChatCompletionClient) -> None:
16281628
# Test basic completion
@@ -1639,7 +1639,7 @@ async def test_model_client_basic_completion(model: str, openai_client: OpenAICh
16391639
@pytest.mark.asyncio
16401640
@pytest.mark.parametrize(
16411641
"model",
1642-
["gpt-4o-mini", "gemini-1.5-flash", "claude-3-5-haiku-20241022"],
1642+
["gpt-4.1-nano", "gemini-1.5-flash", "claude-3-5-haiku-20241022"],
16431643
)
16441644
async def test_model_client_with_function_calling(model: str, openai_client: OpenAIChatCompletionClient) -> None:
16451645
# Test tool calling
@@ -1716,7 +1716,7 @@ async def test_model_client_with_function_calling(model: str, openai_client: Ope
17161716
@pytest.mark.asyncio
17171717
@pytest.mark.parametrize(
17181718
"model",
1719-
["gpt-4o-mini", "gemini-1.5-flash"],
1719+
["gpt-4.1-nano", "gemini-1.5-flash"],
17201720
)
17211721
async def test_openai_structured_output_using_response_format(
17221722
model: str, openai_client: OpenAIChatCompletionClient
@@ -1749,7 +1749,7 @@ class AgentResponse(BaseModel):
17491749
@pytest.mark.asyncio
17501750
@pytest.mark.parametrize(
17511751
"model",
1752-
["gpt-4o-mini", "gemini-1.5-flash"],
1752+
["gpt-4.1-nano", "gemini-1.5-flash"],
17531753
)
17541754
async def test_openai_structured_output(model: str, openai_client: OpenAIChatCompletionClient) -> None:
17551755
class AgentResponse(BaseModel):
@@ -1769,7 +1769,7 @@ class AgentResponse(BaseModel):
17691769
@pytest.mark.asyncio
17701770
@pytest.mark.parametrize(
17711771
"model",
1772-
["gpt-4o-mini", "gemini-1.5-flash"],
1772+
["gpt-4.1-nano", "gemini-1.5-flash"],
17731773
)
17741774
async def test_openai_structured_output_with_streaming(model: str, openai_client: OpenAIChatCompletionClient) -> None:
17751775
class AgentResponse(BaseModel):
@@ -1795,7 +1795,7 @@ class AgentResponse(BaseModel):
17951795
@pytest.mark.parametrize(
17961796
"model",
17971797
[
1798-
"gpt-4o-mini",
1798+
"gpt-4.1-nano",
17991799
# "gemini-1.5-flash", # Gemini models do not support structured output with tool calls from model client.
18001800
],
18011801
)
@@ -1853,7 +1853,7 @@ def sentiment_analysis(text: str) -> str:
18531853
@pytest.mark.parametrize(
18541854
"model",
18551855
[
1856-
"gpt-4o-mini",
1856+
"gpt-4.1-nano",
18571857
# "gemini-1.5-flash", # Gemini models do not support structured output with tool calls from model client.
18581858
],
18591859
)
@@ -2072,7 +2072,7 @@ async def test_add_name_prefixes(monkeypatch: pytest.MonkeyPatch) -> None:
20722072
@pytest.mark.parametrize(
20732073
"model",
20742074
[
2075-
"gpt-4o-mini",
2075+
"gpt-4.1-nano",
20762076
"gemini-1.5-flash",
20772077
"claude-3-5-haiku-20241022",
20782078
],
@@ -2188,7 +2188,7 @@ async def test_system_message_not_merged_for_multiple_system_messages_true() ->
21882188
mock_client = MagicMock()
21892189
client = BaseOpenAIChatCompletionClient(
21902190
client=mock_client,
2191-
create_args={"model": "gpt-4o"},
2191+
create_args={"model": "gpt-4.1-nano"},
21922192
model_info={
21932193
"vision": False,
21942194
"function_calling": False,
@@ -2355,7 +2355,7 @@ async def test_empty_assistant_content_with_gemini(model: str, openai_client: Op
23552355
@pytest.mark.parametrize(
23562356
"model",
23572357
[
2358-
"gpt-4o-mini",
2358+
"gpt-4.1-nano",
23592359
"gemini-1.5-flash",
23602360
"claude-3-5-haiku-20241022",
23612361
],
@@ -2402,7 +2402,7 @@ def get_regitered_transformer(client: OpenAIChatCompletionClient) -> Transformer
24022402
@pytest.mark.parametrize(
24032403
"model",
24042404
[
2405-
"gpt-4o-mini",
2405+
"gpt-4.1-nano",
24062406
],
24072407
)
24082408
async def test_openai_model_unknown_message_type(model: str, openai_client: OpenAIChatCompletionClient) -> None:

python/packages/autogen-ext/tests/test_filesurfer_agent.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ async def test_run_filesurfer(monkeypatch: pytest.MonkeyPatch) -> None:
7474
</html>""")
7575

7676
# Mock the API calls
77-
model = "gpt-4o-2024-05-13"
77+
model = "gpt-4.1-nano-2025-04-14"
7878
chat_completions = [
7979
ChatCompletion(
8080
id="id1",
@@ -153,7 +153,7 @@ async def test_run_filesurfer(monkeypatch: pytest.MonkeyPatch) -> None:
153153
@pytest.mark.asyncio
154154
async def test_file_surfer_serialization() -> None:
155155
"""Test that FileSurfer can be serialized and deserialized properly."""
156-
model = "gpt-4o-2024-05-13"
156+
model = "gpt-4.1-nano-2025-04-14"
157157
agent = FileSurfer(
158158
"FileSurfer",
159159
model_client=OpenAIChatCompletionClient(model=model, api_key=""),

python/packages/autogen-ext/tests/test_openai_assistant_agent.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -202,7 +202,7 @@ def agent(client: AsyncOpenAI) -> OpenAIAssistantAgent:
202202
return OpenAIAssistantAgent(
203203
name="assistant",
204204
instructions="Help the user with their task.",
205-
model="gpt-4o-mini",
205+
model="gpt-4.1-nano",
206206
description="OpenAI Assistant Agent",
207207
client=client,
208208
tools=tools,
@@ -346,7 +346,7 @@ async def test_on_reset_behavior(client: AsyncOpenAI, cancellation_token: Cancel
346346
agent = OpenAIAssistantAgent(
347347
name="assistant",
348348
instructions="Help the user with their task.",
349-
model="gpt-4o-mini",
349+
model="gpt-4.1-nano",
350350
description="OpenAI Assistant Agent",
351351
client=client,
352352
thread_id=thread.id,

python/packages/autogen-ext/tests/test_websurfer_agent.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ async def mock_create(
6161

6262
@pytest.mark.asyncio
6363
async def test_run_websurfer(monkeypatch: pytest.MonkeyPatch) -> None:
64-
model = "gpt-4o-2024-05-13"
64+
model = "gpt-4.1-nano-2025-04-14"
6565
chat_completions = [
6666
ChatCompletion(
6767
id="id2",
@@ -149,7 +149,7 @@ async def test_run_websurfer(monkeypatch: pytest.MonkeyPatch) -> None:
149149

150150
@pytest.mark.asyncio
151151
async def test_run_websurfer_declarative(monkeypatch: pytest.MonkeyPatch) -> None:
152-
model = "gpt-4o-2024-05-13"
152+
model = "gpt-4.1-nano-2025-04-14"
153153
chat_completions = [
154154
ChatCompletion(
155155
id="id1",

python/packages/autogen-studio/tests/test_db_manager.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ def sample_team(test_user: str) -> Team:
3737
agent = AssistantAgent(
3838
name="weather_agent",
3939
model_client=OpenAIChatCompletionClient(
40-
model="gpt-4",
40+
model="gpt-4.1-nano",
4141
),
4242
)
4343

@@ -187,4 +187,4 @@ def test_initialize_database_scenarios(self, tmp_path, monkeypatch):
187187

188188
finally:
189189
asyncio.run(db.close())
190-
db.reset_db()
190+
db.reset_db()

python/packages/autogen-studio/tests/test_team_manager.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ def sample_config():
2121
agent = AssistantAgent(
2222
name="weather_agent",
2323
model_client=OpenAIChatCompletionClient(
24-
model="gpt-4o-mini",
24+
model="gpt-4.1-nano",
2525
),
2626
)
2727

@@ -146,4 +146,4 @@ async def mock_run_stream(*args, **kwargs):
146146

147147
# Verify the last message is a TeamResult
148148
assert isinstance(streamed_messages[-1], type(mock_messages[-1]))
149-
149+

0 commit comments

Comments
 (0)