Skip to content

Commit b3b02c8

Browse files
fix: replace decommissioned kimi models with kimi-k2p5 (#447)
* use kimi k2.5 * turn reasoning off * turn on reasoning * delete them * update model
1 parent 0655f89 commit b3b02c8

23 files changed

Lines changed: 32 additions & 250 deletions

eval_protocol/benchmarks/test_frozen_lake.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ def frozen_lake_to_evaluation_row(data: List[Dict[str, Any]]) -> List[Evaluation
4242
{
4343
"temperature": 0.0,
4444
"max_tokens": 4096,
45-
"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct-0905",
45+
"model": "fireworks_ai/accounts/fireworks/models/kimi-k2p5",
4646
}
4747
],
4848
rollout_processor=MCPGymRolloutProcessor(),

eval_protocol/quickstart/aha_judge/llm_judge.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818

1919
async def aha_judge(
20-
row: EvaluationRow, judge_name: str = "kimi-k2-instruct-0905", adapter: Optional[BaseAdapter] = None
20+
row: EvaluationRow, judge_name: str = "kimi-k2p5", adapter: Optional[BaseAdapter] = None
2121
) -> EvaluationRow:
2222
"""
2323
LLM Judge evaluation using Arena-Hard-Auto style pairwise comparisons for a single row.

eval_protocol/quickstart/aha_judge/llm_judge_openai_responses.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ def openai_responses_data_generator():
5050
"model": "fireworks_ai/accounts/fireworks/models/deepseek-v3p1",
5151
},
5252
{
53-
"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct-0905",
53+
"model": "fireworks_ai/accounts/fireworks/models/kimi-k2p5",
5454
},
5555
],
5656
)

eval_protocol/quickstart/aha_judge/utils.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,8 @@
4949
"api_key": os.getenv("GEMINI_API_KEY"),
5050
"base_url": "https://generativelanguage.googleapis.com/v1beta/openai/",
5151
},
52-
"kimi-k2-instruct-0905": {
53-
"model": "accounts/fireworks/models/kimi-k2-instruct-0905",
52+
"kimi-k2p5": {
53+
"model": "accounts/fireworks/models/kimi-k2p5",
5454
"temperature": 0.6, # Kimi recommended temperature
5555
"max_tokens": 131000,
5656
"api_key": os.getenv("FIREWORKS_API_KEY"),

eval_protocol/quickstart/llm_judge.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818

1919
async def aha_judge(
20-
row: EvaluationRow, judge_name: str = "kimi-k2-instruct-0905", adapter: Optional[BaseAdapter] = None
20+
row: EvaluationRow, judge_name: str = "kimi-k2p5", adapter: Optional[BaseAdapter] = None
2121
) -> EvaluationRow:
2222
"""
2323
LLM Judge evaluation using Arena-Hard-Auto style pairwise comparisons for a single row.

eval_protocol/quickstart/utils.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,8 +51,8 @@
5151
"api_key": os.getenv("GEMINI_API_KEY"),
5252
"base_url": "https://generativelanguage.googleapis.com/v1beta/openai/",
5353
},
54-
"kimi-k2-instruct-0905": {
55-
"model": "accounts/fireworks/models/kimi-k2-instruct-0905",
54+
"kimi-k2p5": {
55+
"model": "accounts/fireworks/models/kimi-k2p5",
5656
"temperature": 0.6, # Kimi recommended temperature
5757
"max_tokens": 131000,
5858
"api_key": os.getenv("FIREWORKS_API_KEY"),

eval_protocol/training/gepa_utils.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@
4040
},
4141
# Fireworks models
4242
"kimi-k2": {
43-
"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct-0905",
43+
"model": "fireworks_ai/accounts/fireworks/models/kimi-k2p5",
4444
"temperature": 0.6,
4545
"max_tokens": 131000,
4646
},
@@ -68,7 +68,7 @@ def build_reflection_lm(reflection_lm_name: str) -> LM:
6868
6969
Args:
7070
reflection_lm_name: One of the predefined configs ("gpt-5", "gpt-4o",
71-
"claude-sonnet", "kimi-k2-instruct-0905")
71+
"claude-sonnet", "kimi-k2p5")
7272
OR a raw LiteLLM model string (e.g., "openai/gpt-4o")
7373
7474
Returns:

tests/chinook/pydantic/test_pydantic_chinook.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,9 @@ def agent_factory(config: RolloutProcessorConfig) -> Agent:
3838
"completion_params",
3939
[
4040
{
41-
"model": "accounts/fireworks/models/kimi-k2-instruct-0905",
41+
"model": "accounts/fireworks/models/kimi-k2p5",
4242
"provider": "fireworks",
43+
"reasoning_effort": "none",
4344
},
4445
{
4546
"model": "gpt-5",
@@ -88,7 +89,7 @@ async def test_simple_query(row: EvaluationRow) -> EvaluationRow:
8889
)
8990
else:
9091
model = OpenAIChatModel(
91-
"accounts/fireworks/models/kimi-k2-instruct-0905",
92+
"accounts/fireworks/models/kimi-k2p5",
9293
provider="fireworks",
9394
)
9495

tests/chinook/pydantic/test_pydantic_complex_queries.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ def agent_factory(config: RolloutProcessorConfig) -> Agent:
4848
# "provider": "fireworks",
4949
# },
5050
# {
51-
# "model": "accounts/fireworks/models/kimi-k2-instruct-0905",
51+
# "model": "accounts/fireworks/models/kimi-k2p5",
5252
# "provider": "fireworks",
5353
# },
5454
{"model": "gpt-5"},

tests/pytest/data/basic_coding_dataset.jsonl

Lines changed: 0 additions & 10 deletions
This file was deleted.

0 commit comments

Comments
 (0)