Skip to content
This repository was archived by the owner on Dec 11, 2025. It is now read-only.

Commit df290dc

Browse files
committed
feat: add support for Moonshot AI's Kimi K2 Thinking model
- Introduced MOONSHOT_API_KEY in settings.py for API integration. - Updated providers.py to include KIMI_K2_THINKING model under Moonshot AI. - Enhanced client.py to handle requests for the Moonshot AI provider. - Added comprehensive tests for Kimi K2 Thinking model, covering streaming, response models, and tool calling functionalities.
1 parent 21a9905 commit df290dc

4 files changed

Lines changed: 107 additions & 0 deletions

File tree

notdiamond/llms/client.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1653,6 +1653,19 @@ def _llm_from_config(
16531653
replicate_api_key=provider.api_key,
16541654
**passed_kwargs,
16551655
)
1656+
if provider.provider == "moonshotai":
1657+
ChatOpenAI = _module_check(
1658+
"langchain_openai.chat_models",
1659+
"ChatOpenAI",
1660+
provider.provider,
1661+
)
1662+
return ChatOpenAI(
1663+
openai_api_key=provider.api_key,
1664+
model_name=provider.model,
1665+
openai_api_base="https://api.moonshot.cn/v1",
1666+
callbacks=callbacks,
1667+
**passed_kwargs,
1668+
)
16561669
raise ValueError(f"Unsupported provider: {provider.provider}")
16571670

16581671
def verify_against_response_model(self) -> bool:

notdiamond/llms/providers.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,8 @@ class NDLLMProviders(Enum):
9090
model served via Replicate
9191
9292
SONAR (NDLLMProvider): refers to "sonar" model by Perplexity
93+
94+
KIMI_K2_THINKING (NDLLMProvider): refers to "kimi-k2-thinking" model by Moonshot AI
9395
"""
9496

9597
GPT_3_5_TURBO = ("openai", "gpt-3.5-turbo")
@@ -207,6 +209,8 @@ class NDLLMProviders(Enum):
207209
"meta-llama-3.1-405b-instruct",
208210
)
209211

212+
KIMI_K2_THINKING = ("moonshotai", "kimi-k2-thinking")
213+
210214
def __new__(cls, provider, model):
211215
return LLMConfig(provider=provider, model=model)
212216

notdiamond/settings.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
TOGETHER_API_KEY = os.getenv("TOGETHER_API_KEY", default="")
1717
PPLX_API_KEY = os.getenv("PPLX_API_KEY", default="")
1818
REPLICATE_API_KEY = os.getenv("REPLICATE_API_KEY", default="")
19+
MOONSHOT_API_KEY = os.getenv("MOONSHOT_API_KEY", default="")
1920

2021

2122
NOTDIAMOND_API_URL = os.getenv(
@@ -434,6 +435,24 @@
434435
"meta-llama-3.1-405b-instruct": {"input": 9.5, "output": 9.5},
435436
},
436437
},
438+
"moonshotai": {
439+
"models": [
440+
"kimi-k2-thinking",
441+
],
442+
"api_key": MOONSHOT_API_KEY,
443+
"support_tools": [
444+
"kimi-k2-thinking",
445+
],
446+
"support_response_model": [
447+
"kimi-k2-thinking",
448+
],
449+
"openrouter_identifier": {
450+
"kimi-k2-thinking": "moonshotai/kimi-k2-thinking",
451+
},
452+
"price": {
453+
"kimi-k2-thinking": {"input": 0.6, "output": 2.5},
454+
},
455+
},
437456
}
438457

439458

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
import pytest
2+
from helpers import astream_chunks, stream_chunks
3+
4+
from notdiamond.llms.client import NotDiamond
5+
from notdiamond.llms.providers import NDLLMProviders
6+
7+
8+
@pytest.mark.longrun
9+
@pytest.mark.vcr
10+
class Test_Moonshotai_LLMs:
11+
def test_kimi_k2_thinking_with_streaming(self, prompt):
12+
provider = NDLLMProviders.KIMI_K2_THINKING
13+
nd_llm = NotDiamond(
14+
llm_configs=[provider], latency_tracking=False, hash_content=True
15+
)
16+
stream_chunks(nd_llm.stream(prompt))
17+
18+
@pytest.mark.asyncio
19+
async def test_kimi_k2_thinking_with_async_streaming(self, prompt):
20+
provider = NDLLMProviders.KIMI_K2_THINKING
21+
nd_llm = NotDiamond(
22+
llm_configs=[provider], latency_tracking=False, hash_content=True
23+
)
24+
25+
await astream_chunks(nd_llm.astream(prompt))
26+
27+
def test_kimi_k2_thinking_response_model(self, response_model):
28+
provider = NDLLMProviders.KIMI_K2_THINKING
29+
provider.kwargs = {"max_tokens": 200}
30+
nd_llm = NotDiamond(
31+
llm_configs=[provider], latency_tracking=False, hash_content=True
32+
)
33+
result, _, _ = nd_llm.invoke(
34+
[{"role": "user", "content": "Tell me a joke"}],
35+
response_model=response_model,
36+
)
37+
38+
assert isinstance(result, response_model)
39+
assert result.setup
40+
assert result.punchline
41+
42+
def test_kimi_k2_thinking_with_tool_calling(self, tools_fixture):
43+
provider = NDLLMProviders.KIMI_K2_THINKING
44+
provider.kwargs = {"max_tokens": 200}
45+
nd_llm = NotDiamond(
46+
llm_configs=[provider], latency_tracking=False, hash_content=True
47+
)
48+
nd_llm = nd_llm.bind_tools(tools_fixture)
49+
result, session_id, _ = nd_llm.invoke(
50+
[{"role": "user", "content": "How much is 3 + 5?"}]
51+
)
52+
53+
assert len(result.tool_calls) == 1
54+
assert result.tool_calls[0]["name"] == "add_fct"
55+
56+
def test_kimi_k2_thinking_with_openai_tool_calling(
57+
self, openai_tools_fixture
58+
):
59+
provider = NDLLMProviders.KIMI_K2_THINKING
60+
provider.kwargs = {"max_tokens": 200}
61+
nd_llm = NotDiamond(
62+
llm_configs=[provider], latency_tracking=False, hash_content=True
63+
)
64+
nd_llm = nd_llm.bind_tools(openai_tools_fixture)
65+
result, session_id, _ = nd_llm.invoke(
66+
[{"role": "user", "content": "How much is 3 + 5?"}]
67+
)
68+
69+
assert len(result.tool_calls) == 1
70+
assert result.tool_calls[0]["name"] == "add_fct"
71+

0 commit comments

Comments
 (0)