-
Notifications
You must be signed in to change notification settings - Fork 5.9k
Expand file tree
/
Copy pathmodel_service.py
More file actions
59 lines (47 loc) · 2.08 KB
/
model_service.py
File metadata and controls
59 lines (47 loc) · 2.08 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import os
import asyncio
from litellm import acompletion
from typing import Dict, Any
async def get_model_response_async(model_name: str, prompt: str, context: Dict[str, Any]):
user_prompt = f"""
You are an expert code generator. Your task is to generate code based on the following repository context:
Repository Context:
{context['content']}
Instructions:
1. Generate code that strictly follows the repository's existing patterns and conventions
2. Use the same coding style, naming conventions, and structure as the codebase
3. Include clear, concise docstrings and comments explaining key functionality
4. Ensure the code integrates seamlessly with existing components
5. Focus on maintainability and readability
User query:
{prompt}
Output only the code implementation without explanations or additional text.
"""
messages = [
{"role": "user", "content": user_prompt}
]
model_mapping = {
"qwen3-coder": "openrouter/qwen/qwen3-coder",
"claude-4": "claude-sonnet-4-20250514"
}
try:
# Get streaming response from the model using LiteLLM asynchronously.
response = await acompletion(
model=model_mapping[model_name],
messages=messages,
api_key=os.getenv("OPENROUTER_API_KEY") if "coder" in model_name else os.getenv("ANTHROPIC_API_KEY"),
max_tokens=2000,
stream=True
)
async for chunk in response:
if chunk.choices[0].delta.content:
yield chunk.choices[0].delta.content
except Exception as e:
yield f"Error generating response: {str(e)}"
async def get_parallel_responses(prompt: str, context: Dict[str, Any]):
claude_gen = get_model_response_async("claude-4", prompt, context)
openai_gen = get_model_response_async("qwen3-coder", prompt, context)
return claude_gen, openai_gen
def get_model_responses(prompt: str, context: Dict[str, Any]):
loop = asyncio.get_event_loop()
return loop.run_until_complete(get_parallel_responses(prompt, context))