Skip to content

Commit 181f215

Browse files
modify the LLM utils.
1 parent d715f4d commit 181f215

1 file changed

Lines changed: 78 additions & 61 deletions

File tree

src/llmtool/LLM_utils.py

Lines changed: 78 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
1-
# Imports
21
from openai import *
32
from pathlib import Path
43
from typing import Tuple
54
import google.generativeai as genai
5+
import anthropic
66
import signal
77
import sys
88
import tiktoken
@@ -18,8 +18,6 @@
1818
import boto3
1919
from utility.errors import RALLMAPIError, RAValueError
2020
from utility.logger import Logger
21-
22-
2321
class LLM:
2422
"""
2523
An online inference model using different LLMs:
@@ -59,7 +57,7 @@ def infer(
5957
elif "o3-mini" in self.online_model_name or "o4-mini" in self.online_model_name:
6058
output = self.infer_with_On_mini_model(message)
6159
elif "claude" in self.online_model_name:
62-
output = self.infer_with_claude(message)
60+
output = self.infer_with_claude_key(message)
6361
elif "deepseek" in self.online_model_name:
6462
output = self.infer_with_deepseek_model(message)
6563
else:
@@ -223,63 +221,7 @@ def call_api():
223221

224222
return ""
225223

226-
# def infer_with_claude(self, message):
227-
# """Infer using the Claude model via AWS Bedrock"""
228-
# if "3.5" in self.online_model_name:
229-
# model_id = "anthropic.claude-3-5-sonnet-20241022-v2:0"
230-
# if "3.7" in self.online_model_name:
231-
# model_id = "us.anthropic.claude-3-7-sonnet-20250219-v1:0"
232-
233-
# model_input = [
234-
# {
235-
# "role": "assistant",
236-
# "content": self.systemRole,
237-
# },
238-
# {"role": "user", "content": message},
239-
# ]
240-
241-
# body = json.dumps(
242-
# {
243-
# "messages": model_input,
244-
# "max_tokens": 4000,
245-
# "anthropic_version": "bedrock-2023-05-31",
246-
# "temperature": self.temperature,
247-
# "top_k": 50,
248-
# }
249-
# )
250-
251-
# def call_api():
252-
# client = boto3.client(
253-
# "bedrock-runtime",
254-
# region_name="us-west-2",
255-
# config=Config(read_timeout=100),
256-
# )
257-
258-
# response = (
259-
# client.invoke_model(
260-
# modelId=model_id, contentType="application/json", body=body
261-
# )["body"]
262-
# .read()
263-
# .decode("utf-8")
264-
# )
265-
266-
# response = json.loads(response)
267-
# return response["content"][0]["text"]
268-
269-
# tryCnt = 0
270-
# while tryCnt < 5:
271-
# tryCnt += 1
272-
# try:
273-
# output = self.run_with_timeout(call_api, timeout=100)
274-
# if output:
275-
# return output
276-
# except Exception as e:
277-
# self.logger.print_log(f"API error: {str(e)}")
278-
# time.sleep(2)
279-
280-
# return ""
281-
282-
def infer_with_claude(self, message):
224+
def infer_with_claude_aws_bedrock(self, message):
283225
"""Infer using the Claude model via AWS Bedrock"""
284226
timeout = 500
285227
model_input = [
@@ -355,3 +297,78 @@ def call_api():
355297
time.sleep(2)
356298

357299
return ""
300+
301+
def infer_with_claude_key(self, message):
302+
"""Infer using the Claude model via API key, with thinking mode for 3.7"""
303+
api_key = os.environ.get("ANTHROPIC_API_KEY")
304+
if not api_key:
305+
raise RALLMAPIError(
306+
"Please set the ANTHROPIC_API_KEY environment variable to use Claude models."
307+
)
308+
309+
# Prepare messages - Claude prefers user messages over assistant system messages
310+
model_input = [{"role": "user", "content": f"{self.systemRole}\n\n{message}"}]
311+
312+
def call_api():
313+
client = anthropic.Anthropic(api_key=api_key)
314+
315+
# Determine model and settings based on version
316+
if "3.7" in self.online_model_name:
317+
# Claude 3.7 with thinking mode enabled
318+
model_name = "claude-3-7-sonnet-20250219"
319+
api_params = {
320+
"model": model_name,
321+
"messages": model_input,
322+
"max_tokens": self.max_output_length,
323+
"temperature": self.temperature,
324+
# "thinking": {
325+
# "type": "enabled",
326+
# "budget_tokens": 10000
327+
# },
328+
}
329+
else:
330+
# Claude 3.5 standard mode
331+
model_name = "claude-3-5-sonnet-20241022"
332+
api_params = {
333+
"model": model_name,
334+
"messages": model_input,
335+
"max_tokens": self.max_output_length,
336+
"temperature": self.temperature,
337+
# No thinking parameter for 3.5
338+
}
339+
340+
# Make the API call
341+
response = client.messages.create(**api_params)
342+
343+
# Extract response text based on model type
344+
if (
345+
"3.7" in self.online_model_name
346+
and hasattr(response, "content")
347+
and len(response.content) > 1
348+
):
349+
# For Claude 3.7 with thinking mode, get the final response (skip thinking content)
350+
return response.content[-1].text
351+
else:
352+
# For Claude 3.5 or any standard response
353+
return response.content[0].text
354+
355+
tryCnt = 0
356+
max_retries = 5
357+
while tryCnt < max_retries:
358+
tryCnt += 1
359+
try:
360+
output = self.run_with_timeout(call_api, timeout=100)
361+
if output:
362+
self.logger.print_log(
363+
f"Claude API call successful with {self.online_model_name}"
364+
)
365+
return output
366+
except Exception as e:
367+
self.logger.print_log(
368+
f"Claude API error (attempt {tryCnt}/{max_retries}): {e}"
369+
)
370+
if tryCnt == max_retries:
371+
self.logger.print_log("Max retries reached for Claude API")
372+
time.sleep(2)
373+
374+
return ""

0 commit comments

Comments
 (0)