|
1 | | -# Imports |
2 | 1 | from openai import * |
3 | 2 | from pathlib import Path |
4 | 3 | from typing import Tuple |
5 | 4 | import google.generativeai as genai |
| 5 | +import anthropic |
6 | 6 | import signal |
7 | 7 | import sys |
8 | 8 | import tiktoken |
|
18 | 18 | import boto3 |
19 | 19 | from utility.errors import RALLMAPIError, RAValueError |
20 | 20 | from utility.logger import Logger |
21 | | - |
22 | | - |
23 | 21 | class LLM: |
24 | 22 | """ |
25 | 23 | An online inference model using different LLMs: |
@@ -59,7 +57,7 @@ def infer( |
59 | 57 | elif "o3-mini" in self.online_model_name or "o4-mini" in self.online_model_name: |
60 | 58 | output = self.infer_with_On_mini_model(message) |
61 | 59 | elif "claude" in self.online_model_name: |
62 | | - output = self.infer_with_claude(message) |
| 60 | + output = self.infer_with_claude_key(message) |
63 | 61 | elif "deepseek" in self.online_model_name: |
64 | 62 | output = self.infer_with_deepseek_model(message) |
65 | 63 | else: |
@@ -223,63 +221,7 @@ def call_api(): |
223 | 221 |
|
224 | 222 | return "" |
225 | 223 |
|
226 | | - # def infer_with_claude(self, message): |
227 | | - # """Infer using the Claude model via AWS Bedrock""" |
228 | | - # if "3.5" in self.online_model_name: |
229 | | - # model_id = "anthropic.claude-3-5-sonnet-20241022-v2:0" |
230 | | - # if "3.7" in self.online_model_name: |
231 | | - # model_id = "us.anthropic.claude-3-7-sonnet-20250219-v1:0" |
232 | | - |
233 | | - # model_input = [ |
234 | | - # { |
235 | | - # "role": "assistant", |
236 | | - # "content": self.systemRole, |
237 | | - # }, |
238 | | - # {"role": "user", "content": message}, |
239 | | - # ] |
240 | | - |
241 | | - # body = json.dumps( |
242 | | - # { |
243 | | - # "messages": model_input, |
244 | | - # "max_tokens": 4000, |
245 | | - # "anthropic_version": "bedrock-2023-05-31", |
246 | | - # "temperature": self.temperature, |
247 | | - # "top_k": 50, |
248 | | - # } |
249 | | - # ) |
250 | | - |
251 | | - # def call_api(): |
252 | | - # client = boto3.client( |
253 | | - # "bedrock-runtime", |
254 | | - # region_name="us-west-2", |
255 | | - # config=Config(read_timeout=100), |
256 | | - # ) |
257 | | - |
258 | | - # response = ( |
259 | | - # client.invoke_model( |
260 | | - # modelId=model_id, contentType="application/json", body=body |
261 | | - # )["body"] |
262 | | - # .read() |
263 | | - # .decode("utf-8") |
264 | | - # ) |
265 | | - |
266 | | - # response = json.loads(response) |
267 | | - # return response["content"][0]["text"] |
268 | | - |
269 | | - # tryCnt = 0 |
270 | | - # while tryCnt < 5: |
271 | | - # tryCnt += 1 |
272 | | - # try: |
273 | | - # output = self.run_with_timeout(call_api, timeout=100) |
274 | | - # if output: |
275 | | - # return output |
276 | | - # except Exception as e: |
277 | | - # self.logger.print_log(f"API error: {str(e)}") |
278 | | - # time.sleep(2) |
279 | | - |
280 | | - # return "" |
281 | | - |
282 | | - def infer_with_claude(self, message): |
| 224 | + def infer_with_claude_aws_bedrock(self, message): |
283 | 225 | """Infer using the Claude model via AWS Bedrock""" |
284 | 226 | timeout = 500 |
285 | 227 | model_input = [ |
@@ -355,3 +297,78 @@ def call_api(): |
355 | 297 | time.sleep(2) |
356 | 298 |
|
357 | 299 | return "" |
| 300 | + |
| 301 | + def infer_with_claude_key(self, message): |
| 302 | + """Infer using the Claude model via API key, with thinking mode for 3.7""" |
| 303 | + api_key = os.environ.get("ANTHROPIC_API_KEY") |
| 304 | + if not api_key: |
| 305 | + raise RALLMAPIError( |
| 306 | + "Please set the ANTHROPIC_API_KEY environment variable to use Claude models." |
| 307 | + ) |
| 308 | + |
| 309 | + # Prepare messages - Claude prefers user messages over assistant system messages |
| 310 | + model_input = [{"role": "user", "content": f"{self.systemRole}\n\n{message}"}] |
| 311 | + |
| 312 | + def call_api(): |
| 313 | + client = anthropic.Anthropic(api_key=api_key) |
| 314 | + |
| 315 | + # Determine model and settings based on version |
| 316 | + if "3.7" in self.online_model_name: |
| 317 | + # Claude 3.7 with thinking mode enabled |
| 318 | + model_name = "claude-3-7-sonnet-20250219" |
| 319 | + api_params = { |
| 320 | + "model": model_name, |
| 321 | + "messages": model_input, |
| 322 | + "max_tokens": self.max_output_length, |
| 323 | + "temperature": self.temperature, |
| 324 | + # "thinking": { |
| 325 | + # "type": "enabled", |
| 326 | + # "budget_tokens": 10000 |
| 327 | + # }, |
| 328 | + } |
| 329 | + else: |
| 330 | + # Claude 3.5 standard mode |
| 331 | + model_name = "claude-3-5-sonnet-20241022" |
| 332 | + api_params = { |
| 333 | + "model": model_name, |
| 334 | + "messages": model_input, |
| 335 | + "max_tokens": self.max_output_length, |
| 336 | + "temperature": self.temperature, |
| 337 | + # No thinking parameter for 3.5 |
| 338 | + } |
| 339 | + |
| 340 | + # Make the API call |
| 341 | + response = client.messages.create(**api_params) |
| 342 | + |
| 343 | + # Extract response text based on model type |
| 344 | + if ( |
| 345 | + "3.7" in self.online_model_name |
| 346 | + and hasattr(response, "content") |
| 347 | + and len(response.content) > 1 |
| 348 | + ): |
| 349 | + # For Claude 3.7 with thinking mode, get the final response (skip thinking content) |
| 350 | + return response.content[-1].text |
| 351 | + else: |
| 352 | + # For Claude 3.5 or any standard response |
| 353 | + return response.content[0].text |
| 354 | + |
| 355 | + tryCnt = 0 |
| 356 | + max_retries = 5 |
| 357 | + while tryCnt < max_retries: |
| 358 | + tryCnt += 1 |
| 359 | + try: |
| 360 | + output = self.run_with_timeout(call_api, timeout=100) |
| 361 | + if output: |
| 362 | + self.logger.print_log( |
| 363 | + f"Claude API call successful with {self.online_model_name}" |
| 364 | + ) |
| 365 | + return output |
| 366 | + except Exception as e: |
| 367 | + self.logger.print_log( |
| 368 | + f"Claude API error (attempt {tryCnt}/{max_retries}): {e}" |
| 369 | + ) |
| 370 | + if tryCnt == max_retries: |
| 371 | + self.logger.print_log("Max retries reached for Claude API") |
| 372 | + time.sleep(2) |
| 373 | + |
| 374 | + return "" |
0 commit comments