ROB-1032 llm selection (#1786)

RoiGlinik · web-flow · commit 691b51fd14ae · 2025-04-28T06:56:36.000Z
support llm model selection in holmes
diff --git a/docs/configuration/holmesgpt/index.rst b/docs/configuration/holmesgpt/index.rst
@@ -223,6 +223,46 @@ Choose an AI provider below and follow the instructions:
 
         Run a :ref:`Helm Upgrade <Simple Upgrade>` to apply the configuration.
 
+    .. tab-item:: Multiple providers
+        :name: multiple-providers
+
+        Starting from version *0.22.1*, Robusta supports an alternative way to configure AI models: using a YAML dictionary in your Helm values file.
+
+        This method allows you to configure multiple models at once, each with its own parameters.
+
+        Update your Helm values (``generated_values.yaml`` file) with the following configuration.
+
+        When multiple models are defined, the Robusta UI will allow users to choose a specific model when initiating an AI-based investigation.
+
+        .. admonition:: Model info
+            :class: warning
+
+            When using multiple providers, the keys differ slightly from the single-provider case.
+
+        .. code-block:: yaml
+
+          enableHolmesGPT: true
+
+          holmes:
+            modelList: # sample configuration.
+              openai:
+                model: openai/gpt-4o
+                api_key: "{{ env.API_KEY }}"
+              azure-low-budget: 
+                model : azure/team-low-budget
+                api_base : <your-api-base> # fill in the base endpoint url of your azure deployment - e.g. https://my-org.openai.azure.com/
+                api_version : "2024-06-01"
+                api_key : "{{ env.AZURE_API_KEY }}" # you can load the values from an environment variable as well.
+                temperature: 0
+              bedrock-devops: 
+                model: bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0 # your bedrock model.
+                aws_region_name: us-east-1
+                aws_access_key_id: "{{ env.AWS_ACCESS_KEY_ID }}" # you can load the values from an environment variable as well.
+                aws_secret_access_key: <your-aws-secret-access-key>
+                thinking: {"type": "enabled", "budget_tokens": 1024}
+
+        Run a :ref:`Helm Upgrade <Simple Upgrade>` to apply the configuration.
+
 Configuring HolmesGPT Access to SaaS Data
 ----------------------------------------------------
 
diff --git a/src/robusta/core/model/base_params.py b/src/robusta/core/model/base_params.py
@@ -83,7 +83,7 @@ class ResourceInfo(BaseModel):
 class HolmesParams(ActionParams):
 
     holmes_url: Optional[str]
-
+    model: Optional[str]
     @validator("holmes_url", allow_reuse=True)
     def validate_protocol(cls, v):
         if v and not v.startswith("http"):  # if the user configured url without http(s)
@@ -251,6 +251,7 @@ class HolmesWorkloadHealthChatParams(HolmesParams):
     conversation_history: Optional[list[dict]] = None
 
 
+
 class NamespacedResourcesParams(ActionParams):
     """
     :var name: Resource name
diff --git a/src/robusta/core/playbooks/internal/ai_integration.py b/src/robusta/core/playbooks/internal/ai_integration.py
@@ -60,7 +60,8 @@ def ask_holmes(event: ExecutionBaseEvent, params: AIInvestigateParams):
             context=params.context if params.context else {},
             include_tool_calls=True,
             include_tool_call_results=True,
-            sections=params.sections
+            sections=params.sections,
+            model=params.model
         )
 
         if params.stream:
@@ -287,6 +288,7 @@ def holmes_issue_chat(event: ExecutionBaseEvent, params: HolmesIssueChatParams):
             conversation_history=params.conversation_history,
             investigation_result=params.context.investigation_result,
             issue_type=params.context.issue_type,
+            model=params.model
         )
         result = requests.post(f"{holmes_url}/api/issue_chat", data=holmes_req.json())
         result.raise_for_status()
@@ -336,7 +338,7 @@ def holmes_chat(event: ExecutionBaseEvent, params: HolmesChatParams):
     cluster_name = event.get_context().cluster_name
 
     try:
-        holmes_req = HolmesChatRequest(ask=params.ask, conversation_history=params.conversation_history)
+        holmes_req = HolmesChatRequest(ask=params.ask, conversation_history=params.conversation_history, model=params.model)
         result = requests.post(f"{holmes_url}/api/chat", data=holmes_req.json())
         result.raise_for_status()
         holmes_result = HolmesChatResult(**json.loads(result.text))
@@ -380,11 +382,12 @@ def holmes_workload_chat(event: ExecutionBaseEvent, params: HolmesWorkloadHealth
             ask=params.ask,
             conversation_history=params.conversation_history,
             workload_health_result=params.workload_health_result,
-            resource=params.resource
+            resource=params.resource,
+            model=params.model
         )
         result = requests.post(f"{holmes_url}/api/workload_health_chat", data=holmes_req.json())
         result.raise_for_status()
-        
+
         holmes_result = HolmesChatResult(**json.loads(result.text))
 
         finding = Finding(
diff --git a/src/robusta/core/reporting/holmes.py b/src/robusta/core/reporting/holmes.py
@@ -20,6 +20,7 @@ class HolmesRequest(BaseModel):
     include_tool_calls: bool = False
     include_tool_call_results: bool = False
     sections: Optional[Dict[str, str]] = None
+    model: Optional[str] = None
 
 
 class HolmesConversationRequest(BaseModel):
@@ -35,6 +36,7 @@ class HolmesConversationRequest(BaseModel):
 class HolmesChatRequest(BaseModel):
     ask: str
     conversation_history: Optional[List[dict]] = None
+    model: Optional[str] = None
 
 
 class HolmesIssueChatRequest(HolmesChatRequest):