feat: add dataSource PVC mount and IntelliAide proposal templates

sakshiep1 · sakshiep1 · commit b69c6585922c · 2026-06-13T21:21:46.000+05:30
diff --git a/api/v1alpha1/proposal_types.go b/api/v1alpha1/proposal_types.go
@@ -270,6 +270,22 @@ func (s ProposalStep) IsZero() bool {
 	return s.Agent == "" && s.Tools.IsZero()
 }
 
+// DataSource references a pre-existing PersistentVolumeClaim containing
+// input data for this proposal (e.g., must-gather bundles, diagnostic data).
+// The PVC must already exist in the same namespace as the Proposal and be
+// pre-populated with data before the Proposal is created. The operator
+// mounts it read-only at a well-known path (/data/input) accessible to
+// all skills in the sandbox pod.
+type DataSource struct {
+	// claimName is the name of the PersistentVolumeClaim to mount.
+	// The PVC must exist in the same namespace as the Proposal.
+	// +required
+	// +kubebuilder:validation:MinLength=1
+	// +kubebuilder:validation:MaxLength=253
+	// +kubebuilder:validation:XValidation:rule="!format.dns1123Subdomain().validate(self).hasValue()",message="must be a valid DNS subdomain"
+	ClaimName string `json:"claimName"`
+}
+
 // ProposalSpec defines the desired state of Proposal.
 //
 // A Proposal defines the workflow shape inline, specifying which steps
@@ -284,6 +300,7 @@ func (s ProposalStep) IsZero() bool {
 // +kubebuilder:validation:XValidation:rule="!has(oldSelf.analysis) || (has(self.analysis) && self.analysis == oldSelf.analysis)",message="analysis is immutable once set"
 // +kubebuilder:validation:XValidation:rule="!has(oldSelf.execution) || (has(self.execution) && self.execution == oldSelf.execution)",message="execution is immutable once set"
 // +kubebuilder:validation:XValidation:rule="!has(oldSelf.verification) || (has(self.verification) && self.verification == oldSelf.verification)",message="verification is immutable once set"
+// +kubebuilder:validation:XValidation:rule="!has(oldSelf.dataSource) || (has(self.dataSource) && self.dataSource == oldSelf.dataSource)",message="dataSource is immutable once set"
 type ProposalSpec struct {
 	// request is the user's original request, alert description, or a
 	// description of what triggered this proposal. This text is passed to
@@ -341,6 +358,15 @@ type ProposalSpec struct {
 	// +optional
 	Tools ToolsSpec `json:"tools,omitzero"`
 
+	// dataSource references a PVC containing pre-populated input data
+	// (e.g., must-gather bundles, diagnostic data). The operator mounts
+	// it read-only at /data/input in the sandbox pod. Skills discover
+	// input data at this standard location.
+	//
+	// Immutable: input data source is fixed at creation.
+	// +optional
+	DataSource *DataSource `json:"dataSource,omitzero"`
+
 	// analysis defines per-step configuration for the analysis step,
 	// including which agent handles it and any per-step tools.
 	//
@@ -362,6 +388,18 @@ type ProposalSpec struct {
 	// +optional
 	Verification ProposalStep `json:"verification,omitzero"`
 
+	// timeoutMinutes sets the per-step timeout for sandbox agent calls.
+	// This controls how long the operator waits for the sandbox pod to
+	// become ready and for the agent to complete its work. Increase this
+	// for long-running tools (e.g., IntelliAide RCA takes 10-30 minutes).
+	// Defaults to 5 minutes when omitted.
+	//
+	// Mutable: can be adjusted before approving a step.
+	// +optional
+	// +kubebuilder:validation:Minimum=1
+	// +kubebuilder:validation:Maximum=60
+	TimeoutMinutes *int32 `json:"timeoutMinutes,omitempty"`
+
 	// revisionFeedback is the user's free-text feedback requesting changes
 	// to the analysis. Patching this field bumps metadata.generation, which
 	// the operator detects (generation > observedGeneration) and triggers
@@ -374,15 +412,6 @@ type ProposalSpec struct {
 	// +kubebuilder:validation:MaxLength=32768
 	RevisionFeedback string `json:"revisionFeedback,omitempty"`
 
-	// timeoutMinutes overrides the default sandbox operation timeout for
-	// this proposal. When set, all sandbox wait and HTTP client timeouts
-	// use this value instead of the operator default (5 minutes).
-	//
-	// Immutable: timeout policy is fixed at creation.
-	// +optional
-	// +kubebuilder:validation:Minimum=1
-	// +kubebuilder:validation:Maximum=120
-	TimeoutMinutes *int32 `json:"timeoutMinutes,omitzero"`
 }
 
 // ProposalStatus defines the observed state of Proposal. All fields are
diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go
diff --git a/controller/proposal/sandbox.go b/controller/proposal/sandbox.go
@@ -99,7 +99,7 @@ func (m *SandboxManager) buildClaim(claimName, proposalName, step, templateName
 func (m *SandboxManager) Claim(ctx context.Context, proposalName, step, _ string) (string, error) {
 	log := logf.FromContext(ctx)
 
-	templateName, err := EnsureAgentTemplate(ctx, m.Client, m.BaseTemplateName, m.Namespace, step, m.agent, m.llm, m.tools, m.serviceAccount)
+	templateName, err := EnsureAgentTemplate(ctx, m.Client, m.BaseTemplateName, m.Namespace, step, m.agent, m.llm, m.tools, nil, m.serviceAccount)
 	if err != nil {
 		return "", fmt.Errorf("%s: %w", ErrEnsureAgentTemplate, err)
 	}
diff --git a/controller/proposal/sandbox_templates.go b/controller/proposal/sandbox_templates.go
@@ -73,10 +73,14 @@ var sandboxTemplateGVK = schema.GroupVersionKind{
 }
 
 const (
-	llmCredsMountPath   = "/var/run/secrets/llm-credentials"
-	llmCredsVolumeName  = "llm-credentials"
-	mcpHeadersMountRoot = "/var/secrets/mcp"
-	mcpServersEnvVar    = "LIGHTSPEED_MCP_SERVERS"
+	agentModeEnvVar = "LIGHTSPEED_MODE"
+	llmCredsMountPath = "/var/run/secrets/llm-credentials"
+	vertexCredsMountPath = "/var/secrets/google"
+	vertexCredsFileName  = "credentials.json"
+	llmCredsVolumeName   = "llm-credentials"
+	mcpHeadersMountRoot  = "/var/secrets/mcp"
+	mcpServersEnvVar     = "LIGHTSPEED_MCP_SERVERS"
+	dataSourceMountPath  = "/data/input"
 
 	LabelManaged      = "agentic.openshift.io/managed"
 	LabelBaseTemplate = "agentic.openshift.io/base-template"
@@ -92,6 +96,7 @@ type templateHashInput struct {
 	Skills              []agenticv1alpha1.SkillsSource      `json:"skills"`
 	MCPServers          []agenticv1alpha1.MCPServerConfig   `json:"mcpServers,omitempty"`
 	RequiredSecrets     []agenticv1alpha1.SecretRequirement `json:"requiredSecrets,omitempty"`
+	DataSource          *agenticv1alpha1.DataSource         `json:"dataSource,omitempty"`
 	Step                string                              `json:"step"`
 	BaseResourceVersion string                              `json:"baseRV"`
 	ServiceAccount      string                              `json:"serviceAccount"`
@@ -103,6 +108,7 @@ func computeTemplateHash(
 	skills []agenticv1alpha1.SkillsSource,
 	mcpServers []agenticv1alpha1.MCPServerConfig,
 	requiredSecrets []agenticv1alpha1.SecretRequirement,
+	dataSource *agenticv1alpha1.DataSource,
 	step string,
 	baseResourceVersion string,
 	serviceAccount string,
@@ -113,6 +119,7 @@ func computeTemplateHash(
 		Skills:              skills,
 		MCPServers:          mcpServers,
 		RequiredSecrets:     requiredSecrets,
+		DataSource:          dataSource,
 		Step:                step,
 		BaseResourceVersion: baseResourceVersion,
 		ServiceAccount:      serviceAccount,
@@ -142,6 +149,7 @@ func EnsureAgentTemplate(
 	agent *agenticv1alpha1.Agent,
 	llm *agenticv1alpha1.LLMProvider,
 	tools *agenticv1alpha1.ToolsSpec,
+	dataSource *agenticv1alpha1.DataSource,
 	serviceAccount string,
 ) (string, error) {
 	log := logf.FromContext(ctx).WithName("sandbox-templates")
@@ -168,7 +176,7 @@ func EnsureAgentTemplate(
 		requiredSecrets = tools.RequiredSecrets
 	}
 
-	hash, err := computeTemplateHash(llm, agent.Spec.Model, skills, mcpServers, requiredSecrets, step, base.GetResourceVersion(), serviceAccount)
+	hash, err := computeTemplateHash(llm, agent.Spec.Model, skills, mcpServers, requiredSecrets, dataSource, step, base.GetResourceVersion(), serviceAccount)
 	if err != nil {
 		return "", fmt.Errorf("%s: %w", ErrComputeTemplateHash, err)
 	}
@@ -234,6 +242,12 @@ func EnsureAgentTemplate(
 		}
 	}
 
+	if dataSource != nil {
+		if err := patchDataSource(derived, dataSource); err != nil {
+			return "", fmt.Errorf("patch data source: %w", err)
+		}
+	}
+
 	if err := patchProbes(derived); err != nil {
 		return "", fmt.Errorf("%s: %w", ErrPatchProbes, err)
 	}
@@ -616,6 +630,36 @@ func addSecretVolume(tmpl *unstructured.Unstructured, volumeName, secretName str
 	return unstructured.SetNestedSlice(tmpl.Object, volumes, "spec", "podTemplate", "spec", "volumes")
 }
 
+func addPVCVolume(tmpl *unstructured.Unstructured, volumeName, claimName string) error {
+	volumes, _, _ := unstructured.NestedSlice(tmpl.Object, "spec", "podTemplate", "spec", "volumes")
+	vol := map[string]any{
+		"name": volumeName,
+		"persistentVolumeClaim": map[string]any{
+			"claimName": claimName,
+		},
+	}
+	for i, v := range volumes {
+		existing, ok := v.(map[string]any)
+		if !ok {
+			continue
+		}
+		if existing["name"] == volumeName {
+			volumes[i] = vol
+			return unstructured.SetNestedSlice(tmpl.Object, volumes, "spec", "podTemplate", "spec", "volumes")
+		}
+	}
+	volumes = append(volumes, vol)
+	return unstructured.SetNestedSlice(tmpl.Object, volumes, "spec", "podTemplate", "spec", "volumes")
+}
+
+func patchDataSource(tmpl *unstructured.Unstructured, ds *agenticv1alpha1.DataSource) error {
+	volName := "data-source"
+	if err := addPVCVolume(tmpl, volName, ds.ClaimName); err != nil {
+		return fmt.Errorf("add data source PVC volume: %w", err)
+	}
+	return addVolumeMount(tmpl, volName, dataSourceMountPath, true)
+}
+
 func addVolumeMount(tmpl *unstructured.Unstructured, name, mountPath string, readOnly bool) error {
 	container, containers, err := firstContainer(tmpl)
 	if err != nil {
diff --git a/controller/proposal/sandbox_templates_test.go b/controller/proposal/sandbox_templates_test.go
@@ -77,7 +77,7 @@ func emptyTemplate() *unstructured.Unstructured {
 
 func mustHash(t *testing.T, llm *agenticv1alpha1.LLMProvider, model string, skills []agenticv1alpha1.SkillsSource, requiredSecrets []agenticv1alpha1.SecretRequirement, phase string) string {
 	t.Helper()
-	h, err := computeTemplateHash(llm, model, skills, nil, requiredSecrets, phase, "", "")
+	h, err := computeTemplateHash(llm, model, skills, nil, requiredSecrets, nil, phase, "", "")
 	if err != nil {
 		t.Fatalf("computeTemplateHash: %v", err)
 	}
@@ -517,14 +517,14 @@ func TestSetEnvVar_FailsOnNoContainers(t *testing.T) {
 }
 
 func TestEnsureAgentTemplate_NilAgent(t *testing.T) {
-	_, err := EnsureAgentTemplate(context.Background(), nil, "base", "ns", "analysis", nil, testLLMProvider(agenticv1alpha1.LLMProviderGoogleCloudVertex), nil, "lightspeed-agent")
+	_, err := EnsureAgentTemplate(context.Background(), nil, "base", "ns", "analysis", nil, testLLMProvider(agenticv1alpha1.LLMProviderGoogleCloudVertex), nil, nil, "lightspeed-agent")
 	if err == nil {
 		t.Error("expected error for nil agent")
 	}
 }
 
 func TestEnsureAgentTemplate_NilLLM(t *testing.T) {
-	_, err := EnsureAgentTemplate(context.Background(), nil, "base", "ns", "analysis", testDefaultAgent(), nil, nil, "lightspeed-agent")
+	_, err := EnsureAgentTemplate(context.Background(), nil, "base", "ns", "analysis", testDefaultAgent(), nil, nil, nil,"lightspeed-agent")
 	if err == nil {
 		t.Error("expected error for nil LLM")
 	}
@@ -627,11 +627,11 @@ func TestComputeTemplateHash_DifferentBaseResourceVersion(t *testing.T) {
 	llm := testLLMProvider(agenticv1alpha1.LLMProviderGoogleCloudVertex)
 	skills := []agenticv1alpha1.SkillsSource{{Image: "quay.io/test/skills:latest"}}
 
-	h1, err := computeTemplateHash(llm, "claude-opus-4-6", skills, nil, nil, "analysis", "1000", "")
+	h1, err := computeTemplateHash(llm, "claude-opus-4-6", skills, nil, nil, nil, "analysis", "1000", "")
 	if err != nil {
 		t.Fatal(err)
 	}
-	h2, err := computeTemplateHash(llm, "claude-opus-4-6", skills, nil, nil, "analysis", "2000", "")
+	h2, err := computeTemplateHash(llm, "claude-opus-4-6", skills, nil, nil, nil, "analysis", "2000", "")
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -645,11 +645,11 @@ func TestComputeTemplateHash_SameBaseResourceVersion(t *testing.T) {
 	llm := testLLMProvider(agenticv1alpha1.LLMProviderGoogleCloudVertex)
 	skills := []agenticv1alpha1.SkillsSource{{Image: "quay.io/test/skills:latest"}}
 
-	h1, err := computeTemplateHash(llm, "claude-opus-4-6", skills, nil, nil, "analysis", "1000", "")
+	h1, err := computeTemplateHash(llm, "claude-opus-4-6", skills, nil, nil, nil, "analysis", "1000", "")
 	if err != nil {
 		t.Fatal(err)
 	}
-	h2, err := computeTemplateHash(llm, "claude-opus-4-6", skills, nil, nil, "analysis", "1000", "")
+	h2, err := computeTemplateHash(llm, "claude-opus-4-6", skills, nil, nil, nil, "analysis", "1000", "")
 	if err != nil {
 		t.Fatal(err)
 	}
diff --git a/controller/proposal/templates/analysis_query.tmpl b/controller/proposal/templates/analysis_query.tmpl
@@ -1,5 +1,31 @@
 You are an analysis agent. Your job is to diagnose the problem, determine the root cause, and propose one or more remediation options. Do NOT attempt to fix, patch, or execute any changes — only analyze and propose.
 
+## Skills
+
+A specialist deep-RCA pipeline is available at `/app/skills/intelliaide/SKILL.md`.
+
+Use it ONLY when the request calls for:
+- Root cause analysis (RCA) or deeper / ML-assisted troubleshooting of a cluster issue
+- Must-gather collection or analysis
+- Investigating pod failures, etcd degradation, networking problems, storage issues, etc.
+- Any request that explicitly mentions "deeper analysis", "deeper troubleshooting", "root cause", "RCA", "must-gather", or "IntelliAide"
+
+For routine inspection (checking pod/node status, listing events, summarising resource state,
+describing objects), use `kubectl`/`oc` commands directly — do NOT invoke the IntelliAide pipeline.
+
+**Decision rule — apply before doing anything else:**
+1. Read the `## Request` section below.
+2. If it is a routine inspection query → proceed with `kubectl`/`oc` directly.
+3. If it is a deep-RCA or troubleshooting request → read the skill file with ONE atomic command:
+   ```
+   cat /app/skills/intelliaide/SKILL.md
+   ```
+   If the command returns one or more paths, read the most relevant SKILL.md with `cat`
+   and follow its workflow **exactly** instead of the instructions below.
+   If no SKILL.md files are found, stop immediately and return a JSON error response — skills are required and their absence is a fatal misconfiguration.
+
+## Analysis requirements
+
 For each option you propose, include:
 - A diagnosis with root cause and confidence level
 - A detailed remediation plan with specific actions
diff --git a/examples/setup/09-intelliaide-proposals.yaml b/examples/setup/09-intelliaide-proposals.yaml

Original file line number	Diff line number	Diff line change
`@@ -99,7 +99,7 @@ func (m *SandboxManager) buildClaim(claimName, proposalName, step, templateName`
`99`	`99`	`func (m *SandboxManager) Claim(ctx context.Context, proposalName, step, _ string) (string, error) {`
`100`	`100`	`log := logf.FromContext(ctx)`
`101`	`101`
`102`		`- templateName, err := EnsureAgentTemplate(ctx, m.Client, m.BaseTemplateName, m.Namespace, step, m.agent, m.llm, m.tools, m.serviceAccount)`
	`102`	`+ templateName, err := EnsureAgentTemplate(ctx, m.Client, m.BaseTemplateName, m.Namespace, step, m.agent, m.llm, m.tools, nil, m.serviceAccount)`
`103`	`103`	`if err != nil {`
`104`	`104`	`return "", fmt.Errorf("%s: %w", ErrEnsureAgentTemplate, err)`
`105`	`105`	`}`
Original file line number	Diff line number	Diff line change
`@@ -77,7 +77,7 @@ func emptyTemplate() *unstructured.Unstructured {`
`77`	`77`
`78`	`78`	`func mustHash(t testing.T, llm agenticv1alpha1.LLMProvider, model string, skills []agenticv1alpha1.SkillsSource, requiredSecrets []agenticv1alpha1.SecretRequirement, phase string) string {`
`79`	`79`	`t.Helper()`
`80`		`- h, err := computeTemplateHash(llm, model, skills, nil, requiredSecrets, phase, "", "")`
	`80`	`+ h, err := computeTemplateHash(llm, model, skills, nil, requiredSecrets, nil, phase, "", "")`
`81`	`81`	`if err != nil {`
`82`	`82`	`t.Fatalf("computeTemplateHash: %v", err)`
`83`	`83`	`}`
`@@ -517,14 +517,14 @@ func TestSetEnvVar_FailsOnNoContainers(t *testing.T) {`
`517`	`517`	`}`
`518`	`518`
`519`	`519`	`func TestEnsureAgentTemplate_NilAgent(t *testing.T) {`
`520`		`- _, err := EnsureAgentTemplate(context.Background(), nil, "base", "ns", "analysis", nil, testLLMProvider(agenticv1alpha1.LLMProviderGoogleCloudVertex), nil, "lightspeed-agent")`
	`520`	`+ _, err := EnsureAgentTemplate(context.Background(), nil, "base", "ns", "analysis", nil, testLLMProvider(agenticv1alpha1.LLMProviderGoogleCloudVertex), nil, nil, "lightspeed-agent")`
`521`	`521`	`if err == nil {`
`522`	`522`	`t.Error("expected error for nil agent")`
`523`	`523`	`}`
`524`	`524`	`}`
`525`	`525`
`526`	`526`	`func TestEnsureAgentTemplate_NilLLM(t *testing.T) {`
`527`		`- _, err := EnsureAgentTemplate(context.Background(), nil, "base", "ns", "analysis", testDefaultAgent(), nil, nil, "lightspeed-agent")`
	`527`	`+ _, err := EnsureAgentTemplate(context.Background(), nil, "base", "ns", "analysis", testDefaultAgent(), nil, nil, nil,"lightspeed-agent")`
`528`	`528`	`if err == nil {`
`529`	`529`	`t.Error("expected error for nil LLM")`
`530`	`530`	`}`
`@@ -627,11 +627,11 @@ func TestComputeTemplateHash_DifferentBaseResourceVersion(t *testing.T) {`
`627`	`627`	`llm := testLLMProvider(agenticv1alpha1.LLMProviderGoogleCloudVertex)`
`628`	`628`	`skills := []agenticv1alpha1.SkillsSource{{Image: "quay.io/test/skills:latest"}}`
`629`	`629`
`630`		`- h1, err := computeTemplateHash(llm, "claude-opus-4-6", skills, nil, nil, "analysis", "1000", "")`
	`630`	`+ h1, err := computeTemplateHash(llm, "claude-opus-4-6", skills, nil, nil, nil, "analysis", "1000", "")`
`631`	`631`	`if err != nil {`
`632`	`632`	`t.Fatal(err)`
`633`	`633`	`}`
`634`		`- h2, err := computeTemplateHash(llm, "claude-opus-4-6", skills, nil, nil, "analysis", "2000", "")`
	`634`	`+ h2, err := computeTemplateHash(llm, "claude-opus-4-6", skills, nil, nil, nil, "analysis", "2000", "")`
`635`	`635`	`if err != nil {`
`636`	`636`	`t.Fatal(err)`
`637`	`637`	`}`
`@@ -645,11 +645,11 @@ func TestComputeTemplateHash_SameBaseResourceVersion(t *testing.T) {`
`645`	`645`	`llm := testLLMProvider(agenticv1alpha1.LLMProviderGoogleCloudVertex)`
`646`	`646`	`skills := []agenticv1alpha1.SkillsSource{{Image: "quay.io/test/skills:latest"}}`
`647`	`647`
`648`		`- h1, err := computeTemplateHash(llm, "claude-opus-4-6", skills, nil, nil, "analysis", "1000", "")`
	`648`	`+ h1, err := computeTemplateHash(llm, "claude-opus-4-6", skills, nil, nil, nil, "analysis", "1000", "")`
`649`	`649`	`if err != nil {`
`650`	`650`	`t.Fatal(err)`
`651`	`651`	`}`
`652`		`- h2, err := computeTemplateHash(llm, "claude-opus-4-6", skills, nil, nil, "analysis", "1000", "")`
	`652`	`+ h2, err := computeTemplateHash(llm, "claude-opus-4-6", skills, nil, nil, nil, "analysis", "1000", "")`
`653`	`653`	`if err != nil {`
`654`	`654`	`t.Fatal(err)`
`655`	`655`	`}`