Skip to content

Commit b69c658

Browse files
committed
feat: add dataSource PVC mount and IntelliAide proposal templates
1 parent 0468de8 commit b69c658

7 files changed

Lines changed: 314 additions & 22 deletions

File tree

api/v1alpha1/proposal_types.go

Lines changed: 38 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -270,6 +270,22 @@ func (s ProposalStep) IsZero() bool {
270270
return s.Agent == "" && s.Tools.IsZero()
271271
}
272272

273+
// DataSource references a pre-existing PersistentVolumeClaim containing
274+
// input data for this proposal (e.g., must-gather bundles, diagnostic data).
275+
// The PVC must already exist in the same namespace as the Proposal and be
276+
// pre-populated with data before the Proposal is created. The operator
277+
// mounts it read-only at a well-known path (/data/input) accessible to
278+
// all skills in the sandbox pod.
279+
type DataSource struct {
280+
// claimName is the name of the PersistentVolumeClaim to mount.
281+
// The PVC must exist in the same namespace as the Proposal.
282+
// +required
283+
// +kubebuilder:validation:MinLength=1
284+
// +kubebuilder:validation:MaxLength=253
285+
// +kubebuilder:validation:XValidation:rule="!format.dns1123Subdomain().validate(self).hasValue()",message="must be a valid DNS subdomain"
286+
ClaimName string `json:"claimName"`
287+
}
288+
273289
// ProposalSpec defines the desired state of Proposal.
274290
//
275291
// A Proposal defines the workflow shape inline, specifying which steps
@@ -284,6 +300,7 @@ func (s ProposalStep) IsZero() bool {
284300
// +kubebuilder:validation:XValidation:rule="!has(oldSelf.analysis) || (has(self.analysis) && self.analysis == oldSelf.analysis)",message="analysis is immutable once set"
285301
// +kubebuilder:validation:XValidation:rule="!has(oldSelf.execution) || (has(self.execution) && self.execution == oldSelf.execution)",message="execution is immutable once set"
286302
// +kubebuilder:validation:XValidation:rule="!has(oldSelf.verification) || (has(self.verification) && self.verification == oldSelf.verification)",message="verification is immutable once set"
303+
// +kubebuilder:validation:XValidation:rule="!has(oldSelf.dataSource) || (has(self.dataSource) && self.dataSource == oldSelf.dataSource)",message="dataSource is immutable once set"
287304
type ProposalSpec struct {
288305
// request is the user's original request, alert description, or a
289306
// description of what triggered this proposal. This text is passed to
@@ -341,6 +358,15 @@ type ProposalSpec struct {
341358
// +optional
342359
Tools ToolsSpec `json:"tools,omitzero"`
343360

361+
// dataSource references a PVC containing pre-populated input data
362+
// (e.g., must-gather bundles, diagnostic data). The operator mounts
363+
// it read-only at /data/input in the sandbox pod. Skills discover
364+
// input data at this standard location.
365+
//
366+
// Immutable: input data source is fixed at creation.
367+
// +optional
368+
DataSource *DataSource `json:"dataSource,omitzero"`
369+
344370
// analysis defines per-step configuration for the analysis step,
345371
// including which agent handles it and any per-step tools.
346372
//
@@ -362,6 +388,18 @@ type ProposalSpec struct {
362388
// +optional
363389
Verification ProposalStep `json:"verification,omitzero"`
364390

391+
// timeoutMinutes sets the per-step timeout for sandbox agent calls.
392+
// This controls how long the operator waits for the sandbox pod to
393+
// become ready and for the agent to complete its work. Increase this
394+
// for long-running tools (e.g., IntelliAide RCA takes 10-30 minutes).
395+
// Defaults to 5 minutes when omitted.
396+
//
397+
// Mutable: can be adjusted before approving a step.
398+
// +optional
399+
// +kubebuilder:validation:Minimum=1
400+
// +kubebuilder:validation:Maximum=60
401+
TimeoutMinutes *int32 `json:"timeoutMinutes,omitempty"`
402+
365403
// revisionFeedback is the user's free-text feedback requesting changes
366404
// to the analysis. Patching this field bumps metadata.generation, which
367405
// the operator detects (generation > observedGeneration) and triggers
@@ -374,15 +412,6 @@ type ProposalSpec struct {
374412
// +kubebuilder:validation:MaxLength=32768
375413
RevisionFeedback string `json:"revisionFeedback,omitempty"`
376414

377-
// timeoutMinutes overrides the default sandbox operation timeout for
378-
// this proposal. When set, all sandbox wait and HTTP client timeouts
379-
// use this value instead of the operator default (5 minutes).
380-
//
381-
// Immutable: timeout policy is fixed at creation.
382-
// +optional
383-
// +kubebuilder:validation:Minimum=1
384-
// +kubebuilder:validation:Maximum=120
385-
TimeoutMinutes *int32 `json:"timeoutMinutes,omitzero"`
386415
}
387416

388417
// ProposalStatus defines the observed state of Proposal. All fields are

api/v1alpha1/zz_generated.deepcopy.go

Lines changed: 5 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

controller/proposal/sandbox.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ func (m *SandboxManager) buildClaim(claimName, proposalName, step, templateName
9999
func (m *SandboxManager) Claim(ctx context.Context, proposalName, step, _ string) (string, error) {
100100
log := logf.FromContext(ctx)
101101

102-
templateName, err := EnsureAgentTemplate(ctx, m.Client, m.BaseTemplateName, m.Namespace, step, m.agent, m.llm, m.tools, m.serviceAccount)
102+
templateName, err := EnsureAgentTemplate(ctx, m.Client, m.BaseTemplateName, m.Namespace, step, m.agent, m.llm, m.tools, nil, m.serviceAccount)
103103
if err != nil {
104104
return "", fmt.Errorf("%s: %w", ErrEnsureAgentTemplate, err)
105105
}

controller/proposal/sandbox_templates.go

Lines changed: 49 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -73,10 +73,14 @@ var sandboxTemplateGVK = schema.GroupVersionKind{
7373
}
7474

7575
const (
76-
llmCredsMountPath = "/var/run/secrets/llm-credentials"
77-
llmCredsVolumeName = "llm-credentials"
78-
mcpHeadersMountRoot = "/var/secrets/mcp"
79-
mcpServersEnvVar = "LIGHTSPEED_MCP_SERVERS"
76+
agentModeEnvVar = "LIGHTSPEED_MODE"
77+
llmCredsMountPath = "/var/run/secrets/llm-credentials"
78+
vertexCredsMountPath = "/var/secrets/google"
79+
vertexCredsFileName = "credentials.json"
80+
llmCredsVolumeName = "llm-credentials"
81+
mcpHeadersMountRoot = "/var/secrets/mcp"
82+
mcpServersEnvVar = "LIGHTSPEED_MCP_SERVERS"
83+
dataSourceMountPath = "/data/input"
8084

8185
LabelManaged = "agentic.openshift.io/managed"
8286
LabelBaseTemplate = "agentic.openshift.io/base-template"
@@ -92,6 +96,7 @@ type templateHashInput struct {
9296
Skills []agenticv1alpha1.SkillsSource `json:"skills"`
9397
MCPServers []agenticv1alpha1.MCPServerConfig `json:"mcpServers,omitempty"`
9498
RequiredSecrets []agenticv1alpha1.SecretRequirement `json:"requiredSecrets,omitempty"`
99+
DataSource *agenticv1alpha1.DataSource `json:"dataSource,omitempty"`
95100
Step string `json:"step"`
96101
BaseResourceVersion string `json:"baseRV"`
97102
ServiceAccount string `json:"serviceAccount"`
@@ -103,6 +108,7 @@ func computeTemplateHash(
103108
skills []agenticv1alpha1.SkillsSource,
104109
mcpServers []agenticv1alpha1.MCPServerConfig,
105110
requiredSecrets []agenticv1alpha1.SecretRequirement,
111+
dataSource *agenticv1alpha1.DataSource,
106112
step string,
107113
baseResourceVersion string,
108114
serviceAccount string,
@@ -113,6 +119,7 @@ func computeTemplateHash(
113119
Skills: skills,
114120
MCPServers: mcpServers,
115121
RequiredSecrets: requiredSecrets,
122+
DataSource: dataSource,
116123
Step: step,
117124
BaseResourceVersion: baseResourceVersion,
118125
ServiceAccount: serviceAccount,
@@ -142,6 +149,7 @@ func EnsureAgentTemplate(
142149
agent *agenticv1alpha1.Agent,
143150
llm *agenticv1alpha1.LLMProvider,
144151
tools *agenticv1alpha1.ToolsSpec,
152+
dataSource *agenticv1alpha1.DataSource,
145153
serviceAccount string,
146154
) (string, error) {
147155
log := logf.FromContext(ctx).WithName("sandbox-templates")
@@ -168,7 +176,7 @@ func EnsureAgentTemplate(
168176
requiredSecrets = tools.RequiredSecrets
169177
}
170178

171-
hash, err := computeTemplateHash(llm, agent.Spec.Model, skills, mcpServers, requiredSecrets, step, base.GetResourceVersion(), serviceAccount)
179+
hash, err := computeTemplateHash(llm, agent.Spec.Model, skills, mcpServers, requiredSecrets, dataSource, step, base.GetResourceVersion(), serviceAccount)
172180
if err != nil {
173181
return "", fmt.Errorf("%s: %w", ErrComputeTemplateHash, err)
174182
}
@@ -234,6 +242,12 @@ func EnsureAgentTemplate(
234242
}
235243
}
236244

245+
if dataSource != nil {
246+
if err := patchDataSource(derived, dataSource); err != nil {
247+
return "", fmt.Errorf("patch data source: %w", err)
248+
}
249+
}
250+
237251
if err := patchProbes(derived); err != nil {
238252
return "", fmt.Errorf("%s: %w", ErrPatchProbes, err)
239253
}
@@ -616,6 +630,36 @@ func addSecretVolume(tmpl *unstructured.Unstructured, volumeName, secretName str
616630
return unstructured.SetNestedSlice(tmpl.Object, volumes, "spec", "podTemplate", "spec", "volumes")
617631
}
618632

633+
func addPVCVolume(tmpl *unstructured.Unstructured, volumeName, claimName string) error {
634+
volumes, _, _ := unstructured.NestedSlice(tmpl.Object, "spec", "podTemplate", "spec", "volumes")
635+
vol := map[string]any{
636+
"name": volumeName,
637+
"persistentVolumeClaim": map[string]any{
638+
"claimName": claimName,
639+
},
640+
}
641+
for i, v := range volumes {
642+
existing, ok := v.(map[string]any)
643+
if !ok {
644+
continue
645+
}
646+
if existing["name"] == volumeName {
647+
volumes[i] = vol
648+
return unstructured.SetNestedSlice(tmpl.Object, volumes, "spec", "podTemplate", "spec", "volumes")
649+
}
650+
}
651+
volumes = append(volumes, vol)
652+
return unstructured.SetNestedSlice(tmpl.Object, volumes, "spec", "podTemplate", "spec", "volumes")
653+
}
654+
655+
func patchDataSource(tmpl *unstructured.Unstructured, ds *agenticv1alpha1.DataSource) error {
656+
volName := "data-source"
657+
if err := addPVCVolume(tmpl, volName, ds.ClaimName); err != nil {
658+
return fmt.Errorf("add data source PVC volume: %w", err)
659+
}
660+
return addVolumeMount(tmpl, volName, dataSourceMountPath, true)
661+
}
662+
619663
func addVolumeMount(tmpl *unstructured.Unstructured, name, mountPath string, readOnly bool) error {
620664
container, containers, err := firstContainer(tmpl)
621665
if err != nil {

controller/proposal/sandbox_templates_test.go

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ func emptyTemplate() *unstructured.Unstructured {
7777

7878
func mustHash(t *testing.T, llm *agenticv1alpha1.LLMProvider, model string, skills []agenticv1alpha1.SkillsSource, requiredSecrets []agenticv1alpha1.SecretRequirement, phase string) string {
7979
t.Helper()
80-
h, err := computeTemplateHash(llm, model, skills, nil, requiredSecrets, phase, "", "")
80+
h, err := computeTemplateHash(llm, model, skills, nil, requiredSecrets, nil, phase, "", "")
8181
if err != nil {
8282
t.Fatalf("computeTemplateHash: %v", err)
8383
}
@@ -517,14 +517,14 @@ func TestSetEnvVar_FailsOnNoContainers(t *testing.T) {
517517
}
518518

519519
func TestEnsureAgentTemplate_NilAgent(t *testing.T) {
520-
_, err := EnsureAgentTemplate(context.Background(), nil, "base", "ns", "analysis", nil, testLLMProvider(agenticv1alpha1.LLMProviderGoogleCloudVertex), nil, "lightspeed-agent")
520+
_, err := EnsureAgentTemplate(context.Background(), nil, "base", "ns", "analysis", nil, testLLMProvider(agenticv1alpha1.LLMProviderGoogleCloudVertex), nil, nil, "lightspeed-agent")
521521
if err == nil {
522522
t.Error("expected error for nil agent")
523523
}
524524
}
525525

526526
func TestEnsureAgentTemplate_NilLLM(t *testing.T) {
527-
_, err := EnsureAgentTemplate(context.Background(), nil, "base", "ns", "analysis", testDefaultAgent(), nil, nil, "lightspeed-agent")
527+
_, err := EnsureAgentTemplate(context.Background(), nil, "base", "ns", "analysis", testDefaultAgent(), nil, nil, nil,"lightspeed-agent")
528528
if err == nil {
529529
t.Error("expected error for nil LLM")
530530
}
@@ -627,11 +627,11 @@ func TestComputeTemplateHash_DifferentBaseResourceVersion(t *testing.T) {
627627
llm := testLLMProvider(agenticv1alpha1.LLMProviderGoogleCloudVertex)
628628
skills := []agenticv1alpha1.SkillsSource{{Image: "quay.io/test/skills:latest"}}
629629

630-
h1, err := computeTemplateHash(llm, "claude-opus-4-6", skills, nil, nil, "analysis", "1000", "")
630+
h1, err := computeTemplateHash(llm, "claude-opus-4-6", skills, nil, nil, nil, "analysis", "1000", "")
631631
if err != nil {
632632
t.Fatal(err)
633633
}
634-
h2, err := computeTemplateHash(llm, "claude-opus-4-6", skills, nil, nil, "analysis", "2000", "")
634+
h2, err := computeTemplateHash(llm, "claude-opus-4-6", skills, nil, nil, nil, "analysis", "2000", "")
635635
if err != nil {
636636
t.Fatal(err)
637637
}
@@ -645,11 +645,11 @@ func TestComputeTemplateHash_SameBaseResourceVersion(t *testing.T) {
645645
llm := testLLMProvider(agenticv1alpha1.LLMProviderGoogleCloudVertex)
646646
skills := []agenticv1alpha1.SkillsSource{{Image: "quay.io/test/skills:latest"}}
647647

648-
h1, err := computeTemplateHash(llm, "claude-opus-4-6", skills, nil, nil, "analysis", "1000", "")
648+
h1, err := computeTemplateHash(llm, "claude-opus-4-6", skills, nil, nil, nil, "analysis", "1000", "")
649649
if err != nil {
650650
t.Fatal(err)
651651
}
652-
h2, err := computeTemplateHash(llm, "claude-opus-4-6", skills, nil, nil, "analysis", "1000", "")
652+
h2, err := computeTemplateHash(llm, "claude-opus-4-6", skills, nil, nil, nil, "analysis", "1000", "")
653653
if err != nil {
654654
t.Fatal(err)
655655
}

controller/proposal/templates/analysis_query.tmpl

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,31 @@
11
You are an analysis agent. Your job is to diagnose the problem, determine the root cause, and propose one or more remediation options. Do NOT attempt to fix, patch, or execute any changes — only analyze and propose.
22

3+
## Skills
4+
5+
A specialist deep-RCA pipeline is available at `/app/skills/intelliaide/SKILL.md`.
6+
7+
Use it ONLY when the request calls for:
8+
- Root cause analysis (RCA) or deeper / ML-assisted troubleshooting of a cluster issue
9+
- Must-gather collection or analysis
10+
- Investigating pod failures, etcd degradation, networking problems, storage issues, etc.
11+
- Any request that explicitly mentions "deeper analysis", "deeper troubleshooting", "root cause", "RCA", "must-gather", or "IntelliAide"
12+
13+
For routine inspection (checking pod/node status, listing events, summarising resource state,
14+
describing objects), use `kubectl`/`oc` commands directly — do NOT invoke the IntelliAide pipeline.
15+
16+
**Decision rule — apply before doing anything else:**
17+
1. Read the `## Request` section below.
18+
2. If it is a routine inspection query → proceed with `kubectl`/`oc` directly.
19+
3. If it is a deep-RCA or troubleshooting request → read the skill file with ONE atomic command:
20+
```
21+
cat /app/skills/intelliaide/SKILL.md
22+
```
23+
If the command returns one or more paths, read the most relevant SKILL.md with `cat`
24+
and follow its workflow **exactly** instead of the instructions below.
25+
If no SKILL.md files are found, stop immediately and return a JSON error response — skills are required and their absence is a fatal misconfiguration.
26+
27+
## Analysis requirements
28+
329
For each option you propose, include:
430
- A diagnosis with root cause and confidence level
531
- A detailed remediation plan with specific actions

0 commit comments

Comments
 (0)