Skip to content

Commit c718372

Browse files
committed
feat: add dataSource PVC mount and IntelliAide proposal templates
1 parent d81e987 commit c718372

6 files changed

Lines changed: 311 additions & 12 deletions

File tree

api/v1alpha1/proposal_types.go

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -264,6 +264,22 @@ func (s ProposalStep) IsZero() bool {
264264
return s.Agent == "" && s.Tools.IsZero()
265265
}
266266

267+
// DataSource references a pre-existing PersistentVolumeClaim containing
268+
// input data for this proposal (e.g., must-gather bundles, diagnostic data).
269+
// The PVC must already exist in the same namespace as the Proposal and be
270+
// pre-populated with data before the Proposal is created. The operator
271+
// mounts it read-only at a well-known path (/data/input) accessible to
272+
// all skills in the sandbox pod.
273+
type DataSource struct {
274+
// claimName is the name of the PersistentVolumeClaim to mount.
275+
// The PVC must exist in the same namespace as the Proposal.
276+
// +required
277+
// +kubebuilder:validation:MinLength=1
278+
// +kubebuilder:validation:MaxLength=253
279+
// +kubebuilder:validation:XValidation:rule="!format.dns1123Subdomain().validate(self).hasValue()",message="must be a valid DNS subdomain"
280+
ClaimName string `json:"claimName"`
281+
}
282+
267283
// ProposalSpec defines the desired state of Proposal.
268284
//
269285
// A Proposal defines the workflow shape inline, specifying which steps
@@ -278,6 +294,7 @@ func (s ProposalStep) IsZero() bool {
278294
// +kubebuilder:validation:XValidation:rule="!has(oldSelf.analysis) || (has(self.analysis) && self.analysis == oldSelf.analysis)",message="analysis is immutable once set"
279295
// +kubebuilder:validation:XValidation:rule="!has(oldSelf.execution) || (has(self.execution) && self.execution == oldSelf.execution)",message="execution is immutable once set"
280296
// +kubebuilder:validation:XValidation:rule="!has(oldSelf.verification) || (has(self.verification) && self.verification == oldSelf.verification)",message="verification is immutable once set"
297+
// +kubebuilder:validation:XValidation:rule="!has(oldSelf.dataSource) || (has(self.dataSource) && self.dataSource == oldSelf.dataSource)",message="dataSource is immutable once set"
281298
type ProposalSpec struct {
282299
// request is the user's original request, alert description, or a
283300
// description of what triggered this proposal. This text is passed to
@@ -335,6 +352,15 @@ type ProposalSpec struct {
335352
// +optional
336353
Tools ToolsSpec `json:"tools,omitzero"`
337354

355+
// dataSource references a PVC containing pre-populated input data
356+
// (e.g., must-gather bundles, diagnostic data). The operator mounts
357+
// it read-only at /data/input in the sandbox pod. Skills discover
358+
// input data at this standard location.
359+
//
360+
// Immutable: input data source is fixed at creation.
361+
// +optional
362+
DataSource *DataSource `json:"dataSource,omitzero"`
363+
338364
// analysis defines per-step configuration for the analysis step,
339365
// including which agent handles it and any per-step tools.
340366
//
@@ -356,6 +382,18 @@ type ProposalSpec struct {
356382
// +optional
357383
Verification ProposalStep `json:"verification,omitzero"`
358384

385+
// timeoutMinutes sets the per-step timeout for sandbox agent calls.
386+
// This controls how long the operator waits for the sandbox pod to
387+
// become ready and for the agent to complete its work. Increase this
388+
// for long-running tools (e.g., IntelliAide RCA takes 10-30 minutes).
389+
// Defaults to 5 minutes when omitted.
390+
//
391+
// Mutable: can be adjusted before approving a step.
392+
// +optional
393+
// +kubebuilder:validation:Minimum=1
394+
// +kubebuilder:validation:Maximum=60
395+
TimeoutMinutes *int32 `json:"timeoutMinutes,omitempty"`
396+
359397
// revisionFeedback is the user's free-text feedback requesting changes
360398
// to the analysis. Patching this field bumps metadata.generation, which
361399
// the operator detects (generation > observedGeneration) and triggers

api/v1alpha1/zz_generated.deepcopy.go

Lines changed: 5 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

controller/proposal/sandbox_templates.go

Lines changed: 47 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -28,10 +28,12 @@ var sandboxTemplateGVK = schema.GroupVersionKind{
2828
const (
2929
agentModeEnvVar = "LIGHTSPEED_MODE"
3030

31-
llmCredsMountPath = "/var/run/secrets/llm-credentials"
32-
llmCredsVolumeName = "llm-credentials"
33-
mcpHeadersMountRoot = "/var/secrets/mcp"
34-
mcpServersEnvVar = "LIGHTSPEED_MCP_SERVERS"
31+
vertexCredsMountPath = "/var/secrets/google"
32+
vertexCredsFileName = "credentials.json"
33+
llmCredsVolumeName = "llm-credentials"
34+
mcpHeadersMountRoot = "/var/secrets/mcp"
35+
mcpServersEnvVar = "LIGHTSPEED_MCP_SERVERS"
36+
dataSourceMountPath = "/data/input"
3537

3638
LabelManaged = "agentic.openshift.io/managed"
3739
LabelBaseTemplate = "agentic.openshift.io/base-template"
@@ -47,6 +49,7 @@ type templateHashInput struct {
4749
Skills []agenticv1alpha1.SkillsSource `json:"skills"`
4850
MCPServers []agenticv1alpha1.MCPServerConfig `json:"mcpServers,omitempty"`
4951
RequiredSecrets []agenticv1alpha1.SecretRequirement `json:"requiredSecrets,omitempty"`
52+
DataSource *agenticv1alpha1.DataSource `json:"dataSource,omitempty"`
5053
Step string `json:"step"`
5154
BaseResourceVersion string `json:"baseRV"`
5255
}
@@ -57,6 +60,7 @@ func computeTemplateHash(
5760
skills []agenticv1alpha1.SkillsSource,
5861
mcpServers []agenticv1alpha1.MCPServerConfig,
5962
requiredSecrets []agenticv1alpha1.SecretRequirement,
63+
dataSource *agenticv1alpha1.DataSource,
6064
step string,
6165
baseResourceVersion string,
6266
) (string, error) {
@@ -66,6 +70,7 @@ func computeTemplateHash(
6670
Skills: skills,
6771
MCPServers: mcpServers,
6872
RequiredSecrets: requiredSecrets,
73+
DataSource: dataSource,
6974
Step: step,
7075
BaseResourceVersion: baseResourceVersion,
7176
}
@@ -94,6 +99,7 @@ func EnsureAgentTemplate(
9499
agent *agenticv1alpha1.Agent,
95100
llm *agenticv1alpha1.LLMProvider,
96101
tools *agenticv1alpha1.ToolsSpec,
102+
dataSource *agenticv1alpha1.DataSource,
97103
) (string, error) {
98104
log := logf.FromContext(ctx).WithName("sandbox-templates")
99105

@@ -119,7 +125,7 @@ func EnsureAgentTemplate(
119125
requiredSecrets = tools.RequiredSecrets
120126
}
121127

122-
hash, err := computeTemplateHash(llm, agent.Spec.Model, skills, mcpServers, requiredSecrets, step, base.GetResourceVersion())
128+
hash, err := computeTemplateHash(llm, agent.Spec.Model, skills, mcpServers, requiredSecrets, dataSource, step, base.GetResourceVersion())
123129
if err != nil {
124130
return "", fmt.Errorf("compute template hash: %w", err)
125131
}
@@ -188,6 +194,12 @@ func EnsureAgentTemplate(
188194
}
189195
}
190196

197+
if dataSource != nil {
198+
if err := patchDataSource(derived, dataSource); err != nil {
199+
return "", fmt.Errorf("patch data source: %w", err)
200+
}
201+
}
202+
191203
if err := c.Create(ctx, derived); err != nil {
192204
if apierrors.IsAlreadyExists(err) {
193205
return name, nil
@@ -531,6 +543,36 @@ func addSecretVolume(tmpl *unstructured.Unstructured, volumeName, secretName str
531543
return unstructured.SetNestedSlice(tmpl.Object, volumes, "spec", "podTemplate", "spec", "volumes")
532544
}
533545

546+
func addPVCVolume(tmpl *unstructured.Unstructured, volumeName, claimName string) error {
547+
volumes, _, _ := unstructured.NestedSlice(tmpl.Object, "spec", "podTemplate", "spec", "volumes")
548+
vol := map[string]any{
549+
"name": volumeName,
550+
"persistentVolumeClaim": map[string]any{
551+
"claimName": claimName,
552+
},
553+
}
554+
for i, v := range volumes {
555+
existing, ok := v.(map[string]any)
556+
if !ok {
557+
continue
558+
}
559+
if existing["name"] == volumeName {
560+
volumes[i] = vol
561+
return unstructured.SetNestedSlice(tmpl.Object, volumes, "spec", "podTemplate", "spec", "volumes")
562+
}
563+
}
564+
volumes = append(volumes, vol)
565+
return unstructured.SetNestedSlice(tmpl.Object, volumes, "spec", "podTemplate", "spec", "volumes")
566+
}
567+
568+
func patchDataSource(tmpl *unstructured.Unstructured, ds *agenticv1alpha1.DataSource) error {
569+
volName := "data-source"
570+
if err := addPVCVolume(tmpl, volName, ds.ClaimName); err != nil {
571+
return fmt.Errorf("add data source PVC volume: %w", err)
572+
}
573+
return addVolumeMount(tmpl, volName, dataSourceMountPath, true)
574+
}
575+
534576
func addVolumeMount(tmpl *unstructured.Unstructured, name, mountPath string, readOnly bool) error {
535577
container, containers, err := firstContainer(tmpl)
536578
if err != nil {

controller/proposal/sandbox_templates_test.go

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ func emptyTemplate() *unstructured.Unstructured {
7676

7777
func mustHash(t *testing.T, llm *agenticv1alpha1.LLMProvider, model string, skills []agenticv1alpha1.SkillsSource, requiredSecrets []agenticv1alpha1.SecretRequirement, phase string) string {
7878
t.Helper()
79-
h, err := computeTemplateHash(llm, model, skills, nil, requiredSecrets, phase, "")
79+
h, err := computeTemplateHash(llm, model, skills, nil, requiredSecrets, nil, phase, "")
8080
if err != nil {
8181
t.Fatalf("computeTemplateHash: %v", err)
8282
}
@@ -516,14 +516,14 @@ func TestSetEnvVar_FailsOnNoContainers(t *testing.T) {
516516
}
517517

518518
func TestEnsureAgentTemplate_NilAgent(t *testing.T) {
519-
_, err := EnsureAgentTemplate(nil, nil, "base", "ns", "analysis", nil, testLLMProvider(agenticv1alpha1.LLMProviderGoogleCloudVertex), nil)
519+
_, err := EnsureAgentTemplate(nil, nil, "base", "ns", "analysis", nil, testLLMProvider(agenticv1alpha1.LLMProviderGoogleCloudVertex), nil, nil)
520520
if err == nil {
521521
t.Error("expected error for nil agent")
522522
}
523523
}
524524

525525
func TestEnsureAgentTemplate_NilLLM(t *testing.T) {
526-
_, err := EnsureAgentTemplate(nil, nil, "base", "ns", "analysis", testDefaultAgent(), nil, nil)
526+
_, err := EnsureAgentTemplate(nil, nil, "base", "ns", "analysis", testDefaultAgent(), nil, nil, nil)
527527
if err == nil {
528528
t.Error("expected error for nil LLM")
529529
}
@@ -626,11 +626,11 @@ func TestComputeTemplateHash_DifferentBaseResourceVersion(t *testing.T) {
626626
llm := testLLMProvider(agenticv1alpha1.LLMProviderGoogleCloudVertex)
627627
skills := []agenticv1alpha1.SkillsSource{{Image: "quay.io/test/skills:latest"}}
628628

629-
h1, err := computeTemplateHash(llm, "claude-opus-4-6", skills, nil, nil, "analysis", "1000")
629+
h1, err := computeTemplateHash(llm, "claude-opus-4-6", skills, nil, nil, nil, "analysis", "1000")
630630
if err != nil {
631631
t.Fatal(err)
632632
}
633-
h2, err := computeTemplateHash(llm, "claude-opus-4-6", skills, nil, nil, "analysis", "2000")
633+
h2, err := computeTemplateHash(llm, "claude-opus-4-6", skills, nil, nil, nil, "analysis", "2000")
634634
if err != nil {
635635
t.Fatal(err)
636636
}
@@ -644,11 +644,11 @@ func TestComputeTemplateHash_SameBaseResourceVersion(t *testing.T) {
644644
llm := testLLMProvider(agenticv1alpha1.LLMProviderGoogleCloudVertex)
645645
skills := []agenticv1alpha1.SkillsSource{{Image: "quay.io/test/skills:latest"}}
646646

647-
h1, err := computeTemplateHash(llm, "claude-opus-4-6", skills, nil, nil, "analysis", "1000")
647+
h1, err := computeTemplateHash(llm, "claude-opus-4-6", skills, nil, nil, nil, "analysis", "1000")
648648
if err != nil {
649649
t.Fatal(err)
650650
}
651-
h2, err := computeTemplateHash(llm, "claude-opus-4-6", skills, nil, nil, "analysis", "1000")
651+
h2, err := computeTemplateHash(llm, "claude-opus-4-6", skills, nil, nil, nil, "analysis", "1000")
652652
if err != nil {
653653
t.Fatal(err)
654654
}

controller/proposal/templates/analysis_query.tmpl

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,31 @@
11
You are an analysis agent. Your job is to diagnose the problem, determine the root cause, and propose one or more remediation options. Do NOT attempt to fix, patch, or execute any changes — only analyze and propose.
22

3+
## Skills
4+
5+
A specialist deep-RCA pipeline is available at `/app/skills/intelliaide/SKILL.md`.
6+
7+
Use it ONLY when the request calls for:
8+
- Root cause analysis (RCA) or deeper / ML-assisted troubleshooting of a cluster issue
9+
- Must-gather collection or analysis
10+
- Investigating pod failures, etcd degradation, networking problems, storage issues, etc.
11+
- Any request that explicitly mentions "deeper analysis", "deeper troubleshooting", "root cause", "RCA", "must-gather", or "IntelliAide"
12+
13+
For routine inspection (checking pod/node status, listing events, summarising resource state,
14+
describing objects), use `kubectl`/`oc` commands directly — do NOT invoke the IntelliAide pipeline.
15+
16+
**Decision rule — apply before doing anything else:**
17+
1. Read the `## Request` section below.
18+
2. If it is a routine inspection query → proceed with `kubectl`/`oc` directly.
19+
3. If it is a deep-RCA or troubleshooting request → read the skill file with ONE atomic command:
20+
```
21+
cat /app/skills/intelliaide/SKILL.md
22+
```
23+
If the command returns one or more paths, read the most relevant SKILL.md with `cat`
24+
and follow its workflow **exactly** instead of the instructions below.
25+
If no SKILL.md files are found, stop immediately and return a JSON error response — skills are required and their absence is a fatal misconfiguration.
26+
27+
## Analysis requirements
28+
329
For each option you propose, include:
430
- A diagnosis with root cause and confidence level
531
- A detailed remediation plan with specific actions

0 commit comments

Comments
 (0)