Skip to content

Commit 31261be

Browse files
committed
feat: add dataSource PVC mount and IntelliAide proposal templates
1 parent 995cc83 commit 31261be

6 files changed

Lines changed: 306 additions & 8 deletions

File tree

api/v1alpha1/proposal_types.go

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -264,6 +264,22 @@ func (s ProposalStep) IsZero() bool {
264264
return s.Agent == "" && s.Tools.IsZero()
265265
}
266266

267+
// DataSource references a pre-existing PersistentVolumeClaim containing
268+
// input data for this proposal (e.g., must-gather bundles, diagnostic data).
269+
// The PVC must already exist in the same namespace as the Proposal and be
270+
// pre-populated with data before the Proposal is created. The operator
271+
// mounts it read-only at a well-known path (/data/input) accessible to
272+
// all skills in the sandbox pod.
273+
type DataSource struct {
274+
// claimName is the name of the PersistentVolumeClaim to mount.
275+
// The PVC must exist in the same namespace as the Proposal.
276+
// +required
277+
// +kubebuilder:validation:MinLength=1
278+
// +kubebuilder:validation:MaxLength=253
279+
// +kubebuilder:validation:XValidation:rule="!format.dns1123Subdomain().validate(self).hasValue()",message="must be a valid DNS subdomain"
280+
ClaimName string `json:"claimName"`
281+
}
282+
267283
// ProposalSpec defines the desired state of Proposal.
268284
//
269285
// A Proposal defines the workflow shape inline, specifying which steps
@@ -278,6 +294,7 @@ func (s ProposalStep) IsZero() bool {
278294
// +kubebuilder:validation:XValidation:rule="!has(oldSelf.analysis) || (has(self.analysis) && self.analysis == oldSelf.analysis)",message="analysis is immutable once set"
279295
// +kubebuilder:validation:XValidation:rule="!has(oldSelf.execution) || (has(self.execution) && self.execution == oldSelf.execution)",message="execution is immutable once set"
280296
// +kubebuilder:validation:XValidation:rule="!has(oldSelf.verification) || (has(self.verification) && self.verification == oldSelf.verification)",message="verification is immutable once set"
297+
// +kubebuilder:validation:XValidation:rule="!has(oldSelf.dataSource) || (has(self.dataSource) && self.dataSource == oldSelf.dataSource)",message="dataSource is immutable once set"
281298
type ProposalSpec struct {
282299
// request is the user's original request, alert description, or a
283300
// description of what triggered this proposal. This text is passed to
@@ -335,6 +352,15 @@ type ProposalSpec struct {
335352
// +optional
336353
Tools ToolsSpec `json:"tools,omitzero"`
337354

355+
// dataSource references a PVC containing pre-populated input data
356+
// (e.g., must-gather bundles, diagnostic data). The operator mounts
357+
// it read-only at /data/input in the sandbox pod. Skills discover
358+
// input data at this standard location.
359+
//
360+
// Immutable: input data source is fixed at creation.
361+
// +optional
362+
DataSource *DataSource `json:"dataSource,omitzero"`
363+
338364
// analysis defines per-step configuration for the analysis step,
339365
// including which agent handles it and any per-step tools.
340366
//
@@ -356,6 +382,18 @@ type ProposalSpec struct {
356382
// +optional
357383
Verification ProposalStep `json:"verification,omitzero"`
358384

385+
// timeoutMinutes sets the per-step timeout for sandbox agent calls.
386+
// This controls how long the operator waits for the sandbox pod to
387+
// become ready and for the agent to complete its work. Increase this
388+
// for long-running tools (e.g., IntelliAide RCA takes 10-30 minutes).
389+
// Defaults to 5 minutes when omitted.
390+
//
391+
// Mutable: can be adjusted before approving a step.
392+
// +optional
393+
// +kubebuilder:validation:Minimum=1
394+
// +kubebuilder:validation:Maximum=60
395+
TimeoutMinutes *int32 `json:"timeoutMinutes,omitempty"`
396+
359397
// revisionFeedback is the user's free-text feedback requesting changes
360398
// to the analysis. Patching this field bumps metadata.generation, which
361399
// the operator detects (generation > observedGeneration) and triggers

api/v1alpha1/zz_generated.deepcopy.go

Lines changed: 5 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

controller/proposal/sandbox_templates.go

Lines changed: 42 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ const (
3333
llmCredsVolumeName = "llm-credentials"
3434
mcpHeadersMountRoot = "/var/secrets/mcp"
3535
mcpServersEnvVar = "LIGHTSPEED_MCP_SERVERS"
36+
dataSourceMountPath = "/data/input"
3637

3738
LabelManaged = "agentic.openshift.io/managed"
3839
LabelBaseTemplate = "agentic.openshift.io/base-template"
@@ -48,6 +49,7 @@ type templateHashInput struct {
4849
Skills []agenticv1alpha1.SkillsSource `json:"skills"`
4950
MCPServers []agenticv1alpha1.MCPServerConfig `json:"mcpServers,omitempty"`
5051
RequiredSecrets []agenticv1alpha1.SecretRequirement `json:"requiredSecrets,omitempty"`
52+
DataSource *agenticv1alpha1.DataSource `json:"dataSource,omitempty"`
5153
Step string `json:"step"`
5254
BaseResourceVersion string `json:"baseRV"`
5355
}
@@ -58,6 +60,7 @@ func computeTemplateHash(
5860
skills []agenticv1alpha1.SkillsSource,
5961
mcpServers []agenticv1alpha1.MCPServerConfig,
6062
requiredSecrets []agenticv1alpha1.SecretRequirement,
63+
dataSource *agenticv1alpha1.DataSource,
6164
step string,
6265
baseResourceVersion string,
6366
) (string, error) {
@@ -67,6 +70,7 @@ func computeTemplateHash(
6770
Skills: skills,
6871
MCPServers: mcpServers,
6972
RequiredSecrets: requiredSecrets,
73+
DataSource: dataSource,
7074
Step: step,
7175
BaseResourceVersion: baseResourceVersion,
7276
}
@@ -95,6 +99,7 @@ func EnsureAgentTemplate(
9599
agent *agenticv1alpha1.Agent,
96100
llm *agenticv1alpha1.LLMProvider,
97101
tools *agenticv1alpha1.ToolsSpec,
102+
dataSource *agenticv1alpha1.DataSource,
98103
) (string, error) {
99104
log := logf.FromContext(ctx).WithName("sandbox-templates")
100105

@@ -120,7 +125,7 @@ func EnsureAgentTemplate(
120125
requiredSecrets = tools.RequiredSecrets
121126
}
122127

123-
hash, err := computeTemplateHash(llm, agent.Spec.Model, skills, mcpServers, requiredSecrets, step, base.GetResourceVersion())
128+
hash, err := computeTemplateHash(llm, agent.Spec.Model, skills, mcpServers, requiredSecrets, dataSource, step, base.GetResourceVersion())
124129
if err != nil {
125130
return "", fmt.Errorf("compute template hash: %w", err)
126131
}
@@ -189,6 +194,12 @@ func EnsureAgentTemplate(
189194
}
190195
}
191196

197+
if dataSource != nil {
198+
if err := patchDataSource(derived, dataSource); err != nil {
199+
return "", fmt.Errorf("patch data source: %w", err)
200+
}
201+
}
202+
192203
if err := c.Create(ctx, derived); err != nil {
193204
if apierrors.IsAlreadyExists(err) {
194205
return name, nil
@@ -509,6 +520,36 @@ func addSecretVolume(tmpl *unstructured.Unstructured, volumeName, secretName str
509520
return unstructured.SetNestedSlice(tmpl.Object, volumes, "spec", "podTemplate", "spec", "volumes")
510521
}
511522

523+
func addPVCVolume(tmpl *unstructured.Unstructured, volumeName, claimName string) error {
524+
volumes, _, _ := unstructured.NestedSlice(tmpl.Object, "spec", "podTemplate", "spec", "volumes")
525+
vol := map[string]any{
526+
"name": volumeName,
527+
"persistentVolumeClaim": map[string]any{
528+
"claimName": claimName,
529+
},
530+
}
531+
for i, v := range volumes {
532+
existing, ok := v.(map[string]any)
533+
if !ok {
534+
continue
535+
}
536+
if existing["name"] == volumeName {
537+
volumes[i] = vol
538+
return unstructured.SetNestedSlice(tmpl.Object, volumes, "spec", "podTemplate", "spec", "volumes")
539+
}
540+
}
541+
volumes = append(volumes, vol)
542+
return unstructured.SetNestedSlice(tmpl.Object, volumes, "spec", "podTemplate", "spec", "volumes")
543+
}
544+
545+
func patchDataSource(tmpl *unstructured.Unstructured, ds *agenticv1alpha1.DataSource) error {
546+
volName := "data-source"
547+
if err := addPVCVolume(tmpl, volName, ds.ClaimName); err != nil {
548+
return fmt.Errorf("add data source PVC volume: %w", err)
549+
}
550+
return addVolumeMount(tmpl, volName, dataSourceMountPath, true)
551+
}
552+
512553
func addVolumeMount(tmpl *unstructured.Unstructured, name, mountPath string, readOnly bool) error {
513554
container, containers, err := firstContainer(tmpl)
514555
if err != nil {

controller/proposal/sandbox_templates_test.go

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ func emptyTemplate() *unstructured.Unstructured {
6767

6868
func mustHash(t *testing.T, llm *agenticv1alpha1.LLMProvider, model string, skills []agenticv1alpha1.SkillsSource, requiredSecrets []agenticv1alpha1.SecretRequirement, phase string) string {
6969
t.Helper()
70-
h, err := computeTemplateHash(llm, model, skills, nil, requiredSecrets, phase, "")
70+
h, err := computeTemplateHash(llm, model, skills, nil, requiredSecrets, nil, phase, "")
7171
if err != nil {
7272
t.Fatalf("computeTemplateHash: %v", err)
7373
}
@@ -381,14 +381,14 @@ func TestSetEnvVar_FailsOnNoContainers(t *testing.T) {
381381
}
382382

383383
func TestEnsureAgentTemplate_NilAgent(t *testing.T) {
384-
_, err := EnsureAgentTemplate(nil, nil, "base", "ns", "analysis", nil, testLLMProvider(agenticv1alpha1.LLMProviderGoogleCloudVertex), nil)
384+
_, err := EnsureAgentTemplate(nil, nil, "base", "ns", "analysis", nil, testLLMProvider(agenticv1alpha1.LLMProviderGoogleCloudVertex), nil, nil)
385385
if err == nil {
386386
t.Error("expected error for nil agent")
387387
}
388388
}
389389

390390
func TestEnsureAgentTemplate_NilLLM(t *testing.T) {
391-
_, err := EnsureAgentTemplate(nil, nil, "base", "ns", "analysis", testDefaultAgent(), nil, nil)
391+
_, err := EnsureAgentTemplate(nil, nil, "base", "ns", "analysis", testDefaultAgent(), nil, nil, nil)
392392
if err == nil {
393393
t.Error("expected error for nil LLM")
394394
}
@@ -491,11 +491,11 @@ func TestComputeTemplateHash_DifferentBaseResourceVersion(t *testing.T) {
491491
llm := testLLMProvider(agenticv1alpha1.LLMProviderGoogleCloudVertex)
492492
skills := []agenticv1alpha1.SkillsSource{{Image: "quay.io/test/skills:latest"}}
493493

494-
h1, err := computeTemplateHash(llm, "claude-opus-4-6", skills, nil, nil, "analysis", "1000")
494+
h1, err := computeTemplateHash(llm, "claude-opus-4-6", skills, nil, nil, nil, "analysis", "1000")
495495
if err != nil {
496496
t.Fatal(err)
497497
}
498-
h2, err := computeTemplateHash(llm, "claude-opus-4-6", skills, nil, nil, "analysis", "2000")
498+
h2, err := computeTemplateHash(llm, "claude-opus-4-6", skills, nil, nil, nil, "analysis", "2000")
499499
if err != nil {
500500
t.Fatal(err)
501501
}
@@ -509,11 +509,11 @@ func TestComputeTemplateHash_SameBaseResourceVersion(t *testing.T) {
509509
llm := testLLMProvider(agenticv1alpha1.LLMProviderGoogleCloudVertex)
510510
skills := []agenticv1alpha1.SkillsSource{{Image: "quay.io/test/skills:latest"}}
511511

512-
h1, err := computeTemplateHash(llm, "claude-opus-4-6", skills, nil, nil, "analysis", "1000")
512+
h1, err := computeTemplateHash(llm, "claude-opus-4-6", skills, nil, nil, nil, "analysis", "1000")
513513
if err != nil {
514514
t.Fatal(err)
515515
}
516-
h2, err := computeTemplateHash(llm, "claude-opus-4-6", skills, nil, nil, "analysis", "1000")
516+
h2, err := computeTemplateHash(llm, "claude-opus-4-6", skills, nil, nil, nil, "analysis", "1000")
517517
if err != nil {
518518
t.Fatal(err)
519519
}

controller/proposal/templates/analysis_query.tmpl

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,31 @@
11
You are an analysis agent. Your job is to diagnose the problem, determine the root cause, and propose one or more remediation options. Do NOT attempt to fix, patch, or execute any changes — only analyze and propose.
22

3+
## Skills
4+
5+
A specialist deep-RCA pipeline is available at `/app/skills/intelliaide/SKILL.md`.
6+
7+
Use it ONLY when the request calls for:
8+
- Root cause analysis (RCA) or deeper / ML-assisted troubleshooting of a cluster issue
9+
- Must-gather collection or analysis
10+
- Investigating pod failures, etcd degradation, networking problems, storage issues, etc.
11+
- Any request that explicitly mentions "deeper analysis", "deeper troubleshooting", "root cause", "RCA", "must-gather", or "IntelliAide"
12+
13+
For routine inspection (checking pod/node status, listing events, summarising resource state,
14+
describing objects), use `kubectl`/`oc` commands directly — do NOT invoke the IntelliAide pipeline.
15+
16+
**Decision rule — apply before doing anything else:**
17+
1. Read the `## Request` section below.
18+
2. If it is a routine inspection query → proceed with `kubectl`/`oc` directly.
19+
3. If it is a deep-RCA or troubleshooting request → read the skill file with ONE atomic command:
20+
```
21+
cat /app/skills/intelliaide/SKILL.md
22+
```
23+
If the command returns one or more paths, read the most relevant SKILL.md with `cat`
24+
and follow its workflow **exactly** instead of the instructions below.
25+
If no SKILL.md files are found, stop immediately and return a JSON error response — skills are required and their absence is a fatal misconfiguration.
26+
27+
## Analysis requirements
28+
329
For each option you propose, include:
430
- A diagnosis with root cause and confidence level
531
- A detailed remediation plan with specific actions

0 commit comments

Comments
 (0)