Skip to content

Commit af4eef0

Browse files
committed
fix: per-step timeoutMinutes, tools.dataSource, and review follow-ups
1 parent 5c91a3f commit af4eef0

17 files changed

Lines changed: 578 additions & 174 deletions

.ai/spec/how/reconciler.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,7 @@ Audience: AI agents. Behavioral rules and phase semantics live in **what/** spec
170170
- **`SandboxProvider`:** Swappable claim/wait/release (tests can fake). Implementations: `SandboxManager` (sandbox-claim mode), `BarePodManager` (bare-pod mode). `SetStep` provides resolved step config before each `Claim` call.
171171
- **`PodSpecBuilder`:** Shared pod-spec assembly. Produces typed `corev1.PodSpec` from image + resolved step config. Used directly by `BarePodManager`; shared helper functions also used by `EnsureAgentTemplate` (unstructured path).
172172
- **`resolveProposal`:** Produces `resolvedWorkflow` with cached `Agent` + `LLMProvider` per name; applies per-stage agent overrides from `ProposalApproval` via `getStageOverrideAgent`; `Execution`/`Verification` steps nil when corresponding spec sections are zero.
173-
- **`EnsureAgentTemplate`:** Deterministic derived `SandboxTemplate` name from hash of LLM spec, model, skills, MCP servers, required secrets, step, and **base template resourceVersion**. Patches pod template env/volumes for credentials, Vertex/Bedrock/Azure extras, skills image/paths, and MCP JSON env. GC older templates labeled for same agent+step.
173+
- **`EnsureAgentTemplate`:** Deterministic derived `SandboxTemplate` name from hash of LLM spec, model, skills, MCP servers, required secrets, dataSource PVC, step, and **base template resourceVersion**. `dataSource` is extracted from `tools.DataSource` (set in `ToolsSpec`). Patches pod template env/volumes for credentials, Vertex/Bedrock/Azure extras, skills image/paths, MCP JSON env, `LIGHTSPEED_MODE`, and optional `/data/input` PVC mount. GC older templates labeled for same agent+step.
174174

175175
---
176176

api/v1alpha1/proposal_types.go

Lines changed: 25 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -258,10 +258,22 @@ type ProposalStep struct {
258258
// for this step. Use this when different steps need different skills.
259259
// +optional
260260
Tools ToolsSpec `json:"tools,omitzero"`
261+
262+
// timeoutMinutes sets the timeout for this step's sandbox agent call.
263+
// This controls how long the operator waits for the sandbox pod to
264+
// become ready and for the agent to complete its work. Increase this
265+
// for long-running tools (e.g., IntelliAide RCA takes 10-30 minutes).
266+
// Defaults to 5 minutes when omitted.
267+
//
268+
// Mutable: can be adjusted before approving a step.
269+
// +optional
270+
// +kubebuilder:validation:Minimum=1
271+
// +kubebuilder:validation:Maximum=60
272+
TimeoutMinutes int32 `json:"timeoutMinutes,omitempty"`
261273
}
262274

263275
func (s ProposalStep) IsZero() bool {
264-
return s.Agent == "" && s.Tools.IsZero()
276+
return s.Agent == "" && s.Tools.IsZero() && s.TimeoutMinutes == 0
265277
}
266278

267279
// DataSource references a pre-existing PersistentVolumeClaim containing
@@ -277,7 +289,11 @@ type DataSource struct {
277289
// +kubebuilder:validation:MinLength=1
278290
// +kubebuilder:validation:MaxLength=253
279291
// +kubebuilder:validation:XValidation:rule="!format.dns1123Subdomain().validate(self).hasValue()",message="must be a valid DNS subdomain"
280-
ClaimName string `json:"claimName"`
292+
ClaimName string `json:"claimName,omitempty"`
293+
}
294+
295+
func (d DataSource) IsZero() bool {
296+
return d.ClaimName == ""
281297
}
282298

283299
// ProposalSpec defines the desired state of Proposal.
@@ -291,10 +307,9 @@ type DataSource struct {
291307
// +kubebuilder:validation:XValidation:rule="!has(oldSelf.analysisOutput) || (has(self.analysisOutput) && self.analysisOutput == oldSelf.analysisOutput)",message="analysisOutput is immutable once set"
292308
// +kubebuilder:validation:XValidation:rule="!has(self.analysisOutput) || self.analysisOutput.mode != 'Minimal' || (!has(self.execution) && !has(self.verification))",message="analysisOutput mode Minimal is only allowed for analysis-only proposals (no execution or verification steps)"
293309
// +kubebuilder:validation:XValidation:rule="!has(oldSelf.tools) || (has(self.tools) && self.tools == oldSelf.tools)",message="tools is immutable once set"
294-
// +kubebuilder:validation:XValidation:rule="!has(oldSelf.analysis) || (has(self.analysis) && self.analysis == oldSelf.analysis)",message="analysis is immutable once set"
295-
// +kubebuilder:validation:XValidation:rule="!has(oldSelf.execution) || (has(self.execution) && self.execution == oldSelf.execution)",message="execution is immutable once set"
296-
// +kubebuilder:validation:XValidation:rule="!has(oldSelf.verification) || (has(self.verification) && self.verification == oldSelf.verification)",message="verification is immutable once set"
297-
// +kubebuilder:validation:XValidation:rule="!has(oldSelf.dataSource) || (has(self.dataSource) && self.dataSource == oldSelf.dataSource)",message="dataSource is immutable once set"
310+
// +kubebuilder:validation:XValidation:rule="!has(oldSelf.analysis) || (has(self.analysis) && self.analysis.agent == oldSelf.analysis.agent && self.analysis.tools == oldSelf.analysis.tools)",message="analysis agent and tools are immutable once set"
311+
// +kubebuilder:validation:XValidation:rule="!has(oldSelf.execution) || (has(self.execution) && self.execution.agent == oldSelf.execution.agent && self.execution.tools == oldSelf.execution.tools)",message="execution agent and tools are immutable once set"
312+
// +kubebuilder:validation:XValidation:rule="!has(oldSelf.verification) || (has(self.verification) && self.verification.agent == oldSelf.verification.agent && self.verification.tools == oldSelf.verification.tools)",message="verification agent and tools are immutable once set"
298313
type ProposalSpec struct {
299314
// request is the user's original request, alert description, or a
300315
// description of what triggered this proposal. This text is passed to
@@ -342,25 +357,17 @@ type ProposalSpec struct {
342357
AnalysisOutput AnalysisOutput `json:"analysisOutput,omitzero"`
343358

344359
// tools defines the default tools for all steps: skills images,
345-
// MCP servers, and required secrets. Per-step tools
346-
// (analysis.tools, execution.tools, verification.tools) replace
347-
// this default for individual steps.
360+
// MCP servers, required secrets, and an optional dataSource PVC.
361+
// Per-step tools (analysis.tools, execution.tools, verification.tools)
362+
// replace this default for individual steps, so a dataSource set in
363+
// spec.analysis.tools is mounted only in the analysis sandbox.
348364
//
349365
// Immutable: the skills and secrets available to the agent are
350366
// fixed at creation. Changing tools mid-flight could violate the
351367
// assumptions of an in-progress analysis or execution.
352368
// +optional
353369
Tools ToolsSpec `json:"tools,omitzero"`
354370

355-
// dataSource references a PVC containing pre-populated input data
356-
// (e.g., must-gather bundles, diagnostic data). The operator mounts
357-
// it read-only at /data/input in the sandbox pod. Skills discover
358-
// input data at this standard location.
359-
//
360-
// Immutable: input data source is fixed at creation.
361-
// +optional
362-
DataSource *DataSource `json:"dataSource,omitzero"`
363-
364371
// analysis defines per-step configuration for the analysis step,
365372
// including which agent handles it and any per-step tools.
366373
//
@@ -382,18 +389,6 @@ type ProposalSpec struct {
382389
// +optional
383390
Verification ProposalStep `json:"verification,omitzero"`
384391

385-
// timeoutMinutes sets the per-step timeout for sandbox agent calls.
386-
// This controls how long the operator waits for the sandbox pod to
387-
// become ready and for the agent to complete its work. Increase this
388-
// for long-running tools (e.g., IntelliAide RCA takes 10-30 minutes).
389-
// Defaults to 5 minutes when omitted.
390-
//
391-
// Mutable: can be adjusted before approving a step.
392-
// +optional
393-
// +kubebuilder:validation:Minimum=1
394-
// +kubebuilder:validation:Maximum=60
395-
TimeoutMinutes *int32 `json:"timeoutMinutes,omitempty"`
396-
397392
// revisionFeedback is the user's free-text feedback requesting changes
398393
// to the analysis. Patching this field bumps metadata.generation, which
399394
// the operator detects (generation > observedGeneration) and triggers

api/v1alpha1/tools_types.go

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -109,12 +109,15 @@ type SecretRequirement struct {
109109
}
110110

111111
// ToolsSpec defines the tools available to an agent in its sandbox pod.
112-
// This includes skills images, MCP servers, and required secrets.
112+
// This includes skills images, MCP servers, required secrets, and an
113+
// optional data source PVC.
113114
//
114115
// ToolsSpec is specified on a Proposal either as a shared default
115116
// (spec.tools) or per-step (spec.analysis.tools, spec.execution.tools,
116117
// spec.verification.tools). Per-step tools replace the shared default
117-
// for that step.
118+
// for that step, so a dataSource set in spec.analysis.tools is mounted
119+
// only in the analysis sandbox, while one in spec.tools is mounted in
120+
// every step that does not override tools.
118121
//
119122
// +kubebuilder:validation:MinProperties=1
120123
type ToolsSpec struct {
@@ -147,8 +150,16 @@ type ToolsSpec struct {
147150
// +kubebuilder:validation:MinItems=1
148151
// +kubebuilder:validation:MaxItems=20
149152
RequiredSecrets []SecretRequirement `json:"requiredSecrets,omitempty"`
153+
154+
// dataSource references a pre-existing PersistentVolumeClaim containing
155+
// input data for this step (e.g., must-gather bundles, diagnostic data).
156+
// The PVC must already exist in the same namespace as the Proposal and be
157+
// pre-populated with data before the Proposal is created. The operator
158+
// mounts it read-only at /data/input in the sandbox pod.
159+
// +optional
160+
DataSource *DataSource `json:"dataSource,omitempty"`
150161
}
151162

152163
func (t ToolsSpec) IsZero() bool {
153-
return len(t.Skills) == 0 && len(t.MCPServers) == 0 && len(t.RequiredSecrets) == 0
164+
return len(t.Skills) == 0 && len(t.MCPServers) == 0 && len(t.RequiredSecrets) == 0 && t.DataSource == nil
154165
}

api/v1alpha1/zz_generated.deepcopy.go

Lines changed: 20 additions & 5 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)