Skip to content

Commit e4d7a00

Browse files
committed
fix: per-step timeoutMinutes, tools.dataSource, and review follow-ups
1 parent b69c658 commit e4d7a00

18 files changed

Lines changed: 559 additions & 196 deletions

.ai/spec/how/reconciler.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -171,7 +171,7 @@ Audience: AI agents. Behavioral rules and phase semantics live in **what/** spec
171171
- **`SandboxProvider`:** Swappable claim/wait/release (tests can fake). Implementations: `SandboxManager` (sandbox-claim mode), `BarePodManager` (bare-pod mode). `SetStep` provides resolved step config before each `Claim` call.
172172
- **`PodSpecBuilder`:** Shared pod-spec assembly. Produces typed `corev1.PodSpec` from image + resolved step config. Used directly by `BarePodManager`; shared helper functions also used by `EnsureAgentTemplate` (unstructured path).
173173
- **`resolveProposal`:** Produces `resolvedWorkflow` with cached `Agent` + `LLMProvider` per name; applies per-stage agent overrides from `ProposalApproval` via `getStageOverrideAgent`; `Execution`/`Verification` steps nil when corresponding spec sections are zero.
174-
- **`EnsureAgentTemplate`:** Deterministic derived `SandboxTemplate` name from hash of LLM spec, model, skills, MCP servers, required secrets, step, and **base template resourceVersion**. Patches pod template env/volumes for credentials, Vertex/Bedrock/Azure extras, skills image/paths, and MCP JSON env. GC older templates labeled for same agent+step.
174+
- **`EnsureAgentTemplate`:** Deterministic derived `SandboxTemplate` name from hash of LLM spec, model, skills, MCP servers, required secrets, dataSource PVC, step, and **base template resourceVersion**. `dataSource` is extracted from `tools.DataSource` (set in `ToolsSpec`). Patches pod template env/volumes for credentials, Vertex/Bedrock/Azure extras, skills image/paths, MCP JSON env, `LIGHTSPEED_MODE`, and optional `/data/input` PVC mount. GC older templates labeled for same agent+step.
175175

176176
---
177177

api/v1alpha1/proposal_types.go

Lines changed: 26 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -264,10 +264,22 @@ type ProposalStep struct {
264264
// for this step. Use this when different steps need different skills.
265265
// +optional
266266
Tools ToolsSpec `json:"tools,omitzero"`
267+
268+
// timeoutMinutes sets the timeout for this step's sandbox agent call.
269+
// This controls how long the operator waits for the sandbox pod to
270+
// become ready and for the agent to complete its work. Increase this
271+
// for long-running tools (e.g., IntelliAide RCA takes 10-30 minutes).
272+
// Defaults to 5 minutes when omitted.
273+
//
274+
// Mutable: can be adjusted at any time; the value is read when the step starts.
275+
// +optional
276+
// +kubebuilder:validation:Minimum=1
277+
// +kubebuilder:validation:Maximum=60
278+
TimeoutMinutes int32 `json:"timeoutMinutes,omitempty"`
267279
}
268280

269281
func (s ProposalStep) IsZero() bool {
270-
return s.Agent == "" && s.Tools.IsZero()
282+
return s.Agent == "" && s.Tools.IsZero() && s.TimeoutMinutes == 0
271283
}
272284

273285
// DataSource references a pre-existing PersistentVolumeClaim containing
@@ -282,8 +294,12 @@ type DataSource struct {
282294
// +required
283295
// +kubebuilder:validation:MinLength=1
284296
// +kubebuilder:validation:MaxLength=253
285-
// +kubebuilder:validation:XValidation:rule="!format.dns1123Subdomain().validate(self).hasValue()",message="must be a valid DNS subdomain"
286-
ClaimName string `json:"claimName"`
297+
// +kubebuilder:validation:XValidation:rule="!format.dns1123Subdomain().validate(self).hasValue()",message="must be a valid DNS subdomain: lowercase alphanumeric characters, hyphens, and dots"
298+
ClaimName string `json:"claimName,omitempty"`
299+
}
300+
301+
func (d DataSource) IsZero() bool {
302+
return d.ClaimName == ""
287303
}
288304

289305
// ProposalSpec defines the desired state of Proposal.
@@ -297,10 +313,9 @@ type DataSource struct {
297313
// +kubebuilder:validation:XValidation:rule="!has(oldSelf.analysisOutput) || (has(self.analysisOutput) && self.analysisOutput == oldSelf.analysisOutput)",message="analysisOutput is immutable once set"
298314
// +kubebuilder:validation:XValidation:rule="!has(self.analysisOutput) || self.analysisOutput.mode != 'Minimal' || (!has(self.execution) && !has(self.verification))",message="analysisOutput mode Minimal is only allowed for analysis-only proposals (no execution or verification steps)"
299315
// +kubebuilder:validation:XValidation:rule="!has(oldSelf.tools) || (has(self.tools) && self.tools == oldSelf.tools)",message="tools is immutable once set"
300-
// +kubebuilder:validation:XValidation:rule="!has(oldSelf.analysis) || (has(self.analysis) && self.analysis == oldSelf.analysis)",message="analysis is immutable once set"
301-
// +kubebuilder:validation:XValidation:rule="!has(oldSelf.execution) || (has(self.execution) && self.execution == oldSelf.execution)",message="execution is immutable once set"
302-
// +kubebuilder:validation:XValidation:rule="!has(oldSelf.verification) || (has(self.verification) && self.verification == oldSelf.verification)",message="verification is immutable once set"
303-
// +kubebuilder:validation:XValidation:rule="!has(oldSelf.dataSource) || (has(self.dataSource) && self.dataSource == oldSelf.dataSource)",message="dataSource is immutable once set"
316+
// +kubebuilder:validation:XValidation:rule="!has(oldSelf.analysis) || (has(self.analysis) && (!has(self.analysis.agent) || !has(oldSelf.analysis.agent) || self.analysis.agent == oldSelf.analysis.agent) && (!has(self.analysis.tools) || !has(oldSelf.analysis.tools) || self.analysis.tools == oldSelf.analysis.tools))",message="analysis agent and tools are immutable once set"
317+
// +kubebuilder:validation:XValidation:rule="!has(oldSelf.execution) || (has(self.execution) && (!has(self.execution.agent) || !has(oldSelf.execution.agent) || self.execution.agent == oldSelf.execution.agent) && (!has(self.execution.tools) || !has(oldSelf.execution.tools) || self.execution.tools == oldSelf.execution.tools))",message="execution agent and tools are immutable once set"
318+
// +kubebuilder:validation:XValidation:rule="!has(oldSelf.verification) || (has(self.verification) && (!has(self.verification.agent) || !has(oldSelf.verification.agent) || self.verification.agent == oldSelf.verification.agent) && (!has(self.verification.tools) || !has(oldSelf.verification.tools) || self.verification.tools == oldSelf.verification.tools))",message="verification agent and tools are immutable once set"
304319
type ProposalSpec struct {
305320
// request is the user's original request, alert description, or a
306321
// description of what triggered this proposal. This text is passed to
@@ -348,25 +363,17 @@ type ProposalSpec struct {
348363
AnalysisOutput AnalysisOutput `json:"analysisOutput,omitzero"`
349364

350365
// tools defines the default tools for all steps: skills images,
351-
// MCP servers, and required secrets. Per-step tools
352-
// (analysis.tools, execution.tools, verification.tools) replace
353-
// this default for individual steps.
366+
// MCP servers, required secrets, and an optional dataSource PVC.
367+
// Per-step tools (analysis.tools, execution.tools, verification.tools)
368+
// replace this default for individual steps, so a dataSource set in
369+
// spec.analysis.tools is mounted only in the analysis sandbox.
354370
//
355371
// Immutable: the skills and secrets available to the agent are
356372
// fixed at creation. Changing tools mid-flight could violate the
357373
// assumptions of an in-progress analysis or execution.
358374
// +optional
359375
Tools ToolsSpec `json:"tools,omitzero"`
360376

361-
// dataSource references a PVC containing pre-populated input data
362-
// (e.g., must-gather bundles, diagnostic data). The operator mounts
363-
// it read-only at /data/input in the sandbox pod. Skills discover
364-
// input data at this standard location.
365-
//
366-
// Immutable: input data source is fixed at creation.
367-
// +optional
368-
DataSource *DataSource `json:"dataSource,omitzero"`
369-
370377
// analysis defines per-step configuration for the analysis step,
371378
// including which agent handles it and any per-step tools.
372379
//
@@ -388,18 +395,6 @@ type ProposalSpec struct {
388395
// +optional
389396
Verification ProposalStep `json:"verification,omitzero"`
390397

391-
// timeoutMinutes sets the per-step timeout for sandbox agent calls.
392-
// This controls how long the operator waits for the sandbox pod to
393-
// become ready and for the agent to complete its work. Increase this
394-
// for long-running tools (e.g., IntelliAide RCA takes 10-30 minutes).
395-
// Defaults to 5 minutes when omitted.
396-
//
397-
// Mutable: can be adjusted before approving a step.
398-
// +optional
399-
// +kubebuilder:validation:Minimum=1
400-
// +kubebuilder:validation:Maximum=60
401-
TimeoutMinutes *int32 `json:"timeoutMinutes,omitempty"`
402-
403398
// revisionFeedback is the user's free-text feedback requesting changes
404399
// to the analysis. Patching this field bumps metadata.generation, which
405400
// the operator detects (generation > observedGeneration) and triggers
@@ -411,7 +406,6 @@ type ProposalSpec struct {
411406
// +kubebuilder:validation:MinLength=1
412407
// +kubebuilder:validation:MaxLength=32768
413408
RevisionFeedback string `json:"revisionFeedback,omitempty"`
414-
415409
}
416410

417411
// ProposalStatus defines the observed state of Proposal. All fields are

api/v1alpha1/tools_types.go

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -109,12 +109,15 @@ type SecretRequirement struct {
109109
}
110110

111111
// ToolsSpec defines the tools available to an agent in its sandbox pod.
112-
// This includes skills images, MCP servers, and required secrets.
112+
// This includes skills images, MCP servers, required secrets, and an
113+
// optional data source PVC.
113114
//
114115
// ToolsSpec is specified on a Proposal either as a shared default
115116
// (spec.tools) or per-step (spec.analysis.tools, spec.execution.tools,
116117
// spec.verification.tools). Per-step tools replace the shared default
117-
// for that step.
118+
// for that step, so a dataSource set in spec.analysis.tools is mounted
119+
// only in the analysis sandbox, while one in spec.tools is mounted in
120+
// every step that does not override tools.
118121
//
119122
// +kubebuilder:validation:MinProperties=1
120123
type ToolsSpec struct {
@@ -147,8 +150,16 @@ type ToolsSpec struct {
147150
// +kubebuilder:validation:MinItems=1
148151
// +kubebuilder:validation:MaxItems=20
149152
RequiredSecrets []SecretRequirement `json:"requiredSecrets,omitempty"`
153+
154+
// dataSource references a pre-existing PersistentVolumeClaim containing
155+
// input data for this step (e.g., must-gather bundles, diagnostic data).
156+
// The PVC must already exist in the same namespace as the Proposal and be
157+
// pre-populated with data before the Proposal is created. The operator
158+
// mounts it read-only at /data/input in the sandbox pod.
159+
// +optional
160+
DataSource DataSource `json:"dataSource,omitzero"`
150161
}
151162

152163
func (t ToolsSpec) IsZero() bool {
153-
return len(t.Skills) == 0 && len(t.MCPServers) == 0 && len(t.RequiredSecrets) == 0
164+
return len(t.Skills) == 0 && len(t.MCPServers) == 0 && len(t.RequiredSecrets) == 0 && t.DataSource.IsZero()
154165
}

api/v1alpha1/zz_generated.deepcopy.go

Lines changed: 16 additions & 5 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)