Skip to content

Commit 4ba26ce

Browse files
committed
fix: per-step timeoutMinutes, tools.dataSource, and review follow-ups
1 parent 7d01df4 commit 4ba26ce

16 files changed

Lines changed: 557 additions & 160 deletions

.ai/spec/how/reconciler.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,7 @@ Audience: AI agents. Behavioral rules and phase semantics live in **what/** spec
146146
- **`AgentCaller`:** Boundary between reconciler and runtime (stub vs sandbox+HTTP). Methods mirror workflow steps plus `ReleaseSandboxes`.
147147
- **`SandboxProvider`:** Swappable claim/wait/release (tests can fake).
148148
- **`resolveProposal`:** Produces `resolvedWorkflow` with cached `Agent` + `LLMProvider` per name; applies per-stage agent overrides from `ProposalApproval` via `getStageOverrideAgent`; `Execution`/`Verification` steps nil when corresponding spec sections are zero.
149-
- **`EnsureAgentTemplate`:** Deterministic derived `SandboxTemplate` name from hash of LLM spec, model, skills, MCP servers, required secrets, step, and **base template resourceVersion**. Patches pod template env/volumes for credentials, Vertex/Bedrock/Azure extras, skills image/paths, and MCP JSON env. GC older templates labeled for same agent+step.
149+
- **`EnsureAgentTemplate`:** Deterministic derived `SandboxTemplate` name from hash of LLM spec, model, skills, MCP servers, required secrets, dataSource PVC, step, and **base template resourceVersion**. `dataSource` is extracted from `tools.DataSource` (set in `ToolsSpec`). Patches pod template env/volumes for credentials, Vertex/Bedrock/Azure extras, skills image/paths, MCP JSON env, `LIGHTSPEED_MODE`, and optional `/data/input` PVC mount. GC older templates labeled for same agent+step.
150150

151151
---
152152

api/v1alpha1/proposal_types.go

Lines changed: 17 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -258,10 +258,22 @@ type ProposalStep struct {
258258
// for this step. Use this when different steps need different skills.
259259
// +optional
260260
Tools ToolsSpec `json:"tools,omitzero"`
261+
262+
// timeoutMinutes sets the timeout for this step's sandbox agent call.
263+
// This controls how long the operator waits for the sandbox pod to
264+
// become ready and for the agent to complete its work. Increase this
265+
// for long-running tools (e.g., IntelliAide RCA takes 10-30 minutes).
266+
// Defaults to 5 minutes when omitted.
267+
//
268+
// Mutable: can be adjusted before approving a step.
269+
// +optional
270+
// +kubebuilder:validation:Minimum=1
271+
// +kubebuilder:validation:Maximum=60
272+
TimeoutMinutes *int32 `json:"timeoutMinutes,omitempty"`
261273
}
262274

263275
func (s ProposalStep) IsZero() bool {
264-
return s.Agent == "" && s.Tools.IsZero()
276+
return s.Agent == "" && s.Tools.IsZero() && s.TimeoutMinutes == nil
265277
}
266278

267279
// DataSource references a pre-existing PersistentVolumeClaim containing
@@ -294,7 +306,6 @@ type DataSource struct {
294306
// +kubebuilder:validation:XValidation:rule="!has(oldSelf.analysis) || (has(self.analysis) && self.analysis == oldSelf.analysis)",message="analysis is immutable once set"
295307
// +kubebuilder:validation:XValidation:rule="!has(oldSelf.execution) || (has(self.execution) && self.execution == oldSelf.execution)",message="execution is immutable once set"
296308
// +kubebuilder:validation:XValidation:rule="!has(oldSelf.verification) || (has(self.verification) && self.verification == oldSelf.verification)",message="verification is immutable once set"
297-
// +kubebuilder:validation:XValidation:rule="!has(oldSelf.dataSource) || (has(self.dataSource) && self.dataSource == oldSelf.dataSource)",message="dataSource is immutable once set"
298309
type ProposalSpec struct {
299310
// request is the user's original request, alert description, or a
300311
// description of what triggered this proposal. This text is passed to
@@ -342,25 +353,17 @@ type ProposalSpec struct {
342353
AnalysisOutput AnalysisOutput `json:"analysisOutput,omitzero"`
343354

344355
// tools defines the default tools for all steps: skills images,
345-
// MCP servers, and required secrets. Per-step tools
346-
// (analysis.tools, execution.tools, verification.tools) replace
347-
// this default for individual steps.
356+
// MCP servers, required secrets, and an optional dataSource PVC.
357+
// Per-step tools (analysis.tools, execution.tools, verification.tools)
358+
// replace this default for individual steps, so a dataSource set in
359+
// spec.analysis.tools is mounted only in the analysis sandbox.
348360
//
349361
// Immutable: the skills and secrets available to the agent are
350362
// fixed at creation. Changing tools mid-flight could violate the
351363
// assumptions of an in-progress analysis or execution.
352364
// +optional
353365
Tools ToolsSpec `json:"tools,omitzero"`
354366

355-
// dataSource references a PVC containing pre-populated input data
356-
// (e.g., must-gather bundles, diagnostic data). The operator mounts
357-
// it read-only at /data/input in the sandbox pod. Skills discover
358-
// input data at this standard location.
359-
//
360-
// Immutable: input data source is fixed at creation.
361-
// +optional
362-
DataSource *DataSource `json:"dataSource,omitzero"`
363-
364367
// analysis defines per-step configuration for the analysis step,
365368
// including which agent handles it and any per-step tools.
366369
//
@@ -382,18 +385,6 @@ type ProposalSpec struct {
382385
// +optional
383386
Verification ProposalStep `json:"verification,omitzero"`
384387

385-
// timeoutMinutes sets the per-step timeout for sandbox agent calls.
386-
// This controls how long the operator waits for the sandbox pod to
387-
// become ready and for the agent to complete its work. Increase this
388-
// for long-running tools (e.g., IntelliAide RCA takes 10-30 minutes).
389-
// Defaults to 5 minutes when omitted.
390-
//
391-
// Mutable: can be adjusted before approving a step.
392-
// +optional
393-
// +kubebuilder:validation:Minimum=1
394-
// +kubebuilder:validation:Maximum=60
395-
TimeoutMinutes *int32 `json:"timeoutMinutes,omitempty"`
396-
397388
// revisionFeedback is the user's free-text feedback requesting changes
398389
// to the analysis. Patching this field bumps metadata.generation, which
399390
// the operator detects (generation > observedGeneration) and triggers

api/v1alpha1/tools_types.go

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -109,12 +109,15 @@ type SecretRequirement struct {
109109
}
110110

111111
// ToolsSpec defines the tools available to an agent in its sandbox pod.
112-
// This includes skills images, MCP servers, and required secrets.
112+
// This includes skills images, MCP servers, required secrets, and an
113+
// optional data source PVC.
113114
//
114115
// ToolsSpec is specified on a Proposal either as a shared default
115116
// (spec.tools) or per-step (spec.analysis.tools, spec.execution.tools,
116117
// spec.verification.tools). Per-step tools replace the shared default
117-
// for that step.
118+
// for that step, so a dataSource set in spec.analysis.tools is mounted
119+
// only in the analysis sandbox, while one in spec.tools is mounted in
120+
// every step that does not override tools.
118121
//
119122
// +kubebuilder:validation:MinProperties=1
120123
type ToolsSpec struct {
@@ -147,8 +150,16 @@ type ToolsSpec struct {
147150
// +kubebuilder:validation:MinItems=1
148151
// +kubebuilder:validation:MaxItems=20
149152
RequiredSecrets []SecretRequirement `json:"requiredSecrets,omitempty"`
153+
154+
// dataSource references a pre-existing PersistentVolumeClaim containing
155+
// input data for this step (e.g., must-gather bundles, diagnostic data).
156+
// The PVC must already exist in the same namespace as the Proposal and be
157+
// pre-populated with data before the Proposal is created. The operator
158+
// mounts it read-only at /data/input in the sandbox pod.
159+
// +optional
160+
DataSource *DataSource `json:"dataSource,omitzero"`
150161
}
151162

152163
func (t ToolsSpec) IsZero() bool {
153-
return len(t.Skills) == 0 && len(t.MCPServers) == 0 && len(t.RequiredSecrets) == 0
164+
return len(t.Skills) == 0 && len(t.MCPServers) == 0 && len(t.RequiredSecrets) == 0 && t.DataSource == nil
154165
}

api/v1alpha1/zz_generated.deepcopy.go

Lines changed: 25 additions & 5 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)