diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index f3f58eb4d..6d6f80065 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -265,6 +265,7 @@ jobs: - app - cli - golang-adk + - golang-adk-full - skills-init runs-on: ubuntu-latest services: diff --git a/.github/workflows/image-scan.yaml b/.github/workflows/image-scan.yaml index 892f8cae8..42b117f79 100644 --- a/.github/workflows/image-scan.yaml +++ b/.github/workflows/image-scan.yaml @@ -31,6 +31,7 @@ jobs: - app - skills-init - golang-adk + - golang-adk-full runs-on: ubuntu-latest services: registry: diff --git a/.github/workflows/tag.yaml b/.github/workflows/tag.yaml index 620112368..6938f32e0 100644 --- a/.github/workflows/tag.yaml +++ b/.github/workflows/tag.yaml @@ -21,6 +21,7 @@ jobs: - ui - app - golang-adk + - golang-adk-full - skills-init runs-on: ubuntu-latest permissions: diff --git a/Makefile b/Makefile index 747fac545..8be88640a 100644 --- a/Makefile +++ b/Makefile @@ -46,12 +46,14 @@ UI_IMAGE_TAG ?= $(VERSION) APP_IMAGE_TAG ?= $(VERSION) KAGENT_ADK_IMAGE_TAG ?= $(VERSION) GOLANG_ADK_IMAGE_TAG ?= $(VERSION) +GOLANG_ADK_FULL_IMAGE_TAG ?= $(VERSION)-full SKILLS_INIT_IMAGE_TAG ?= $(VERSION) CONTROLLER_IMG ?= $(DOCKER_REGISTRY)/$(DOCKER_REPO)/$(CONTROLLER_IMAGE_NAME):$(CONTROLLER_IMAGE_TAG) UI_IMG ?= $(DOCKER_REGISTRY)/$(DOCKER_REPO)/$(UI_IMAGE_NAME):$(UI_IMAGE_TAG) APP_IMG ?= $(DOCKER_REGISTRY)/$(DOCKER_REPO)/$(APP_IMAGE_NAME):$(APP_IMAGE_TAG) KAGENT_ADK_IMG ?= $(DOCKER_REGISTRY)/$(DOCKER_REPO)/$(KAGENT_ADK_IMAGE_NAME):$(KAGENT_ADK_IMAGE_TAG) GOLANG_ADK_IMG ?= $(DOCKER_REGISTRY)/$(DOCKER_REPO)/$(GOLANG_ADK_IMAGE_NAME):$(GOLANG_ADK_IMAGE_TAG) +GOLANG_ADK_FULL_IMG ?= $(DOCKER_REGISTRY)/$(DOCKER_REPO)/$(GOLANG_ADK_IMAGE_NAME):$(GOLANG_ADK_FULL_IMAGE_TAG) SKILLS_INIT_IMG ?= $(DOCKER_REGISTRY)/$(DOCKER_REPO)/$(SKILLS_INIT_IMAGE_NAME):$(SKILLS_INIT_IMAGE_TAG) #take from go/go.mod @@ -165,6 +167,7 @@ buildx-create: build-all: BUILD_ARGS ?= --progress=plain --builder $(BUILDX_BUILDER_NAME) --platform linux/amd64,linux/arm64 --output type=tar,dest=/dev/null build-all: buildx-create $(DOCKER_BUILDER) build $(BUILD_ARGS) $(TOOLS_IMAGE_BUILD_ARGS) -f go/Dockerfile ./go + $(DOCKER_BUILDER) build $(BUILD_ARGS) $(TOOLS_IMAGE_BUILD_ARGS) -f go/Dockerfile.full ./go $(DOCKER_BUILDER) build $(BUILD_ARGS) $(TOOLS_IMAGE_BUILD_ARGS) -f ui/Dockerfile ./ui $(DOCKER_BUILDER) build $(BUILD_ARGS) $(TOOLS_IMAGE_BUILD_ARGS) -f python/Dockerfile ./python @@ -218,13 +221,14 @@ prune-docker-images: docker images --filter dangling=true -q | xargs -r docker rmi || : .PHONY: build -build: buildx-create build-controller build-ui build-app build-golang-adk build-skills-init +build: buildx-create build-controller build-ui build-app build-golang-adk build-golang-adk-full build-skills-init @echo "Build completed successfully." @echo "Controller Image: $(CONTROLLER_IMG)" @echo "UI Image: $(UI_IMG)" @echo "App Image: $(APP_IMG)" @echo "Kagent ADK Image: $(KAGENT_ADK_IMG)" @echo "Golang ADK Image: $(GOLANG_ADK_IMG)" + @echo "Golang ADK Full Image: $(GOLANG_ADK_FULL_IMG)" @echo "Skills Init Image: $(SKILLS_INIT_IMG)" .PHONY: build-monitor @@ -246,6 +250,8 @@ build-img-versions: @echo ui=$(UI_IMG) @echo app=$(APP_IMG) @echo kagent-adk=$(KAGENT_ADK_IMG) + @echo golang-adk=$(GOLANG_ADK_IMG) + @echo golang-adk-full=$(GOLANG_ADK_FULL_IMG) @echo skills-init=$(SKILLS_INIT_IMG) .PHONY: lint @@ -254,7 +260,7 @@ lint: make -C python lint .PHONY: push -push: push-controller push-ui push-app push-kagent-adk push-golang-adk +push: push-controller push-ui push-app push-kagent-adk push-golang-adk push-golang-adk-full .PHONY: controller-manifests @@ -282,6 +288,10 @@ build-app: buildx-create build-kagent-adk build-golang-adk: buildx-create $(DOCKER_BUILDER) build $(DOCKER_BUILD_ARGS) $(TOOLS_IMAGE_BUILD_ARGS) --build-arg BUILD_PACKAGE=adk/cmd/main.go -t $(GOLANG_ADK_IMG) -f go/Dockerfile ./go +.PHONY: build-golang-adk-full +build-golang-adk-full: buildx-create + $(DOCKER_BUILDER) build $(DOCKER_BUILD_ARGS) $(TOOLS_IMAGE_BUILD_ARGS) --build-arg BUILD_PACKAGE=adk/cmd/main.go -t $(GOLANG_ADK_FULL_IMG) -f go/Dockerfile.full ./go + .PHONY: build-skills-init build-skills-init: buildx-create $(DOCKER_BUILDER) build $(DOCKER_BUILD_ARGS) -t $(SKILLS_INIT_IMG) -f docker/skills-init/Dockerfile docker/skills-init diff --git a/go/Dockerfile.full b/go/Dockerfile.full new file mode 100644 index 000000000..b12e6b7e7 --- /dev/null +++ b/go/Dockerfile.full @@ -0,0 +1,66 @@ +ARG BASE_IMAGE_REGISTRY=cgr.dev +ARG BUILDPLATFORM +FROM --platform=$BUILDPLATFORM $BASE_IMAGE_REGISTRY/chainguard/go:latest AS builder +ARG TARGETARCH +ARG TARGETPLATFORM +ARG BUILDPLATFORM +ARG BUILD_PACKAGE=adk/cmd/main.go + +WORKDIR /workspace +COPY go.mod go.sum . +RUN --mount=type=cache,target=/root/go/pkg/mod,rw \ + --mount=type=cache,target=/root/.cache/go-build,rw \ + go mod download + +COPY api/ api/ +COPY core/ core/ +COPY adk/ adk/ + +ARG LDFLAGS +RUN --mount=type=cache,target=/root/go/pkg/mod,rw \ + --mount=type=cache,target=/root/.cache/go-build,rw \ + echo "Building on $BUILDPLATFORM -> linux/$TARGETARCH" && \ + CGO_ENABLED=0 GOOS=${TARGETOS:-linux} GOARCH=${TARGETARCH} go build -a -ldflags "$LDFLAGS" -o /app "$BUILD_PACKAGE" + +FROM $BASE_IMAGE_REGISTRY/chainguard/wolfi-base:latest AS srt-builder +ARG TOOLS_PYTHON_VERSION=3.13 + +RUN --mount=type=cache,target=/var/cache/apk,rw \ + apk add --no-cache \ + bash git ca-certificates nodejs npm node-gyp bubblewrap python-${TOOLS_PYTHON_VERSION} libstdc++ + +RUN --mount=type=cache,target=/root/.npm \ + mkdir -p /opt && \ + cd /opt && \ + git clone --depth 1 --revision=ef4afdef4d711ba21a507d7f7369e305f7d3dbfa https://github.com/anthropic-experimental/sandbox-runtime.git && \ + cd sandbox-runtime && \ + npm install && \ + npm run build && \ + npm prune --omit=dev + +FROM $BASE_IMAGE_REGISTRY/chainguard/wolfi-base:latest +ARG TOOLS_PYTHON_VERSION=3.13 + +RUN --mount=type=cache,target=/var/cache/apk,rw \ + apk add --no-cache \ + bash ca-certificates curl nodejs bubblewrap socat python-${TOOLS_PYTHON_VERSION} ripgrep libstdc++ + +RUN addgroup -g 1001 goagent && \ + adduser -u 1001 -G goagent -s /bin/bash -D goagent + +COPY --from=srt-builder /opt/sandbox-runtime /opt/sandbox-runtime + +RUN chmod +x /opt/sandbox-runtime/dist/cli.js && \ + ln -s /opt/sandbox-runtime/dist/cli.js /usr/bin/srt + +WORKDIR / +COPY --from=builder /app /app +ENV PATH="/usr/bin:/opt/sandbox-runtime/node_modules/.bin:$PATH" +ARG VERSION + +LABEL org.opencontainers.image.source=https://github.com/kagent-dev/kagent +LABEL org.opencontainers.image.description="Kagent Go ADK runtime with sandbox execution dependencies." +LABEL org.opencontainers.image.version="$VERSION" + +USER 1001:1001 +ENTRYPOINT ["/app"] diff --git a/go/adk/cmd/main.go b/go/adk/cmd/main.go index ad0d28d80..6467ea440 100644 --- a/go/adk/cmd/main.go +++ b/go/adk/cmd/main.go @@ -187,6 +187,7 @@ func main() { Stream: stream, AppName: appName, Logger: logger, + RetryPolicy: agentConfig.RetryPolicy, }) // Build the agent card. diff --git a/go/adk/pkg/a2a/executor.go b/go/adk/pkg/a2a/executor.go index 39f7d0406..47867d92e 100644 --- a/go/adk/pkg/a2a/executor.go +++ b/go/adk/pkg/a2a/executor.go @@ -2,9 +2,11 @@ package a2a import ( "context" + "errors" "fmt" "maps" "os" + "time" a2atype "github.com/a2aproject/a2a-go/a2a" "github.com/a2aproject/a2a-go/a2asrv" @@ -13,6 +15,7 @@ import ( "github.com/kagent-dev/kagent/go/adk/pkg/session" "github.com/kagent-dev/kagent/go/adk/pkg/skills" "github.com/kagent-dev/kagent/go/adk/pkg/telemetry" + adkapi "github.com/kagent-dev/kagent/go/api/adk" "go.opentelemetry.io/otel/attribute" adkagent "google.golang.org/adk/agent" "google.golang.org/adk/runner" @@ -34,6 +37,7 @@ type KAgentExecutorConfig struct { AppName string SkillsDirectory string Logger logr.Logger + RetryPolicy *adkapi.RetryPolicyConfig } // KAgentExecutor implements a2asrv.AgentExecutor @@ -45,6 +49,7 @@ type KAgentExecutor struct { appName string skillsDirectory string logger logr.Logger + retryPolicy *adkapi.RetryPolicyConfig } var _ a2asrv.AgentExecutor = (*KAgentExecutor)(nil) @@ -66,6 +71,7 @@ func NewKAgentExecutor(cfg KAgentExecutorConfig) *KAgentExecutor { appName: cfg.AppName, skillsDirectory: skillsDir, logger: cfg.Logger.WithName("kagent-executor"), + retryPolicy: cfg.RetryPolicy, } } @@ -97,10 +103,65 @@ func (u *userIDInterceptor) Before(ctx context.Context, callCtx *a2asrv.CallCont return ctx, nil } +// computeRetryDelay calculates the delay for a retry attempt using exponential backoff. +func computeRetryDelay(attempt int, policy *adkapi.RetryPolicyConfig) time.Duration { + delay := policy.InitialRetryDelay * float64(int(1)< *policy.MaxRetryDelay { + delay = *policy.MaxRetryDelay + } + return time.Duration(delay * float64(time.Second)) +} + +// executeWithRetry runs fn with optional retry on failure. +// If policy is nil or MaxRetries is 0, fn is called once. +// Context cancellation is never retried. +func executeWithRetry(ctx context.Context, policy *adkapi.RetryPolicyConfig, fn func(ctx context.Context) error) error { + maxAttempts := 1 + if policy != nil { + maxAttempts += policy.MaxRetries + } + + var lastErr error + for attempt := range maxAttempts { + lastErr = fn(ctx) + if lastErr == nil { + return nil + } + + // Never retry context cancellation + if ctx.Err() != nil || errors.Is(lastErr, context.Canceled) || errors.Is(lastErr, context.DeadlineExceeded) { + return lastErr + } + + if attempt+1 < maxAttempts { + delay := computeRetryDelay(attempt, policy) + logr.FromContextOrDiscard(ctx).Info("Request failed, retrying", + "attempt", attempt+1, + "maxAttempts", maxAttempts, + "delay", delay, + "error", lastErr, + ) + select { + case <-time.After(delay): + case <-ctx.Done(): + return ctx.Err() + } + } + } + return lastErr +} + // Execute implements a2asrv.AgentExecutor. +func (e *KAgentExecutor) Execute(ctx context.Context, reqCtx *a2asrv.RequestContext, queue eventqueue.Queue) error { + return executeWithRetry(ctx, e.retryPolicy, func(ctx context.Context) error { + return e.executeOnce(ctx, reqCtx, queue) + }) +} + +// executeOnce performs a single attempt at handling an A2A request. // It follows the Python _handle_request pattern: set up session, handle HITL, // convert inbound message, run the agent loop, and emit A2A events. -func (e *KAgentExecutor) Execute(ctx context.Context, reqCtx *a2asrv.RequestContext, queue eventqueue.Queue) error { +func (e *KAgentExecutor) executeOnce(ctx context.Context, reqCtx *a2asrv.RequestContext, queue eventqueue.Queue) error { if reqCtx.Message == nil { return fmt.Errorf("A2A request message cannot be nil") } diff --git a/go/adk/pkg/a2a/executor_test.go b/go/adk/pkg/a2a/executor_test.go new file mode 100644 index 000000000..7c6d33ab7 --- /dev/null +++ b/go/adk/pkg/a2a/executor_test.go @@ -0,0 +1,130 @@ +package a2a + +import ( + "context" + "fmt" + "testing" + "time" + + adkapi "github.com/kagent-dev/kagent/go/api/adk" + "github.com/stretchr/testify/assert" +) + +func TestComputeRetryDelay(t *testing.T) { + tests := []struct { + name string + attempt int + policy *adkapi.RetryPolicyConfig + want time.Duration + }{ + { + name: "first attempt", + attempt: 0, + policy: &adkapi.RetryPolicyConfig{MaxRetries: 3, InitialRetryDelay: 1.0}, + want: 1 * time.Second, + }, + { + name: "second attempt doubles", + attempt: 1, + policy: &adkapi.RetryPolicyConfig{MaxRetries: 3, InitialRetryDelay: 1.0}, + want: 2 * time.Second, + }, + { + name: "third attempt quadruples", + attempt: 2, + policy: &adkapi.RetryPolicyConfig{MaxRetries: 3, InitialRetryDelay: 1.0}, + want: 4 * time.Second, + }, + { + name: "capped by max delay", + attempt: 3, + policy: &adkapi.RetryPolicyConfig{MaxRetries: 5, InitialRetryDelay: 1.0, MaxRetryDelay: new(5.0)}, + want: 5 * time.Second, + }, + { + name: "sub-second initial delay", + attempt: 0, + policy: &adkapi.RetryPolicyConfig{MaxRetries: 3, InitialRetryDelay: 0.5}, + want: 500 * time.Millisecond, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := computeRetryDelay(tt.attempt, tt.policy) + assert.Equal(t, tt.want, got) + }) + } +} + +func TestExecuteWithRetry(t *testing.T) { + tests := []struct { + name string + policy *adkapi.RetryPolicyConfig + failCount int + wantCalls int + wantErr bool + }{ + { + name: "no retry policy, success", + policy: nil, + failCount: 0, + wantCalls: 1, + wantErr: false, + }, + { + name: "no retry policy, failure", + policy: nil, + failCount: 1, + wantCalls: 1, + wantErr: true, + }, + { + name: "retry policy, success on second attempt", + policy: &adkapi.RetryPolicyConfig{MaxRetries: 3, InitialRetryDelay: 0.001}, + failCount: 1, + wantCalls: 2, + wantErr: false, + }, + { + name: "retry policy, all attempts fail", + policy: &adkapi.RetryPolicyConfig{MaxRetries: 2, InitialRetryDelay: 0.001}, + failCount: 10, + wantCalls: 3, + wantErr: true, + }, + { + name: "context cancelled, no retry", + policy: &adkapi.RetryPolicyConfig{MaxRetries: 3, InitialRetryDelay: 0.001}, + failCount: -1, // special: returns context.Canceled + wantCalls: 1, + wantErr: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + callCount := 0 + fn := func(ctx context.Context) error { + callCount++ + if tt.failCount == -1 { + return context.Canceled + } + if callCount <= tt.failCount { + return fmt.Errorf("transient error %d", callCount) + } + return nil + } + + ctx := context.Background() + err := executeWithRetry(ctx, tt.policy, fn) + + assert.Equal(t, tt.wantCalls, callCount) + if tt.wantErr { + assert.Error(t, err) + } else { + assert.NoError(t, err) + } + }) + } +} diff --git a/go/adk/pkg/agent/agent.go b/go/adk/pkg/agent/agent.go index e1b475240..9ebc425f3 100644 --- a/go/adk/pkg/agent/agent.go +++ b/go/adk/pkg/agent/agent.go @@ -68,23 +68,10 @@ func CreateGoogleADKAgentWithSubagentSessionIDs(ctx context.Context, agentConfig log.Info("Wired remote A2A agent tool", "name", remoteAgent.Name, "url", remoteAgent.Url) } - // Add memory tools if memory is configured - var memoryTools []tool.Tool - if agentConfig.Memory != nil { - log.Info("Memory configuration detected, adding memory tools") - memoryTools = []tool.Tool{ - preloadmemorytool.New(), - loadmemorytool.New(), - } - } - memoryTools = append(memoryTools, remoteAgentTools...) - memoryTools = append(memoryTools, extraTools...) - - askUserTool, err := tools.NewAskUserTool() + localTools, err := buildAgentTools(agentConfig, remoteAgentTools, extraTools, log) if err != nil { - return nil, nil, fmt.Errorf("failed to create ask_user tool: %w", err) + return nil, nil, err } - memoryTools = append(memoryTools, askUserTool) if agentConfig.Model == nil { return nil, nil, fmt.Errorf("model configuration is required") @@ -126,7 +113,7 @@ func CreateGoogleADKAgentWithSubagentSessionIDs(ctx context.Context, agentConfig Instruction: agentConfig.Instruction, Model: llmModel, IncludeContents: llmagent.IncludeContentsDefault, - Tools: memoryTools, + Tools: localTools, Toolsets: toolsets, BeforeToolCallbacks: beforeToolCallbacks, AfterToolCallbacks: []llmagent.AfterToolCallback{ @@ -156,6 +143,36 @@ func CreateGoogleADKAgentWithSubagentSessionIDs(ctx context.Context, agentConfig return llmAgent, subagentSessionIDs, nil } +func buildAgentTools(agentConfig *adk.AgentConfig, remoteAgentTools, extraTools []tool.Tool, log logr.Logger) ([]tool.Tool, error) { + var localTools []tool.Tool + if agentConfig.Memory != nil { + log.Info("Memory configuration detected, adding memory tools") + localTools = []tool.Tool{ + preloadmemorytool.New(), + loadmemorytool.New(), + } + } + localTools = append(localTools, remoteAgentTools...) + localTools = append(localTools, extraTools...) + + skillsDirectory := strings.TrimSpace(os.Getenv("KAGENT_SKILLS_FOLDER")) + if skillsDirectory != "" { + skillsTools, err := tools.NewSkillsTools(skillsDirectory) + if err != nil { + return nil, fmt.Errorf("failed to create skills tools: %w", err) + } + localTools = append(localTools, skillsTools...) + log.Info("Wired local skills tools", "skillsDirectory", skillsDirectory, "toolCount", len(skillsTools)) + } + + askUserTool, err := tools.NewAskUserTool() + if err != nil { + return nil, fmt.Errorf("failed to create ask_user tool: %w", err) + } + localTools = append(localTools, askUserTool) + return localTools, nil +} + // CreateLLM creates an adkmodel.LLM from the model configuration. // This is exported to allow reuse of model creation logic (e.g., for memory summarization). func CreateLLM(ctx context.Context, m adk.Model, log logr.Logger) (adkmodel.LLM, error) { diff --git a/go/adk/pkg/agent/agent_test.go b/go/adk/pkg/agent/agent_test.go index 5d9cc7125..4f4c5fb45 100644 --- a/go/adk/pkg/agent/agent_test.go +++ b/go/adk/pkg/agent/agent_test.go @@ -2,8 +2,11 @@ package agent import ( "encoding/json" + "os" + "path/filepath" "testing" + "github.com/go-logr/logr" "github.com/kagent-dev/kagent/go/adk/pkg/models" "github.com/kagent-dev/kagent/go/api/adk" ) @@ -283,6 +286,42 @@ func TestConfigDeserialization_Bedrock(t *testing.T) { } } +func TestBuildAgentTools_WiresSkillsToolsFromEnv(t *testing.T) { + skillsDir := t.TempDir() + skillDir := filepath.Join(skillsDir, "csv-to-json") + if err := os.MkdirAll(skillDir, 0755); err != nil { + t.Fatalf("failed to create skill dir: %v", err) + } + if err := os.WriteFile(filepath.Join(skillDir, "SKILL.md"), []byte(`--- +name: csv-to-json +description: Convert CSV into JSON. +--- + +Use the script in scripts/convert.py. +`), 0644); err != nil { + t.Fatalf("failed to write SKILL.md: %v", err) + } + + t.Setenv("KAGENT_SKILLS_FOLDER", skillsDir) + t.Setenv("KAGENT_SRT_SETTINGS_PATH", filepath.Join(t.TempDir(), "srt-settings.json")) + + tools, err := buildAgentTools(&adk.AgentConfig{}, nil, nil, logr.Discard()) + if err != nil { + t.Fatalf("buildAgentTools() error = %v", err) + } + + got := map[string]bool{} + for _, tool := range tools { + got[tool.Name()] = true + } + + for _, name := range []string{"skills", "read_file", "write_file", "edit_file", "bash", "ask_user"} { + if !got[name] { + t.Errorf("expected tool %q to be registered", name) + } + } +} + // TestAgentConfigFieldUsage is a smoke test that ensures AgentConfig structures // used by agents exercise all relevant fields. This test acts as a canary: if a // new field is added to AgentConfig but not reflected in this test configuration, diff --git a/go/adk/pkg/config/config_usage.go b/go/adk/pkg/config/config_usage.go index 82a83038d..126400261 100644 --- a/go/adk/pkg/config/config_usage.go +++ b/go/adk/pkg/config/config_usage.go @@ -35,6 +35,10 @@ import ( // - Currently disabled in Go controller (see adk_api_translator.go:533) // - Would enable SandboxedLocalCodeExecutor if true // +// Agent.Spec.Sandbox.Network -> AgentConfig.Network +// - Translated into the mounted srt-settings.json consumed by sandboxed execution +// - When omitted, sandboxed execution remains deny-by-default for outbound network access +// // Agent.Spec.A2AConfig.Skills -> Not in config.json, handled separately // - Skills are added via SkillsPlugin in Python // - In go-adk, skills are handled via KAGENT_SKILLS_FOLDER env var @@ -72,6 +76,7 @@ func ValidateAgentConfigUsageWithLogger(config *adk.AgentConfig, logger logr.Log "modelType", config.Model.GetType(), "stream", config.Stream, "executeCode", config.ExecuteCode, + "hasNetworkConfig", config.Network != nil, "httpToolsCount", len(config.HttpTools), "sseToolsCount", len(config.SseTools), "remoteAgentsCount", len(config.RemoteAgents)) @@ -116,6 +121,7 @@ func GetAgentConfigSummary(config *adk.AgentConfig) string { summary += fmt.Sprintf(" Instruction: %d chars\n", len(config.Instruction)) summary += fmt.Sprintf(" Stream: %v\n", config.Stream) summary += fmt.Sprintf(" ExecuteCode: %v\n", config.ExecuteCode) + summary += fmt.Sprintf(" HasNetworkConfig: %v\n", config.Network != nil) summary += fmt.Sprintf(" HttpTools: %d\n", len(config.HttpTools)) summary += fmt.Sprintf(" SseTools: %d\n", len(config.SseTools)) summary += fmt.Sprintf(" RemoteAgents: %d\n", len(config.RemoteAgents)) diff --git a/go/adk/pkg/skills/discovery_test.go b/go/adk/pkg/skills/discovery_test.go index bd3106537..01e4aad2f 100644 --- a/go/adk/pkg/skills/discovery_test.go +++ b/go/adk/pkg/skills/discovery_test.go @@ -252,6 +252,7 @@ func TestLoadSkillContent_NoSkillMD(t *testing.T) { func TestSkillExecution_Integration(t *testing.T) { sessionDir, _ := createSkillTestEnv(t) defer os.RemoveAll(filepath.Dir(sessionDir)) + defer os.RemoveAll(installFakeSRT(t)) // 1. "Upload" a file for the skill to process inputCSVPath := filepath.Join(sessionDir, "uploads", "data.csv") @@ -262,7 +263,11 @@ func TestSkillExecution_Integration(t *testing.T) { // 2. Execute the skill's core command command := "python skills/csv-to-json/scripts/convert.py uploads/data.csv outputs/result.json" - result, err := ExecuteCommand(context.Background(), command, sessionDir) + executor, err := NewCommandExecutorFromEnv() + if err != nil { + t.Fatalf("NewCommandExecutorFromEnv() error = %v", err) + } + result, err := executor.ExecuteCommand(context.Background(), command, sessionDir) if err != nil { // Python might not be available, skip this test t.Skipf("Python not available or command failed: %v", err) diff --git a/go/adk/pkg/skills/shell.go b/go/adk/pkg/skills/shell.go index b9eb2305e..d69ffe619 100644 --- a/go/adk/pkg/skills/shell.go +++ b/go/adk/pkg/skills/shell.go @@ -12,6 +12,12 @@ import ( "time" ) +const srtSettingsPathEnv = "KAGENT_SRT_SETTINGS_PATH" + +type CommandExecutor struct { + srtArgs []string +} + // ReadFileContent reads a file with line numbers. func ReadFileContent(path string, offset, limit int) (string, error) { file, err := os.Open(path) @@ -102,8 +108,24 @@ func EditFileContent(path string, oldString, newString string, replaceAll bool) return os.WriteFile(path, []byte(newContent), 0644) } +func resolveSRTSettingsArgs() ([]string, error) { + settingsPath := strings.TrimSpace(os.Getenv(srtSettingsPathEnv)) + if settingsPath == "" { + return nil, fmt.Errorf("%s is not set", srtSettingsPathEnv) + } + return []string{"--settings", settingsPath}, nil +} + +func NewCommandExecutorFromEnv() (*CommandExecutor, error) { + srtArgs, err := resolveSRTSettingsArgs() + if err != nil { + return nil, err + } + return &CommandExecutor{srtArgs: srtArgs}, nil +} + // ExecuteCommand executes a shell command. -func ExecuteCommand(ctx context.Context, command string, workingDir string) (string, error) { +func (e *CommandExecutor) ExecuteCommand(ctx context.Context, command string, workingDir string) (string, error) { timeout := 30 * time.Second if strings.Contains(command, "python") { timeout = 60 * time.Second @@ -112,9 +134,8 @@ func ExecuteCommand(ctx context.Context, command string, workingDir string) (str ctx, cancel := context.WithTimeout(ctx, timeout) defer cancel() - // In the python version, it uses 'srt' for sandboxing. - // Here we'll execute the command directly but you might want to wrap it in a sandbox. - cmd := exec.CommandContext(ctx, "bash", "-c", command) + args := append(append([]string{}, e.srtArgs...), "bash", "-c", command) + cmd := exec.CommandContext(ctx, "srt", args...) cmd.Dir = workingDir var stdout, stderr bytes.Buffer diff --git a/go/adk/pkg/skills/shell_test.go b/go/adk/pkg/skills/shell_test.go index 831b2e6e5..be7f9d6c6 100644 --- a/go/adk/pkg/skills/shell_test.go +++ b/go/adk/pkg/skills/shell_test.go @@ -17,6 +17,26 @@ func createTempDir(t *testing.T) string { return tmpDir } +func installFakeSRT(t *testing.T) string { + t.Helper() + + tmpDir := createTempDir(t) + scriptPath := filepath.Join(tmpDir, "srt") + script := "#!/bin/sh\nif [ \"$1\" = \"--settings\" ]; then\n shift 2\nfi\nexec \"$@\"\n" + if err := os.WriteFile(scriptPath, []byte(script), 0755); err != nil { + t.Fatalf("Failed to write fake srt: %v", err) + } + + settingsPath := filepath.Join(tmpDir, "srt-settings.json") + if err := os.WriteFile(settingsPath, []byte(`{"network":{"allowedDomains":[],"deniedDomains":[]},"filesystem":{"denyRead":[],"allowWrite":[".","/tmp"],"denyWrite":[]}}`), 0644); err != nil { + t.Fatalf("Failed to write fake srt settings: %v", err) + } + + t.Setenv("PATH", tmpDir+string(os.PathListSeparator)+os.Getenv("PATH")) + t.Setenv(srtSettingsPathEnv, settingsPath) + return tmpDir +} + func TestReadFileContent(t *testing.T) { tmpDir := createTempDir(t) defer os.RemoveAll(tmpDir) @@ -287,8 +307,13 @@ func TestEditFileContent(t *testing.T) { func TestExecuteCommand(t *testing.T) { tmpDir := createTempDir(t) defer os.RemoveAll(tmpDir) + defer os.RemoveAll(installFakeSRT(t)) ctx := context.Background() + executor, err := NewCommandExecutorFromEnv() + if err != nil { + t.Fatalf("NewCommandExecutorFromEnv() error = %v", err) + } tests := []struct { name string @@ -369,7 +394,7 @@ func TestExecuteCommand(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - result, err := ExecuteCommand(ctx, tt.command, tt.workingDir) + result, err := executor.ExecuteCommand(ctx, tt.command, tt.workingDir) if tt.wantErr { if err == nil { t.Error("Expected error, got nil") @@ -388,6 +413,18 @@ func TestExecuteCommand(t *testing.T) { } } +func TestExecuteCommand_RequiresMountedSRTSettings(t *testing.T) { + t.Setenv(srtSettingsPathEnv, "") + + _, err := NewCommandExecutorFromEnv() + if err == nil { + t.Fatal("expected error when SRT settings path is missing") + } + if !strings.Contains(err.Error(), srtSettingsPathEnv+" is not set") { + t.Fatalf("unexpected error: %v", err) + } +} + func TestExecuteCommand_Timeout(t *testing.T) { // Skip this test if running in CI or if test timeout is too short // This test requires at least 35 seconds to run properly @@ -397,8 +434,13 @@ func TestExecuteCommand_Timeout(t *testing.T) { tmpDir := createTempDir(t) defer os.RemoveAll(tmpDir) + defer os.RemoveAll(installFakeSRT(t)) ctx := context.Background() + executor, err := NewCommandExecutorFromEnv() + if err != nil { + t.Fatalf("NewCommandExecutorFromEnv() error = %v", err) + } // Test timeout for long-running command // The timeout is 30 seconds for non-python commands @@ -407,7 +449,7 @@ func TestExecuteCommand_Timeout(t *testing.T) { command := "sleep 31" // This should timeout after 30 seconds start := time.Now() - result, err := ExecuteCommand(ctx, command, tmpDir) + result, err := executor.ExecuteCommand(ctx, command, tmpDir) elapsed := time.Since(start) // When a command times out, ExecuteCommand should return an error diff --git a/go/adk/pkg/skills/skills_tools.go b/go/adk/pkg/skills/skills_tools.go index c4da0ea91..0ddea1756 100644 --- a/go/adk/pkg/skills/skills_tools.go +++ b/go/adk/pkg/skills/skills_tools.go @@ -37,11 +37,20 @@ func (t *SkillsTool) Execute(ctx context.Context, command string) (string, error // BashTool provides shell command execution in skills context type BashTool struct { SkillsDirectory string + executor *CommandExecutor } // NewBashTool creates a new BashTool -func NewBashTool(skillsDirectory string) *BashTool { - return &BashTool{SkillsDirectory: skillsDirectory} +func NewBashTool(skillsDirectory string) (*BashTool, error) { + executor, err := NewCommandExecutorFromEnv() + if err != nil { + return nil, err + } + + return &BashTool{ + SkillsDirectory: skillsDirectory, + executor: executor, + }, nil } // Execute executes a bash command in the skills context @@ -52,7 +61,7 @@ func (t *BashTool) Execute(ctx context.Context, command string, sessionID string return "", fmt.Errorf("failed to get session path: %w", err) } - return ExecuteCommand(ctx, command, sessionPath) + return t.executor.ExecuteCommand(ctx, command, sessionPath) } // FileTools provides file operation tools diff --git a/go/adk/pkg/tools/skills.go b/go/adk/pkg/tools/skills.go new file mode 100644 index 000000000..d8fa1bec2 --- /dev/null +++ b/go/adk/pkg/tools/skills.go @@ -0,0 +1,365 @@ +package tools + +import ( + "fmt" + "os" + "path/filepath" + "strings" + + skillruntime "github.com/kagent-dev/kagent/go/adk/pkg/skills" + "google.golang.org/adk/tool" + "google.golang.org/adk/tool/functiontool" +) + +const ( + readFileDescription = `Reads a file from the filesystem with line numbers. + +Usage: +- Provide a path to the file (absolute or relative to your working directory) +- Returns content with line numbers (format: LINE_NUMBER|CONTENT) +- Optional offset and limit parameters for reading specific line ranges +- Lines longer than 2000 characters are truncated +- Always read a file before editing it +- You can read from skills/ directory, uploads/, outputs/, or any file in your session` + + writeFileDescription = `Writes content to a file on the filesystem. + +Usage: +- Provide a path (absolute or relative to working directory) and content to write +- Overwrites existing files +- Creates parent directories if needed +- For existing files, read them first using read_file +- Prefer editing existing files over writing new ones +- You can write to your working directory, outputs/, or any writable location +- Note: skills/ directory is read-only` + + editFileDescription = `Performs exact string replacements in files. + +Usage: +- You must read the file first using read_file +- Provide path (absolute or relative to working directory) +- When editing, preserve exact indentation from the file content +- Do NOT include line number prefixes in old_string or new_string +- old_string must be unique unless replace_all=true +- Use replace_all to rename variables/strings throughout the file +- old_string and new_string must be different +- Note: skills/ directory is read-only` + + bashDescription = `Execute bash commands in the skills environment with sandbox protection. + +Working Directory & Structure: +- Commands run in a temporary session directory: /tmp/kagent/{session_id}/ +- /skills -> All skills are available here (read-only). +- Your current working directory and /skills are added to PYTHONPATH. + +Python Imports (CRITICAL): +- To import from a skill, use the name of the skill. + Example: from skills_name.module import function +- If the skills name contains a dash '-', you need to use importlib to import it. + Example: + import importlib + skill_module = importlib.import_module('skill-name.module') + +For file operations: +- Use read_file, write_file, and edit_file for interacting with the filesystem. + +Timeouts: +- python scripts: 60s +- other commands: 30s` +) + +type skillsInput struct { + Command string `json:"command"` +} + +type bashInput struct { + Command string `json:"command"` + Description string `json:"description,omitempty"` +} + +type readFileInput struct { + FilePath string `json:"file_path"` + Offset int `json:"offset,omitempty"` + Limit int `json:"limit,omitempty"` +} + +type writeFileInput struct { + FilePath string `json:"file_path"` + Content string `json:"content"` +} + +type editFileInput struct { + FilePath string `json:"file_path"` + OldString string `json:"old_string"` + NewString string `json:"new_string"` + ReplaceAll bool `json:"replace_all,omitempty"` +} + +func NewSkillsTools(skillsDirectory string) ([]tool.Tool, error) { + skillsDirectory = strings.TrimSpace(skillsDirectory) + if skillsDirectory == "" { + return nil, nil + } + + absSkillsDir, err := filepath.Abs(skillsDirectory) + if err != nil { + return nil, fmt.Errorf("failed to resolve skills directory %q: %w", skillsDirectory, err) + } + if _, err := os.Stat(absSkillsDir); err != nil { + return nil, fmt.Errorf("failed to access skills directory %q: %w", absSkillsDir, err) + } + + discoveredSkills, err := skillruntime.DiscoverSkills(absSkillsDir) + if err != nil { + return nil, fmt.Errorf("failed to discover skills: %w", err) + } + commandExecutor, err := skillruntime.NewCommandExecutorFromEnv() + if err != nil { + return nil, fmt.Errorf("failed to configure bash sandbox: %w", err) + } + + skillsTool, err := functiontool.New(functiontool.Config{ + Name: "skills", + Description: skillruntime.GenerateSkillsToolDescription(discoveredSkills), + }, func(ctx tool.Context, in skillsInput) (string, error) { + skillName := strings.TrimSpace(in.Command) + if skillName == "" { + return "Error: No skill name provided", nil + } + + content, err := skillruntime.LoadSkillContent(absSkillsDir, skillName) + if err != nil { + return fmt.Sprintf("Error loading skill '%s': %v", skillName, err), nil + } + + return fmt.Sprintf( + "The %q skill is loading\n\nBase directory for this skill: %s\n\n%s\n\n---\nThe skill has been loaded. Follow the instructions above and use the bash tool to execute commands.", + skillName, + filepath.Join(absSkillsDir, skillName), + content, + ), nil + }) + if err != nil { + return nil, fmt.Errorf("failed to create skills tool: %w", err) + } + + readFileTool, err := functiontool.New(functiontool.Config{ + Name: "read_file", + Description: readFileDescription, + }, func(ctx tool.Context, in readFileInput) (string, error) { + path, err := resolveReadPath(ctx.SessionID(), absSkillsDir, in.FilePath) + if err != nil { + return fmt.Sprintf("Error reading file %s: %v", strings.TrimSpace(in.FilePath), err), nil + } + + content, err := skillruntime.ReadFileContent(path, in.Offset, in.Limit) + if err != nil { + return fmt.Sprintf("Error reading file %s: %v", strings.TrimSpace(in.FilePath), err), nil + } + return content, nil + }) + if err != nil { + return nil, fmt.Errorf("failed to create read_file tool: %w", err) + } + + writeFileTool, err := functiontool.New(functiontool.Config{ + Name: "write_file", + Description: writeFileDescription, + }, func(ctx tool.Context, in writeFileInput) (string, error) { + path, err := resolveWritePath(ctx.SessionID(), absSkillsDir, in.FilePath) + if err != nil { + return fmt.Sprintf("Error writing file %s: %v", strings.TrimSpace(in.FilePath), err), nil + } + + if err := skillruntime.WriteFileContent(path, in.Content); err != nil { + return fmt.Sprintf("Error writing file %s: %v", strings.TrimSpace(in.FilePath), err), nil + } + return fmt.Sprintf("Successfully wrote file: %s", path), nil + }) + if err != nil { + return nil, fmt.Errorf("failed to create write_file tool: %w", err) + } + + editFileTool, err := functiontool.New(functiontool.Config{ + Name: "edit_file", + Description: editFileDescription, + }, func(ctx tool.Context, in editFileInput) (string, error) { + path, err := resolveEditPath(ctx.SessionID(), absSkillsDir, in.FilePath) + if err != nil { + return fmt.Sprintf("Error editing file %s: %v", strings.TrimSpace(in.FilePath), err), nil + } + + if err := skillruntime.EditFileContent(path, in.OldString, in.NewString, in.ReplaceAll); err != nil { + return fmt.Sprintf("Error editing file %s: %v", strings.TrimSpace(in.FilePath), err), nil + } + return fmt.Sprintf("Successfully edited file: %s", path), nil + }) + if err != nil { + return nil, fmt.Errorf("failed to create edit_file tool: %w", err) + } + + bashTool, err := functiontool.New(functiontool.Config{ + Name: "bash", + Description: bashDescription, + }, func(ctx tool.Context, in bashInput) (string, error) { + command := strings.TrimSpace(in.Command) + if command == "" { + return "Error: No command provided", nil + } + + sessionPath, err := skillruntime.GetSessionPath(ctx.SessionID(), absSkillsDir) + if err != nil { + return fmt.Sprintf("Error executing command %q: %v", command, err), nil + } + + result, err := commandExecutor.ExecuteCommand(ctx, command, sessionPath) + if err != nil { + return fmt.Sprintf("Error executing command %q: %v", command, err), nil + } + return result, nil + }) + if err != nil { + return nil, fmt.Errorf("failed to create bash tool: %w", err) + } + + return []tool.Tool{skillsTool, readFileTool, writeFileTool, editFileTool, bashTool}, nil +} + +func resolveReadPath(sessionID, skillsDirectory, requestedPath string) (string, error) { + sessionPath, err := skillruntime.GetSessionPath(sessionID, skillsDirectory) + if err != nil { + return "", err + } + + candidate, err := resolveRequestedPath(sessionPath, requestedPath) + if err != nil { + return "", err + } + + resolvedCandidate, err := filepath.EvalSymlinks(candidate) + if err != nil { + return "", err + } + + sessionRoot, err := filepath.Abs(sessionPath) + if err != nil { + return "", err + } + skillsRoot, err := filepath.EvalSymlinks(skillsDirectory) + if err != nil { + return "", err + } + + if !isWithinRoot(resolvedCandidate, sessionRoot) && !isWithinRoot(resolvedCandidate, skillsRoot) { + return "", fmt.Errorf("path %q is outside the allowed roots", requestedPath) + } + + return resolvedCandidate, nil +} + +func resolveEditPath(sessionID, skillsDirectory, requestedPath string) (string, error) { + sessionPath, err := skillruntime.GetSessionPath(sessionID, skillsDirectory) + if err != nil { + return "", err + } + + candidate, err := resolveRequestedPath(sessionPath, requestedPath) + if err != nil { + return "", err + } + + resolvedCandidate, err := filepath.EvalSymlinks(candidate) + if err != nil { + return "", err + } + + sessionRoot, err := filepath.Abs(sessionPath) + if err != nil { + return "", err + } + if !isWithinRoot(resolvedCandidate, sessionRoot) { + return "", fmt.Errorf("path %q is outside the writable session directory", requestedPath) + } + + return resolvedCandidate, nil +} + +func resolveWritePath(sessionID, skillsDirectory, requestedPath string) (string, error) { + sessionPath, err := skillruntime.GetSessionPath(sessionID, skillsDirectory) + if err != nil { + return "", err + } + + candidate, err := resolveRequestedPath(sessionPath, requestedPath) + if err != nil { + return "", err + } + + resolvedCandidate, err := resolvePathWithExistingParents(candidate) + if err != nil { + return "", err + } + + sessionRoot, err := filepath.Abs(sessionPath) + if err != nil { + return "", err + } + if !isWithinRoot(resolvedCandidate, sessionRoot) { + return "", fmt.Errorf("path %q is outside the writable session directory", requestedPath) + } + + return resolvedCandidate, nil +} + +func resolveRequestedPath(basePath, requestedPath string) (string, error) { + requestedPath = strings.TrimSpace(requestedPath) + if requestedPath == "" { + return "", fmt.Errorf("no file path provided") + } + + candidate := requestedPath + if !filepath.IsAbs(candidate) { + candidate = filepath.Join(basePath, candidate) + } + return filepath.Abs(candidate) +} + +func resolvePathWithExistingParents(path string) (string, error) { + absPath, err := filepath.Abs(path) + if err != nil { + return "", err + } + + current := absPath + for { + if _, err := os.Lstat(current); err == nil { + resolvedBase, err := filepath.EvalSymlinks(current) + if err != nil { + return "", err + } + if current == absPath { + return filepath.Clean(resolvedBase), nil + } + + relativeSuffix, err := filepath.Rel(current, absPath) + if err != nil { + return "", err + } + return filepath.Clean(filepath.Join(resolvedBase, relativeSuffix)), nil + } else if !os.IsNotExist(err) { + return "", err + } + + parent := filepath.Dir(current) + if parent == current { + return "", fmt.Errorf("failed to resolve path %q", path) + } + current = parent + } +} + +func isWithinRoot(path, root string) bool { + path = filepath.Clean(path) + root = filepath.Clean(root) + return path == root || strings.HasPrefix(path, root+string(filepath.Separator)) +} diff --git a/go/adk/pkg/tools/skills_test.go b/go/adk/pkg/tools/skills_test.go new file mode 100644 index 000000000..1e4758c9e --- /dev/null +++ b/go/adk/pkg/tools/skills_test.go @@ -0,0 +1,72 @@ +package tools + +import ( + "fmt" + "os" + "path/filepath" + "strings" + "testing" +) + +func TestResolveReadPath_AllowsSymlinkedSkillsDirectory(t *testing.T) { + t.Setenv("TMPDIR", t.TempDir()) + skillsDir := t.TempDir() + skillFile := filepath.Join(skillsDir, "script.py") + if err := os.WriteFile(skillFile, []byte("print('ok')\n"), 0644); err != nil { + t.Fatalf("failed to write skill file: %v", err) + } + + sessionID := fmt.Sprintf("%s-read", t.Name()) + resolved, err := resolveReadPath(sessionID, skillsDir, "skills/script.py") + if err != nil { + t.Fatalf("resolveReadPath() error = %v", err) + } + if resolved != skillFile { + t.Fatalf("resolveReadPath() = %q, want %q", resolved, skillFile) + } +} + +func TestResolveWritePath_BlocksSkillsSymlink(t *testing.T) { + t.Setenv("TMPDIR", t.TempDir()) + skillsDir := t.TempDir() + sessionID := fmt.Sprintf("%s-write", t.Name()) + _, err := resolveWritePath(sessionID, skillsDir, "skills/new-file.txt") + if err == nil { + t.Fatal("expected write through skills symlink to be rejected") + } + if !strings.Contains(err.Error(), "outside the writable session directory") { + t.Fatalf("unexpected error: %v", err) + } +} + +func TestNewSkillsTools_ReturnsExpectedToolSet(t *testing.T) { + skillsDir := t.TempDir() + t.Setenv("KAGENT_SRT_SETTINGS_PATH", filepath.Join(t.TempDir(), "srt-settings.json")) + skillDir := filepath.Join(skillsDir, "demo") + if err := os.MkdirAll(skillDir, 0755); err != nil { + t.Fatalf("failed to create skill dir: %v", err) + } + if err := os.WriteFile(filepath.Join(skillDir, "SKILL.md"), []byte(`--- +name: demo +description: Demo skill. +--- +`), 0644); err != nil { + t.Fatalf("failed to write skill metadata: %v", err) + } + + tools, err := NewSkillsTools(skillsDir) + if err != nil { + t.Fatalf("NewSkillsTools() error = %v", err) + } + + got := map[string]bool{} + for _, tool := range tools { + got[tool.Name()] = true + } + + for _, name := range []string{"skills", "read_file", "write_file", "edit_file", "bash"} { + if !got[name] { + t.Errorf("expected tool %q to be present", name) + } + } +} diff --git a/go/api/adk/types.go b/go/api/adk/types.go index aee673f09..54f9d7005 100644 --- a/go/api/adk/types.go +++ b/go/api/adk/types.go @@ -385,6 +385,10 @@ type MemoryConfig struct { Embedding *EmbeddingConfig `json:"embedding,omitempty"` } +type NetworkConfig struct { + AllowedDomains []string `json:"allowed_domains,omitempty"` +} + // AgentContextConfig is the context management configuration that flows through config.json to the Python runtime. type AgentContextConfig struct { Compaction *AgentCompressionConfig `json:"compaction,omitempty"` @@ -427,6 +431,13 @@ func (c *AgentCompressionConfig) UnmarshalJSON(data []byte) error { return nil } +// RetryPolicyConfig configures retry behavior for agent request executions. +type RetryPolicyConfig struct { + MaxRetries int `json:"max_retries"` + InitialRetryDelay float64 `json:"initial_retry_delay_seconds"` + MaxRetryDelay *float64 `json:"max_retry_delay_seconds,omitempty"` +} + // See `python/packages/kagent-adk/src/kagent/adk/types.py` for the python version of this type AgentConfig struct { Model Model `json:"model"` @@ -438,7 +449,9 @@ type AgentConfig struct { ExecuteCode *bool `json:"execute_code,omitempty"` Stream *bool `json:"stream,omitempty"` Memory *MemoryConfig `json:"memory,omitempty"` + Network *NetworkConfig `json:"network,omitempty"` ContextConfig *AgentContextConfig `json:"context_config,omitempty"` + RetryPolicy *RetryPolicyConfig `json:"retry_policy,omitempty"` } // GetStream returns the stream value or default if not set @@ -468,7 +481,9 @@ func (a *AgentConfig) UnmarshalJSON(data []byte) error { ExecuteCode *bool `json:"execute_code,omitempty"` Stream *bool `json:"stream,omitempty"` Memory json.RawMessage `json:"memory"` + Network *NetworkConfig `json:"network,omitempty"` ContextConfig *AgentContextConfig `json:"context_config,omitempty"` + RetryPolicy *RetryPolicyConfig `json:"retry_policy,omitempty"` } if err := json.Unmarshal(data, &tmp); err != nil { return err @@ -496,7 +511,9 @@ func (a *AgentConfig) UnmarshalJSON(data []byte) error { a.ExecuteCode = tmp.ExecuteCode a.Stream = tmp.Stream a.Memory = memory + a.Network = tmp.Network a.ContextConfig = tmp.ContextConfig + a.RetryPolicy = tmp.RetryPolicy return nil } diff --git a/go/api/adk/types_test.go b/go/api/adk/types_test.go index f38cef504..d1d9c8c06 100644 --- a/go/api/adk/types_test.go +++ b/go/api/adk/types_test.go @@ -247,6 +247,40 @@ func TestMarshalJSON_TypeSpecificFields(t *testing.T) { }) } +func TestAgentConfig_UnmarshalJSON_Network(t *testing.T) { + configJSON := `{ + "model": { + "type": "openai", + "model": "gpt-4o" + }, + "description": "test agent", + "instruction": "you are helpful", + "network": { + "allowed_domains": ["api.example.com", "*.example.org"] + } + }` + + var cfg AgentConfig + if err := json.Unmarshal([]byte(configJSON), &cfg); err != nil { + t.Fatalf("failed to unmarshal config: %v", err) + } + + if cfg.Network == nil { + t.Fatal("network config is nil") + } + + if len(cfg.Network.AllowedDomains) != 2 { + t.Fatalf("allowed domains len = %d, want 2", len(cfg.Network.AllowedDomains)) + } + + if cfg.Network.AllowedDomains[0] != "api.example.com" { + t.Errorf("allowed_domains[0] = %q, want %q", cfg.Network.AllowedDomains[0], "api.example.com") + } + if cfg.Network.AllowedDomains[1] != "*.example.org" { + t.Errorf("allowed_domains[1] = %q, want %q", cfg.Network.AllowedDomains[1], "*.example.org") + } +} + func TestParseModel_Roundtrip(t *testing.T) { tests := []struct { name string diff --git a/go/api/config/crd/bases/kagent.dev_agents.yaml b/go/api/config/crd/bases/kagent.dev_agents.yaml index e68d93e30..8180fda2d 100644 --- a/go/api/config/crd/bases/kagent.dev_agents.yaml +++ b/go/api/config/crd/bases/kagent.dev_agents.yaml @@ -10166,6 +10166,26 @@ spec: rule: '!has(self.systemMessage) || !has(self.systemMessageFrom)' description: type: string + sandbox: + description: |- + Sandbox configures sandboxed execution behavior shared across runtimes. + This is intended for sandboxed declarative execution today, and can also + be consumed by BYO agents. + properties: + network: + description: |- + Network configures outbound network access for sandboxed execution paths. + When unset or when allowedDomains is empty, outbound access is denied by default. + properties: + allowedDomains: + description: |- + AllowedDomains lists the domains that sandboxed execution may contact. + Wildcards such as *.example.com are supported by the sandbox runtime. + items: + type: string + type: array + type: object + type: object skills: description: |- Skills to load into the agent. They will be pulled from the specified container images. diff --git a/go/api/config/crd/bases/kagent.dev_sandboxagents.yaml b/go/api/config/crd/bases/kagent.dev_sandboxagents.yaml index 35d6b08ec..9118e971b 100644 --- a/go/api/config/crd/bases/kagent.dev_sandboxagents.yaml +++ b/go/api/config/crd/bases/kagent.dev_sandboxagents.yaml @@ -7816,6 +7816,26 @@ spec: rule: '!has(self.systemMessage) || !has(self.systemMessageFrom)' description: type: string + sandbox: + description: |- + Sandbox configures sandboxed execution behavior shared across runtimes. + This is intended for sandboxed declarative execution today, and can also + be consumed by BYO agents. + properties: + network: + description: |- + Network configures outbound network access for sandboxed execution paths. + When unset or when allowedDomains is empty, outbound access is denied by default. + properties: + allowedDomains: + description: |- + AllowedDomains lists the domains that sandboxed execution may contact. + Wildcards such as *.example.com are supported by the sandbox runtime. + items: + type: string + type: array + type: object + type: object skills: description: |- Skills to load into the agent. They will be pulled from the specified container images. diff --git a/go/api/v1alpha2/agent_types.go b/go/api/v1alpha2/agent_types.go index e41ab979b..3d38a282f 100644 --- a/go/api/v1alpha2/agent_types.go +++ b/go/api/v1alpha2/agent_types.go @@ -69,6 +69,12 @@ type AgentSpec struct { // +optional Skills *SkillForAgent `json:"skills,omitempty"` + // Sandbox configures sandboxed execution behavior shared across runtimes. + // This is intended for sandboxed declarative execution today, and can also + // be consumed by BYO agents. + // +optional + Sandbox *SandboxConfig `json:"sandbox,omitempty"` + // AllowedNamespaces defines which namespaces are allowed to reference this Agent as a tool. // This follows the Gateway API pattern for cross-namespace route attachments. // If not specified, only Agents in the same namespace can reference this Agent as a tool. @@ -203,6 +209,27 @@ type DeclarativeAgentSpec struct { // This includes event compaction (compression) and context caching. // +optional Context *ContextConfig `json:"context,omitempty"` + + // RetryPolicy configures retry behavior for failed agent request executions. + // When set, failed requests are retried with exponential backoff. + // +optional + RetryPolicy *RetryPolicySpec `json:"retryPolicy,omitempty"` +} + +// SandboxConfig configures sandboxed execution behavior. +type SandboxConfig struct { + // Network configures outbound network access for sandboxed execution paths. + // When unset or when allowedDomains is empty, outbound access is denied by default. + // +optional + Network *NetworkConfig `json:"network,omitempty"` +} + +// NetworkConfig configures outbound network access for sandboxed execution paths. +type NetworkConfig struct { + // AllowedDomains lists the domains that sandboxed execution may contact. + // Wildcards such as *.example.com are supported by the sandbox runtime. + // +optional + AllowedDomains []string `json:"allowedDomains,omitempty"` } // ContextConfig configures context management for an agent. @@ -253,6 +280,21 @@ type ContextSummarizerConfig struct { PromptTemplate *string `json:"promptTemplate,omitempty"` } +// RetryPolicySpec configures retry behavior for failed agent request executions. +// When set, failed requests are retried with exponential backoff. +type RetryPolicySpec struct { + // Maximum number of retry attempts. 0 means no retries. + // +kubebuilder:validation:Minimum=0 + // +kubebuilder:default=0 + MaxRetries *int `json:"maxRetries,omitempty"` + // Initial delay between retries. Uses Go duration format (e.g., "1s", "500ms"). + // +kubebuilder:default="1s" + InitialRetryDelay *string `json:"initialRetryDelay,omitempty"` + // Maximum delay between retries (caps exponential growth). Optional. + // +optional + MaxRetryDelay *string `json:"maxRetryDelay,omitempty"` +} + // PromptTemplateSpec configures prompt template processing for an agent's system message. type PromptTemplateSpec struct { // DataSources defines the ConfigMaps whose keys can be included in the systemMessage diff --git a/go/api/v1alpha2/zz_generated.deepcopy.go b/go/api/v1alpha2/zz_generated.deepcopy.go index ed186ec34..67f7c4b53 100644 --- a/go/api/v1alpha2/zz_generated.deepcopy.go +++ b/go/api/v1alpha2/zz_generated.deepcopy.go @@ -165,6 +165,11 @@ func (in *AgentSpec) DeepCopyInto(out *AgentSpec) { *out = new(SkillForAgent) (*in).DeepCopyInto(*out) } + if in.Sandbox != nil { + in, out := &in.Sandbox, &out.Sandbox + *out = new(SandboxConfig) + (*in).DeepCopyInto(*out) + } if in.AllowedNamespaces != nil { in, out := &in.AllowedNamespaces, &out.AllowedNamespaces *out = new(AllowedNamespaces) @@ -490,6 +495,11 @@ func (in *DeclarativeAgentSpec) DeepCopyInto(out *DeclarativeAgentSpec) { *out = new(ContextConfig) (*in).DeepCopyInto(*out) } + if in.RetryPolicy != nil { + in, out := &in.RetryPolicy, &out.RetryPolicy + *out = new(RetryPolicySpec) + (*in).DeepCopyInto(*out) + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DeclarativeAgentSpec. @@ -883,6 +893,26 @@ func (in *ModelProviderConfigStatus) DeepCopy() *ModelProviderConfigStatus { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *NetworkConfig) DeepCopyInto(out *NetworkConfig) { + *out = *in + if in.AllowedDomains != nil { + in, out := &in.AllowedDomains, &out.AllowedDomains + *out = make([]string, len(*in)) + copy(*out, *in) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NetworkConfig. +func (in *NetworkConfig) DeepCopy() *NetworkConfig { + if in == nil { + return nil + } + out := new(NetworkConfig) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *OllamaConfig) DeepCopyInto(out *OllamaConfig) { *out = *in @@ -1110,6 +1140,36 @@ func (in *RemoteMCPServerStatus) DeepCopy() *RemoteMCPServerStatus { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *RetryPolicySpec) DeepCopyInto(out *RetryPolicySpec) { + *out = *in + if in.MaxRetries != nil { + in, out := &in.MaxRetries, &out.MaxRetries + *out = new(int) + **out = **in + } + if in.InitialRetryDelay != nil { + in, out := &in.InitialRetryDelay, &out.InitialRetryDelay + *out = new(string) + **out = **in + } + if in.MaxRetryDelay != nil { + in, out := &in.MaxRetryDelay, &out.MaxRetryDelay + *out = new(string) + **out = **in + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RetryPolicySpec. +func (in *RetryPolicySpec) DeepCopy() *RetryPolicySpec { + if in == nil { + return nil + } + out := new(RetryPolicySpec) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *SandboxAgent) DeepCopyInto(out *SandboxAgent) { *out = *in @@ -1169,6 +1229,26 @@ func (in *SandboxAgentList) DeepCopyObject() runtime.Object { return nil } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *SandboxConfig) DeepCopyInto(out *SandboxConfig) { + *out = *in + if in.Network != nil { + in, out := &in.Network, &out.Network + *out = new(NetworkConfig) + (*in).DeepCopyInto(*out) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SandboxConfig. +func (in *SandboxConfig) DeepCopy() *SandboxConfig { + if in == nil { + return nil + } + out := new(SandboxConfig) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *SecretReference) DeepCopyInto(out *SecretReference) { *out = *in diff --git a/go/core/internal/controller/translator/agent/adk_api_translator_test.go b/go/core/internal/controller/translator/agent/adk_api_translator_test.go index 0d19b5a93..1c4f10835 100644 --- a/go/core/internal/controller/translator/agent/adk_api_translator_test.go +++ b/go/core/internal/controller/translator/agent/adk_api_translator_test.go @@ -1257,6 +1257,22 @@ func Test_AdkApiTranslator_ContextConfig(t *testing.T) { assert.Equal(t, adk.ModelTypeOpenAI, cfg.ContextConfig.Compaction.SummarizerModel.GetType()) }, }, + { + name: "network allowlist", + agent: func() *v1alpha2.Agent { + agent := makeAgent(nil) + agent.Spec.Sandbox = &v1alpha2.SandboxConfig{ + Network: &v1alpha2.NetworkConfig{ + AllowedDomains: []string{"api.example.com", "*.example.org"}, + }, + } + return agent + }(), + assertConfig: func(t *testing.T, cfg *adk.AgentConfig) { + require.NotNil(t, cfg.Network) + assert.Equal(t, []string{"api.example.com", "*.example.org"}, cfg.Network.AllowedDomains) + }, + }, } for _, tt := range tests { @@ -1290,6 +1306,128 @@ func Test_AdkApiTranslator_ContextConfig(t *testing.T) { } } +func Test_AdkApiTranslator_RetryPolicy(t *testing.T) { + scheme := schemev1.Scheme + require.NoError(t, v1alpha2.AddToScheme(scheme)) + + modelConfig := &v1alpha2.ModelConfig{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-model", + Namespace: "default", + }, + Spec: v1alpha2.ModelConfigSpec{ + Model: "gpt-4", + Provider: v1alpha2.ModelProviderOpenAI, + }, + } + + makeAgent := func(retryPolicy *v1alpha2.RetryPolicySpec) *v1alpha2.Agent { + return &v1alpha2.Agent{ + ObjectMeta: metav1.ObjectMeta{Name: "test-agent", Namespace: "default"}, + Spec: v1alpha2.AgentSpec{ + Type: v1alpha2.AgentType_Declarative, + Description: "Test agent", + Declarative: &v1alpha2.DeclarativeAgentSpec{ + SystemMessage: "You are a test agent", + ModelConfig: "test-model", + RetryPolicy: retryPolicy, + }, + }, + } + } + + tests := []struct { + name string + agent *v1alpha2.Agent + wantErr bool + errContains string + assertConfig func(t *testing.T, cfg *adk.AgentConfig) + }{ + { + name: "no retry policy", + agent: makeAgent(nil), + assertConfig: func(t *testing.T, cfg *adk.AgentConfig) { + assert.Nil(t, cfg.RetryPolicy) + }, + }, + { + name: "basic retry policy", + agent: makeAgent(&v1alpha2.RetryPolicySpec{ + MaxRetries: new(3), + InitialRetryDelay: new("1s"), + }), + assertConfig: func(t *testing.T, cfg *adk.AgentConfig) { + require.NotNil(t, cfg.RetryPolicy) + assert.Equal(t, 3, cfg.RetryPolicy.MaxRetries) + assert.Equal(t, 1.0, cfg.RetryPolicy.InitialRetryDelay) + assert.Nil(t, cfg.RetryPolicy.MaxRetryDelay) + }, + }, + { + name: "retry policy with max delay", + agent: makeAgent(&v1alpha2.RetryPolicySpec{ + MaxRetries: new(5), + InitialRetryDelay: new("500ms"), + MaxRetryDelay: new("30s"), + }), + assertConfig: func(t *testing.T, cfg *adk.AgentConfig) { + require.NotNil(t, cfg.RetryPolicy) + assert.Equal(t, 5, cfg.RetryPolicy.MaxRetries) + assert.Equal(t, 0.5, cfg.RetryPolicy.InitialRetryDelay) + require.NotNil(t, cfg.RetryPolicy.MaxRetryDelay) + assert.Equal(t, 30.0, *cfg.RetryPolicy.MaxRetryDelay) + }, + }, + { + name: "invalid initial retry delay", + agent: makeAgent(&v1alpha2.RetryPolicySpec{ + MaxRetries: new(3), + InitialRetryDelay: new("not-a-duration"), + }), + wantErr: true, + errContains: "invalid initialRetryDelay", + }, + { + name: "invalid max retry delay", + agent: makeAgent(&v1alpha2.RetryPolicySpec{ + MaxRetries: new(3), + InitialRetryDelay: new("1s"), + MaxRetryDelay: new("bad"), + }), + wantErr: true, + errContains: "invalid maxRetryDelay", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + kubeClient := fake.NewClientBuilder(). + WithScheme(scheme). + WithObjects(modelConfig.DeepCopy()). + Build() + + defaultModel := types.NamespacedName{Namespace: "default", Name: "test-model"} + trans := translator.NewAdkApiTranslator(kubeClient, defaultModel, nil, "", nil) + outputs, err := translator.TranslateAgent(context.Background(), trans, tt.agent) + + if tt.wantErr { + require.Error(t, err) + if tt.errContains != "" { + assert.Contains(t, err.Error(), tt.errContains) + } + return + } + + require.NoError(t, err) + require.NotNil(t, outputs) + require.NotNil(t, outputs.Config) + if tt.assertConfig != nil { + tt.assertConfig(t, outputs.Config) + } + }) + } +} + func Test_AdkApiTranslator_SandboxAgent_defaultEmitsSandbox(t *testing.T) { ctx := context.Background() scheme := schemev1.Scheme diff --git a/go/core/internal/controller/translator/agent/compiler.go b/go/core/internal/controller/translator/agent/compiler.go index 403014b34..8f175caac 100644 --- a/go/core/internal/controller/translator/agent/compiler.go +++ b/go/core/internal/controller/translator/agent/compiler.go @@ -4,6 +4,7 @@ import ( "context" "fmt" "slices" + "time" "github.com/kagent-dev/kagent/go/api/adk" "github.com/kagent-dev/kagent/go/api/v1alpha2" @@ -14,6 +15,7 @@ import ( // AgentManifestInputs holds the translated data needed to emit Kubernetes resources. type AgentManifestInputs struct { Config *adk.AgentConfig + Sandbox *v1alpha2.SandboxConfig Deployment *resolvedDeployment AgentCard *server.AgentCard SecretHashBytes []byte @@ -101,6 +103,7 @@ func (a *adkApiTranslator) CompileAgent( return &AgentManifestInputs{ Config: cfg, + Sandbox: spec.Sandbox, Deployment: dep, AgentCard: card, SecretHashBytes: secretHashBytes, @@ -174,6 +177,12 @@ func (a *adkApiTranslator) translateInlineAgent(ctx context.Context, agent v1alp Stream: new(spec.Declarative.Stream), } + if spec.Sandbox != nil && spec.Sandbox.Network != nil { + cfg.Network = &adk.NetworkConfig{ + AllowedDomains: append([]string(nil), spec.Sandbox.Network.AllowedDomains...), + } + } + // Translate context management configuration if spec.Declarative.Context != nil { contextCfg := &adk.AgentContextConfig{} @@ -236,6 +245,15 @@ func (a *adkApiTranslator) translateInlineAgent(ctx context.Context, agent v1alp } } + // Translate retry policy configuration + if spec.Declarative.RetryPolicy != nil { + retryPolicy, err := translateRetryPolicy(spec.Declarative.RetryPolicy) + if err != nil { + return nil, nil, nil, fmt.Errorf("failed to translate retry policy: %w", err) + } + cfg.RetryPolicy = retryPolicy + } + for _, tool := range spec.Declarative.Tools { headers, err := tool.ResolveHeaders(ctx, a.kube, agent.GetNamespace()) if err != nil { @@ -318,3 +336,34 @@ func (a *adkApiTranslator) resolveRawSystemMessage(ctx context.Context, agent v1 } return "", fmt.Errorf("at least one system message source (SystemMessage or SystemMessageFrom) must be specified") } + +func translateRetryPolicy(spec *v1alpha2.RetryPolicySpec) (*adk.RetryPolicyConfig, error) { + if spec == nil { + return nil, nil + } + + cfg := &adk.RetryPolicyConfig{} + + if spec.MaxRetries != nil { + cfg.MaxRetries = *spec.MaxRetries + } + + if spec.InitialRetryDelay != nil { + d, err := time.ParseDuration(*spec.InitialRetryDelay) + if err != nil { + return nil, fmt.Errorf("invalid initialRetryDelay %q: %w", *spec.InitialRetryDelay, err) + } + cfg.InitialRetryDelay = d.Seconds() + } + + if spec.MaxRetryDelay != nil { + d, err := time.ParseDuration(*spec.MaxRetryDelay) + if err != nil { + return nil, fmt.Errorf("invalid maxRetryDelay %q: %w", *spec.MaxRetryDelay, err) + } + secs := d.Seconds() + cfg.MaxRetryDelay = &secs + } + + return cfg, nil +} diff --git a/go/core/internal/controller/translator/agent/deployments.go b/go/core/internal/controller/translator/agent/deployments.go index d1275fd8b..e3c2c6bdb 100644 --- a/go/core/internal/controller/translator/agent/deployments.go +++ b/go/core/internal/controller/translator/agent/deployments.go @@ -139,10 +139,14 @@ func resolveInlineDeployment(agent v1alpha2.AgentObject, mdd *modelDeploymentDat registry = spec.ImageRegistry } - // Get repository based on runtime repository := getRuntimeImageRepository(runtime) - image := fmt.Sprintf("%s/%s:%s", registry, repository, DefaultImageConfig.Tag) + tag := DefaultImageConfig.Tag + if runtime == v1alpha2.DeclarativeRuntime_Go && needsSRTSettings(agent, specRef.Sandbox) { + tag += "-full" + } + + image := fmt.Sprintf("%s/%s:%s", registry, repository, tag) imagePullPolicy := corev1.PullPolicy(DefaultImageConfig.PullPolicy) if spec.ImagePullPolicy != "" { diff --git a/go/core/internal/controller/translator/agent/manifest_builder.go b/go/core/internal/controller/translator/agent/manifest_builder.go index e146e3204..b5b44d6a1 100644 --- a/go/core/internal/controller/translator/agent/manifest_builder.go +++ b/go/core/internal/controller/translator/agent/manifest_builder.go @@ -57,7 +57,7 @@ func (a *adkApiTranslator) BuildManifest( outputs := &AgentOutputs{} manifestCtx := newManifestContext(agent, inputs.Deployment) - configSecret, err := a.buildConfigSecret(manifestCtx, inputs.Config, inputs.AgentCard, inputs.SecretHashBytes) + configSecret, err := a.buildConfigSecret(manifestCtx, inputs.Config, inputs.Sandbox, inputs.AgentCard, inputs.SecretHashBytes) if err != nil { return nil, err } @@ -67,7 +67,7 @@ func (a *adkApiTranslator) BuildManifest( outputs.Manifest = append(outputs.Manifest, sa) } - podRuntime, err := buildPodRuntime(manifestCtx, inputs.Config, configSecret.volumes, configSecret.mounts) + podRuntime, err := buildPodRuntime(manifestCtx, inputs.Config, inputs.Sandbox, configSecret.volumes, configSecret.mounts) if err != nil { return nil, err } @@ -127,32 +127,48 @@ func (m manifestContext) objectMeta() metav1.ObjectMeta { func (a *adkApiTranslator) buildConfigSecret( manifestCtx manifestContext, cfg *adk.AgentConfig, + sandboxCfg *v1alpha2.SandboxConfig, card *server.AgentCard, modelConfigSecretHashBytes []byte, ) (*configSecretInputs, error) { cfgJSON := "" agentCard := "" + srtSettingsJSON := "" var configHash uint64 var volumes []corev1.Volume var mounts []corev1.VolumeMount - if cfg != nil && card != nil { + if cfg != nil { bCfg, err := json.Marshal(cfg) if err != nil { return nil, err } + cfgJSON = string(bCfg) + } + if card != nil { bCard, err := json.Marshal(card) if err != nil { return nil, err } + agentCard = string(bCard) + } + if needsSRTSettings(manifestCtx.agent, sandboxCfg) { + bSRTSettings, err := buildSRTSettingsJSON(sandboxCfg) + if err != nil { + return nil, err + } + srtSettingsJSON = string(bSRTSettings) + } + if cfg != nil || srtSettingsJSON != "" { secretData := modelConfigSecretHashBytes if secretData == nil { secretData = []byte{} } - configHash = computeConfigHash(bCfg, bCard, secretData) - cfgJSON = string(bCfg) - agentCard = string(bCard) + hashData := make([]byte, 0, len(secretData)+len(srtSettingsJSON)) + hashData = append(hashData, secretData...) + hashData = append(hashData, srtSettingsJSON...) + configHash = computeConfigHash([]byte(cfgJSON), []byte(agentCard), hashData) volumes = []corev1.Volume{{ Name: "config", VolumeSource: corev1.VolumeSource{ @@ -166,10 +182,7 @@ func (a *adkApiTranslator) buildConfigSecret( secret: &corev1.Secret{ TypeMeta: metav1.TypeMeta{APIVersion: "v1", Kind: "Secret"}, ObjectMeta: manifestCtx.objectMeta(), - StringData: map[string]string{ - "config.json": cfgJSON, - "agent-card.json": agentCard, - }, + StringData: buildConfigSecretData(cfgJSON, agentCard, srtSettingsJSON), }, configHash: configHash, volumes: volumes, @@ -177,6 +190,17 @@ func (a *adkApiTranslator) buildConfigSecret( }, nil } +func buildConfigSecretData(cfgJSON, agentCard, srtSettingsJSON string) map[string]string { + data := map[string]string{ + "config.json": cfgJSON, + "agent-card.json": agentCard, + } + if srtSettingsJSON != "" { + data["srt-settings.json"] = srtSettingsJSON + } + return data +} + func buildServiceAccount(manifestCtx manifestContext) *corev1.ServiceAccount { serviceAccountName := manifestCtx.deployment.ServiceAccountName if serviceAccountName == nil || *serviceAccountName != manifestCtx.agent.GetName() { @@ -214,6 +238,7 @@ func buildServiceAccount(manifestCtx manifestContext) *corev1.ServiceAccount { func buildPodRuntime( manifestCtx manifestContext, cfg *adk.AgentConfig, + sandboxCfg *v1alpha2.SandboxConfig, secretVolumes []corev1.Volume, secretMounts []corev1.VolumeMount, ) (*podRuntimeInputs, error) { @@ -236,6 +261,13 @@ func buildPodRuntime( MountPath: "/var/run/secrets/tokens", }) + if needsSRTSettings(manifestCtx.agent, sandboxCfg) { + sharedEnv = append(sharedEnv, corev1.EnvVar{ + Name: env.KagentSRTSettingsPath.Name(), + Value: env.KagentSRTSettingsPath.DefaultValue(), + }) + } + envVars := append([]corev1.EnvVar{}, manifestCtx.deployment.Env...) envVars = append(envVars, sharedEnv...) @@ -248,6 +280,38 @@ func buildPodRuntime( }, nil } +func needsSRTSettings(agent v1alpha2.AgentObject, sandboxCfg *v1alpha2.SandboxConfig) bool { + spec := agent.GetAgentSpec() + if spec.Type == v1alpha2.AgentType_BYO { + return sandboxCfg != nil + } + if spec.Skills != nil { + return true + } + return spec.Declarative != nil && + spec.Declarative.ExecuteCodeBlocks != nil && + *spec.Declarative.ExecuteCodeBlocks +} + +func buildSRTSettingsJSON(sandboxCfg *v1alpha2.SandboxConfig) ([]byte, error) { + allowedDomains := []string{} + if sandboxCfg != nil && sandboxCfg.Network != nil { + allowedDomains = append(allowedDomains, sandboxCfg.Network.AllowedDomains...) + } + + return json.Marshal(map[string]any{ + "network": map[string]any{ + "allowedDomains": allowedDomains, + "deniedDomains": []string{}, + }, + "filesystem": map[string]any{ + "denyRead": []string{}, + "allowWrite": []string{".", "/tmp"}, + "denyWrite": []string{}, + }, + }) +} + func collectSharedEnv(agent v1alpha2.AgentObject) []corev1.EnvVar { sharedEnv := make([]corev1.EnvVar, 0, 8) sharedEnv = append(sharedEnv, collectOtelEnvFromProcess()...) diff --git a/go/core/internal/controller/translator/agent/manifest_builder_test.go b/go/core/internal/controller/translator/agent/manifest_builder_test.go new file mode 100644 index 000000000..8c01a2af5 --- /dev/null +++ b/go/core/internal/controller/translator/agent/manifest_builder_test.go @@ -0,0 +1,119 @@ +package agent + +import ( + "encoding/json" + "testing" + + "github.com/kagent-dev/kagent/go/api/v1alpha2" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +func TestBuildSRTSettingsJSON_DefaultDenyConfig(t *testing.T) { + got, err := buildSRTSettingsJSON(nil) + if err != nil { + t.Fatalf("buildSRTSettingsJSON() error = %v", err) + } + + var settings map[string]any + if err := json.Unmarshal(got, &settings); err != nil { + t.Fatalf("failed to unmarshal settings: %v", err) + } + + network, ok := settings["network"].(map[string]any) + if !ok { + t.Fatalf("settings.network missing or wrong type: %#v", settings["network"]) + } + if got := network["allowedDomains"]; len(got.([]any)) != 0 { + t.Fatalf("allowedDomains = %#v, want empty list", got) + } + if got := network["deniedDomains"]; len(got.([]any)) != 0 { + t.Fatalf("deniedDomains = %#v, want empty list", got) + } + + filesystem, ok := settings["filesystem"].(map[string]any) + if !ok { + t.Fatalf("settings.filesystem missing or wrong type: %#v", settings["filesystem"]) + } + if got := filesystem["denyRead"]; len(got.([]any)) != 0 { + t.Fatalf("denyRead = %#v, want empty list", got) + } + if got := filesystem["allowWrite"].([]any); len(got) != 2 || got[0] != "." || got[1] != "/tmp" { + t.Fatalf("allowWrite = %#v, want ['.','/tmp']", got) + } + if got := filesystem["denyWrite"]; len(got.([]any)) != 0 { + t.Fatalf("denyWrite = %#v, want empty list", got) + } +} + +func TestNeedsSRTSettings(t *testing.T) { + declarativeAgent := &v1alpha2.Agent{ + ObjectMeta: metav1.ObjectMeta{Name: "decl", Namespace: "default"}, + Spec: v1alpha2.AgentSpec{ + Type: v1alpha2.AgentType_Declarative, + Declarative: &v1alpha2.DeclarativeAgentSpec{}, + }, + } + skillsAgent := &v1alpha2.Agent{ + ObjectMeta: metav1.ObjectMeta{Name: "skills", Namespace: "default"}, + Spec: v1alpha2.AgentSpec{ + Type: v1alpha2.AgentType_Declarative, + Declarative: &v1alpha2.DeclarativeAgentSpec{}, + Skills: &v1alpha2.SkillForAgent{Refs: []string{"example.com/skill:latest"}}, + }, + } + executeCode := true + codeAgent := &v1alpha2.Agent{ + ObjectMeta: metav1.ObjectMeta{Name: "code", Namespace: "default"}, + Spec: v1alpha2.AgentSpec{ + Type: v1alpha2.AgentType_Declarative, + Declarative: &v1alpha2.DeclarativeAgentSpec{ + ExecuteCodeBlocks: &executeCode, + }, + }, + } + byoAgent := &v1alpha2.Agent{ + ObjectMeta: metav1.ObjectMeta{Name: "byo", Namespace: "default"}, + Spec: v1alpha2.AgentSpec{ + Type: v1alpha2.AgentType_BYO, + BYO: &v1alpha2.BYOAgentSpec{}, + }, + } + + if needsSRTSettings(declarativeAgent, nil) { + t.Fatal("declarative agents without sandboxed execution should not get srt settings") + } + if !needsSRTSettings(skillsAgent, nil) { + t.Fatal("declarative agents with skills should get srt settings") + } + if !needsSRTSettings(codeAgent, nil) { + t.Fatal("declarative agents with executeCodeBlocks should get srt settings") + } + if needsSRTSettings(byoAgent, nil) { + t.Fatal("BYO agents should not get srt settings unless sandbox config is set") + } + if !needsSRTSettings(byoAgent, &v1alpha2.SandboxConfig{}) { + t.Fatal("BYO agents with sandbox config should get srt settings") + } +} + +func TestBuildConfigSecretData_OmitsEmptySRTSettings(t *testing.T) { + data := buildConfigSecretData(`{"app":"ok"}`, `{"card":"ok"}`, "") + + if data["config.json"] == "" { + t.Fatal("config.json should be present") + } + if data["agent-card.json"] == "" { + t.Fatal("agent-card.json should be present") + } + if _, ok := data["srt-settings.json"]; ok { + t.Fatal("srt-settings.json should be omitted when empty") + } +} + +func TestBuildConfigSecretData_IncludesSRTSettingsWhenPresent(t *testing.T) { + data := buildConfigSecretData(`{"app":"ok"}`, `{"card":"ok"}`, `{"network":{}}`) + + if got := data["srt-settings.json"]; got == "" { + t.Fatal("srt-settings.json should be present when non-empty") + } +} diff --git a/go/core/internal/controller/translator/agent/runtime_test.go b/go/core/internal/controller/translator/agent/runtime_test.go index 52be85a45..9bf8425a7 100644 --- a/go/core/internal/controller/translator/agent/runtime_test.go +++ b/go/core/internal/controller/translator/agent/runtime_test.go @@ -83,6 +83,7 @@ func TestRuntime_GoRuntime(t *testing.T) { require.Len(t, deployment.Spec.Template.Spec.Containers, 1) container := deployment.Spec.Template.Spec.Containers[0] assert.Contains(t, container.Image, "golang-adk", "Image should use golang-adk repository") + assert.NotContains(t, container.Image, "-full", "Go runtime without sandboxed execution should use the distroless tag") // Verify Go runtime readiness probe timings (fast startup) require.NotNil(t, container.ReadinessProbe) @@ -91,6 +92,72 @@ func TestRuntime_GoRuntime(t *testing.T) { assert.Equal(t, int32(1), container.ReadinessProbe.PeriodSeconds, "Go runtime should have 1s period") } +func TestRuntime_GoRuntimeWithSkillsUsesFullImageTag(t *testing.T) { + ctx := context.Background() + + agent := &v1alpha2.Agent{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-go-skills-agent", + Namespace: "test", + }, + Spec: v1alpha2.AgentSpec{ + Type: v1alpha2.AgentType_Declarative, + Declarative: &v1alpha2.DeclarativeAgentSpec{ + Runtime: v1alpha2.DeclarativeRuntime_Go, + SystemMessage: "Test Go agent with skills", + ModelConfig: "test-model", + }, + Skills: &v1alpha2.SkillForAgent{ + Refs: []string{"example.com/skill:latest"}, + }, + }, + } + + modelConfig := &v1alpha2.ModelConfig{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-model", + Namespace: "test", + }, + Spec: v1alpha2.ModelConfigSpec{ + Provider: "OpenAI", + Model: "gpt-4o", + }, + } + + scheme := schemev1.Scheme + err := v1alpha2.AddToScheme(scheme) + require.NoError(t, err) + + kubeClient := fake.NewClientBuilder(). + WithScheme(scheme). + WithObjects(agent, modelConfig). + Build() + + defaultModel := types.NamespacedName{ + Namespace: "test", + Name: "test-model", + } + translatorInstance := translator.NewAdkApiTranslator(kubeClient, defaultModel, nil, "", nil) + + result, err := translator.TranslateAgent(ctx, translatorInstance, agent) + require.NoError(t, err) + require.NotNil(t, result) + + var deployment *appsv1.Deployment + for _, obj := range result.Manifest { + if dep, ok := obj.(*appsv1.Deployment); ok { + deployment = dep + break + } + } + require.NotNil(t, deployment, "Deployment should be in manifest") + + require.Len(t, deployment.Spec.Template.Spec.Containers, 1) + container := deployment.Spec.Template.Spec.Containers[0] + assert.Contains(t, container.Image, "golang-adk", "Image should use golang-adk repository") + assert.Contains(t, container.Image, "-full", "Go runtime with skills should use the full image tag") +} + func TestRuntime_PythonRuntime(t *testing.T) { ctx := context.Background() @@ -317,4 +384,77 @@ func TestRuntime_CustomRepositoryPath(t *testing.T) { require.Len(t, deployment.Spec.Template.Spec.Containers, 1) container := deployment.Spec.Template.Spec.Containers[0] assert.Contains(t, container.Image, "my-registry.com/custom/golang-adk", "Image should use custom repository with golang-adk") + assert.NotContains(t, container.Image, "-full", "Go runtime without sandboxed execution should keep the base tag") +} + +func TestRuntime_CustomRepositoryPath_WithSkillsUsesFullTag(t *testing.T) { + ctx := context.Background() + + originalRepo := translator.DefaultImageConfig.Repository + defer func() { + translator.DefaultImageConfig.Repository = originalRepo + }() + translator.DefaultImageConfig.Repository = "my-registry.com/custom/app" + + agent := &v1alpha2.Agent{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-custom-repo-skills-agent", + Namespace: "test", + }, + Spec: v1alpha2.AgentSpec{ + Type: v1alpha2.AgentType_Declarative, + Declarative: &v1alpha2.DeclarativeAgentSpec{ + Runtime: v1alpha2.DeclarativeRuntime_Go, + SystemMessage: "Test Go agent with custom repo and skills", + ModelConfig: "test-model", + }, + Skills: &v1alpha2.SkillForAgent{ + Refs: []string{"example.com/skill:latest"}, + }, + }, + } + + modelConfig := &v1alpha2.ModelConfig{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-model", + Namespace: "test", + }, + Spec: v1alpha2.ModelConfigSpec{ + Provider: "OpenAI", + Model: "gpt-4o", + }, + } + + scheme := schemev1.Scheme + err := v1alpha2.AddToScheme(scheme) + require.NoError(t, err) + + kubeClient := fake.NewClientBuilder(). + WithScheme(scheme). + WithObjects(agent, modelConfig). + Build() + + defaultModel := types.NamespacedName{ + Namespace: "test", + Name: "test-model", + } + translatorInstance := translator.NewAdkApiTranslator(kubeClient, defaultModel, nil, "", nil) + + result, err := translator.TranslateAgent(ctx, translatorInstance, agent) + require.NoError(t, err) + require.NotNil(t, result) + + var deployment *appsv1.Deployment + for _, obj := range result.Manifest { + if dep, ok := obj.(*appsv1.Deployment); ok { + deployment = dep + break + } + } + require.NotNil(t, deployment, "Deployment should be in manifest") + + require.Len(t, deployment.Spec.Template.Spec.Containers, 1) + container := deployment.Spec.Template.Spec.Containers[0] + assert.Contains(t, container.Image, "my-registry.com/custom/golang-adk", "Image should use custom repository with golang-adk") + assert.Contains(t, container.Image, "-full", "Go runtime with skills should use the full tag") } diff --git a/go/core/internal/controller/translator/agent/testdata/outputs/agent_with_code.json b/go/core/internal/controller/translator/agent/testdata/outputs/agent_with_code.json index bb75cf120..675b20862 100644 --- a/go/core/internal/controller/translator/agent/testdata/outputs/agent_with_code.json +++ b/go/core/internal/controller/translator/agent/testdata/outputs/agent_with_code.json @@ -62,7 +62,8 @@ }, "stringData": { "agent-card.json": "{\"name\":\"agent_with_code\",\"description\":\"\",\"url\":\"http://agent-with-code.test:8080\",\"version\":\"\",\"capabilities\":{\"streaming\":true,\"pushNotifications\":false,\"stateTransitionHistory\":true},\"defaultInputModes\":[\"text\"],\"defaultOutputModes\":[\"text\"],\"skills\":[]}", - "config.json": "{\"model\":{\"type\":\"openai\",\"model\":\"gpt-4o\",\"headers\":{\"User-Agent\":\"kagent/1.0\"},\"base_url\":\"\",\"max_tokens\":1024,\"reasoning_effort\":\"low\",\"temperature\":0.7,\"top_p\":0.95},\"description\":\"\",\"instruction\":\"You are a helpful assistant.\",\"execute_code\":true,\"stream\":false}" + "config.json": "{\"model\":{\"type\":\"openai\",\"model\":\"gpt-4o\",\"headers\":{\"User-Agent\":\"kagent/1.0\"},\"base_url\":\"\",\"max_tokens\":1024,\"reasoning_effort\":\"low\",\"temperature\":0.7,\"top_p\":0.95},\"description\":\"\",\"instruction\":\"You are a helpful assistant.\",\"execute_code\":true,\"stream\":false}", + "srt-settings.json": "{\"filesystem\":{\"allowWrite\":[\".\",\"/tmp\"],\"denyRead\":[],\"denyWrite\":[]},\"network\":{\"allowedDomains\":[],\"deniedDomains\":[]}}" } }, { @@ -131,7 +132,7 @@ "template": { "metadata": { "annotations": { - "kagent.dev/config-hash": "17236809883120755669" + "kagent.dev/config-hash": "3112549247443018156" }, "labels": { "app": "kagent", @@ -177,6 +178,10 @@ { "name": "KAGENT_URL", "value": "http://kagent-controller.kagent:8083" + }, + { + "name": "KAGENT_SRT_SETTINGS_PATH", + "value": "/config/srt-settings.json" } ], "image": "cr.kagent.dev/kagent-dev/kagent/app:dev", diff --git a/go/core/internal/controller/translator/agent/testdata/outputs/agent_with_git_skills.json b/go/core/internal/controller/translator/agent/testdata/outputs/agent_with_git_skills.json index 9edfe2a8d..ba9eb7369 100644 --- a/go/core/internal/controller/translator/agent/testdata/outputs/agent_with_git_skills.json +++ b/go/core/internal/controller/translator/agent/testdata/outputs/agent_with_git_skills.json @@ -61,7 +61,8 @@ }, "stringData": { "agent-card.json": "{\"name\":\"git_skills_agent\",\"description\":\"\",\"url\":\"http://git-skills-agent.test:8080\",\"version\":\"\",\"capabilities\":{\"streaming\":true,\"pushNotifications\":false,\"stateTransitionHistory\":true},\"defaultInputModes\":[\"text\"],\"defaultOutputModes\":[\"text\"],\"skills\":[]}", - "config.json": "{\"model\":{\"type\":\"openai\",\"model\":\"gpt-4o\",\"headers\":{\"User-Agent\":\"kagent/1.0\"},\"base_url\":\"\",\"max_tokens\":1024,\"reasoning_effort\":\"low\",\"temperature\":0.7,\"top_p\":0.95},\"description\":\"\",\"instruction\":\"You are a helpful assistant with skills from git.\",\"stream\":false}" + "config.json": "{\"model\":{\"type\":\"openai\",\"model\":\"gpt-4o\",\"headers\":{\"User-Agent\":\"kagent/1.0\"},\"base_url\":\"\",\"max_tokens\":1024,\"reasoning_effort\":\"low\",\"temperature\":0.7,\"top_p\":0.95},\"description\":\"\",\"instruction\":\"You are a helpful assistant with skills from git.\",\"stream\":false}", + "srt-settings.json": "{\"filesystem\":{\"allowWrite\":[\".\",\"/tmp\"],\"denyRead\":[],\"denyWrite\":[]},\"network\":{\"allowedDomains\":[],\"deniedDomains\":[]}}" } }, { @@ -130,7 +131,7 @@ "template": { "metadata": { "annotations": { - "kagent.dev/config-hash": "9999974246309522967" + "kagent.dev/config-hash": "10549266842660390977" }, "labels": { "app": "kagent", @@ -180,6 +181,10 @@ { "name": "KAGENT_SKILLS_FOLDER", "value": "/skills" + }, + { + "name": "KAGENT_SRT_SETTINGS_PATH", + "value": "/config/srt-settings.json" } ], "image": "cr.kagent.dev/kagent-dev/kagent/app:dev", diff --git a/go/core/internal/controller/translator/agent/testdata/outputs/agent_with_skills.json b/go/core/internal/controller/translator/agent/testdata/outputs/agent_with_skills.json index b5d432949..350b4cc3a 100644 --- a/go/core/internal/controller/translator/agent/testdata/outputs/agent_with_skills.json +++ b/go/core/internal/controller/translator/agent/testdata/outputs/agent_with_skills.json @@ -61,7 +61,8 @@ }, "stringData": { "agent-card.json": "{\"name\":\"skills_agent\",\"description\":\"\",\"url\":\"http://skills-agent.test:8080\",\"version\":\"\",\"capabilities\":{\"streaming\":true,\"pushNotifications\":false,\"stateTransitionHistory\":true},\"defaultInputModes\":[\"text\"],\"defaultOutputModes\":[\"text\"],\"skills\":[]}", - "config.json": "{\"model\":{\"type\":\"openai\",\"model\":\"gpt-4o\",\"headers\":{\"User-Agent\":\"kagent/1.0\"},\"base_url\":\"\",\"max_tokens\":1024,\"reasoning_effort\":\"low\",\"temperature\":0.7,\"top_p\":0.95},\"description\":\"\",\"instruction\":\"You are a helpful assistant.\",\"stream\":false}" + "config.json": "{\"model\":{\"type\":\"openai\",\"model\":\"gpt-4o\",\"headers\":{\"User-Agent\":\"kagent/1.0\"},\"base_url\":\"\",\"max_tokens\":1024,\"reasoning_effort\":\"low\",\"temperature\":0.7,\"top_p\":0.95},\"description\":\"\",\"instruction\":\"You are a helpful assistant.\",\"stream\":false}", + "srt-settings.json": "{\"filesystem\":{\"allowWrite\":[\".\",\"/tmp\"],\"denyRead\":[],\"denyWrite\":[]},\"network\":{\"allowedDomains\":[],\"deniedDomains\":[]}}" } }, { @@ -130,7 +131,7 @@ "template": { "metadata": { "annotations": { - "kagent.dev/config-hash": "3277683458140603261" + "kagent.dev/config-hash": "6814466927027660546" }, "labels": { "app": "kagent", @@ -180,6 +181,10 @@ { "name": "KAGENT_SKILLS_FOLDER", "value": "/skills" + }, + { + "name": "KAGENT_SRT_SETTINGS_PATH", + "value": "/config/srt-settings.json" } ], "image": "cr.kagent.dev/kagent-dev/kagent/app:dev", diff --git a/go/core/pkg/env/kagent.go b/go/core/pkg/env/kagent.go index 9d6786aed..be8c62d41 100644 --- a/go/core/pkg/env/kagent.go +++ b/go/core/pkg/env/kagent.go @@ -46,6 +46,13 @@ var ( ComponentAgentRuntime, ) + KagentSRTSettingsPath = RegisterStringVar( + "KAGENT_SRT_SETTINGS_PATH", + "/config/srt-settings.json", + "Path to the mounted srt settings file used by sandboxed execution.", + ComponentAgentRuntime, + ) + KagentPropagateToken = RegisterStringVar( "KAGENT_PROPAGATE_TOKEN", "", diff --git a/go/core/test/e2e/invoke_api_test.go b/go/core/test/e2e/invoke_api_test.go index 23b08ffe2..b8de767d7 100644 --- a/go/core/test/e2e/invoke_api_test.go +++ b/go/core/test/e2e/invoke_api_test.go @@ -22,6 +22,7 @@ import ( "github.com/kagent-dev/kagent/go/api/v1alpha2" "github.com/kagent-dev/kagent/go/core/internal/a2a" + "github.com/kagent-dev/kagent/go/core/internal/utils" e2emocks "github.com/kagent-dev/kagent/go/core/test/e2e/mocks" "github.com/kagent-dev/kmcp/api/v1alpha1" "github.com/kagent-dev/mockllm" @@ -159,6 +160,7 @@ type AgentOptions struct { Stream bool Env []corev1.EnvVar Skills *v1alpha2.SkillForAgent + Sandbox *v1alpha2.SandboxConfig ExecuteCode *bool Runtime *v1alpha2.DeclarativeRuntime Memory *v1alpha2.MemorySpec @@ -498,6 +500,10 @@ func generateAgent(modelConfigName string, tools []*v1alpha2.Tool, opts AgentOpt agent.Spec.Declarative.Memory = opts.Memory } + if opts.Sandbox != nil { + agent.Spec.Sandbox = opts.Sandbox + } + if opts.PromptTemplate != nil { agent.Spec.Declarative.PromptTemplate = opts.PromptTemplate } @@ -1140,6 +1146,56 @@ func TestE2EInvokeSkillInAgent(t *testing.T) { runSyncTest(t, a2aClient, "make me a kebab", "Pick it up from around the corner", nil) } +func TestE2EDeclarativeAgentNetworkAllowlistWithSkills(t *testing.T) { + runDeclarativeAgentNetworkAllowlistWithSkills(t, "python", nil) +} + +func TestE2EGoDeclarativeAgentNetworkAllowlistWithSkills(t *testing.T) { + goRuntime := v1alpha2.DeclarativeRuntime_Go + runDeclarativeAgentNetworkAllowlistWithSkills(t, "go", &goRuntime) +} + +func runDeclarativeAgentNetworkAllowlistWithSkills(t *testing.T, runtimeName string, runtimeOverride *v1alpha2.DeclarativeRuntime) { + baseURL, stopServer := setupMockServer(t, "mocks/invoke_skill_network.json") + defer stopServer() + + cli := setupK8sClient(t, false) + modelCfg := setupModelConfig(t, cli, baseURL) + + controllerHost := fmt.Sprintf("%s.%s", utils.GetControllerName(), utils.GetResourceNamespace()) + + t.Run(runtimeName+"/deny_by_default", func(t *testing.T) { + agent := setupAgentWithOptions(t, cli, modelCfg.Name, nil, AgentOptions{ + Runtime: runtimeOverride, + Skills: &v1alpha2.SkillForAgent{ + InsecureSkipVerify: true, + Refs: []string{"kind-registry:5000/kebab-maker:latest"}, + }, + }) + + a2aClient := setupA2AClient(t, agent) + runSyncTest(t, a2aClient, "check the controller health with bash", "python and node are available; network denied", nil) + }) + + t.Run(runtimeName+"/allowlist_enables_access", func(t *testing.T) { + agent := setupAgentWithOptions(t, cli, modelCfg.Name, nil, AgentOptions{ + Runtime: runtimeOverride, + Skills: &v1alpha2.SkillForAgent{ + InsecureSkipVerify: true, + Refs: []string{"kind-registry:5000/kebab-maker:latest"}, + }, + Sandbox: &v1alpha2.SandboxConfig{ + Network: &v1alpha2.NetworkConfig{ + AllowedDomains: []string{controllerHost}, + }, + }, + }) + + a2aClient := setupA2AClient(t, agent) + runSyncTest(t, a2aClient, "check the controller health with bash", "python and node are available; controller health is ok", nil) + }) +} + func TestE2EInvokePassthroughAgent(t *testing.T) { // Setup mock server with header matching — the mock only responds // if the Authorization header contains our passthrough token. @@ -1413,6 +1469,38 @@ func TestE2EIAgentRunsCode(t *testing.T) { runSyncTest(t, a2aClient, "write some code", "hello, world!", nil) } +func TestE2ESandboxAgentNetworkAllowlistWithExecuteCode(t *testing.T) { + baseURL, stopServer := setupMockServer(t, "mocks/run_code_network.json") + defer stopServer() + + cli := setupK8sClient(t, false) + modelCfg := setupModelConfig(t, cli, baseURL) + controllerHost := fmt.Sprintf("%s.%s", utils.GetControllerName(), utils.GetResourceNamespace()) + + t.Run("deny_by_default", func(t *testing.T) { + agent := setupSandboxAgentWithOptions(t, cli, modelCfg.Name, nil, AgentOptions{ + ExecuteCode: new(true), + }) + + a2aClient := setupSandboxA2AClient(t, agent) + runSyncTest(t, a2aClient, "check the controller health in python", "NETWORK_DENIED", nil) + }) + + t.Run("allowlist_enables_access", func(t *testing.T) { + agent := setupSandboxAgentWithOptions(t, cli, modelCfg.Name, nil, AgentOptions{ + ExecuteCode: new(true), + Sandbox: &v1alpha2.SandboxConfig{ + Network: &v1alpha2.NetworkConfig{ + AllowedDomains: []string{controllerHost}, + }, + }, + }) + + a2aClient := setupSandboxA2AClient(t, agent) + runSyncTest(t, a2aClient, "check the controller health in python", "controller health is ok", nil) + }) +} + func cleanup(t *testing.T, cli client.Client, obj ...client.Object) { t.Cleanup(func() { for _, o := range obj { diff --git a/go/core/test/e2e/mocks/invoke_skill_network.json b/go/core/test/e2e/mocks/invoke_skill_network.json new file mode 100644 index 000000000..f830be48f --- /dev/null +++ b/go/core/test/e2e/mocks/invoke_skill_network.json @@ -0,0 +1,92 @@ +{ + "openai": [ + { + "name": "initial_request", + "match": { + "match_type": "contains", + "message": { + "content": "check the controller health with bash", + "role": "user" + } + }, + "response": { + "id": "chatcmpl-1", + "object": "chat.completion", + "created": 1677652288, + "model": "gpt-4.1-mini", + "choices": [ + { + "index": 0, + "role": "assistant", + "message": { + "content": "", + "tool_calls": [ + { + "id": "call_1", + "type": "function", + "function": { + "name": "bash", + "arguments": "{\"command\": \"python -c \\\"print('PY_OK')\\\" && node -e \\\"console.log('NODE_OK')\\\" && python - <<'PY'\\nimport os\\nimport urllib.request\\nurl = os.environ['KAGENT_URL'] + '/health'\\ntry:\\n urllib.request.urlopen(url, timeout=5).read()\\n print('NETWORK_ALLOWED')\\nexcept Exception:\\n print('NETWORK_DENIED')\\nPY\"}" + } + } + ] + }, + "finish_reason": "tool_calls" + } + ] + } + }, + { + "name": "bash_response_denied", + "match": { + "match_type": "contains", + "message": { + "content": "PY_OK\nNODE_OK\nNETWORK_DENIED", + "role": "tool", + "tool_call_id": "call_1" + } + }, + "response": { + "id": "chatcmpl-2", + "object": "chat.completion", + "created": 1677652288, + "model": "gpt-4.1-mini", + "choices": [ + { + "index": 0, + "role": "assistant", + "message": { + "content": "python and node are available; network denied" + } + } + ] + } + }, + { + "name": "bash_response_allowed", + "match": { + "match_type": "contains", + "message": { + "content": "PY_OK\nNODE_OK\nNETWORK_ALLOWED", + "role": "tool", + "tool_call_id": "call_1" + } + }, + "response": { + "id": "chatcmpl-3", + "object": "chat.completion", + "created": 1677652288, + "model": "gpt-4.1-mini", + "choices": [ + { + "index": 0, + "role": "assistant", + "message": { + "content": "python and node are available; controller health is ok" + } + } + ] + } + } + ] +} diff --git a/go/core/test/e2e/mocks/run_code_network.json b/go/core/test/e2e/mocks/run_code_network.json new file mode 100644 index 000000000..49df76511 --- /dev/null +++ b/go/core/test/e2e/mocks/run_code_network.json @@ -0,0 +1,79 @@ +{ + "openai": [ + { + "name": "initial_request", + "match": { + "match_type": "contains", + "message": { + "content": "check the controller health in python", + "role": "user" + } + }, + "response": { + "id": "chatcmpl-1", + "object": "chat.completion", + "created": 1677652288, + "model": "gpt-4.1-mini", + "choices": [ + { + "index": 0, + "role": "assistant", + "message": { + "content": "```python\nimport os\nimport urllib.request\nurl = os.environ['KAGENT_URL'] + '/health'\ntry:\n print(urllib.request.urlopen(url, timeout=5).read().decode().strip())\nexcept Exception:\n print('NETWORK_DENIED')\n```" + } + } + ] + } + }, + { + "name": "code_execution_denied", + "match": { + "match_type": "contains", + "message": { + "content": "NETWORK_DENIED", + "role": "user" + } + }, + "response": { + "id": "executioncmpl-1", + "object": "chat.completion", + "created": 1677652288, + "model": "gpt-4.1-mini", + "choices": [ + { + "index": 0, + "role": "assistant", + "message": { + "content": "NETWORK_DENIED" + } + } + ] + } + }, + { + "name": "code_execution_allowed", + "match": { + "match_type": "contains", + "message": { + "content": "\"status\":\"OK\"", + "role": "user" + } + }, + "response": { + "id": "executioncmpl-2", + "object": "chat.completion", + "created": 1677652288, + "model": "gpt-4.1-mini", + "choices": [ + { + "index": 0, + "role": "assistant", + "message": { + "content": "controller health is ok" + } + } + ] + } + } + ] +} diff --git a/helm/kagent-crds/templates/kagent.dev_agents.yaml b/helm/kagent-crds/templates/kagent.dev_agents.yaml index e68d93e30..8180fda2d 100644 --- a/helm/kagent-crds/templates/kagent.dev_agents.yaml +++ b/helm/kagent-crds/templates/kagent.dev_agents.yaml @@ -10166,6 +10166,26 @@ spec: rule: '!has(self.systemMessage) || !has(self.systemMessageFrom)' description: type: string + sandbox: + description: |- + Sandbox configures sandboxed execution behavior shared across runtimes. + This is intended for sandboxed declarative execution today, and can also + be consumed by BYO agents. + properties: + network: + description: |- + Network configures outbound network access for sandboxed execution paths. + When unset or when allowedDomains is empty, outbound access is denied by default. + properties: + allowedDomains: + description: |- + AllowedDomains lists the domains that sandboxed execution may contact. + Wildcards such as *.example.com are supported by the sandbox runtime. + items: + type: string + type: array + type: object + type: object skills: description: |- Skills to load into the agent. They will be pulled from the specified container images. diff --git a/helm/kagent-crds/templates/kagent.dev_sandboxagents.yaml b/helm/kagent-crds/templates/kagent.dev_sandboxagents.yaml index 35d6b08ec..9118e971b 100644 --- a/helm/kagent-crds/templates/kagent.dev_sandboxagents.yaml +++ b/helm/kagent-crds/templates/kagent.dev_sandboxagents.yaml @@ -7816,6 +7816,26 @@ spec: rule: '!has(self.systemMessage) || !has(self.systemMessageFrom)' description: type: string + sandbox: + description: |- + Sandbox configures sandboxed execution behavior shared across runtimes. + This is intended for sandboxed declarative execution today, and can also + be consumed by BYO agents. + properties: + network: + description: |- + Network configures outbound network access for sandboxed execution paths. + When unset or when allowedDomains is empty, outbound access is denied by default. + properties: + allowedDomains: + description: |- + AllowedDomains lists the domains that sandboxed execution may contact. + Wildcards such as *.example.com are supported by the sandbox runtime. + items: + type: string + type: array + type: object + type: object skills: description: |- Skills to load into the agent. They will be pulled from the specified container images. diff --git a/python/packages/kagent-adk/src/kagent/adk/_a2a.py b/python/packages/kagent-adk/src/kagent/adk/_a2a.py index 4329d94d1..02de5b71f 100644 --- a/python/packages/kagent-adk/src/kagent/adk/_a2a.py +++ b/python/packages/kagent-adk/src/kagent/adk/_a2a.py @@ -138,9 +138,10 @@ def create_runner() -> Runner: if not local and http_client is not None: task_store = KAgentTaskStore(http_client) + retry_policy = self.agent_config.retry_policy if self.agent_config else None agent_executor = A2aAgentExecutor( runner=create_runner, - config=A2aAgentExecutorConfig(stream=self.stream), + config=A2aAgentExecutorConfig(stream=self.stream, retry_policy=retry_policy), task_store=task_store, ) diff --git a/python/packages/kagent-adk/src/kagent/adk/_agent_executor.py b/python/packages/kagent-adk/src/kagent/adk/_agent_executor.py index 726e5ad46..131df61fb 100644 --- a/python/packages/kagent-adk/src/kagent/adk/_agent_executor.py +++ b/python/packages/kagent-adk/src/kagent/adk/_agent_executor.py @@ -53,6 +53,7 @@ ) from ._mcp_toolset import is_anyio_cross_task_cancel_scope_error +from .types import RetryPolicyConfig from ._remote_a2a_tool import SubagentSessionProvider from .converters.event_converter import convert_event_to_a2a_events, serialize_metadata_value from .converters.part_converter import convert_a2a_part_to_genai_part, convert_genai_part_to_a2a_part @@ -65,6 +66,15 @@ class A2aAgentExecutorConfig(BaseModel): """Configuration for the KAgent A2aAgentExecutor.""" stream: bool = False + retry_policy: Optional["RetryPolicyConfig"] = None + + +def _compute_retry_delay(attempt: int, policy: "RetryPolicyConfig") -> float: + """Compute retry delay using exponential backoff.""" + delay = policy.initial_retry_delay_seconds * (2**attempt) + if policy.max_retry_delay_seconds is not None: + delay = min(delay, policy.max_retry_delay_seconds) + return delay def _kagent_request_converter(request, _part_converter=None): @@ -163,42 +173,61 @@ async def execute( context: RequestContext, event_queue: EventQueue, ): - """Executes an A2A request and publishes updates to the event queue - specified. It runs as following: - * Takes the input from the A2A request - * Convert the input to ADK input content, and runs the ADK agent - * Collects output events of the underlying ADK Agent - * Converts the ADK output events into A2A task updates - * Publishes the updates back to A2A server via event queue - """ - try: - await self._execute_impl(context, event_queue) - except asyncio.CancelledError as e: - # anyio cancel scope corruption (from MCP session cleanup in a - # different task context) calls Task.cancel() on the current - # task. CancelledError can escape from multiple places: the - # outer try body, the inner except handler (if the task's - # cancellation counter > 1), or the finally block's - # _safe_close_runner (which re-raises CancelledError). - # This top-level guard ensures CancelledError never propagates - # to _run_event_stream in the A2A SDK, which would produce a - # 500 Internal Server Error. - current_task = asyncio.current_task() - if current_task is not None: - # Clear all pending cancellation requests so subsequent - # awaits (e.g. publishing the failure event) don't re-raise. - while current_task.uncancel() > 0: - pass - logger.error( - "CancelledError escaped execute, converting to failed status: %s", - e, - exc_info=True, - ) - await self._publish_failed_status_event( - context, - event_queue, - str(e) or "A2A request execution was cancelled.", - ) + """Executes an A2A request with optional retry on failure.""" + retry_policy = self._kagent_config.retry_policy if self._kagent_config else None + max_attempts = 1 + (retry_policy.max_retries if retry_policy else 0) + last_error: Exception | None = None + + for attempt in range(max_attempts): + try: + await self._execute_impl(context, event_queue) + return # success + except asyncio.CancelledError as e: + # anyio cancel scope corruption (from MCP session cleanup in a + # different task context) calls Task.cancel() on the current + # task. CancelledError can escape from multiple places: the + # outer try body, the inner except handler (if the task's + # cancellation counter > 1), or the finally block's + # _safe_close_runner (which re-raises CancelledError). + # This top-level guard ensures CancelledError never propagates + # to _run_event_stream in the A2A SDK, which would produce a + # 500 Internal Server Error. + # Never retry cancellations. + current_task = asyncio.current_task() + if current_task is not None: + # Clear all pending cancellation requests so subsequent + # awaits (e.g. publishing the failure event) don't re-raise. + while current_task.uncancel() > 0: + pass + logger.error( + "CancelledError escaped execute, converting to failed status: %s", + e, + exc_info=True, + ) + await self._publish_failed_status_event( + context, + event_queue, + str(e) or "A2A request execution was cancelled.", + ) + return + except Exception as e: + last_error = e + if attempt + 1 < max_attempts: + delay = _compute_retry_delay(attempt, retry_policy) + logger.warning( + "Request failed (attempt %d/%d), retrying in %.1fs: %s", + attempt + 1, + max_attempts, + delay, + e, + ) + await asyncio.sleep(delay) + + # All attempts exhausted — publish failure (matches original behavior) + if last_error is not None: + logger.error("All %d attempts failed: %s", max_attempts, last_error, exc_info=True) + error_message = str(last_error) + await self._publish_failed_status_event(context, event_queue, error_message) async def _execute_impl( self, diff --git a/python/packages/kagent-adk/src/kagent/adk/cli.py b/python/packages/kagent-adk/src/kagent/adk/cli.py index 838d70134..e22ed7ef9 100644 --- a/python/packages/kagent-adk/src/kagent/adk/cli.py +++ b/python/packages/kagent-adk/src/kagent/adk/cli.py @@ -44,6 +44,13 @@ def maybe_add_skills(root_agent: BaseAgent): add_skills_tool_to_agent(skills_directory, root_agent) +def maybe_add_skills_with_config(root_agent: BaseAgent, agent_config: Optional[AgentConfig] = None): + skills_directory = os.getenv("KAGENT_SKILLS_FOLDER", None) + if skills_directory: + logger.info(f"Adding skills from directory: {skills_directory}") + add_skills_tool_to_agent(skills_directory, root_agent) + + @app.command() def static( host: str = "127.0.0.1", @@ -75,7 +82,7 @@ def static( def root_agent_factory() -> BaseAgent: root_agent = agent_config.to_agent(app_cfg.name, sts_integration) - maybe_add_skills(root_agent) + maybe_add_skills_with_config(root_agent, agent_config) return root_agent @@ -149,7 +156,7 @@ def root_agent_factory() -> BaseAgent: if sts_integration: add_to_agent(sts_integration, root_agent) - maybe_add_skills(root_agent) + maybe_add_skills_with_config(root_agent, agent_config) return root_agent @@ -213,7 +220,7 @@ async def test_agent(agent_config: AgentConfig, agent_card: AgentCard, task: str def root_agent_factory() -> BaseAgent: root_agent = agent_config.to_agent(app_cfg.name, sts_integration) - maybe_add_skills(root_agent) + maybe_add_skills_with_config(root_agent, agent_config) return root_agent app = KAgentApp( diff --git a/python/packages/kagent-adk/src/kagent/adk/sandbox_code_executer.py b/python/packages/kagent-adk/src/kagent/adk/sandbox_code_executer.py index 40dd61d33..bdd67ba79 100644 --- a/python/packages/kagent-adk/src/kagent/adk/sandbox_code_executer.py +++ b/python/packages/kagent-adk/src/kagent/adk/sandbox_code_executer.py @@ -22,7 +22,8 @@ from pydantic import Field from typing_extensions import override -from kagent.skills.shell import _sanitize_env +from kagent.adk.artifacts.session_path import get_session_path +from kagent.skills.shell import _get_srt_settings_args, _sanitize_env class SandboxedLocalCodeExecutor(BaseCodeExecutor): @@ -52,14 +53,17 @@ def execute_code( """Executes the given code in a sandboxed local context. uses the srt command to sandbox""" output = "" error = "" + srt_args = _get_srt_settings_args() + working_dir = get_session_path(session_id=invocation_context.session.id) try: # Execute the provided code by piping it to `python -` inside the sandbox. proc = subprocess.run( - ["srt", "python", "-"], + ["srt", *srt_args, "python", "-"], input=code_execution_input.code, capture_output=True, text=True, + cwd=working_dir, env=_sanitize_env(), ) output = proc.stdout or "" @@ -71,7 +75,6 @@ def execute_code( except Exception as e: output = "" error = f"Unexpected error during execution: {e}" - # Collect the final result. return CodeExecutionResult( stdout=output, diff --git a/python/packages/kagent-adk/src/kagent/adk/tools/bash_tool.py b/python/packages/kagent-adk/src/kagent/adk/tools/bash_tool.py index a15f90a2d..27f45d16f 100644 --- a/python/packages/kagent-adk/src/kagent/adk/tools/bash_tool.py +++ b/python/packages/kagent-adk/src/kagent/adk/tools/bash_tool.py @@ -65,7 +65,11 @@ async def run_async(self, *, args: Dict[str, Any], tool_context: ToolContext) -> try: working_dir = get_session_path(session_id=tool_context.session.id) - result = await execute_command(command, working_dir, self.skills_directory) + result = await execute_command( + command, + working_dir, + self.skills_directory, + ) logger.info(f"Executed bash command: {command}, description: {description}") return result except Exception as e: diff --git a/python/packages/kagent-adk/src/kagent/adk/tools/skills_plugin.py b/python/packages/kagent-adk/src/kagent/adk/tools/skills_plugin.py index 024869690..bf7e899da 100644 --- a/python/packages/kagent-adk/src/kagent/adk/tools/skills_plugin.py +++ b/python/packages/kagent-adk/src/kagent/adk/tools/skills_plugin.py @@ -12,7 +12,10 @@ logger = logging.getLogger("kagent_adk." + __name__) -def add_skills_tool_to_agent(skills_directory: str | Path, agent: BaseAgent) -> None: +def add_skills_tool_to_agent( + skills_directory: str | Path, + agent: BaseAgent, +) -> None: """Utility function to add Skills and Bash tools to a given agent. Args: diff --git a/python/packages/kagent-adk/src/kagent/adk/types.py b/python/packages/kagent-adk/src/kagent/adk/types.py index 74ef7f46f..f2eb57c6e 100644 --- a/python/packages/kagent-adk/src/kagent/adk/types.py +++ b/python/packages/kagent-adk/src/kagent/adk/types.py @@ -254,6 +254,16 @@ class MemoryConfig(BaseModel): embedding: EmbeddingConfig | None = None # Embedding model config for memory tools. +class RetryPolicyConfig(BaseModel): + """Retry policy configuration for agent request executions.""" + + max_retries: int = 0 + initial_retry_delay_seconds: float = 1.0 + max_retry_delay_seconds: float | None = None +class NetworkConfig(BaseModel): + allowed_domains: list[str] = Field(default_factory=list) + + class AgentConfig(BaseModel): model: ModelUnion = Field(discriminator="type") description: str @@ -264,7 +274,9 @@ class AgentConfig(BaseModel): execute_code: bool | None = None stream: bool | None = None # Refers to LLM response streaming, not A2A streaming memory: MemoryConfig | None = None # Memory configuration + network: NetworkConfig | None = None context_config: ContextConfig | None = None + retry_policy: RetryPolicyConfig | None = None # Retry policy configuration def to_agent(self, name: str, sts_integration: Optional[ADKTokenPropagationPlugin] = None) -> Agent: if name is None or not str(name).strip(): diff --git a/python/packages/kagent-adk/tests/unittests/test_context_config.py b/python/packages/kagent-adk/tests/unittests/test_context_config.py index 512974dab..236ba6e5f 100644 --- a/python/packages/kagent-adk/tests/unittests/test_context_config.py +++ b/python/packages/kagent-adk/tests/unittests/test_context_config.py @@ -109,6 +109,13 @@ def test_round_trip_serialization(self): assert parsed.compaction.overlap_size == 2 assert parsed.compaction.token_threshold == 1000 + def test_network_config(self): + data = json.loads(_make_agent_config_json()) + data["network"] = {"allowed_domains": ["api.example.com", "*.example.org"]} + config = AgentConfig.model_validate(data) + assert config.network is not None + assert config.network.allowed_domains == ["api.example.com", "*.example.org"] + class TestBuildAdkContextConfigs: def test_compaction_only(self): diff --git a/python/packages/kagent-adk/tests/unittests/test_retry_policy.py b/python/packages/kagent-adk/tests/unittests/test_retry_policy.py new file mode 100644 index 000000000..4998efa43 --- /dev/null +++ b/python/packages/kagent-adk/tests/unittests/test_retry_policy.py @@ -0,0 +1,187 @@ +import asyncio +import json +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest +from pydantic import ValidationError + +from kagent.adk._agent_executor import A2aAgentExecutor, A2aAgentExecutorConfig, _compute_retry_delay +from kagent.adk.types import AgentConfig, RetryPolicyConfig + + +def _make_agent_config_json(**retry_kwargs) -> dict: + config = { + "model": {"type": "openai", "model": "gpt-4"}, + "description": "test agent", + "instruction": "test instruction", + } + if retry_kwargs: + config["retry_policy"] = retry_kwargs + return config + + +class TestRetryPolicyConfigParsing: + def test_no_retry_policy(self): + config = AgentConfig.model_validate(_make_agent_config_json()) + assert config.retry_policy is None + + def test_retry_policy_defaults(self): + data = _make_agent_config_json(max_retries=3) + config = AgentConfig.model_validate(data) + assert config.retry_policy is not None + assert config.retry_policy.max_retries == 3 + assert config.retry_policy.initial_retry_delay_seconds == 1.0 + assert config.retry_policy.max_retry_delay_seconds is None + + def test_retry_policy_all_fields(self): + data = _make_agent_config_json( + max_retries=5, + initial_retry_delay_seconds=0.5, + max_retry_delay_seconds=30.0, + ) + config = AgentConfig.model_validate(data) + assert config.retry_policy.max_retries == 5 + assert config.retry_policy.initial_retry_delay_seconds == 0.5 + assert config.retry_policy.max_retry_delay_seconds == 30.0 + + def test_retry_policy_json_roundtrip(self): + data = _make_agent_config_json( + max_retries=3, + initial_retry_delay_seconds=2.0, + max_retry_delay_seconds=60.0, + ) + config = AgentConfig.model_validate(data) + dumped = json.loads(config.model_dump_json()) + assert dumped["retry_policy"]["max_retries"] == 3 + assert dumped["retry_policy"]["initial_retry_delay_seconds"] == 2.0 + assert dumped["retry_policy"]["max_retry_delay_seconds"] == 60.0 + + +class TestRetryExecution: + @pytest.mark.asyncio + async def test_no_retry_on_success(self): + """Successful execution should not retry.""" + mock_runner = MagicMock() + executor = A2aAgentExecutor( + runner=mock_runner, + config=A2aAgentExecutorConfig( + stream=False, + retry_policy=RetryPolicyConfig(max_retries=3, initial_retry_delay_seconds=0.01), + ), + ) + context = MagicMock() + context.message = MagicMock() + context.current_task = None + context.task_id = "test-task" + context.context_id = "test-context" + event_queue = AsyncMock() + + with patch.object(executor, "_execute_impl", new_callable=AsyncMock) as mock_impl: + await executor.execute(context, event_queue) + assert mock_impl.call_count == 1 + + @pytest.mark.asyncio + async def test_retry_on_failure(self): + """Failed execution should retry up to max_retries.""" + mock_runner = MagicMock() + executor = A2aAgentExecutor( + runner=mock_runner, + config=A2aAgentExecutorConfig( + stream=False, + retry_policy=RetryPolicyConfig(max_retries=2, initial_retry_delay_seconds=0.01), + ), + ) + context = MagicMock() + context.message = MagicMock() + context.current_task = None + context.task_id = "test-task" + context.context_id = "test-context" + event_queue = AsyncMock() + + with patch.object( + executor, + "_execute_impl", + new_callable=AsyncMock, + side_effect=RuntimeError("transient error"), + ) as mock_impl: + await executor.execute(context, event_queue) + # 1 initial + 2 retries = 3 total + assert mock_impl.call_count == 3 + + @pytest.mark.asyncio + async def test_no_retry_on_cancelled_error(self): + """CancelledError should never be retried.""" + mock_runner = MagicMock() + executor = A2aAgentExecutor( + runner=mock_runner, + config=A2aAgentExecutorConfig( + stream=False, + retry_policy=RetryPolicyConfig(max_retries=3, initial_retry_delay_seconds=0.01), + ), + ) + context = MagicMock() + context.message = MagicMock() + context.current_task = None + context.task_id = "test-task" + context.context_id = "test-context" + event_queue = AsyncMock() + + with patch.object( + executor, + "_execute_impl", + new_callable=AsyncMock, + side_effect=asyncio.CancelledError("cancelled"), + ) as mock_impl: + await executor.execute(context, event_queue) + assert mock_impl.call_count == 1 + + @pytest.mark.asyncio + async def test_retry_succeeds_on_second_attempt(self): + """Retry should stop after first success.""" + mock_runner = MagicMock() + executor = A2aAgentExecutor( + runner=mock_runner, + config=A2aAgentExecutorConfig( + stream=False, + retry_policy=RetryPolicyConfig(max_retries=3, initial_retry_delay_seconds=0.01), + ), + ) + context = MagicMock() + context.message = MagicMock() + context.current_task = None + context.task_id = "test-task" + context.context_id = "test-context" + event_queue = AsyncMock() + + call_count = 0 + + async def fail_then_succeed(*args, **kwargs): + nonlocal call_count + call_count += 1 + if call_count == 1: + raise RuntimeError("transient error") + + with patch.object(executor, "_execute_impl", new_callable=AsyncMock, side_effect=fail_then_succeed): + await executor.execute(context, event_queue) + assert call_count == 2 + + +class TestRetryDelayComputation: + def test_exponential_backoff(self): + policy = RetryPolicyConfig(max_retries=5, initial_retry_delay_seconds=1.0) + assert _compute_retry_delay(0, policy) == 1.0 + assert _compute_retry_delay(1, policy) == 2.0 + assert _compute_retry_delay(2, policy) == 4.0 + assert _compute_retry_delay(3, policy) == 8.0 + + def test_exponential_backoff_with_max(self): + policy = RetryPolicyConfig( + max_retries=5, + initial_retry_delay_seconds=1.0, + max_retry_delay_seconds=5.0, + ) + assert _compute_retry_delay(0, policy) == 1.0 + assert _compute_retry_delay(1, policy) == 2.0 + assert _compute_retry_delay(2, policy) == 4.0 + assert _compute_retry_delay(3, policy) == 5.0 # capped + assert _compute_retry_delay(4, policy) == 5.0 # still capped diff --git a/python/packages/kagent-adk/tests/unittests/test_sandbox_code_executor.py b/python/packages/kagent-adk/tests/unittests/test_sandbox_code_executor.py new file mode 100644 index 000000000..5adeb229e --- /dev/null +++ b/python/packages/kagent-adk/tests/unittests/test_sandbox_code_executor.py @@ -0,0 +1,29 @@ +from types import SimpleNamespace +from unittest.mock import patch + +from google.adk.code_executors.code_execution_utils import CodeExecutionInput + +from kagent.adk.sandbox_code_executer import SandboxedLocalCodeExecutor + + +def test_execute_code_uses_session_working_directory(): + executor = SandboxedLocalCodeExecutor() + invocation_context = SimpleNamespace(session=SimpleNamespace(id="session-123")) + code_input = CodeExecutionInput(code="print('ok')") + + with ( + patch("kagent.adk.sandbox_code_executer.get_session_path", return_value="/tmp/kagent/session-123"), + patch( + "kagent.adk.sandbox_code_executer._get_srt_settings_args", + return_value=["--settings", "/config/srt-settings.json"], + ), + patch("kagent.adk.sandbox_code_executer._sanitize_env", return_value={}), + patch("kagent.adk.sandbox_code_executer.subprocess.run") as mock_run, + ): + mock_run.return_value = SimpleNamespace(stdout="ok\n", stderr="") + result = executor.execute_code(invocation_context, code_input) + + assert result.stdout == "ok\n" + assert result.stderr == "" + _, kwargs = mock_run.call_args + assert kwargs["cwd"] == "/tmp/kagent/session-123" diff --git a/python/packages/kagent-skills/src/kagent/skills/shell.py b/python/packages/kagent-skills/src/kagent/skills/shell.py index a133d4739..6c89d8df3 100644 --- a/python/packages/kagent-skills/src/kagent/skills/shell.py +++ b/python/packages/kagent-skills/src/kagent/skills/shell.py @@ -159,6 +159,14 @@ def _sanitize_env(env: dict[str, str] | None = None) -> dict[str, str]: return {k: v for k, v in source.items() if k not in _SECRET_ENV_NAMES and not _SECRET_PATTERNS.search(k)} +def _get_srt_settings_args() -> list[str]: + """Return srt settings args using the mounted config path.""" + settings_path_env = os.environ.get("KAGENT_SRT_SETTINGS_PATH", "").strip() + if not settings_path_env: + raise ValueError("KAGENT_SRT_SETTINGS_PATH is not set") + return ["--settings", settings_path_env] + + def _get_command_timeout_seconds(command: str) -> float: """Determine appropriate timeout for a command.""" if "python " in command or "python3 " in command: @@ -167,7 +175,11 @@ def _get_command_timeout_seconds(command: str) -> float: return 30.0 # 30 seconds for other commands -async def execute_command(command: str, working_dir: Path, skills_dir: Path = Path("/skills")) -> str: +async def execute_command( + command: str, + working_dir: Path, + skills_dir: Path = Path("/skills"), +) -> str: """Executes a shell command in a sandboxed environment.""" timeout = _get_command_timeout_seconds(command) @@ -187,9 +199,12 @@ async def execute_command(command: str, working_dir: Path, skills_dir: Path = Pa env["PATH"] = f"{bash_venv_bin}:{env.get('PATH', '')}" env["VIRTUAL_ENV"] = bash_venv_path + srt_args = _get_srt_settings_args() + try: process = await asyncio.create_subprocess_exec( "srt", + *srt_args, "sh", "-c", command, diff --git a/python/packages/kagent-skills/src/kagent/tests/unittests/test_skill_execution.py b/python/packages/kagent-skills/src/kagent/tests/unittests/test_skill_execution.py index 22119e5a1..17e731e82 100644 --- a/python/packages/kagent-skills/src/kagent/tests/unittests/test_skill_execution.py +++ b/python/packages/kagent-skills/src/kagent/tests/unittests/test_skill_execution.py @@ -1,3 +1,4 @@ +import os import json import shutil import tempfile @@ -15,7 +16,7 @@ read_file_content, write_file_content, ) -from kagent.skills.shell import _sanitize_env +from kagent.skills.shell import _get_srt_settings_args, _sanitize_env @pytest.fixture @@ -104,10 +105,29 @@ async def test_skill_core_logic(skill_test_env: Path): input_csv_path = session_dir / "uploads" / "data.csv" input_csv_path.write_text("id,name\n1,Alice\n2,Bob\n") + fake_bin_dir = session_dir.parent / "bin" + fake_bin_dir.mkdir() + fake_srt = fake_bin_dir / "srt" + fake_srt.write_text('#!/bin/sh\nif [ "$1" = "--settings" ]; then\n shift 2\nfi\nexec "$@"\n') + fake_srt.chmod(0o755) + + settings_path = session_dir.parent / "srt-settings.json" + settings_path.write_text( + '{"network":{"allowedDomains":[],"deniedDomains":[]},"filesystem":{"denyRead":[],"allowWrite":[".","/tmp"],"denyWrite":[]}}' + ) + # 2. Execute the skill's core command, just as an agent would # We use the centralized `execute_command` function directly command = "python skills/csv-to-json/scripts/convert.py uploads/data.csv outputs/result.json" - result = await execute_command(command, working_dir=session_dir, skills_dir=Path("/skills")) + with patch.dict( + "os.environ", + { + "KAGENT_SRT_SETTINGS_PATH": str(settings_path), + "PATH": f"{fake_bin_dir}:{os.environ.get('PATH', '')}", + }, + clear=False, + ): + result = await execute_command(command, working_dir=session_dir, skills_dir=Path("/skills")) assert "Successfully converted" in result @@ -144,6 +164,7 @@ async def mock_exec(*args, **kwargs): injection_payload = 'ls"; cat /etc/passwd; echo "pwned' with ( + patch.dict("os.environ", {"KAGENT_SRT_SETTINGS_PATH": "/config/srt-settings.json"}, clear=False), patch("asyncio.create_subprocess_shell") as mock_shell, patch("asyncio.create_subprocess_exec", side_effect=mock_exec), ): @@ -156,11 +177,27 @@ async def mock_exec(*args, **kwargs): args = captured["args"] # The first argument should still be the sandbox runner. assert args[0] == "srt" + assert args[1] == "--settings" # The injection payload must appear exactly once as its own argument. assert injection_payload in args assert list(args).count(injection_payload) == 1 +def test_get_srt_settings_args_uses_mounted_path(): + """Mounted srt settings should be used when the env var is present.""" + with patch.dict("os.environ", {"KAGENT_SRT_SETTINGS_PATH": "/config/srt-settings.json"}, clear=True): + args = _get_srt_settings_args() + + assert args == ["--settings", "/config/srt-settings.json"] + + +def test_get_srt_settings_args_requires_mounted_path(): + """Sandbox execution should require the mounted settings path.""" + with patch.dict("os.environ", {}, clear=True): + with pytest.raises(ValueError, match="KAGENT_SRT_SETTINGS_PATH is not set"): + _get_srt_settings_args() + + # --- Path traversal tests --- @@ -329,7 +366,11 @@ async def mock_exec(*args, **kwargs): } with ( - patch.dict("os.environ", env_overrides, clear=True), + patch.dict( + "os.environ", + {**env_overrides, "KAGENT_SRT_SETTINGS_PATH": "/config/srt-settings.json"}, + clear=True, + ), patch("asyncio.create_subprocess_exec", side_effect=mock_exec), ): await execute_command("echo hello", working_dir=tmp_path)