Skip to content

Commit 0eddee2

Browse files
committed
Merge branch 'main' into jetc/feat/a2a-v1-migration-a
2 parents 579d89f + 7fb3aa6 commit 0eddee2

201 files changed

Lines changed: 15152 additions & 1697 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.github/workflows/ci.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ jobs:
8181
- name: Install agent-sandbox
8282
run: |
8383
kubectl apply -f "https://github.com/kubernetes-sigs/agent-sandbox/releases/download/${AGENT_SANDBOX_VERSION}/manifest.yaml"
84-
kubectl wait --for=condition=Established crd/sandboxes.agents.x-k8s.io --timeout=90s
84+
timeout 90s bash -c 'until [ "$(kubectl get crd sandboxes.agents.x-k8s.io -o jsonpath="{.status.conditions[?(@.type==\"Established\")].status}" 2>/dev/null)" = "True" ]; do sleep 1; done'
8585
kubectl rollout status deployment/agent-sandbox-controller -n agent-sandbox-system --timeout=120s
8686
kubectl wait --for=condition=Ready pod -l app=agent-sandbox-controller -n agent-sandbox-system --timeout=120s
8787

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -279,7 +279,7 @@ build-golang-adk-full: buildx-create
279279
.PHONY: build-skills-init
280280
build-skills-init: ## Build and push the skills-init image
281281
build-skills-init: buildx-create
282-
$(DOCKER_BUILDER) $(DOCKER_BUILD_ARGS) -t $(SKILLS_INIT_IMG) -f docker/skills-init/Dockerfile docker/skills-init
282+
$(DOCKER_BUILDER) $(DOCKER_BUILD_ARGS) -t $(SKILLS_INIT_IMG) -f docker/skills-init/Dockerfile ./go
283283
$(DOCKER_PUSH) $(SKILLS_INIT_IMG)
284284

285285
.PHONY: push

docker/skills-init/Dockerfile

Lines changed: 33 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,39 @@
1-
### Stage 0: build krane
2-
FROM golang:1.26-alpine AS krane-builder
3-
4-
ENV KRANE_VERSION=v0.21.2
5-
WORKDIR /build
6-
7-
RUN apk add --no-cache git && \
8-
git clone --depth 1 --branch $KRANE_VERSION \
9-
https://github.com/google/go-containerregistry.git
10-
11-
WORKDIR /build/go-containerregistry/cmd/krane
12-
13-
RUN CGO_ENABLED=0 go build -trimpath -ldflags="-s -w" -o /build/krane .
14-
1+
### Stage 0: build the skills-init Go binary
2+
ARG BASE_IMAGE_REGISTRY=cgr.dev
3+
ARG BUILDPLATFORM
4+
FROM --platform=$BUILDPLATFORM $BASE_IMAGE_REGISTRY/chainguard/go:latest AS builder
5+
ARG TARGETARCH
6+
ARG TARGETOS
7+
8+
WORKDIR /workspace
9+
10+
COPY go.mod go.sum ./
11+
RUN --mount=type=cache,target=/root/go/pkg/mod,rw \
12+
--mount=type=cache,target=/root/.cache/go-build,rw \
13+
go mod download
14+
15+
COPY api/ api/
16+
COPY core/ core/
17+
COPY adk/ adk/
18+
19+
ARG LDFLAGS
20+
RUN --mount=type=cache,target=/root/go/pkg/mod,rw \
21+
--mount=type=cache,target=/root/.cache/go-build,rw \
22+
CGO_ENABLED=0 GOOS=${TARGETOS:-linux} GOARCH=${TARGETARCH} \
23+
go build -a -trimpath -ldflags "$LDFLAGS" -o /skills-init ./core/cmd/skills-init
24+
25+
### Stage 1: runtime
1526
FROM alpine:3.23
1627

1728
ARG PYTHON_UID=1001
1829
ARG PYTHON_GID=1001
1930

20-
RUN apk upgrade --no-cache && apk add --no-cache git jq
21-
COPY --from=krane-builder /build/krane /usr/local/bin/krane
31+
# git is invoked by skills-init via exec.Command with an argv vector — never
32+
# through a shell — so the only attack surface here is git itself. OCI fetch
33+
# uses the in-process go-containerregistry library, so krane and jq are gone.
34+
RUN apk upgrade --no-cache && apk add --no-cache git openssh-client ca-certificates
35+
36+
COPY --from=builder /skills-init /usr/local/bin/skills-init
2237

2338
# Run as the same UID/GID as the main agent container (python user) so that
2439
# files written to the shared /skills volume are readable by the main container.
@@ -28,3 +43,5 @@ RUN addgroup -g ${PYTHON_GID} pythongroup && \
2843
adduser -u ${PYTHON_UID} -G pythongroup -s /bin/sh -D python
2944

3045
USER ${PYTHON_UID}:${PYTHON_GID}
46+
47+
ENTRYPOINT ["/usr/local/bin/skills-init"]
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
# Substrate AgentHarness Lifecycle
2+
3+
This branch should use a single ownership model for `runtime: substrate` harnesses.
4+
5+
## Ownership
6+
7+
- Platform/Helm owns `WorkerPool` capacity.
8+
- kagent owns the generated per-harness `ActorTemplate`.
9+
- kagent owns the per-harness actor lifecycle through `ate-api`.
10+
- Substrate owns the `WorkerPool` deployment and the `ActorTemplate` golden snapshot process.
11+
12+
kagent should not create or delete `WorkerPool` resources from the `AgentHarness` reconciler. A chart may optionally install a default `WorkerPool`, and the controller may use that default when `spec.substrate.workerPoolRef` is unset.
13+
14+
## Spec Shape
15+
16+
`AgentHarness.spec.substrate` should contain only harness-level inputs:
17+
18+
- `workerPoolRef`, optional; falls back to the configured controller default.
19+
- `snapshotsConfig`, optional; defaults to `gs://ate-snapshots/<namespace>/<name>`.
20+
- `workloadImage`, optional.
21+
- exactly one of `gatewayToken` or `gatewayTokenSecretRef`.
22+
23+
There is no `actorTemplateRef`. kagent always generates the `ActorTemplate`, so adopting an external template is not part of the workflow.
24+
25+
## Status
26+
27+
Use top-level Kubernetes conditions for progress:
28+
29+
- `Accepted`
30+
- `ActorTemplateReady`
31+
- `ActorReady`
32+
- `Ready`
33+
34+
`Ready` is the aggregate condition. Specific blockers should be reflected in `reason` and `message`.
35+
36+
Do not store ownership booleans or cleanup markers in annotations or status. Ownership is deterministic:
37+
38+
- `WorkerPool` is external.
39+
- generated `ActorTemplate` is owned by the `AgentHarness` through an owner reference.
40+
41+
## Reconcile
42+
43+
The substrate reconcile path should:
44+
45+
1. Resolve `workerPoolRef` from spec or controller default.
46+
2. Verify the `WorkerPool` exists.
47+
3. Create or update the generated `ActorTemplate` with an owner reference to the `AgentHarness`.
48+
4. Wait for `ActorTemplate.status.phase == Ready`.
49+
5. Create or resume the actor through `ate-api`.
50+
6. Mark `ActorReady` and aggregate `Ready`.
51+
52+
## Delete
53+
54+
The finalizer should:
55+
56+
1. Delete the harness actor recorded in `status.backendRef.id`.
57+
2. Read the generated `ActorTemplate` and delete `status.goldenActorID`, if present.
58+
3. Remove the finalizer.
59+
60+
Kubernetes garbage collection deletes the generated `ActorTemplate` through the owner reference. kagent does not delete `WorkerPool`.

examples/modelconfig-with-tls.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -386,5 +386,5 @@ roleRef:
386386
# 6. Troubleshooting:
387387
# - See https://kagent.dev/docs for detailed debugging steps
388388
# - Check agent logs: kubectl logs deployment/agent-<name>
389-
# - Verify Secret is mounted: kubectl exec deployment/agent-<name> -- ls /etc/ssl/certs/custom/
390-
# - Test certificate: kubectl exec deployment/agent-<name> -- openssl x509 -in /etc/ssl/certs/custom/ca.crt -text -noout
389+
# - Verify Secret is mounted: kubectl exec deployment/agent-<name> -- ls /etc/ssl/certs/custom/corp-ca/
390+
# - Test certificate: kubectl exec deployment/agent-<name> -- openssl x509 -in /etc/ssl/certs/custom/corp-ca/ca.crt -text -noout
Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,144 @@
1+
# OpenClaw on Agent Substrate
2+
3+
## 1. Install Substrate on your Kind cluster
4+
5+
You can clone the kagent fork of substrate [here](https://github.com/kagent-dev/substrate).
6+
7+
These instructions use a Kind cluster called `kind` (`KIND_CLUSTER_NAME=kind`).
8+
9+
```bash
10+
cd substrate
11+
12+
./hack/create-kind-cluster.sh
13+
./hack/install-ate-kind.sh --deploy-ate-system
14+
```
15+
16+
`--deploy-ate-system` installs the **control plane only** (ate-api, ate-controller, atelet, atenet, …). Your registry catalog will show `ateapi-*`, `atelet-*`, etc., but **not** ateom until you build it.
17+
18+
Build and push **ateom-gvisor** (required for the WorkerPool `ateomImage`):
19+
20+
```bash
21+
# build the ateom-gvisor image from the substrate repo root
22+
export KO_DOCKER_REPO=localhost:5001
23+
export KO_DEFAULTPLATFORMS=linux/$(go env GOARCH)
24+
./hack/run-tool.sh ko build -B ./cmd/ateom-gvisor
25+
```
26+
27+
## kagent AgentHarness with substrate runtime
28+
29+
kagent generates a per-harness `ActorTemplate` and uses an existing `WorkerPool`.
30+
31+
Install kagent (Substrate must already be running in the cluster):
32+
33+
```bash
34+
export KIND_CLUSTER_NAME=kind
35+
make helm-install KAGENT_HELM_EXTRA_ARGS="\
36+
--set controller.substrate.enabled=true \
37+
--set controller.substrate.ateApiEndpoint=dns:///api.ate-system.svc:443 \
38+
--set controller.substrate.ateApiInsecure=true \
39+
--set substrateWorkerPool.create=true \
40+
--set substrateWorkerPool.ateomImage=localhost:5001/ateom-gvisor:latest"
41+
```
42+
43+
The generated `ActorTemplate` uses `controller.substrate.pauseImage`, `controller.substrate.runscAMD64URL`, `controller.substrate.runscAMD64SHA256`, `controller.substrate.runscARM64URL`, and `controller.substrate.runscARM64SHA256` from the Helm values Override them with `--set` or a values file when you need to pin a different gVisor build.
44+
45+
Create a harness. If `snapshotsConfig` is omitted, kagent defaults it to `gs://ate-snapshots/<namespace>/<agentharnessname>`.
46+
47+
- **Worker pool** — reference an existing pool (`workerPoolRef`) or configure a controller default WorkerPool
48+
- **Gateway token** — required per harness with either `gatewayToken` or `gatewayTokenSecretRef`
49+
50+
```yaml
51+
apiVersion: kagent.dev/v1alpha2
52+
kind: AgentHarness
53+
metadata:
54+
name: peterj-claw
55+
namespace: kagent
56+
spec:
57+
runtime: substrate
58+
backend: openclaw
59+
description: OpenClaw on Agent Substrate
60+
modelConfigRef: default-model-config
61+
substrate:
62+
# Optional: defaults to gs://ate-snapshots/kagent/peterj-claw
63+
# snapshotsConfig:
64+
# location: gs://ate-snapshots/kagent/peterj-claw
65+
66+
# Required unless the controller has a default WorkerPool configured.
67+
workerPoolRef:
68+
name: kagent-default
69+
70+
# Required: configure the OpenClaw gateway token for this harness.
71+
# Use either gatewayToken or gatewayTokenSecretRef. The Secret must contain key "token".
72+
gatewayToken: test-token
73+
74+
# gatewayTokenSecretRef:
75+
# name: openclaw-gateway-token
76+
77+
# Optional: override the sandbox image used in the ActorTemplate (must be digest-pinned).
78+
# workloadImage: ghcr.io/kagent-dev/nemoclaw/sandbox-base@sha256:d52bee415dc4c0dba7164f9eabe727574c056d4f211781f20af249707883a3b4
79+
```
80+
81+
kagent creates an `ActorTemplate` that looks roughly like this:
82+
83+
```yaml
84+
apiVersion: ate.dev/v1alpha1
85+
kind: ActorTemplate
86+
metadata:
87+
name: peterj-claw
88+
namespace: kagent
89+
labels:
90+
app.kubernetes.io/managed-by: kagent
91+
kagent.dev/agent-harness: peterj-claw
92+
spec:
93+
pauseImage: gcr.io/gke-release/pause@sha256:bcbd57ba5653580ec647b16d8163cdd1112df3609129b01f912a8032e48265da
94+
runsc:
95+
amd64:
96+
url: gs://gvisor/releases/nightly/2026-05-19/x86_64/runsc
97+
sha256Hash: a397be1abc2420d26bce6c70e6e2ff96c73aaaab929756c56f5e2089ea842b63
98+
arm64:
99+
url: gs://gvisor/releases/nightly/2026-05-19/aarch64/runsc
100+
sha256Hash: 1ba2366ae2efceba166046f51a4104f9261c9cb72c6db8f5b3fe2dc57dea86b9
101+
workerPoolRef:
102+
name: peterj-claw-wp
103+
namespace: kagent
104+
snapshotsConfig:
105+
location: gs://ate-snapshots/kagent/peterj-claw
106+
containers:
107+
- name: openclaw
108+
image: ghcr.io/kagent-dev/nemoclaw/sandbox-base@sha256:d52bee415dc4c0dba7164f9eabe727574c056d4f211781f20af249707883a3b4
109+
ports:
110+
- containerPort: 80
111+
command:
112+
- /bin/sh
113+
- -c
114+
- |
115+
# Generated by kagent:
116+
# 1. writes ~/.openclaw/openclaw.json from modelConfigRef/channels/gateway token
117+
# 2. configures gateway.controlUi.basePath for the kagent proxy path
118+
# 3. starts `openclaw gateway run --port 80 --allow-unconfigured`
119+
# 4. waits for the gateway and tails the log
120+
env:
121+
- name: HOME
122+
value: /root
123+
```
124+
125+
The generated `command` contains a base64-encoded `openclaw.json`, so the live object will be more verbose than the abbreviated example above. `pauseImage`, runsc URLs and hashes, and the default workload image come from controller/Helm configuration unless overridden on the `AgentHarness`; the gateway token comes from `spec.substrate.gatewayToken` or `gatewayTokenSecretRef`. kagent also sets `gateway.controlUi.basePath` to `/api/agentharnesses/<namespace>/<name>/gateway` so OpenClaw serves the Control UI under the same path kagent proxies.
126+
127+
When `modelConfigRef` or `spec.channels` are set, credentials are **not** copied into the ActorTemplate or `openclaw.json` as plaintext. kagent writes `valueFrom.secretKeyRef` (or inline `value` for harness inline tokens) on the ActorTemplate container env; Substrate `ate-api` resolves those refs at actor resume. In `openclaw.json`, kagent uses OpenClaw [env SecretRefs](https://docs.openclaw.ai/gateway/secrets) (`{source:"env",provider:"default",id:"<VAR>"}`) for `models.providers.*.apiKey`, `channels.telegram.accounts.*.botToken`, and `channels.slack.accounts.*.botToken` / `appToken`. Rotate a Secret and recreate the ActorTemplate golden snapshot when keys change.
128+
129+
With `controller.substrate.enabled=true`, the kagent Helm chart installs a namespace-scoped Role and RoleBinding so `ate-api-server` (in `ate-system` by default) can `get` Secrets and ConfigMaps referenced by generated ActorTemplates. Harnesses in other namespaces need that namespace listed in `rbac.namespaces` (or a matching RoleBinding applied manually).
130+
131+
Port-forward the UI:
132+
133+
```bash
134+
kubectl port-forward -n kagent svc/kagent-ui 8001:8080
135+
```
136+
137+
Navigate to the deployed agent harness. If the OpenClaw Control UI asks for a gateway connection, use:
138+
139+
- Gateway URL: `http://localhost:8001/api/agentharnesses/kagent/peterj-claw/gateway/`
140+
- Gateway token: `test-token`
141+
142+
The gateway URL must include the trailing slash. The token is the value configured in `spec.substrate.gatewayToken`, or the Secret value referenced by `spec.substrate.gatewayTokenSecretRef`; enter it in the token/credentials field rather than relying on a `token` query parameter.
143+
144+
kagent proxies UI traffic to the actor OpenClaw gateway through Substrate's **atenet-router** (Envoy) using the actor `Host` header (`<actor-id>.actors.resources.substrate.ate.dev`). The default router URL is `http://atenet-router.ate-system.svc:80`; override with `controller.substrate.atenetRouterURL` when needed.

go/.dockerignore

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
# Build artifacts
22
./bin/
33
bin/
4+
.cache/
5+
**/.cache/
46

57
# Test files
68
*_test.go
@@ -45,4 +47,4 @@ PROJECT
4547
hack/
4648

4749
# Makefiles
48-
Makefile
50+
Makefile

go/Makefile

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,10 @@ setup-envtest: envtest ## Download the binaries required for ENVTEST in the loca
177177
exit 1; \
178178
}
179179

180+
.PHONY: envtest-path
181+
envtest-path: envtest ## Print the path to the envtest binaries (downloads them if absent).
182+
@$(ENVTEST) use $(ENVTEST_K8S_VERSION) --bin-dir $(LOCALBIN) -p path
183+
180184
.PHONY: envtest
181185
envtest: $(ENVTEST) ## Download setup-envtest locally if necessary.
182186
$(ENVTEST): $(LOCALBIN)

0 commit comments

Comments
 (0)