Skip to content
1 change: 1 addition & 0 deletions AGENTS.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ Handler entry tests: `cdk/test/handlers/orchestrate-task.test.ts`, `create-task.
- **Editing on `main` directly** — ALWAYS create a worktree with a feature branch for changes, even trivial ones. Main should stay clean; all work flows through worktree → branch → PR → merge.
- **Git worktrees** — Always **`git fetch origin main`** before creating a new worktree to ensure you branch from the latest remote state. `node_modules/` and `agent/.venv/` are per-tree (not shared). Run **`mise run install`** in each new worktree before building. All CDK path references (`__dirname`-relative) and mise `config_roots` resolve correctly without extra setup.
- **Bumping Cedar engines in isolation** — `cedarpy` (Python, `agent/pyproject.toml`) and `@cedar-policy/cedar-wasm` (TypeScript, `cdk/package.json`) are two language bindings over the same Cedar Rust core. They MUST move together; even patch-version drift between bindings can yield divergent `(decision, matching_rule_ids)` on the same `(policy, input)` — invisible to per-side unit tests, caught (only) by `contracts/cedar-parity/` golden fixtures in CI. If you bump one engine you MUST bump the other to a tested-compatible version AND refresh the parity fixtures in the same commit. Both pins are EXACT (no `^`/`~`). See `docs/design/CEDAR_HITL_GATES.md` §15.6 (decision #23) and the parity-contract banner in `mise.toml`. **DO NOT** accept upstream's "Update branch" or auto-merge suggestions on cedarpy without verifying parity with cedar-wasm.
- **Dropping outbound SDK solution attribution on a new AWS client (#319)** — every outbound AWS API call carries two `User-Agent` segments: `app/uksb-wt64nei4u6#{stack}` and `md/uksb-wt64nei4u6#{component}`. The `app/` segment is **SDK-native** — it comes from the `AWS_SDK_UA_APP_ID` env var (CDK sets it on every Lambda via `SolutionUaAspect`, plus the AgentCore runtime and ECS container), so new clients get it for free as long as they run on a surface where CDK threads that env. The `md/` segment is the per-surface label and must be carried explicitly: in `agent/src/` build clients via `aws_session.tenant_client`/`tenant_resource` (tenant data) or `aws_session.platform_client` (ambient-chain calls) — never naked `boto3.client(...)`; in `cdk/src/handlers/` spread `...abcaUserAgent()` from `shared/ua.ts` into the client constructor; in `cli/src/` spread `...abcaUserAgent()` from `cli/src/ua.ts`. The three `ua` modules (`agent/src/ua.py`, `cdk/src/handlers/shared/ua.ts`, `cli/src/ua.ts`) MUST stay identical in solution id, wire format, and sanitization. (Customer opt-out: `-c sdkUaAppId=''` at deploy, or export `AWS_SDK_UA_APP_ID=''` for the CLI.)

### Tech stack

Expand Down
67 changes: 56 additions & 11 deletions agent/src/aws_session.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,8 @@ def _build_scoped_session(role_arn: str) -> Any:
)
from botocore.session import get_session as get_botocore_session

import ua

region = os.environ.get("AWS_REGION") or os.environ.get("AWS_DEFAULT_REGION")
task_id = _tags.get("task_id", "")
# Role session name must be <=64 chars and match [\w+=,.@-]. task_id is a
Expand All @@ -138,8 +140,9 @@ def _build_scoped_session(role_arn: str) -> Any:

# A dedicated STS client built from the *ambient* (compute-role) chain.
# This is the role-chaining caller; the assumed SessionRole credentials it
# returns must NOT be used to build it, or refresh would recurse.
sts_client = boto3.client("sts", region_name=region)
# returns must NOT be used to build it, or refresh would recurse. Carries
# the static md/ UA segment so the assume-role call is attributed too.
sts_client = boto3.client("sts", region_name=region, config=ua.client_config())

def _refresh() -> dict[str, str]:
resp = sts_client.assume_role(
Expand All @@ -158,6 +161,10 @@ def _refresh() -> dict[str, str]:
}

botocore_session = get_botocore_session()
# Static md/ solution-attribution segment at the session level: it
# propagates to every client AND resource derived from this session, so
# all tenant-data calls carry it. (#319)
botocore_session.user_agent_extra = ua.static_user_agent_extra()
# Deferred: the first assume_role happens on first credential use, not now,
# so a transient STS hiccup at startup doesn't crash the agent before it
# has even begun.
Expand Down Expand Up @@ -209,10 +216,19 @@ def get_session() -> Any:
) from exc
else:
# Scoping not requested (local/dev/tests, or pre-provisioning):
# plain ambient session, behaviorally identical to pre-feature code.
_session = boto3.Session(
region_name=os.environ.get("AWS_REGION") or os.environ.get("AWS_DEFAULT_REGION")
)
# plain ambient session. Built from an explicit botocore session so
# the static md/ solution-attribution segment rides every derived
# client/resource (propagation requires the botocore session). (#319)
from botocore.session import get_session as get_botocore_session

import ua

botocore_session = get_botocore_session()
botocore_session.user_agent_extra = ua.static_user_agent_extra()
region = os.environ.get("AWS_REGION") or os.environ.get("AWS_DEFAULT_REGION")
if region:
botocore_session.set_config_variable("region", region)
_session = boto3.Session(botocore_session=botocore_session)
_scoped = False
return _session

Expand All @@ -224,20 +240,35 @@ def is_scoped() -> bool:
return bool(_scoped)


def _merge_ua_config(kwargs: dict[str, Any]) -> dict[str, Any]:
"""Return ``kwargs`` with the static md/ UA merged into any ``config``.

Preserves a caller-supplied ``botocore.config.Config`` by merging rather
than overwriting; supplies one carrying just the UA otherwise. (#319)
"""
import ua

ua_config = ua.client_config()
existing = kwargs.get("config")
kwargs["config"] = existing.merge(ua_config) if existing is not None else ua_config
return kwargs


def tenant_client(service_name: str, **kwargs: Any) -> Any:
"""boto3 client for tenant data.

When the per-task SessionRole is configured, the client is built from the
tag-scoped, refreshable session. Otherwise it delegates directly to
``boto3.client`` — behaviorally identical to the pre-feature code path
(and transparent to callers/tests that mock ``boto3.client``).
tag-scoped, refreshable session (which already carries the static md/ UA at
the session level). Otherwise it delegates directly to ``boto3.client`` —
behaviorally identical to the pre-feature code path (transparent to
callers/tests that mock ``boto3.client``) but with the md/ UA merged in.
"""
session = get_session()
if is_scoped():
return session.client(service_name, **kwargs)
import boto3

return boto3.client(service_name, **kwargs)
return boto3.client(service_name, **_merge_ua_config(kwargs))


def tenant_resource(service_name: str, **kwargs: Any) -> Any:
Expand All @@ -247,4 +278,18 @@ def tenant_resource(service_name: str, **kwargs: Any) -> Any:
return session.resource(service_name, **kwargs)
import boto3

return boto3.resource(service_name, **kwargs)
return boto3.resource(service_name, **_merge_ua_config(kwargs))


def platform_client(service_name: str, **kwargs: Any) -> Any:
"""boto3 client for **platform** (non-tenant) calls on the ambient chain.

For the direct ``boto3.client(...)`` sites that deliberately bypass the
scoped session (CloudWatch Logs, Secrets Manager, bedrock-agentcore): they
talk to platform resources, not tenant data, so they use the compute role's
ambient credentials — but should still carry the static md/ solution
attribution. Merges the UA into any caller ``config``. (#319)
"""
import boto3

return boto3.client(service_name, **_merge_ua_config(kwargs))
9 changes: 5 additions & 4 deletions agent/src/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,10 @@ def resolve_github_token() -> str:
return cached
secret_arn = os.environ.get("GITHUB_TOKEN_SECRET_ARN")
if secret_arn:
import boto3
from aws_session import platform_client

region = os.environ.get("AWS_REGION") or os.environ.get("AWS_DEFAULT_REGION")
client = boto3.client("secretsmanager", region_name=region)
client = platform_client("secretsmanager", region_name=region)
resp = client.get_secret_value(SecretId=secret_arn)
token = resp["SecretString"]
# Cache in env so downstream tools (git, gh CLI) work unchanged
Expand Down Expand Up @@ -101,14 +101,15 @@ def resolve_linear_api_token(channel_metadata: dict[str, str] | None = None) ->
import json
from datetime import datetime, timedelta

import boto3
from botocore.exceptions import BotoCoreError, ClientError
except ImportError as e:
log("WARN", f"resolve_linear_api_token: boto3 unavailable ({e}); skipping")
# nosemgrep: py-silent-success-masking -- optional Linear MCP; boto3 unavailable
return ""

sm = boto3.client("secretsmanager", region_name=region)
from aws_session import platform_client

sm = platform_client("secretsmanager", region_name=region)

def _fetch_token() -> dict | None:
"""Fetch + parse the per-workspace OAuth secret.
Expand Down
4 changes: 2 additions & 2 deletions agent/src/memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,12 @@ def _get_client():
global _client
if _client is not None:
return _client
import boto3
from aws_session import platform_client

region = os.environ.get("AWS_REGION") or os.environ.get("AWS_DEFAULT_REGION")
if not region:
raise ValueError("AWS_REGION or AWS_DEFAULT_REGION must be set for memory operations")
_client = boto3.client("bedrock-agentcore", region_name=region)
_client = platform_client("bedrock-agentcore", region_name=region)
return _client


Expand Down
8 changes: 4 additions & 4 deletions agent/src/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,10 +166,10 @@ def _warn_cw_write_blocking(log_group: str, task_id: str | None, stamped: str) -
covers both writers.
"""
try:
import boto3
from aws_session import platform_client

region = os.environ.get("AWS_REGION") or os.environ.get("AWS_DEFAULT_REGION")
client = boto3.client("logs", region_name=region)
client = platform_client("logs", region_name=region)

stream = f"server_warn/{task_id or 'server'}"
with _ctx_for_debug.suppress(client.exceptions.ResourceAlreadyExistsException):
Expand All @@ -193,10 +193,10 @@ def _warn_cw_write_blocking(log_group: str, task_id: str | None, stamped: str) -
def _debug_cw_write_blocking(log_group: str, task_id: str | None, stamped: str) -> None:
"""Blocking CloudWatch write — only called from a background thread."""
try:
import boto3
from aws_session import platform_client

region = os.environ.get("AWS_REGION") or os.environ.get("AWS_DEFAULT_REGION")
client = boto3.client("logs", region_name=region)
client = platform_client("logs", region_name=region)

stream = f"server_debug/{task_id or 'server'}"
with _ctx_for_debug.suppress(client.exceptions.ResourceAlreadyExistsException):
Expand Down
4 changes: 2 additions & 2 deletions agent/src/shell.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,10 +75,10 @@ def _log_error_cw_blocking(log_group: str, task_id: str | None, stamped: str) ->
fire on the absence of the expected stream, not on this helper).
"""
try:
import boto3
from aws_session import platform_client

region = os.environ.get("AWS_REGION") or os.environ.get("AWS_DEFAULT_REGION")
client = boto3.client("logs", region_name=region)
client = platform_client("logs", region_name=region)
stream = f"agent_error/{task_id or 'unknown'}"
with contextlib.suppress(client.exceptions.ResourceAlreadyExistsException):
client.create_log_stream(logGroupName=log_group, logStreamName=stream)
Expand Down
8 changes: 4 additions & 4 deletions agent/src/telemetry.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,10 +56,10 @@ def _emit_metrics_to_cloudwatch(json_payload: dict) -> None:
try:
import contextlib

import boto3
from aws_session import platform_client

region = os.environ.get("AWS_REGION") or os.environ.get("AWS_DEFAULT_REGION")
client = boto3.client("logs", region_name=region)
client = platform_client("logs", region_name=region)

task_id = json_payload.get("task_id", "unknown")
log_stream = f"metrics/{task_id}"
Expand Down Expand Up @@ -164,10 +164,10 @@ def _ensure_client(self):

import contextlib

import boto3
from aws_session import platform_client

region = os.environ.get("AWS_REGION") or os.environ.get("AWS_DEFAULT_REGION")
self._client = boto3.client("logs", region_name=region)
self._client = platform_client("logs", region_name=region)

log_stream = f"trajectory/{self._task_id}"
with contextlib.suppress(self._client.exceptions.ResourceAlreadyExistsException):
Expand Down
76 changes: 76 additions & 0 deletions agent/src/ua.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
"""Outbound AWS SDK User-Agent solution attribution (#319).

Every AWS API call made by the agent carries two ABCA solution-attribution
segments in the ``User-Agent`` header:

app/uksb-wt64nei4u6#{STACKNAME} <- native AWS_SDK_UA_APP_ID env (no code here)
md/uksb-wt64nei4u6#agent <- static, baked once at construction

**The ``app/`` segment is emitted by the SDK itself.** Both botocore and the
JS v3 SDK read the ``AWS_SDK_UA_APP_ID`` environment variable natively and
render it as ``app/{value}`` (botocore ``configprovider.py`` maps it to the
``user_agent_appid`` config; the value charset *includes* ``#``, so the
``uksb-wt64nei4u6#{stack}`` form survives verbatim). CDK sets that env var on
every Lambda / AgentCore runtime / ECS container, so this module contributes
**nothing** to ``app/`` — and a customer can suppress it by setting the env
var to the empty string. (This is the key simplification over the original
``/``-separated design, which had to bypass the native field because ``/`` is
not a legal app-id character. Using ``#`` keeps it native.)

This module owns only the **static ``md/`` segment** — a stable
per-component label baked once via ``user_agent_extra`` at session/client
construction. There is intentionally no per-request trace handle and no
event/middleware machinery: connection pools are never re-pinned, and
request correlation is owned by X-Ray / structured-log request ids (#245),
not the User-Agent.

The TypeScript counterparts are ``cdk/src/handlers/shared/ua.ts`` and
``cli/src/ua.ts`` — the solution id, wire format, and sanitization rules
must stay identical across all three.
"""

from __future__ import annotations

import string
from typing import Any

# AWS solution-attribution id for ABCA. Also appears (deploy-time
# counterpart, #292) in the CloudFormation stack description in
# ``cdk/src/main.ts`` and in the TS mirrors of this module. Per-surface
# literal by design.
SOLUTION_ID = "uksb-wt64nei4u6"

# Stable per-component label: this surface IS the Python agent runtime.
COMPONENT = "agent"

# RFC 7230 token charset (the UA product-token alphabet). '#' is the
# scheme's structural separator and is deliberately NOT here, so a hostile
# component/label value cannot inject extra segments.
_ALLOWED = frozenset(string.ascii_letters + string.digits + "!$%&'*+-.^_`|~")


def sanitize_ua_value(raw: str) -> str:
"""Replace every non-UA-token char (incl. non-ASCII) with ``-``."""
return "".join(c if c in _ALLOWED else "-" for c in raw)


def static_user_agent_extra() -> str:
"""The static ``md/`` segment baked at client/session construction.

Always ``md/{SOLUTION_ID}#{COMPONENT}`` — the ``app/`` segment is
contributed separately by the SDK from ``AWS_SDK_UA_APP_ID`` and is not
this module's concern.
"""
return f"md/{SOLUTION_ID}#{sanitize_ua_value(COMPONENT)}"


def client_config() -> Any:
"""``botocore.config.Config`` carrying the static ``md/`` segment.

For direct ``boto3.client(...)`` call sites that don't go through a
shared session (see ``aws_session.platform_client``). Merge-friendly:
callers that already pass a ``Config`` should use ``.merge(...)``.
"""
from botocore.config import Config

return Config(user_agent_extra=static_user_agent_extra())
55 changes: 55 additions & 0 deletions agent/tests/test_aws_session.py
Original file line number Diff line number Diff line change
Expand Up @@ -298,3 +298,58 @@ def test_overlong_value_truncated_to_256(self, monkeypatch):
assert len(tags["repo"]) == _MAX_TAG_VALUE_LEN == 256
# Untruncated values are passed through unchanged.
assert tags["user_id"] == "u-1"


class TestSolutionUserAgent:
"""The static md/ solution-attribution segment (#319) rides every client."""

def test_platform_client_carries_md_segment(self, monkeypatch):
monkeypatch.setenv("AWS_REGION", "us-east-1")
from aws_session import platform_client

with patch("boto3.client", return_value=MagicMock(name="logs")) as mk:
platform_client("logs", region_name="us-east-1")

cfg = mk.call_args.kwargs["config"]
assert cfg.user_agent_extra == "md/uksb-wt64nei4u6#agent"

def test_unscoped_tenant_client_carries_md_segment(self, monkeypatch):
# No SESSION_ROLE_ARN -> unscoped path delegates to boto3.client.
monkeypatch.setenv("AWS_REGION", "us-east-1")
from aws_session import tenant_client

with patch("boto3.client", return_value=MagicMock(name="ddb")) as mk:
tenant_client("dynamodb")

cfg = mk.call_args.kwargs["config"]
assert cfg.user_agent_extra == "md/uksb-wt64nei4u6#agent"

def test_caller_config_is_merged_not_overwritten(self, monkeypatch):
from botocore.config import Config

monkeypatch.setenv("AWS_REGION", "us-east-1")
from aws_session import platform_client

with patch("boto3.client", return_value=MagicMock()) as mk:
platform_client("logs", config=Config(read_timeout=7))

cfg = mk.call_args.kwargs["config"]
# Both the caller's setting and our UA survive the merge.
assert cfg.read_timeout == 7
assert cfg.user_agent_extra == "md/uksb-wt64nei4u6#agent"

def test_scoped_session_sets_session_level_extra(self, monkeypatch):
monkeypatch.setenv("AWS_REGION", "us-east-1")
monkeypatch.setenv(SESSION_ROLE_ARN_ENV, "arn:aws:iam::111122223333:role/abca-session")
configure_session(user_id="u-1", repo="owner/repo", task_id="t-abc")

fake_botocore_session = MagicMock(name="botocore-session")
with (
patch("boto3.client", return_value=MagicMock(name="sts")),
patch("boto3.Session", return_value=MagicMock(name="boto3-session")),
patch("botocore.credentials.DeferredRefreshableCredentials"),
patch("botocore.session.get_session", return_value=fake_botocore_session),
):
get_session()

assert fake_botocore_session.user_agent_extra == "md/uksb-wt64nei4u6#agent"
Loading
Loading