Skip to content

Commit 0382d47

Browse files
Merge branch 'main' into feat/eslint-10-upgrade
2 parents 92e6911 + 1efe4a1 commit 0382d47

63 files changed

Lines changed: 11595 additions & 134 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

agent/src/attachments.py

Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
"""Attachment download and integrity verification.
2+
3+
Downloads attachments from S3 using version-pinned reads and verifies
4+
SHA-256 checksums against the orchestrator-provided values. Files are
5+
placed in a workspace subdirectory for the agent to reference.
6+
"""
7+
8+
from __future__ import annotations
9+
10+
import hashlib
11+
import os
12+
from pathlib import Path
13+
from typing import Literal
14+
from urllib.parse import urlparse
15+
16+
from pydantic import BaseModel, ConfigDict
17+
18+
from shell import log
19+
20+
ATTACHMENTS_DIR = ".attachments"
21+
22+
23+
class PreparedAttachment(BaseModel):
24+
"""An attachment downloaded to the local filesystem and verified."""
25+
26+
model_config = ConfigDict(frozen=True, extra="forbid")
27+
28+
attachment_id: str
29+
type: Literal["image", "file", "url"]
30+
content_type: str
31+
filename: str
32+
local_path: str
33+
size_bytes: int
34+
token_estimate: int | None = None
35+
36+
37+
def download_attachments(
38+
attachments: list,
39+
workspace: str,
40+
) -> list[PreparedAttachment]:
41+
"""Download all attachments from S3 and verify integrity.
42+
43+
Args:
44+
attachments: List of AttachmentConfig models from TaskConfig.
45+
workspace: The agent workspace root (e.g., /workspace).
46+
47+
Returns:
48+
List of PreparedAttachment with local file paths.
49+
50+
Raises:
51+
RuntimeError: If any attachment fails download or integrity check.
52+
"""
53+
if not attachments:
54+
return []
55+
56+
import boto3
57+
58+
attachments_dir = Path(workspace) / ATTACHMENTS_DIR
59+
attachments_dir.mkdir(parents=True, exist_ok=True)
60+
61+
s3_client = boto3.client("s3")
62+
prepared: list[PreparedAttachment] = []
63+
64+
try:
65+
for att in attachments:
66+
local_path = _download_single(att, attachments_dir, s3_client)
67+
prepared.append(
68+
PreparedAttachment(
69+
attachment_id=att.attachment_id,
70+
type=att.type,
71+
content_type=att.content_type,
72+
filename=att.filename,
73+
local_path=str(local_path),
74+
size_bytes=att.size_bytes,
75+
token_estimate=att.token_estimate,
76+
)
77+
)
78+
except Exception:
79+
import shutil
80+
81+
shutil.rmtree(attachments_dir, ignore_errors=True)
82+
raise
83+
84+
log("TASK", f"Downloaded {len(prepared)} attachment(s) to {attachments_dir}")
85+
return prepared
86+
87+
88+
def _download_single(att, attachments_dir: Path, s3_client) -> Path:
89+
"""Download a single attachment and verify its SHA-256 checksum."""
90+
# Parse s3_uri (s3://bucket/key)
91+
parsed = urlparse(att.s3_uri)
92+
bucket = parsed.netloc
93+
key = parsed.path.lstrip("/")
94+
95+
# Unique subdirectory per attachment to avoid filename collisions
96+
dest_dir = attachments_dir / att.attachment_id
97+
dest_dir.mkdir(parents=True, exist_ok=True)
98+
local_path = dest_dir / att.filename
99+
100+
log(
101+
"TASK",
102+
f"Downloading attachment '{att.filename}' "
103+
f"(s3://{bucket}/{key}, version={att.s3_version_id})",
104+
)
105+
106+
# Download with pinned VersionId to prevent TOCTOU
107+
response = s3_client.get_object(
108+
Bucket=bucket,
109+
Key=key,
110+
VersionId=att.s3_version_id,
111+
)
112+
content = response["Body"].read()
113+
114+
# Verify SHA-256 integrity
115+
actual_checksum = hashlib.sha256(content).hexdigest()
116+
if actual_checksum != att.checksum_sha256:
117+
raise RuntimeError(
118+
f"Attachment '{att.filename}' integrity check failed: "
119+
f"expected SHA-256 {att.checksum_sha256}, got {actual_checksum}. "
120+
f"The file may have been tampered with."
121+
)
122+
123+
# Verify size matches
124+
if len(content) != att.size_bytes:
125+
raise RuntimeError(
126+
f"Attachment '{att.filename}' size mismatch: "
127+
f"expected {att.size_bytes} bytes, got {len(content)} bytes."
128+
)
129+
130+
# Write to local filesystem
131+
local_path.write_bytes(content)
132+
os.chmod(str(local_path), 0o444) # Read-only
133+
134+
log("TASK", f" Verified: {att.filename} ({len(content)} bytes, SHA-256 OK)")
135+
return local_path

agent/src/config.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import sys
55
import uuid
66

7-
from models import TaskConfig, TaskType
7+
from models import AttachmentConfig, TaskConfig, TaskType
88
from shell import log
99

1010
AGENT_WORKSPACE = os.environ.get("AGENT_WORKSPACE", "/workspace")
@@ -114,6 +114,7 @@ def build_config(
114114
initial_approvals: list[str] | None = None,
115115
initial_approval_gate_count: int = 0,
116116
approval_gate_cap: int | None = None,
117+
attachments: list[dict] | None = None,
117118
) -> TaskConfig:
118119
"""Build and validate configuration from explicit parameters.
119120
@@ -149,6 +150,17 @@ def build_config(
149150
if errors:
150151
raise ValueError("; ".join(errors))
151152

153+
# Validate attachment descriptors into typed models (Pydantic validation
154+
# surfaces schema mismatches between the orchestrator and agent early).
155+
validated_attachments: list[AttachmentConfig] = []
156+
if attachments:
157+
for i, raw_att in enumerate(attachments):
158+
try:
159+
validated_attachments.append(AttachmentConfig.model_validate(raw_att))
160+
except Exception as e:
161+
log("ERROR", f"Attachment[{i}] validation failed: {e}")
162+
raise ValueError(f"Attachment[{i}] validation failed: {e}") from e
163+
152164
return TaskConfig(
153165
repo_url=resolved_repo_url,
154166
issue_number=resolved_issue_number,
@@ -172,6 +184,7 @@ def build_config(
172184
initial_approvals=initial_approvals or [],
173185
initial_approval_gate_count=initial_approval_gate_count,
174186
approval_gate_cap=approval_gate_cap,
187+
attachments=validated_attachments,
175188
)
176189

177190

agent/src/models.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,39 @@ class MemoryContext(BaseModel):
6161
# (see cdk/src/handlers/shared/context-hydration.ts).
6262
SUPPORTED_HYDRATED_CONTEXT_VERSION = 1
6363

64+
# Attachment types — mirrors AttachmentType in cdk/src/handlers/shared/types.ts.
65+
AttachmentType = Literal["image", "file", "url"]
66+
67+
68+
class AttachmentConfig(BaseModel):
69+
"""Attachment descriptor from the orchestrator — mirrors AgentAttachmentPayload in types.ts."""
70+
71+
model_config = ConfigDict(frozen=True, extra="forbid")
72+
73+
attachment_id: str
74+
type: AttachmentType
75+
content_type: str
76+
filename: str
77+
s3_uri: str
78+
s3_version_id: str
79+
size_bytes: int
80+
source_url: str | None = None
81+
token_estimate: int | None = None
82+
checksum_sha256: str
83+
84+
@model_validator(mode="after")
85+
def _validate_integrity_fields(self) -> Self:
86+
if not self.s3_version_id:
87+
raise ValueError("s3_version_id is required for integrity verification")
88+
if not self.checksum_sha256:
89+
raise ValueError("checksum_sha256 is required for integrity verification")
90+
# checksum must be lowercase hex (SHA-256 = 64 hex chars)
91+
if len(self.checksum_sha256) != 64 or not all(
92+
c in "0123456789abcdef" for c in self.checksum_sha256
93+
):
94+
raise ValueError("checksum_sha256 must be a 64-character lowercase hex string")
95+
return self
96+
6497

6598
class HydratedContext(BaseModel):
6699
"""Orchestrator context JSON — keep in sync with HydratedContext in context-hydration.ts."""
@@ -150,6 +183,9 @@ class TaskConfig(BaseModel):
150183
approval_gate_cap: int | None = None
151184
issue: GitHubIssue | None = None
152185
base_branch: str | None = None
186+
# Attachments from the orchestrator payload (Phase 3). Validated as
187+
# AttachmentConfig models. Empty list for tasks without attachments.
188+
attachments: list[AttachmentConfig] = Field(default_factory=list)
153189

154190
@model_validator(mode="after")
155191
def _validate_trace_requires_user_id(self) -> Self:

agent/src/pipeline.py

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,32 @@ def _chain_prior_agent_error(agent_result: AgentResult | None, exc: BaseExceptio
5757
return tail
5858

5959

60+
def _inject_attachment_context(prompt: str, prepared_attachments: list) -> str:
61+
"""Append attachment file references to the user prompt.
62+
63+
Images are referenced by absolute path so the agent can view them
64+
with the Read tool (which supports multimodal image reading).
65+
File attachments are similarly referenced by path.
66+
"""
67+
lines = ["\n\n---\n\n**Attachments provided with this task:**\n"]
68+
for att in prepared_attachments:
69+
size_kb = att.size_bytes / 1024
70+
if att.type == "image":
71+
lines.append(
72+
f"- **Image:** `{att.filename}` ({size_kb:.1f} KB, {att.content_type}) "
73+
f"— View with: `Read {att.local_path}`"
74+
)
75+
else:
76+
lines.append(
77+
f"- **File:** `{att.filename}` ({size_kb:.1f} KB, {att.content_type}) "
78+
f"— Read with: `Read {att.local_path}`"
79+
)
80+
lines.append(
81+
"\nUse the Read tool to view these files. Image files will be displayed visually when read."
82+
)
83+
return prompt + "\n".join(lines)
84+
85+
6086
def _maybe_upload_trace(
6187
config: TaskConfig,
6288
trajectory,
@@ -252,6 +278,7 @@ def run_task(
252278
channel_metadata: dict[str, str] | None = None,
253279
trace: bool = False,
254280
user_id: str = "",
281+
attachments: list[dict] | None = None,
255282
) -> dict:
256283
"""Run the full agent pipeline and return a serialized result dict.
257284
@@ -290,6 +317,7 @@ def run_task(
290317
initial_approvals=initial_approvals,
291318
initial_approval_gate_count=initial_approval_gate_count,
292319
approval_gate_cap=approval_gate_cap,
320+
attachments=attachments,
293321
)
294322

295323
# Inject Cedar policies into config for the PolicyEngine in runner.py
@@ -440,6 +468,33 @@ def _on_trace_truncated(max_bytes: int, first_dropped: int) -> None:
440468
config.channel_metadata,
441469
)
442470

471+
# Download attachments from S3 (version-pinned, integrity-verified)
472+
prepared_attachments: list = []
473+
if config.attachments:
474+
from attachments import download_attachments
475+
476+
try:
477+
with task_span("task.attachment_download"):
478+
prepared_attachments = download_attachments(
479+
config.attachments, setup.repo_dir
480+
)
481+
progress.write_agent_milestone(
482+
"attachments_downloaded",
483+
f"count={len(prepared_attachments)}",
484+
)
485+
except RuntimeError as e:
486+
log("ERROR", f"Attachment integrity check failed: {e}")
487+
raise RuntimeError(
488+
f"Attachment download/verification failed: {e}. "
489+
"The task cannot proceed without valid attachments."
490+
) from e
491+
except Exception as e:
492+
err_type = type(e).__name__
493+
log("ERROR", f"Attachment download failed: {err_type}: {e}")
494+
raise RuntimeError(
495+
f"Failed to download task attachments from S3: {err_type}: {e}"
496+
) from e
497+
443498
# Log discovered repo-level project configuration
444499
# (all files loaded by setting_sources=["project"])
445500
repo_dir = setup.repo_dir
@@ -449,6 +504,13 @@ def _on_trace_truncated(max_bytes: int, first_dropped: int) -> None:
449504
else:
450505
log("TASK", "No repo-level project configuration found")
451506

507+
# Inject attachment references into the prompt so the agent knows
508+
# about available files. Images are read natively by the agent's
509+
# Read tool (multimodal support). File attachments are referenced
510+
# by path for the agent to read as needed.
511+
if prepared_attachments:
512+
prompt = _inject_attachment_context(prompt, prepared_attachments)
513+
452514
# Run agent
453515
disk_before = get_disk_usage(AGENT_WORKSPACE)
454516
start_time = time.time()

agent/src/server.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -352,6 +352,7 @@ def _run_task_background(
352352
channel_metadata: dict[str, str] | None = None,
353353
trace: bool = False,
354354
user_id: str = "",
355+
attachments: list[dict] | None = None,
355356
) -> None:
356357
"""Run the agent task in a background thread."""
357358
global _background_pipeline_failed
@@ -405,6 +406,7 @@ def _run_task_background(
405406
channel_metadata=channel_metadata,
406407
trace=trace,
407408
user_id=user_id,
409+
attachments=attachments,
408410
)
409411
_background_pipeline_failed = False
410412
except Exception as e:
@@ -492,6 +494,7 @@ def _extract_invocation_params(inp: dict, request: Request) -> dict:
492494
approval_gate_cap = None
493495
channel_source = inp.get("channel_source", "") or ""
494496
channel_metadata = inp.get("channel_metadata") or {}
497+
attachments = inp.get("attachments") or []
495498
# ``trace`` is strictly opt-in (design §10.1). Accept only real
496499
# booleans from the orchestrator — a string "false" would otherwise
497500
# flip the flag on.
@@ -556,6 +559,7 @@ def _extract_invocation_params(inp: dict, request: Request) -> dict:
556559
"channel_metadata": channel_metadata,
557560
"trace": trace,
558561
"user_id": user_id,
562+
"attachments": attachments,
559563
}
560564

561565

0 commit comments

Comments
 (0)