|
| 1 | +"""Attachment download and integrity verification. |
| 2 | +
|
| 3 | +Downloads attachments from S3 using version-pinned reads and verifies |
| 4 | +SHA-256 checksums against the orchestrator-provided values. Files are |
| 5 | +placed in a workspace subdirectory for the agent to reference. |
| 6 | +""" |
| 7 | + |
| 8 | +from __future__ import annotations |
| 9 | + |
| 10 | +import hashlib |
| 11 | +import os |
| 12 | +from pathlib import Path |
| 13 | +from typing import Literal |
| 14 | +from urllib.parse import urlparse |
| 15 | + |
| 16 | +from pydantic import BaseModel, ConfigDict |
| 17 | + |
| 18 | +from shell import log |
| 19 | + |
| 20 | +ATTACHMENTS_DIR = ".attachments" |
| 21 | + |
| 22 | + |
| 23 | +class PreparedAttachment(BaseModel): |
| 24 | + """An attachment downloaded to the local filesystem and verified.""" |
| 25 | + |
| 26 | + model_config = ConfigDict(frozen=True, extra="forbid") |
| 27 | + |
| 28 | + attachment_id: str |
| 29 | + type: Literal["image", "file", "url"] |
| 30 | + content_type: str |
| 31 | + filename: str |
| 32 | + local_path: str |
| 33 | + size_bytes: int |
| 34 | + token_estimate: int | None = None |
| 35 | + |
| 36 | + |
| 37 | +def download_attachments( |
| 38 | + attachments: list, |
| 39 | + workspace: str, |
| 40 | +) -> list[PreparedAttachment]: |
| 41 | + """Download all attachments from S3 and verify integrity. |
| 42 | +
|
| 43 | + Args: |
| 44 | + attachments: List of AttachmentConfig models from TaskConfig. |
| 45 | + workspace: The agent workspace root (e.g., /workspace). |
| 46 | +
|
| 47 | + Returns: |
| 48 | + List of PreparedAttachment with local file paths. |
| 49 | +
|
| 50 | + Raises: |
| 51 | + RuntimeError: If any attachment fails download or integrity check. |
| 52 | + """ |
| 53 | + if not attachments: |
| 54 | + return [] |
| 55 | + |
| 56 | + import boto3 |
| 57 | + |
| 58 | + attachments_dir = Path(workspace) / ATTACHMENTS_DIR |
| 59 | + attachments_dir.mkdir(parents=True, exist_ok=True) |
| 60 | + |
| 61 | + s3_client = boto3.client("s3") |
| 62 | + prepared: list[PreparedAttachment] = [] |
| 63 | + |
| 64 | + try: |
| 65 | + for att in attachments: |
| 66 | + local_path = _download_single(att, attachments_dir, s3_client) |
| 67 | + prepared.append( |
| 68 | + PreparedAttachment( |
| 69 | + attachment_id=att.attachment_id, |
| 70 | + type=att.type, |
| 71 | + content_type=att.content_type, |
| 72 | + filename=att.filename, |
| 73 | + local_path=str(local_path), |
| 74 | + size_bytes=att.size_bytes, |
| 75 | + token_estimate=att.token_estimate, |
| 76 | + ) |
| 77 | + ) |
| 78 | + except Exception: |
| 79 | + import shutil |
| 80 | + |
| 81 | + shutil.rmtree(attachments_dir, ignore_errors=True) |
| 82 | + raise |
| 83 | + |
| 84 | + log("TASK", f"Downloaded {len(prepared)} attachment(s) to {attachments_dir}") |
| 85 | + return prepared |
| 86 | + |
| 87 | + |
| 88 | +def _download_single(att, attachments_dir: Path, s3_client) -> Path: |
| 89 | + """Download a single attachment and verify its SHA-256 checksum.""" |
| 90 | + # Parse s3_uri (s3://bucket/key) |
| 91 | + parsed = urlparse(att.s3_uri) |
| 92 | + bucket = parsed.netloc |
| 93 | + key = parsed.path.lstrip("/") |
| 94 | + |
| 95 | + # Unique subdirectory per attachment to avoid filename collisions |
| 96 | + dest_dir = attachments_dir / att.attachment_id |
| 97 | + dest_dir.mkdir(parents=True, exist_ok=True) |
| 98 | + local_path = dest_dir / att.filename |
| 99 | + |
| 100 | + log( |
| 101 | + "TASK", |
| 102 | + f"Downloading attachment '{att.filename}' " |
| 103 | + f"(s3://{bucket}/{key}, version={att.s3_version_id})", |
| 104 | + ) |
| 105 | + |
| 106 | + # Download with pinned VersionId to prevent TOCTOU |
| 107 | + response = s3_client.get_object( |
| 108 | + Bucket=bucket, |
| 109 | + Key=key, |
| 110 | + VersionId=att.s3_version_id, |
| 111 | + ) |
| 112 | + content = response["Body"].read() |
| 113 | + |
| 114 | + # Verify SHA-256 integrity |
| 115 | + actual_checksum = hashlib.sha256(content).hexdigest() |
| 116 | + if actual_checksum != att.checksum_sha256: |
| 117 | + raise RuntimeError( |
| 118 | + f"Attachment '{att.filename}' integrity check failed: " |
| 119 | + f"expected SHA-256 {att.checksum_sha256}, got {actual_checksum}. " |
| 120 | + f"The file may have been tampered with." |
| 121 | + ) |
| 122 | + |
| 123 | + # Verify size matches |
| 124 | + if len(content) != att.size_bytes: |
| 125 | + raise RuntimeError( |
| 126 | + f"Attachment '{att.filename}' size mismatch: " |
| 127 | + f"expected {att.size_bytes} bytes, got {len(content)} bytes." |
| 128 | + ) |
| 129 | + |
| 130 | + # Write to local filesystem |
| 131 | + local_path.write_bytes(content) |
| 132 | + os.chmod(str(local_path), 0o444) # Read-only |
| 133 | + |
| 134 | + log("TASK", f" Verified: {att.filename} ({len(content)} bytes, SHA-256 OK)") |
| 135 | + return local_path |
0 commit comments