|
2 | 2 |
|
3 | 3 | This module defines the Skill dataclass and provides classmethods for |
4 | 4 | discovering, parsing, and loading skills from the filesystem, raw content, |
5 | | -or HTTPS URLs. Skills are directories containing a SKILL.md file with YAML |
6 | | -frontmatter metadata and markdown instructions. |
| 5 | +HTTPS URLs, or Amazon S3. Skills are directories containing a SKILL.md file |
| 6 | +with YAML frontmatter metadata and markdown instructions. |
7 | 7 | """ |
8 | 8 |
|
9 | 9 | from __future__ import annotations |
10 | 10 |
|
11 | 11 | import logging |
12 | 12 | import re |
| 13 | +import tempfile |
13 | 14 | import urllib.error |
14 | 15 | import urllib.request |
| 16 | +from concurrent.futures import ThreadPoolExecutor, as_completed |
15 | 17 | from dataclasses import dataclass, field |
16 | 18 | from pathlib import Path |
17 | 19 | from typing import Any |
18 | 20 |
|
| 21 | +import boto3 |
19 | 22 | import yaml |
20 | 23 |
|
21 | 24 | logger = logging.getLogger(__name__) |
22 | 25 |
|
23 | 26 | _SKILL_NAME_PATTERN = re.compile(r"^[a-z0-9]([a-z0-9-]*[a-z0-9])?$") |
24 | 27 | _MAX_SKILL_NAME_LENGTH = 64 |
| 28 | +_S3_MIRROR_MAX_WORKERS = 4 |
| 29 | +_S3_MIRROR_CACHE: dict[tuple[str, str | None], Path] = {} |
25 | 30 |
|
26 | 31 |
|
27 | 32 | def _find_skill_md(skill_dir: Path) -> Path: |
@@ -204,6 +209,172 @@ def _build_skill_from_frontmatter( |
204 | 209 | ) |
205 | 210 |
|
206 | 211 |
|
| 212 | +def _mirror_skills_from_s3( |
| 213 | + bucket: str, |
| 214 | + prefix: str | None = None, |
| 215 | + *, |
| 216 | + s3_client: Any = None, |
| 217 | + local_dir: str | Path | None = None, |
| 218 | +) -> Path | None: |
| 219 | + """Mirror skill directories from S3 to a local filesystem path. |
| 220 | +
|
| 221 | + Discovers all SKILL.md files under the given prefix, then downloads each |
| 222 | + skill directory (SKILL.md + subdirectories like scripts/, references/, |
| 223 | + assets/) in parallel using a thread pool. |
| 224 | +
|
| 225 | + Results are cached per (bucket, prefix) pair for the lifetime of the |
| 226 | + process. Subsequent calls with the same arguments return the previously |
| 227 | + mirrored directory without re-downloading. |
| 228 | +
|
| 229 | + Args: |
| 230 | + bucket: S3 bucket name. |
| 231 | + prefix: Optional key prefix to scope the scan (e.g. "agents/orchestrator/"). |
| 232 | + If None, scans the entire bucket. |
| 233 | + s3_client: Optional pre-configured boto3 S3 client. If None, a default |
| 234 | + client is created via ``boto3.client("s3")``. |
| 235 | + local_dir: Optional local directory to mirror skills into. If None, a |
| 236 | + temporary directory is created. |
| 237 | +
|
| 238 | + Returns: |
| 239 | + Path to the local directory containing mirrored skill subdirectories, |
| 240 | + or None if no skills were found. |
| 241 | + """ |
| 242 | + cache_key = (bucket, prefix) |
| 243 | + if cache_key in _S3_MIRROR_CACHE: |
| 244 | + logger.debug("bucket=<%s>, prefix=<%s> | s3 mirror cache hit", bucket, prefix or "") |
| 245 | + return _S3_MIRROR_CACHE[cache_key] |
| 246 | + |
| 247 | + if s3_client is None: |
| 248 | + s3_client = boto3.client("s3") |
| 249 | + |
| 250 | + # Normalize prefix |
| 251 | + normalized_prefix = "" |
| 252 | + if prefix: |
| 253 | + normalized_prefix = prefix.rstrip("/") + "/" |
| 254 | + |
| 255 | + # Discover all objects under the prefix |
| 256 | + objects = _s3_list_all_objects(s3_client, bucket, normalized_prefix) |
| 257 | + |
| 258 | + if not objects: |
| 259 | + logger.warning("bucket=<%s>, prefix=<%s> | no objects found", bucket, normalized_prefix) |
| 260 | + return None |
| 261 | + |
| 262 | + # Find skill directories (those containing SKILL.md) |
| 263 | + skill_dirs = _s3_find_skill_directories(objects, normalized_prefix) |
| 264 | + |
| 265 | + if not skill_dirs: |
| 266 | + logger.warning("bucket=<%s>, prefix=<%s> | no SKILL.md files found", bucket, normalized_prefix) |
| 267 | + return None |
| 268 | + |
| 269 | + logger.info( |
| 270 | + "bucket=<%s>, prefix=<%s>, count=<%d> | found skills: %s", |
| 271 | + bucket, |
| 272 | + normalized_prefix, |
| 273 | + len(skill_dirs), |
| 274 | + ", ".join(skill_dirs), |
| 275 | + ) |
| 276 | + |
| 277 | + dest = _s3_resolve_local_dir(local_dir) |
| 278 | + download_tasks = _s3_build_download_tasks(objects, skill_dirs, normalized_prefix) |
| 279 | + _s3_download_parallel(s3_client, bucket, download_tasks, dest) |
| 280 | + |
| 281 | + _S3_MIRROR_CACHE[cache_key] = dest |
| 282 | + return dest |
| 283 | + |
| 284 | + |
| 285 | +def _s3_resolve_local_dir(local_dir: str | Path | None) -> Path: |
| 286 | + """Resolve or create the local directory for S3 mirroring.""" |
| 287 | + if local_dir is None: |
| 288 | + path = Path(tempfile.mkdtemp(prefix="strands-s3-skills-")) |
| 289 | + else: |
| 290 | + path = Path(local_dir) |
| 291 | + path.mkdir(parents=True, exist_ok=True) |
| 292 | + return path |
| 293 | + |
| 294 | + |
| 295 | +def _s3_list_all_objects(s3_client: Any, bucket: str, prefix: str) -> list[str]: |
| 296 | + """List all object keys under a prefix, handling pagination.""" |
| 297 | + keys: list[str] = [] |
| 298 | + paginator = s3_client.get_paginator("list_objects_v2") |
| 299 | + pages = paginator.paginate(Bucket=bucket, Prefix=prefix) |
| 300 | + |
| 301 | + for page in pages: |
| 302 | + for obj in page.get("Contents", []): |
| 303 | + keys.append(obj["Key"]) |
| 304 | + |
| 305 | + return keys |
| 306 | + |
| 307 | + |
| 308 | +def _s3_find_skill_directories(object_keys: list[str], prefix: str) -> list[str]: |
| 309 | + """Find skill directory names (relative to prefix) that contain SKILL.md. |
| 310 | +
|
| 311 | + Returns a sorted list of skill directory names (e.g. ["code-review", "pdf-processing"]). |
| 312 | + """ |
| 313 | + skill_dirs: list[str] = [] |
| 314 | + |
| 315 | + for key in object_keys: |
| 316 | + relative = key[len(prefix) :] |
| 317 | + parts = relative.split("/") |
| 318 | + |
| 319 | + # We expect: <skill-name>/SKILL.md (or skill.md) |
| 320 | + if len(parts) == 2 and parts[1].lower() == "skill.md": |
| 321 | + skill_dirs.append(parts[0]) |
| 322 | + |
| 323 | + return sorted(set(skill_dirs)) |
| 324 | + |
| 325 | + |
| 326 | +def _s3_build_download_tasks( |
| 327 | + object_keys: list[str], |
| 328 | + skill_dirs: list[str], |
| 329 | + prefix: str, |
| 330 | +) -> list[tuple[str, str]]: |
| 331 | + """Build a list of (s3_key, relative_local_path) tuples to download. |
| 332 | +
|
| 333 | + Only includes objects that belong to a discovered skill directory. |
| 334 | + """ |
| 335 | + skill_dir_set = set(skill_dirs) |
| 336 | + tasks: list[tuple[str, str]] = [] |
| 337 | + |
| 338 | + for key in object_keys: |
| 339 | + relative = key[len(prefix) :] |
| 340 | + parts = relative.split("/") |
| 341 | + |
| 342 | + if parts[0] in skill_dir_set and len(parts) >= 2: |
| 343 | + if not key.endswith("/"): |
| 344 | + tasks.append((key, relative)) |
| 345 | + |
| 346 | + return tasks |
| 347 | + |
| 348 | + |
| 349 | +def _s3_download_parallel( |
| 350 | + s3_client: Any, |
| 351 | + bucket: str, |
| 352 | + tasks: list[tuple[str, str]], |
| 353 | + dest: Path, |
| 354 | +) -> None: |
| 355 | + """Download files from S3 in parallel using a thread pool.""" |
| 356 | + if not tasks: |
| 357 | + return |
| 358 | + |
| 359 | + def _download_one(task: tuple[str, str]) -> str: |
| 360 | + s3_key, relative_path = task |
| 361 | + local_path = dest / relative_path |
| 362 | + local_path.parent.mkdir(parents=True, exist_ok=True) |
| 363 | + s3_client.download_file(bucket, s3_key, str(local_path)) |
| 364 | + return relative_path |
| 365 | + |
| 366 | + with ThreadPoolExecutor(max_workers=_S3_MIRROR_MAX_WORKERS) as executor: |
| 367 | + futures = {executor.submit(_download_one, t): t for t in tasks} |
| 368 | + for future in as_completed(futures): |
| 369 | + try: |
| 370 | + path = future.result() |
| 371 | + logger.debug("s3_path=<%s> | downloaded", path) |
| 372 | + except Exception: |
| 373 | + s3_key, _ = futures[future] |
| 374 | + logger.warning("bucket=<%s>, key=<%s> | failed to download", bucket, s3_key) |
| 375 | + raise |
| 376 | + |
| 377 | + |
207 | 378 | @dataclass |
208 | 379 | class Skill: |
209 | 380 | r"""Represents an agent skill with metadata and instructions. |
@@ -422,3 +593,62 @@ def from_directory(cls, skills_dir: str | Path, *, strict: bool = False) -> list |
422 | 593 |
|
423 | 594 | logger.debug("path=<%s>, count=<%d> | loaded skills from directory", skills_dir, len(skills)) |
424 | 595 | return skills |
| 596 | + |
| 597 | + @classmethod |
| 598 | + def from_s3( |
| 599 | + cls, |
| 600 | + bucket: str, |
| 601 | + prefix: str | None = None, |
| 602 | + *, |
| 603 | + s3_client: Any = None, |
| 604 | + local_dir: str | Path | None = None, |
| 605 | + strict: bool = False, |
| 606 | + ) -> list[Skill]: |
| 607 | + """Load skills from an Amazon S3 bucket. |
| 608 | +
|
| 609 | + Scans ``bucket`` (under ``prefix`` if given) for directories containing |
| 610 | + a SKILL.md file, mirrors each skill directory (including subdirectories |
| 611 | + like scripts/, references/, assets/) to a local path, then delegates to |
| 612 | + :meth:`from_directory` for parsing. |
| 613 | +
|
| 614 | + Results are cached per (bucket, prefix) pair for the lifetime of the |
| 615 | + process — subsequent calls with the same arguments return the cached |
| 616 | + list without re-downloading. |
| 617 | +
|
| 618 | + Example:: |
| 619 | +
|
| 620 | + from strands.vended_plugins.skills import Skill |
| 621 | +
|
| 622 | + # Load all skills under a prefix |
| 623 | + skills = Skill.from_s3("my-bucket", prefix="agents/director/") |
| 624 | +
|
| 625 | + # Load from bucket root with a custom S3 client |
| 626 | + import boto3 |
| 627 | + s3 = boto3.client("s3", region_name="eu-west-1") |
| 628 | + skills = Skill.from_s3("my-bucket", s3_client=s3) |
| 629 | +
|
| 630 | + Args: |
| 631 | + bucket: S3 bucket name. |
| 632 | + prefix: Optional key prefix to scope the scan (e.g. "agents/director/"). |
| 633 | + If None, scans the entire bucket. |
| 634 | + s3_client: Optional pre-configured boto3 S3 client. If None, a default |
| 635 | + client is created via ``boto3.client("s3")``. |
| 636 | + local_dir: Optional local directory to mirror skills into. If None, a |
| 637 | + temporary directory is created. |
| 638 | + strict: If True, raise on skill validation issues. If False (default), |
| 639 | + warn and load anyway. Passed through to :meth:`from_directory`. |
| 640 | +
|
| 641 | + Returns: |
| 642 | + List of Skill instances loaded from S3. |
| 643 | + """ |
| 644 | + local_path = _mirror_skills_from_s3( |
| 645 | + bucket=bucket, |
| 646 | + prefix=prefix, |
| 647 | + s3_client=s3_client, |
| 648 | + local_dir=local_dir, |
| 649 | + ) |
| 650 | + |
| 651 | + if local_path is None: |
| 652 | + return [] |
| 653 | + |
| 654 | + return cls.from_directory(local_path, strict=strict) |
0 commit comments