Skip to content

Commit 573c613

Browse files
committed
feat: add Skill.from_s3 classmethod for dynamic loading skills from S3
1 parent 559b2a0 commit 573c613

2 files changed

Lines changed: 586 additions & 2 deletions

File tree

src/strands/vended_plugins/skills/skill.py

Lines changed: 232 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,26 +2,31 @@
22
33
This module defines the Skill dataclass and provides classmethods for
44
discovering, parsing, and loading skills from the filesystem, raw content,
5-
or HTTPS URLs. Skills are directories containing a SKILL.md file with YAML
6-
frontmatter metadata and markdown instructions.
5+
HTTPS URLs, or Amazon S3. Skills are directories containing a SKILL.md file
6+
with YAML frontmatter metadata and markdown instructions.
77
"""
88

99
from __future__ import annotations
1010

1111
import logging
1212
import re
13+
import tempfile
1314
import urllib.error
1415
import urllib.request
16+
from concurrent.futures import ThreadPoolExecutor, as_completed
1517
from dataclasses import dataclass, field
1618
from pathlib import Path
1719
from typing import Any
1820

21+
import boto3
1922
import yaml
2023

2124
logger = logging.getLogger(__name__)
2225

2326
_SKILL_NAME_PATTERN = re.compile(r"^[a-z0-9]([a-z0-9-]*[a-z0-9])?$")
2427
_MAX_SKILL_NAME_LENGTH = 64
28+
_S3_MIRROR_MAX_WORKERS = 4
29+
_S3_MIRROR_CACHE: dict[tuple[str, str | None], Path] = {}
2530

2631

2732
def _find_skill_md(skill_dir: Path) -> Path:
@@ -204,6 +209,172 @@ def _build_skill_from_frontmatter(
204209
)
205210

206211

212+
def _mirror_skills_from_s3(
213+
bucket: str,
214+
prefix: str | None = None,
215+
*,
216+
s3_client: Any = None,
217+
local_dir: str | Path | None = None,
218+
) -> Path | None:
219+
"""Mirror skill directories from S3 to a local filesystem path.
220+
221+
Discovers all SKILL.md files under the given prefix, then downloads each
222+
skill directory (SKILL.md + subdirectories like scripts/, references/,
223+
assets/) in parallel using a thread pool.
224+
225+
Results are cached per (bucket, prefix) pair for the lifetime of the
226+
process. Subsequent calls with the same arguments return the previously
227+
mirrored directory without re-downloading.
228+
229+
Args:
230+
bucket: S3 bucket name.
231+
prefix: Optional key prefix to scope the scan (e.g. "agents/orchestrator/").
232+
If None, scans the entire bucket.
233+
s3_client: Optional pre-configured boto3 S3 client. If None, a default
234+
client is created via ``boto3.client("s3")``.
235+
local_dir: Optional local directory to mirror skills into. If None, a
236+
temporary directory is created.
237+
238+
Returns:
239+
Path to the local directory containing mirrored skill subdirectories,
240+
or None if no skills were found.
241+
"""
242+
cache_key = (bucket, prefix)
243+
if cache_key in _S3_MIRROR_CACHE:
244+
logger.debug("bucket=<%s>, prefix=<%s> | s3 mirror cache hit", bucket, prefix or "")
245+
return _S3_MIRROR_CACHE[cache_key]
246+
247+
if s3_client is None:
248+
s3_client = boto3.client("s3")
249+
250+
# Normalize prefix
251+
normalized_prefix = ""
252+
if prefix:
253+
normalized_prefix = prefix.rstrip("/") + "/"
254+
255+
# Discover all objects under the prefix
256+
objects = _s3_list_all_objects(s3_client, bucket, normalized_prefix)
257+
258+
if not objects:
259+
logger.warning("bucket=<%s>, prefix=<%s> | no objects found", bucket, normalized_prefix)
260+
return None
261+
262+
# Find skill directories (those containing SKILL.md)
263+
skill_dirs = _s3_find_skill_directories(objects, normalized_prefix)
264+
265+
if not skill_dirs:
266+
logger.warning("bucket=<%s>, prefix=<%s> | no SKILL.md files found", bucket, normalized_prefix)
267+
return None
268+
269+
logger.info(
270+
"bucket=<%s>, prefix=<%s>, count=<%d> | found skills: %s",
271+
bucket,
272+
normalized_prefix,
273+
len(skill_dirs),
274+
", ".join(skill_dirs),
275+
)
276+
277+
dest = _s3_resolve_local_dir(local_dir)
278+
download_tasks = _s3_build_download_tasks(objects, skill_dirs, normalized_prefix)
279+
_s3_download_parallel(s3_client, bucket, download_tasks, dest)
280+
281+
_S3_MIRROR_CACHE[cache_key] = dest
282+
return dest
283+
284+
285+
def _s3_resolve_local_dir(local_dir: str | Path | None) -> Path:
286+
"""Resolve or create the local directory for S3 mirroring."""
287+
if local_dir is None:
288+
path = Path(tempfile.mkdtemp(prefix="strands-s3-skills-"))
289+
else:
290+
path = Path(local_dir)
291+
path.mkdir(parents=True, exist_ok=True)
292+
return path
293+
294+
295+
def _s3_list_all_objects(s3_client: Any, bucket: str, prefix: str) -> list[str]:
296+
"""List all object keys under a prefix, handling pagination."""
297+
keys: list[str] = []
298+
paginator = s3_client.get_paginator("list_objects_v2")
299+
pages = paginator.paginate(Bucket=bucket, Prefix=prefix)
300+
301+
for page in pages:
302+
for obj in page.get("Contents", []):
303+
keys.append(obj["Key"])
304+
305+
return keys
306+
307+
308+
def _s3_find_skill_directories(object_keys: list[str], prefix: str) -> list[str]:
309+
"""Find skill directory names (relative to prefix) that contain SKILL.md.
310+
311+
Returns a sorted list of skill directory names (e.g. ["code-review", "pdf-processing"]).
312+
"""
313+
skill_dirs: list[str] = []
314+
315+
for key in object_keys:
316+
relative = key[len(prefix) :]
317+
parts = relative.split("/")
318+
319+
# We expect: <skill-name>/SKILL.md (or skill.md)
320+
if len(parts) == 2 and parts[1].lower() == "skill.md":
321+
skill_dirs.append(parts[0])
322+
323+
return sorted(set(skill_dirs))
324+
325+
326+
def _s3_build_download_tasks(
327+
object_keys: list[str],
328+
skill_dirs: list[str],
329+
prefix: str,
330+
) -> list[tuple[str, str]]:
331+
"""Build a list of (s3_key, relative_local_path) tuples to download.
332+
333+
Only includes objects that belong to a discovered skill directory.
334+
"""
335+
skill_dir_set = set(skill_dirs)
336+
tasks: list[tuple[str, str]] = []
337+
338+
for key in object_keys:
339+
relative = key[len(prefix) :]
340+
parts = relative.split("/")
341+
342+
if parts[0] in skill_dir_set and len(parts) >= 2:
343+
if not key.endswith("/"):
344+
tasks.append((key, relative))
345+
346+
return tasks
347+
348+
349+
def _s3_download_parallel(
350+
s3_client: Any,
351+
bucket: str,
352+
tasks: list[tuple[str, str]],
353+
dest: Path,
354+
) -> None:
355+
"""Download files from S3 in parallel using a thread pool."""
356+
if not tasks:
357+
return
358+
359+
def _download_one(task: tuple[str, str]) -> str:
360+
s3_key, relative_path = task
361+
local_path = dest / relative_path
362+
local_path.parent.mkdir(parents=True, exist_ok=True)
363+
s3_client.download_file(bucket, s3_key, str(local_path))
364+
return relative_path
365+
366+
with ThreadPoolExecutor(max_workers=_S3_MIRROR_MAX_WORKERS) as executor:
367+
futures = {executor.submit(_download_one, t): t for t in tasks}
368+
for future in as_completed(futures):
369+
try:
370+
path = future.result()
371+
logger.debug("s3_path=<%s> | downloaded", path)
372+
except Exception:
373+
s3_key, _ = futures[future]
374+
logger.warning("bucket=<%s>, key=<%s> | failed to download", bucket, s3_key)
375+
raise
376+
377+
207378
@dataclass
208379
class Skill:
209380
r"""Represents an agent skill with metadata and instructions.
@@ -422,3 +593,62 @@ def from_directory(cls, skills_dir: str | Path, *, strict: bool = False) -> list
422593

423594
logger.debug("path=<%s>, count=<%d> | loaded skills from directory", skills_dir, len(skills))
424595
return skills
596+
597+
@classmethod
598+
def from_s3(
599+
cls,
600+
bucket: str,
601+
prefix: str | None = None,
602+
*,
603+
s3_client: Any = None,
604+
local_dir: str | Path | None = None,
605+
strict: bool = False,
606+
) -> list[Skill]:
607+
"""Load skills from an Amazon S3 bucket.
608+
609+
Scans ``bucket`` (under ``prefix`` if given) for directories containing
610+
a SKILL.md file, mirrors each skill directory (including subdirectories
611+
like scripts/, references/, assets/) to a local path, then delegates to
612+
:meth:`from_directory` for parsing.
613+
614+
Results are cached per (bucket, prefix) pair for the lifetime of the
615+
process — subsequent calls with the same arguments return the cached
616+
list without re-downloading.
617+
618+
Example::
619+
620+
from strands.vended_plugins.skills import Skill
621+
622+
# Load all skills under a prefix
623+
skills = Skill.from_s3("my-bucket", prefix="agents/director/")
624+
625+
# Load from bucket root with a custom S3 client
626+
import boto3
627+
s3 = boto3.client("s3", region_name="eu-west-1")
628+
skills = Skill.from_s3("my-bucket", s3_client=s3)
629+
630+
Args:
631+
bucket: S3 bucket name.
632+
prefix: Optional key prefix to scope the scan (e.g. "agents/director/").
633+
If None, scans the entire bucket.
634+
s3_client: Optional pre-configured boto3 S3 client. If None, a default
635+
client is created via ``boto3.client("s3")``.
636+
local_dir: Optional local directory to mirror skills into. If None, a
637+
temporary directory is created.
638+
strict: If True, raise on skill validation issues. If False (default),
639+
warn and load anyway. Passed through to :meth:`from_directory`.
640+
641+
Returns:
642+
List of Skill instances loaded from S3.
643+
"""
644+
local_path = _mirror_skills_from_s3(
645+
bucket=bucket,
646+
prefix=prefix,
647+
s3_client=s3_client,
648+
local_dir=local_dir,
649+
)
650+
651+
if local_path is None:
652+
return []
653+
654+
return cls.from_directory(local_path, strict=strict)

0 commit comments

Comments
 (0)