|
16 | 16 |
|
17 | 17 | from __future__ import annotations |
18 | 18 |
|
| 19 | +import logging |
19 | 20 | import pathlib |
20 | 21 | from typing import Union |
21 | 22 |
|
| 23 | +from google.cloud import storage |
| 24 | +from pydantic import ValidationError |
22 | 25 | import yaml |
23 | 26 |
|
24 | 27 | from . import models |
@@ -58,6 +61,39 @@ def _load_dir(directory: pathlib.Path) -> dict[str, str]: |
58 | 61 | return files |
59 | 62 |
|
60 | 63 |
|
| 64 | +def _parse_skill_md_content(content: str) -> tuple[dict, str]: |
| 65 | + """Parse SKILL.md from raw content string. |
| 66 | +
|
| 67 | + Args: |
| 68 | + content: The string content of SKILL.md. |
| 69 | +
|
| 70 | + Returns: |
| 71 | + Tuple of (parsed_frontmatter_dict, body_string). |
| 72 | +
|
| 73 | + Raises: |
| 74 | + ValueError: If SKILL.md is invalid. |
| 75 | + """ |
| 76 | + if not content.startswith("---"): |
| 77 | + raise ValueError("SKILL.md must start with YAML frontmatter (---)") |
| 78 | + |
| 79 | + parts = content.split("---", 2) |
| 80 | + if len(parts) < 3: |
| 81 | + raise ValueError("SKILL.md frontmatter not properly closed with ---") |
| 82 | + |
| 83 | + frontmatter_str = parts[1] |
| 84 | + body = parts[2].strip() |
| 85 | + |
| 86 | + try: |
| 87 | + parsed = yaml.safe_load(frontmatter_str) |
| 88 | + except yaml.YAMLError as e: |
| 89 | + raise ValueError(f"Invalid YAML in frontmatter: {e}") from e |
| 90 | + |
| 91 | + if not isinstance(parsed, dict): |
| 92 | + raise ValueError("SKILL.md frontmatter must be a YAML mapping") |
| 93 | + |
| 94 | + return parsed, body |
| 95 | + |
| 96 | + |
61 | 97 | def _parse_skill_md( |
62 | 98 | skill_dir: pathlib.Path, |
63 | 99 | ) -> tuple[dict, str, pathlib.Path]: |
@@ -87,23 +123,7 @@ def _parse_skill_md( |
87 | 123 | raise FileNotFoundError(f"SKILL.md not found in '{skill_dir}'.") |
88 | 124 |
|
89 | 125 | content = skill_md.read_text(encoding="utf-8") |
90 | | - if not content.startswith("---"): |
91 | | - raise ValueError("SKILL.md must start with YAML frontmatter (---)") |
92 | | - |
93 | | - parts = content.split("---", 2) |
94 | | - if len(parts) < 3: |
95 | | - raise ValueError("SKILL.md frontmatter not properly closed with ---") |
96 | | - |
97 | | - frontmatter_str = parts[1] |
98 | | - body = parts[2].strip() |
99 | | - |
100 | | - try: |
101 | | - parsed = yaml.safe_load(frontmatter_str) |
102 | | - except yaml.YAMLError as e: |
103 | | - raise ValueError(f"Invalid YAML in frontmatter: {e}") from e |
104 | | - |
105 | | - if not isinstance(parsed, dict): |
106 | | - raise ValueError("SKILL.md frontmatter must be a YAML mapping") |
| 126 | + parsed, body = _parse_skill_md_content(content) |
107 | 127 |
|
108 | 128 | return parsed, body, skill_md |
109 | 129 |
|
@@ -198,7 +218,7 @@ def _validate_skill_dir( |
198 | 218 |
|
199 | 219 | try: |
200 | 220 | frontmatter = models.Frontmatter.model_validate(parsed) |
201 | | - except Exception as e: |
| 221 | + except ValidationError as e: |
202 | 222 | problems.append(f"Frontmatter validation error: {e}") |
203 | 223 | return problems |
204 | 224 |
|
@@ -232,3 +252,140 @@ def _read_skill_properties( |
232 | 252 | skill_dir = pathlib.Path(skill_dir).resolve() |
233 | 253 | parsed, _, _ = _parse_skill_md(skill_dir) |
234 | 254 | return models.Frontmatter.model_validate(parsed) |
| 255 | + |
| 256 | + |
| 257 | +def _list_skills_in_gcs_dir( |
| 258 | + bucket_name: str, |
| 259 | + skills_base_path: str = "", |
| 260 | +) -> Dict[str, models.Frontmatter]: |
| 261 | + """List skills in a GCS directory. |
| 262 | +
|
| 263 | + Args: |
| 264 | + bucket_name: Name of the GCS bucket. |
| 265 | + skills_base_path: Base directory within the bucket (e.g., 'path/to/skills'). |
| 266 | +
|
| 267 | + Returns: |
| 268 | + Dictionary mapping skill IDs to their frontmatter. |
| 269 | + """ |
| 270 | + client = storage.Client() |
| 271 | + bucket = client.bucket(bucket_name) |
| 272 | + |
| 273 | + base_prefix = skills_base_path.strip("/") |
| 274 | + if base_prefix: |
| 275 | + base_prefix += "/" |
| 276 | + |
| 277 | + iterator = bucket.list_blobs(prefix=base_prefix, delimiter="/") |
| 278 | + # We must consume the iterator to populate the prefixes attribute |
| 279 | + for _ in iterator: |
| 280 | + pass |
| 281 | + logging.info("Found %s skills in GCS.", iterator.prefixes) |
| 282 | + |
| 283 | + skills = {} |
| 284 | + for skill_prefix in sorted(iterator.prefixes): |
| 285 | + manifest_blob = bucket.blob(f"{skill_prefix}SKILL.md") |
| 286 | + |
| 287 | + if manifest_blob.exists(): |
| 288 | + content = manifest_blob.download_as_text() |
| 289 | + skill_id = skill_prefix.rstrip("/").split("/")[-1] |
| 290 | + try: |
| 291 | + parsed, _ = _parse_skill_md_content(content) |
| 292 | + frontmatter = models.Frontmatter.model_validate(parsed) |
| 293 | + skills[skill_id] = frontmatter |
| 294 | + except (ValueError, ValidationError) as e: |
| 295 | + # log invalid skills during listing and skip them |
| 296 | + logging.warning( |
| 297 | + "Skipping invalid skill '%s' in bucket '%s': %s", |
| 298 | + skill_id, |
| 299 | + bucket_name, |
| 300 | + e, |
| 301 | + ) |
| 302 | + return skills |
| 303 | + |
| 304 | + |
| 305 | +def _load_skill_from_gcs_dir( |
| 306 | + bucket_name: str, |
| 307 | + skill_id: str, |
| 308 | + skills_base_path: str = "", |
| 309 | +) -> models.Skill: |
| 310 | + """Load a complete skill from a GCS directory. |
| 311 | +
|
| 312 | + Args: |
| 313 | + bucket_name: Name of the GCS bucket. |
| 314 | + skill_id: The ID of the skill (directory name). |
| 315 | + skills_base_path: Base directory within the bucket (e.g., 'path/to/skills'). |
| 316 | +
|
| 317 | + Returns: |
| 318 | + Skill object with all components loaded. |
| 319 | +
|
| 320 | + Raises: |
| 321 | + FileNotFoundError: If the skill directory or SKILL.md is not found. |
| 322 | + ValueError: If SKILL.md is invalid or the skill name does not match |
| 323 | + the directory name. |
| 324 | + """ |
| 325 | + client = storage.Client() |
| 326 | + bucket = client.bucket(bucket_name) |
| 327 | + |
| 328 | + base_prefix = skills_base_path.strip("/") |
| 329 | + if base_prefix: |
| 330 | + base_prefix += "/" |
| 331 | + |
| 332 | + skill_dir_prefix = f"{base_prefix}{skill_id}/" |
| 333 | + manifest_blob = bucket.blob(f"{skill_dir_prefix}SKILL.md") |
| 334 | + |
| 335 | + if not manifest_blob.exists(): |
| 336 | + raise FileNotFoundError( |
| 337 | + f"SKILL.md not found at gs://{bucket_name}/{skill_dir_prefix}SKILL.md" |
| 338 | + ) |
| 339 | + |
| 340 | + content = manifest_blob.download_as_text() |
| 341 | + parsed, body = _parse_skill_md_content(content) |
| 342 | + frontmatter = models.Frontmatter.model_validate(parsed) |
| 343 | + |
| 344 | + # Validate that skill name matches the directory name |
| 345 | + skill_name_expected = skill_id.strip("/").split("/")[-1] |
| 346 | + if skill_name_expected != frontmatter.name: |
| 347 | + raise ValueError( |
| 348 | + f"Skill name '{frontmatter.name}' does not match directory" |
| 349 | + f" name '{skill_name_expected}'." |
| 350 | + ) |
| 351 | + |
| 352 | + def _load_files_in_dir(subdir: str) -> Dict[str, Union[str, bytes]]: |
| 353 | + prefix = f"{skill_dir_prefix}{subdir}/" |
| 354 | + blobs = bucket.list_blobs(prefix=prefix) |
| 355 | + result = {} |
| 356 | + |
| 357 | + for blob in blobs: |
| 358 | + relative_path = blob.name[len(prefix) :] |
| 359 | + if not relative_path: |
| 360 | + continue |
| 361 | + |
| 362 | + try: |
| 363 | + result[relative_path] = blob.download_as_text() |
| 364 | + except UnicodeDecodeError: |
| 365 | + result[relative_path] = blob.download_as_bytes() |
| 366 | + return result |
| 367 | + |
| 368 | + references = _load_files_in_dir("references") |
| 369 | + assets = _load_files_in_dir("assets") |
| 370 | + raw_scripts = _load_files_in_dir("scripts") |
| 371 | + |
| 372 | + scripts = {} |
| 373 | + for name, src in raw_scripts.items(): |
| 374 | + if isinstance(src, bytes): |
| 375 | + try: |
| 376 | + src = src.decode("utf-8") |
| 377 | + except UnicodeDecodeError: |
| 378 | + continue # skip binary scripts if any |
| 379 | + scripts[name] = models.Script(src=src) |
| 380 | + |
| 381 | + resources = models.Resources( |
| 382 | + references=references, |
| 383 | + assets=assets, |
| 384 | + scripts=scripts, |
| 385 | + ) |
| 386 | + |
| 387 | + return models.Skill( |
| 388 | + frontmatter=frontmatter, |
| 389 | + instructions=body, |
| 390 | + resources=resources, |
| 391 | + ) |
0 commit comments