skills: handle YAML block-scalar descriptions in frontmatter

jamesbroadhead · jamesbroadhead · commit e3a3dc150e51 · 2026-05-12T21:53:17.000Z
The previous regex-only parser in extract_description_from_skill()
captured the YAML block-scalar indicator (`&gt;-`) verbatim, so any SKILL.md
that wrote `description: &gt;-\n  multi-line content` produced a manifest
entry of `"&gt;-"`. The new ai-dev-kit import (PR #533) brought two such
files — databricks-dbsql and databricks-execution-compute — which
landed corrupted descriptions in manifest.json and corrupted
short_description / default_prompt in agents/openai.yaml.

Walk the frontmatter line by line: if the value is a block-scalar
indicator (|, |-, |+, &gt;, &gt;-, &gt;+), aggregate the indented continuation
lines (folded with spaces for `&gt;`-style, newlines for `|`-style).

Regenerate manifest.json and the two affected agents/openai.yaml stubs.

Co-authored-by: Isaac
diff --git a/experimental/databricks-dbsql/agents/openai.yaml b/experimental/databricks-dbsql/agents/openai.yaml
@@ -1,7 +1,7 @@
 interface:
   display_name: "Databricks Dbsql"
-  short_description: ">-"
+  short_description: "Databricks SQL (DBSQL) advanced features and SQL warehouse capabilities. This skill MUST be invoked when the user mentions: \"DBSQL\", \"Databricks SQL\", \"SQL warehouse\", \"SQL scripting\", \"stored proc..."
   icon_small: "./assets/databricks.svg"
   icon_large: "./assets/databricks.png"
   brand_color: "#FF3621"
-  default_prompt: "Use $databricks-dbsql for >-."
+  default_prompt: "Use $databricks-dbsql for databricks sql (dbsql) advanced features and sql warehouse capabilities. this skill must be invoked when the user mentions: \"dbsql\", \"databricks sql\", \"sql warehouse\", \"sql scripting\", \"stored proc."
diff --git a/experimental/databricks-execution-compute/agents/openai.yaml b/experimental/databricks-execution-compute/agents/openai.yaml
@@ -1,7 +1,7 @@
 interface:
   display_name: "Databricks Execution Compute"
-  short_description: ">-"
+  short_description: "Execute code and manage compute on Databricks."
   icon_small: "./assets/databricks.svg"
   icon_large: "./assets/databricks.png"
   brand_color: "#FF3621"
-  default_prompt: "Use $databricks-execution-compute for >-."
+  default_prompt: "Use $databricks-execution-compute for execute code and manage compute on databricks."
diff --git a/manifest.json b/manifest.json
@@ -1,6 +1,6 @@
 {
   "version": "2",
-  "updated_at": "2026-05-12T21:15:40Z",
+  "updated_at": "2026-05-12T21:53:00Z",
   "skills": {
     "databricks-apps": {
       "version": "0.1.1",
@@ -265,9 +265,9 @@
     },
     "databricks-dbsql": {
       "version": "0.0.1",
-      "description": ">-",
+      "description": "Databricks SQL (DBSQL) advanced features and SQL warehouse capabilities. This skill MUST be invoked when the user mentions: \"DBSQL\", \"Databricks SQL\", \"SQL warehouse\", \"SQL scripting\", \"stored procedure\", \"CALL procedure\", \"materialized view\", \"CREATE MATERIALIZED VIEW\", \"pipe syntax\", \"|>\", \"geospatial\", \"H3\", \"ST_\", \"spatial SQL\", \"collation\", \"COLLATE\", \"ai_query\", \"ai_classify\", \"ai_extract\", \"ai_gen\", \"AI function\", \"http_request\", \"remote_query\", \"read_files\", \"Lakehouse Federation\", \"recursive CTE\", \"WITH RECURSIVE\", \"multi-statement transaction\", \"temp table\", \"temporary view\", \"pipe operator\". SHOULD also invoke when the user asks about SQL best practices, data modeling patterns, or advanced SQL features on Databricks.",
       "experimental": true,
-      "updated_at": "2026-05-12T21:15:40Z",
+      "updated_at": "2026-05-12T21:53:00Z",
       "files": [
         "SKILL.md",
         "agents/openai.yaml",
@@ -294,9 +294,9 @@
     },
     "databricks-execution-compute": {
       "version": "0.0.1",
-      "description": ">-",
+      "description": "Execute code and manage compute on Databricks. Use this skill when the user mentions: \"run code\", \"execute\", \"run on databricks\", \"serverless\", \"no cluster\", \"run python\", \"run scala\", \"run sql\", \"run R\", \"run file\", \"push and run\", \"notebook run\", \"batch script\", \"model training\", \"run script on cluster\", \"create cluster\", \"new cluster\", \"resize cluster\", \"modify cluster\", \"delete cluster\", \"terminate cluster\", \"create warehouse\", \"new warehouse\", \"resize warehouse\", \"delete warehouse\", \"node types\", \"runtime versions\", \"DBR versions\", \"spin up compute\", \"provision cluster\".",
       "experimental": true,
-      "updated_at": "2026-05-12T21:15:40Z",
+      "updated_at": "2026-05-12T21:53:00Z",
       "files": [
         "SKILL.md",
         "agents/openai.yaml",
@@ -357,7 +357,7 @@
       "version": "0.0.1",
       "description": "Patterns and best practices for Lakebase Autoscaling (next-gen managed PostgreSQL). Use when creating or managing Lakebase Autoscaling projects, configuring autoscaling compute or scale-to-zero, working with database branching for dev/test workflows, implementing reverse ETL via synced tables, or connecting applications to Lakebase with OAuth credentials.",
       "experimental": true,
-      "updated_at": "2026-05-12T21:15:40Z",
+      "updated_at": "2026-05-12T21:16:50Z",
       "files": [
         "SKILL.md",
         "agents/openai.yaml",
diff --git a/scripts/skills.py b/scripts/skills.py
@@ -194,8 +194,17 @@ def check_assets_synced(repo_root: Path) -> list[str]:
 # Manifest generation
 # ---------------------------------------------------------------------------
 
+_BLOCK_SCALAR_INDICATORS = {"|", "|-", "|+", ">", ">-", ">+"}
+
+
 def extract_description_from_skill(skill_path: Path) -> str:
-    """Best-effort extraction of `description:` from SKILL.md frontmatter."""
+    """Best-effort extraction of `description:` from SKILL.md frontmatter.
+
+    Handles plain (`description: foo`), quoted (`description: "foo"`), and
+    block-scalar (`description: >-` followed by indented lines) values. The
+    regex-only version captured the block-scalar indicator verbatim, which
+    corrupted manifest entries and Codex marketplace metadata.
+    """
     skill_md = skill_path / "SKILL.md"
     if not skill_md.exists():
         return ""
@@ -205,9 +214,24 @@ def extract_description_from_skill(skill_path: Path) -> str:
     end_idx = content.find("---", 3)
     if end_idx == -1:
         return ""
-    frontmatter = content[3:end_idx]
-    match = re.search(r'description:\s*["\']?(.+?)["\']?\s*$', frontmatter, re.MULTILINE)
-    return match.group(1).strip() if match else ""
+    lines = content[3:end_idx].splitlines()
+    for i, line in enumerate(lines):
+        m = re.match(r'^description:\s*(.*?)\s*$', line)
+        if not m:
+            continue
+        value = m.group(1)
+        if value in _BLOCK_SCALAR_INDICATORS:
+            collected = []
+            for cont in lines[i + 1:]:
+                if cont and not cont[0].isspace():
+                    break
+                stripped = cont.strip()
+                if stripped:
+                    collected.append(stripped)
+            joiner = " " if value.startswith(">") else "\n"
+            return joiner.join(collected)
+        return value.strip().strip('"').strip("'")
+    return ""
 
 
 # Markers that separate the "what this skill does" lead-in from the