Skip to content

Commit e3a3dc1

Browse files
skills: handle YAML block-scalar descriptions in frontmatter
The previous regex-only parser in extract_description_from_skill() captured the YAML block-scalar indicator (`>-`) verbatim, so any SKILL.md that wrote `description: >-\n multi-line content` produced a manifest entry of `">-"`. The new ai-dev-kit import (PR #533) brought two such files — databricks-dbsql and databricks-execution-compute — which landed corrupted descriptions in manifest.json and corrupted short_description / default_prompt in agents/openai.yaml. Walk the frontmatter line by line: if the value is a block-scalar indicator (|, |-, |+, >, >-, >+), aggregate the indented continuation lines (folded with spaces for `>`-style, newlines for `|`-style). Regenerate manifest.json and the two affected agents/openai.yaml stubs. Co-authored-by: Isaac
1 parent 6fc33f7 commit e3a3dc1

4 files changed

Lines changed: 38 additions & 14 deletions

File tree

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
interface:
22
display_name: "Databricks Dbsql"
3-
short_description: ">-"
3+
short_description: "Databricks SQL (DBSQL) advanced features and SQL warehouse capabilities. This skill MUST be invoked when the user mentions: \"DBSQL\", \"Databricks SQL\", \"SQL warehouse\", \"SQL scripting\", \"stored proc..."
44
icon_small: "./assets/databricks.svg"
55
icon_large: "./assets/databricks.png"
66
brand_color: "#FF3621"
7-
default_prompt: "Use $databricks-dbsql for >-."
7+
default_prompt: "Use $databricks-dbsql for databricks sql (dbsql) advanced features and sql warehouse capabilities. this skill must be invoked when the user mentions: \"dbsql\", \"databricks sql\", \"sql warehouse\", \"sql scripting\", \"stored proc."
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
interface:
22
display_name: "Databricks Execution Compute"
3-
short_description: ">-"
3+
short_description: "Execute code and manage compute on Databricks."
44
icon_small: "./assets/databricks.svg"
55
icon_large: "./assets/databricks.png"
66
brand_color: "#FF3621"
7-
default_prompt: "Use $databricks-execution-compute for >-."
7+
default_prompt: "Use $databricks-execution-compute for execute code and manage compute on databricks."

manifest.json

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"version": "2",
3-
"updated_at": "2026-05-12T21:15:40Z",
3+
"updated_at": "2026-05-12T21:53:00Z",
44
"skills": {
55
"databricks-apps": {
66
"version": "0.1.1",
@@ -265,9 +265,9 @@
265265
},
266266
"databricks-dbsql": {
267267
"version": "0.0.1",
268-
"description": ">-",
268+
"description": "Databricks SQL (DBSQL) advanced features and SQL warehouse capabilities. This skill MUST be invoked when the user mentions: \"DBSQL\", \"Databricks SQL\", \"SQL warehouse\", \"SQL scripting\", \"stored procedure\", \"CALL procedure\", \"materialized view\", \"CREATE MATERIALIZED VIEW\", \"pipe syntax\", \"|>\", \"geospatial\", \"H3\", \"ST_\", \"spatial SQL\", \"collation\", \"COLLATE\", \"ai_query\", \"ai_classify\", \"ai_extract\", \"ai_gen\", \"AI function\", \"http_request\", \"remote_query\", \"read_files\", \"Lakehouse Federation\", \"recursive CTE\", \"WITH RECURSIVE\", \"multi-statement transaction\", \"temp table\", \"temporary view\", \"pipe operator\". SHOULD also invoke when the user asks about SQL best practices, data modeling patterns, or advanced SQL features on Databricks.",
269269
"experimental": true,
270-
"updated_at": "2026-05-12T21:15:40Z",
270+
"updated_at": "2026-05-12T21:53:00Z",
271271
"files": [
272272
"SKILL.md",
273273
"agents/openai.yaml",
@@ -294,9 +294,9 @@
294294
},
295295
"databricks-execution-compute": {
296296
"version": "0.0.1",
297-
"description": ">-",
297+
"description": "Execute code and manage compute on Databricks. Use this skill when the user mentions: \"run code\", \"execute\", \"run on databricks\", \"serverless\", \"no cluster\", \"run python\", \"run scala\", \"run sql\", \"run R\", \"run file\", \"push and run\", \"notebook run\", \"batch script\", \"model training\", \"run script on cluster\", \"create cluster\", \"new cluster\", \"resize cluster\", \"modify cluster\", \"delete cluster\", \"terminate cluster\", \"create warehouse\", \"new warehouse\", \"resize warehouse\", \"delete warehouse\", \"node types\", \"runtime versions\", \"DBR versions\", \"spin up compute\", \"provision cluster\".",
298298
"experimental": true,
299-
"updated_at": "2026-05-12T21:15:40Z",
299+
"updated_at": "2026-05-12T21:53:00Z",
300300
"files": [
301301
"SKILL.md",
302302
"agents/openai.yaml",
@@ -357,7 +357,7 @@
357357
"version": "0.0.1",
358358
"description": "Patterns and best practices for Lakebase Autoscaling (next-gen managed PostgreSQL). Use when creating or managing Lakebase Autoscaling projects, configuring autoscaling compute or scale-to-zero, working with database branching for dev/test workflows, implementing reverse ETL via synced tables, or connecting applications to Lakebase with OAuth credentials.",
359359
"experimental": true,
360-
"updated_at": "2026-05-12T21:15:40Z",
360+
"updated_at": "2026-05-12T21:16:50Z",
361361
"files": [
362362
"SKILL.md",
363363
"agents/openai.yaml",

scripts/skills.py

Lines changed: 28 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -194,8 +194,17 @@ def check_assets_synced(repo_root: Path) -> list[str]:
194194
# Manifest generation
195195
# ---------------------------------------------------------------------------
196196

197+
_BLOCK_SCALAR_INDICATORS = {"|", "|-", "|+", ">", ">-", ">+"}
198+
199+
197200
def extract_description_from_skill(skill_path: Path) -> str:
198-
"""Best-effort extraction of `description:` from SKILL.md frontmatter."""
201+
"""Best-effort extraction of `description:` from SKILL.md frontmatter.
202+
203+
Handles plain (`description: foo`), quoted (`description: "foo"`), and
204+
block-scalar (`description: >-` followed by indented lines) values. The
205+
regex-only version captured the block-scalar indicator verbatim, which
206+
corrupted manifest entries and Codex marketplace metadata.
207+
"""
199208
skill_md = skill_path / "SKILL.md"
200209
if not skill_md.exists():
201210
return ""
@@ -205,9 +214,24 @@ def extract_description_from_skill(skill_path: Path) -> str:
205214
end_idx = content.find("---", 3)
206215
if end_idx == -1:
207216
return ""
208-
frontmatter = content[3:end_idx]
209-
match = re.search(r'description:\s*["\']?(.+?)["\']?\s*$', frontmatter, re.MULTILINE)
210-
return match.group(1).strip() if match else ""
217+
lines = content[3:end_idx].splitlines()
218+
for i, line in enumerate(lines):
219+
m = re.match(r'^description:\s*(.*?)\s*$', line)
220+
if not m:
221+
continue
222+
value = m.group(1)
223+
if value in _BLOCK_SCALAR_INDICATORS:
224+
collected = []
225+
for cont in lines[i + 1:]:
226+
if cont and not cont[0].isspace():
227+
break
228+
stripped = cont.strip()
229+
if stripped:
230+
collected.append(stripped)
231+
joiner = " " if value.startswith(">") else "\n"
232+
return joiner.join(collected)
233+
return value.strip().strip('"').strip("'")
234+
return ""
211235

212236

213237
# Markers that separate the "what this skill does" lead-in from the

0 commit comments

Comments
 (0)