fix(dev,html-app): adjust skills index to align with Cloudflare Agent Skills Discovery RFC

azasypkin · azasypkin · commit afb487999281 · 2026-05-17T18:50:28.000+02:00
diff --git a/dev/tools/AGENTS.md b/dev/tools/AGENTS.md
@@ -319,11 +319,11 @@ porting between repos stays trivial.
 
 ### URL shape
 
-| Surface           | URL                                          | Content type     | Source on disk                          |
-|-------------------|----------------------------------------------|------------------|-----------------------------------------|
-| Tool page         | `https://{{TOOLS_HOST}}/<path>`              | `text/html`      | `dev/tools/<name>.html`                 |
-| Per-tool skill    | `https://{{TOOLS_HOST}}/<path>.md`           | `text/markdown`  | `dev/tools/<name>.skill.md`             |
-| Aggregate index   | `https://{{TOOLS_HOST}}/llms.txt`            | `text/markdown`  | generated at deploy time from .html metadata; also the destination of `/`'s `Accept: text/markdown` redirect |
+| Surface         | URL                                | Content type    | Source on disk                                                                                               |
+|-----------------|------------------------------------|-----------------|--------------------------------------------------------------------------------------------------------------|
+| Tool page       | `https://{{TOOLS_HOST}}/<path>`    | `text/html`     | `dev/tools/<name>.html`                                                                                      |
+| Per-tool skill  | `https://{{TOOLS_HOST}}/<path>.md` | `text/markdown` | `dev/tools/<name>.skill.md`                                                                                  |
+| Aggregate index | `https://{{TOOLS_HOST}}/llms.txt`  | `text/markdown` | generated at deploy time from .html metadata; also the destination of `/`'s `Accept: text/markdown` redirect |
 
 Two **separate responders** per tool: one for the HTML body, one for the markdown.
 This avoids fragile content-negotiation, keeps the HTML body under
@@ -448,12 +448,12 @@ checklist asks every agent-friendly site to publish. None of them require any
 per-tool authoring; they are derived 1:1 from the same HTML registry +
 `*.skill.md` directory listing as `llms.txt`.
 
-| URL                                            | Content type        | Source of truth                            | Responder env var                                  |
-|------------------------------------------------|---------------------|--------------------------------------------|-----------------------------------------------------|
-| `/robots.txt`                                  | `text/plain`        | `buildRobotsTxt()` in `deploy.ts`          | `SECUTILS_HTML_APP_RESPONDER_ID_ROBOTS_TXT`         |
-| `/sitemap.xml`                                 | `application/xml`   | `buildSitemapXml()` in `deploy.ts`         | `SECUTILS_HTML_APP_RESPONDER_ID_SITEMAP_XML`        |
-| `/.well-known/agent-skills/index.json`         | `application/json`  | `buildAgentSkillsIndex()` in `deploy.ts`   | `SECUTILS_HTML_APP_RESPONDER_ID_AGENT_SKILLS_INDEX` |
-| `Link:` headers on `/`                         | (HTTP response headers) | hard-coded `indexLinkHeaders` in `deploy.ts` | (no extra responder; pinned via index settings)   |
+| URL                                    | Content type            | Source of truth                              | Responder env var                                   |
+|----------------------------------------|-------------------------|----------------------------------------------|-----------------------------------------------------|
+| `/robots.txt`                          | `text/plain`            | `buildRobotsTxt()` in `deploy.ts`            | `SECUTILS_HTML_APP_RESPONDER_ID_ROBOTS_TXT`         |
+| `/sitemap.xml`                         | `application/xml`       | `buildSitemapXml()` in `deploy.ts`           | `SECUTILS_HTML_APP_RESPONDER_ID_SITEMAP_XML`        |
+| `/.well-known/agent-skills/index.json` | `application/json`      | `buildAgentSkillsIndex()` in `deploy.ts`     | `SECUTILS_HTML_APP_RESPONDER_ID_AGENT_SKILLS_INDEX` |
+| `Link:` headers on `/`                 | (HTTP response headers) | hard-coded `indexLinkHeaders` in `deploy.ts` | (no extra responder; pinned via index settings)     |
 
 #### `/robots.txt`
 
@@ -486,12 +486,31 @@ engines respect it as a hint, not a contract.
 #### `/.well-known/agent-skills/index.json`
 
 [Cloudflare's Agent Skills Discovery RFC v0.2.0](https://github.com/cloudflare/agent-skills-discovery-rfc)
-shape: `$schema` field plus a `skills` array where each entry has `name`,
-`type: "skill"`, `description` (mirrors the HTML's `su-tool-description`),
-`url` (the live `<path>.md` URL), and `sha256` of the deployed skill body.
-The hash is computed from the **substituted** Markdown body that actually
-ships, so an agent that's already cached the skill can detect updates with
-a single GET.
+shape: `$schema` URI (pinned to the canonical
+`https://schemas.agentskills.io/discovery/0.2.0/schema.json` - the spec
+requires strict clients to match it exactly) plus a `skills` array where
+each entry has:
+
+- `name` - the **frontmatter `name:` value from the SKILL.md**, not the
+  file slug. This is the canonical Agent Skills identifier (e.g.
+  `pem-certificate-decoder`, `mock-response`); the slug (`pem`, `echo`) is
+  a deploy-time path concern and would diverge from the promo site's
+  `/.well-known/agent-skills/index.json`, which keys off the same field.
+  `deploy.ts` parses the frontmatter at index build time and **fails the
+  deploy** if any skill is missing a `name:` or if two skills collide on
+  it - agents cache by name, so a collision corrupts that cache.
+- `type: "skill-md"` - the v0.2.0 RFC requires `"skill-md"` or
+  `"archive"`; strict clients silently skip unrecognized values. Earlier
+  deploys used `"skill"`, which would have made every entry invisible to
+  a literal RFC implementation.
+- `description` - mirrors the HTML's `su-tool-description` `<meta>` so
+  marketing/SEO/agent copy stays in sync from one source.
+- `url` - the live `<path>.md` URL.
+- `digest: "sha256:<hex>"` - per the RFC's "Integrity and Verification"
+  section. The hash is computed from the **substituted** Markdown body
+  that actually ships, so an agent that's already cached the skill can
+  detect updates with a single GET. Earlier deploys emitted a bare
+  `sha256: <hex>` field instead, which strict clients would not recognise.
 
 #### `Link:` headers on `/`
 
diff --git a/dev/tools/deploy.ts b/dev/tools/deploy.ts
@@ -250,16 +250,47 @@ function buildSitemapXml(tools: ToolMeta[], toolsHost: string): string {
 
 // `/.well-known/agent-skills/index.json` -- Cloudflare's Agent Skills
 // Discovery RFC v0.2.0 format (https://github.com/cloudflare/agent-skills-discovery-rfc).
-// One entry per deployed `<slug>.skill.md`. The sha256 of each skill body is
-// included so agent skill loaders can detect updates without re-fetching.
+// One entry per deployed `<slug>.skill.md`. The digest of each skill body is
+// included (as `sha256:<hex>` per the spec) so agent skill loaders can detect
+// updates without re-fetching.
+//
+// Strict v0.2.0 conformance (was wrong in earlier deploys, fixed for review
+// feedback from Cloudflare):
+//   - `$schema` is the canonical `https://schemas.agentskills.io/...` URL,
+//     not the `agentskills.io/schema/...` variant.
+//   - `type` is `"skill-md"` (was `"skill"`).
+//   - Integrity field is `digest: "sha256:<hex>"` (was a bare `sha256: <hex>`).
+//   - `name` is taken from the SKILL.md YAML frontmatter `name:` field, NOT
+//     from the file slug. The slug is a deploy-time path concern; the skill's
+//     canonical identifier (e.g. `pem-certificate-decoder`, `mock-response`)
+//     lives in the SKILL.md itself, where it must match the Agent Skills
+//     spec naming rules and stay in sync with the promo site's
+//     `/.well-known/agent-skills/index.json`, which keys off the same field.
 type SkillIndexEntry = {
   name: string;
-  type: "skill";
+  type: "skill-md";
   description: string;
   url: string;
-  sha256: string;
+  digest: string;
 };
 
+// Extracts the `name:` value from a SKILL.md YAML frontmatter block. The
+// frontmatter is always the first `---`-delimited block at the top of the
+// file (we generate it that way ourselves). Returns `undefined` if there is
+// no frontmatter or no `name:` line -- the caller treats that as a hard
+// error rather than silently falling back to the slug, because a wrong name
+// in the discovery index makes the skill indistinguishable from a different
+// one cached by clients keying on `name`.
+const FRONTMATTER_RE = /^---\r?\n([\s\S]*?)\r?\n---/;
+const FRONTMATTER_NAME_RE = /^name:\s*["']?([^"'\r\n]+?)["']?\s*$/m;
+
+function extractSkillName(body: string): string | undefined {
+  const block = FRONTMATTER_RE.exec(body);
+  if (!block) return undefined;
+  const name = FRONTMATTER_NAME_RE.exec(block[1])?.[1]?.trim();
+  return name || undefined;
+}
+
 function buildAgentSkillsIndex(
   tools: ToolMeta[],
   skillBodies: Map<string, string>,
@@ -269,19 +300,35 @@ function buildAgentSkillsIndex(
   // non-promoted skills are still served at `<path>.md` for direct fetching.
   const ordered = tools.filter((t) => t.promote && t.path !== "/");
   const skills: SkillIndexEntry[] = [];
+  const seenNames = new Set<string>();
   for (const t of ordered) {
     const body = skillBodies.get(t.slug);
     if (!body) continue;
+    const name = extractSkillName(body);
+    if (!name) {
+      throw new Error(
+        `agent-skills index: ${t.slug}.skill.md is missing a \`name:\` field in its YAML frontmatter`,
+      );
+    }
+    if (seenNames.has(name)) {
+      throw new Error(
+        `agent-skills index: duplicate skill name "${name}" -- two SKILL.md files share the same frontmatter \`name:\``,
+      );
+    }
+    seenNames.add(name);
+    const hex = createHash("sha256").update(body, "utf-8").digest("hex");
     skills.push({
-      name: t.slug,
-      type: "skill",
+      name,
+      type: "skill-md",
       description: t.description,
       url: `https://${toolsHost}${t.path}.md`,
-      sha256: createHash("sha256").update(body, "utf-8").digest("hex"),
+      digest: `sha256:${hex}`,
     });
   }
   const doc = {
-    $schema: "https://agentskills.io/schema/v0.2.0/index.schema.json",
+    // Canonical RFC v0.2.0 schema URL. See
+    // https://github.com/cloudflare/agent-skills-discovery-rfc for the spec.
+    $schema: "https://schemas.agentskills.io/discovery/0.2.0/schema.json",
     skills,
   };
   return JSON.stringify(doc, null, 2) + "\n";
diff --git a/e2e/tools/registry.spec.ts b/e2e/tools/registry.spec.ts
@@ -114,16 +114,33 @@ test.describe('Tools registry - cross-cutting agent-discovery artefacts', () =>
     expect(r.headers()['content-type'] ?? '').toMatch(/application\/json/);
     const doc = await r.json();
     expect(doc).toHaveProperty('$schema');
-    expect(doc.$schema).toMatch(/agentskills/);
+    // The `$schema` URI is an opaque identifier per RFC v0.2.0; strict
+    // clients MUST match it exactly. We pin to the canonical Cloudflare URL.
+    expect(doc.$schema).toBe('https://schemas.agentskills.io/discovery/0.2.0/schema.json');
     expect(Array.isArray(doc.skills)).toBe(true);
     expect(doc.skills.length, 'should list at least one skill').toBeGreaterThan(0);
     for (const skill of doc.skills) {
-      expect(skill, 'every entry must be type=skill').toMatchObject({ type: 'skill' });
-      expect(skill.name, 'name must be non-empty').toMatch(/\S/);
+      // RFC v0.2.0: every entry MUST declare `type: "skill-md"` or `"archive"`.
+      // Earlier deploys used a non-spec `"skill"` value, which strict clients
+      // would silently skip; we now emit the spec-compliant value.
+      expect(skill, 'every entry must be type=skill-md').toMatchObject({ type: 'skill-md' });
+      // Agent Skills naming spec: 1-64 chars, lowercase alphanumeric + hyphens,
+      // no leading/trailing/consecutive hyphens.
+      expect(skill.name, 'name must conform to Agent Skills naming spec').toMatch(
+        /^[a-z0-9]+(-[a-z0-9]+)*$/,
+      );
+      expect(skill.name.length).toBeLessThanOrEqual(64);
       expect(skill.description, 'description must be non-empty').toMatch(/\S/);
       expect(skill.url, 'url must point at our tools host .md').toMatch(new RegExp(`^https://${TOOLS_HOST}/.+\\.md$`));
-      expect(skill.sha256, 'sha256 must be 64 hex chars').toMatch(/^[0-9a-f]{64}$/);
+      // RFC v0.2.0 §"Integrity and Verification": digest is `sha256:<hex>`,
+      // not a bare hex string under a `sha256` field.
+      expect(skill.digest, 'digest must be sha256:<hex>').toMatch(/^sha256:[0-9a-f]{64}$/);
+      expect(skill).not.toHaveProperty('sha256');
     }
+    // The skill `name` MUST be unique across the index: agents cache by name,
+    // and duplicates corrupt that cache.
+    const names = doc.skills.map((s: { name: string }) => s.name);
+    expect(new Set(names).size, 'skill names must be unique').toBe(names.length);
     // Non-promoted tools must not advertise their skill in the discovery
     // index (their `<path>.md` is still served for direct fetching).
     const urls = doc.skills.map((s: { url: string }) => s.url);