chore: add root pickled.yml for agent-legibility checks

caio-pizzol · caio-pizzol · commit bd431911c322 · 2026-05-25T19:21:41.000-03:00
Pickled (https://docs.pickled.dev) runs scripted scenarios across a matrix of interfaces, sources, and toolsets and scores answers with deterministic checks. This config covers one scenario today (the custom React toolbar question) across two interfaces (Claude Code haiku, OpenAI Responses) and four context-delivery paths (none, web, the official SuperDoc Mintlify docs MCP, Context7 MCP). 16 cells per run. Sits alongside evals/ rather than under it: evals/ is the Promptfoo suite that scores the SuperDoc tool surface; this is the outside-in view of how agents talk about SuperDoc when asked to build with it. Run with: bunx @pickled-dev/cli check .
diff --git a/pickled.yml b/pickled.yml
@@ -0,0 +1,91 @@
+# 🥒 pickled.yml - measure what agents understand about SuperDoc
+#
+# Pickled runs the scenarios below across a matrix of interfaces,
+# sources, and toolsets, then scores answers with deterministic checks.
+# Each cell isolates one context-delivery path (docs link / web tools /
+# MCP server) so the report shows where agents do well and where they
+# do not.
+#
+# Quick start:
+#   bunx @pickled-dev/cli check .
+#
+# Docs: https://docs.pickled.dev
+
+tool:
+  name: superdoc
+  description: "Document engine for the modern web (.docx-native editor + SDK + MCP)"
+
+docs:
+  sources:
+    # Official SuperDoc docs bundle. Injected only in cells where
+    # `source: superdoc_docs` is selected; the MCP and web cells use
+    # `source: none` so the toolset is the only delivery path.
+    superdoc_docs: https://docs.superdoc.dev/llms-full.txt
+
+targets:
+  # Claude Code via the Agent SDK. Cheap, fast, matches how most
+  # external users first try SuperDoc inside their IDE.
+  quick:
+    category: cli
+    provider: claude-code
+    model: claude-haiku-4-5
+    maxTurns: 10
+
+  # OpenAI Responses API. The other interface that today supports both
+  # `web` and `mcp` toolsets, so the matrix can cover the same context
+  # modes across two providers.
+  openai_api:
+    category: api
+    provider: openai
+    model: gpt-5.2
+    temperature: 0
+    maxTokens: 4096
+
+toolsets:
+  none: {}
+
+  # Each interface's built-in web tools. On Claude Code this scopes to
+  # WebSearch + WebFetch; on OpenAI it uses the server-side web_search.
+  web:
+    webSearch: true
+    webFetch: true
+
+  # SuperDoc's official Mintlify docs MCP server. Public HTTP endpoint,
+  # no auth. Exposes search_super_doc + query_docs_filesystem_super_doc
+  # so the agent can search docs and read pages as files.
+  superdoc_mintlify_mcp:
+    mcpServers:
+      superdoc:
+        type: http
+        url: https://docs.superdoc.dev/mcp
+
+  # Third-party Context7 index. Requires CONTEXT7_API_KEY in the env.
+  # Kept as a comparison surface alongside the official Mintlify server.
+  context7_mcp:
+    mcpServers:
+      context7:
+        type: http
+        url: https://mcp.context7.com/mcp
+        headers:
+          CONTEXT7_API_KEY: ${CONTEXT7_API_KEY}
+
+scenarios:
+  # Custom React toolbar. The correct answer names SuperDocUIProvider
+  # and useSuperDocUI from the superdoc/ui/react surface. The wrong
+  # answers name the legacy headless toolbar (createHeadlessToolbar)
+  # or reach for activeEditor.commands.
+  - name: "Custom React toolbar surface"
+    prompt: "I am building with SuperDoc in React and want to add a custom toolbar. Which SuperDoc surface should I use, what should I import, and what should I avoid?"
+    matrix:
+      interfaces: [quick, openai_api]
+      sources: [none, superdoc_docs]
+      toolsets: [none, web, superdoc_mintlify_mcp, context7_mcp]
+    expected:
+      symbols:
+        - "SuperDocUIProvider"
+        - "useSuperDocUI"
+      paths:
+        - "superdoc/ui/react"
+      excludes:
+        - "createHeadlessToolbar"
+        - "activeEditor.commands"